diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 54da6d717a06..b987dc2c6851 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -72,10 +72,6 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags) /* * Zone interfaces */ - -#define kmem_zone kmem_cache -#define kmem_zone_t struct kmem_cache - static inline struct page * kmem_to_page(void *addr) { diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 005abfd9fd34..d7d875cef07a 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -850,7 +850,7 @@ xfs_ag_shrink_space( if (err2 != -ENOSPC) goto resv_err; - __xfs_bmap_add_free(*tpp, args.fsbno, delta, NULL, true); + __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true); /* * Roll the transaction before trying to re-init the per-ag diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 4c6f9045baca..3f597cad2c33 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -116,23 +116,29 @@ void xfs_perag_put(struct xfs_perag *pag); /* * Perag iteration APIs - * - * XXX: for_each_perag_range() usage really needs an iterator to clean up when - * we terminate at end_agno because we may have taken a reference to the perag - * beyond end_agno. Right now callers have to be careful to catch and clean that - * up themselves. This is not necessary for the callers of for_each_perag() and - * for_each_perag_from() because they terminate at sb_agcount where there are - * no perag structures in tree beyond end_agno. */ -#define for_each_perag_range(mp, next_agno, end_agno, pag) \ - for ((pag) = xfs_perag_get((mp), (next_agno)); \ - (pag) != NULL && (next_agno) <= (end_agno); \ - (next_agno) = (pag)->pag_agno + 1, \ - xfs_perag_put(pag), \ - (pag) = xfs_perag_get((mp), (next_agno))) +static inline struct xfs_perag * +xfs_perag_next( + struct xfs_perag *pag, + xfs_agnumber_t *agno, + xfs_agnumber_t end_agno) +{ + struct xfs_mount *mp = pag->pag_mount; -#define for_each_perag_from(mp, next_agno, pag) \ - for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag)) + *agno = pag->pag_agno + 1; + xfs_perag_put(pag); + if (*agno > end_agno) + return NULL; + return xfs_perag_get(mp, *agno); +} + +#define for_each_perag_range(mp, agno, end_agno, pag) \ + for ((pag) = xfs_perag_get((mp), (agno)); \ + (pag) != NULL; \ + (pag) = xfs_perag_next((pag), &(agno), (end_agno))) + +#define for_each_perag_from(mp, agno, pag) \ + for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) #define for_each_perag(mp, agno, pag) \ diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 2aa2b3484c28..fe94058d4e9e 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -91,7 +91,8 @@ xfs_ag_resv_critical( trace_xfs_ag_resv_critical(pag, type, avail); /* Critically low if less than 10% or max btree height remains. */ - return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS, + return XFS_TEST_ERROR(avail < orig / 10 || + avail < pag->pag_mount->m_agbtree_maxlevels, pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL); } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 95157f5a5a6c..353e53b892e6 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -27,7 +27,7 @@ #include "xfs_ag_resv.h" #include "xfs_bmap.h" -extern kmem_zone_t *xfs_bmap_free_item_zone; +struct kmem_cache *xfs_extfree_item_cache; struct workqueue_struct *xfs_alloc_wq; @@ -426,8 +426,8 @@ xfs_alloc_fix_len( */ STATIC int /* error code */ xfs_alloc_fixup_trees( - xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ - xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ + struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */ + struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */ xfs_agblock_t fbno, /* starting block of free extent */ xfs_extlen_t flen, /* length of free extent */ xfs_agblock_t rbno, /* starting block of returned extent */ @@ -488,8 +488,8 @@ xfs_alloc_fixup_trees( struct xfs_btree_block *bnoblock; struct xfs_btree_block *cntblock; - bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); - cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); + bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp); + cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp); if (XFS_IS_CORRUPT(mp, bnoblock->bb_numrecs != @@ -1200,8 +1200,8 @@ xfs_alloc_ag_vextent_exact( xfs_alloc_arg_t *args) /* allocation argument structure */ { struct xfs_agf __maybe_unused *agf = args->agbp->b_addr; - xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ - xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ + struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */ + struct xfs_btree_cur *cnt_cur;/* by count btree cursor */ int error; xfs_agblock_t fbno; /* start block of found extent */ xfs_extlen_t flen; /* length of found extent */ @@ -1512,7 +1512,7 @@ xfs_alloc_ag_vextent_lastblock( * than minlen. */ if (*len || args->alignment > 1) { - acur->cnt->bc_ptrs[0] = 1; + acur->cnt->bc_levels[0].ptr = 1; do { error = xfs_alloc_get_rec(acur->cnt, bno, len, &i); if (error) @@ -1658,8 +1658,8 @@ xfs_alloc_ag_vextent_size( xfs_alloc_arg_t *args) /* allocation argument structure */ { struct xfs_agf *agf = args->agbp->b_addr; - xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ - xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ + struct xfs_btree_cur *bno_cur; /* cursor for bno btree */ + struct xfs_btree_cur *cnt_cur; /* cursor for cnt btree */ int error; /* error result */ xfs_agblock_t fbno; /* start of found freespace */ xfs_extlen_t flen; /* length of found freespace */ @@ -2190,14 +2190,15 @@ xfs_free_ag_extent( */ /* - * Compute and fill in value of m_ag_maxlevels. + * Compute and fill in value of m_alloc_maxlevels. */ void xfs_alloc_compute_maxlevels( xfs_mount_t *mp) /* file system mount structure */ { - mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr, + mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr, (mp->m_sb.sb_agblocks + 1) / 2); + ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk()); } /* @@ -2255,14 +2256,14 @@ xfs_alloc_min_freelist( const uint8_t *levels = pag ? pag->pagf_levels : fake_levels; unsigned int min_free; - ASSERT(mp->m_ag_maxlevels > 0); + ASSERT(mp->m_alloc_maxlevels > 0); /* space needed by-bno freespace btree */ min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, - mp->m_ag_maxlevels); + mp->m_alloc_maxlevels); /* space needed by-size freespace btree */ min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, - mp->m_ag_maxlevels); + mp->m_alloc_maxlevels); /* space needed reverse mapping used space btree */ if (xfs_has_rmapbt(mp)) min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, @@ -2439,7 +2440,7 @@ xfs_agfl_reset( /* * Defer an AGFL block free. This is effectively equivalent to - * xfs_bmap_add_free() with some special handling particular to AGFL blocks. + * xfs_free_extent_later() with some special handling particular to AGFL blocks. * * Deferring AGFL frees helps prevent log reservation overruns due to too many * allocation operations in a transaction. AGFL frees are prone to this problem @@ -2458,21 +2459,74 @@ xfs_defer_agfl_block( struct xfs_mount *mp = tp->t_mountp; struct xfs_extent_free_item *new; /* new element */ - ASSERT(xfs_bmap_free_item_zone != NULL); + ASSERT(xfs_extfree_item_cache != NULL); ASSERT(oinfo != NULL); - new = kmem_cache_alloc(xfs_bmap_free_item_zone, + new = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); new->xefi_blockcount = 1; - new->xefi_oinfo = *oinfo; - new->xefi_skip_discard = false; + new->xefi_owner = oinfo->oi_owner; trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list); } +/* + * Add the extent to the list of extents to be free at transaction end. + * The list is maintained sorted (by block number). + */ +void +__xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, + xfs_filblks_t len, + const struct xfs_owner_info *oinfo, + bool skip_discard) +{ + struct xfs_extent_free_item *new; /* new element */ +#ifdef DEBUG + struct xfs_mount *mp = tp->t_mountp; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + + ASSERT(bno != NULLFSBLOCK); + ASSERT(len > 0); + ASSERT(len <= MAXEXTLEN); + ASSERT(!isnullstartblock(bno)); + agno = XFS_FSB_TO_AGNO(mp, bno); + agbno = XFS_FSB_TO_AGBNO(mp, bno); + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(agbno < mp->m_sb.sb_agblocks); + ASSERT(len < mp->m_sb.sb_agblocks); + ASSERT(agbno + len <= mp->m_sb.sb_agblocks); +#endif + ASSERT(xfs_extfree_item_cache != NULL); + + new = kmem_cache_zalloc(xfs_extfree_item_cache, + GFP_KERNEL | __GFP_NOFAIL); + new->xefi_startblock = bno; + new->xefi_blockcount = (xfs_extlen_t)len; + if (skip_discard) + new->xefi_flags |= XFS_EFI_SKIP_DISCARD; + if (oinfo) { + ASSERT(oinfo->oi_offset == 0); + + if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK) + new->xefi_flags |= XFS_EFI_ATTR_FORK; + if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK) + new->xefi_flags |= XFS_EFI_BMBT_BLOCK; + new->xefi_owner = oinfo->oi_owner; + } else { + new->xefi_owner = XFS_RMAP_OWN_NULL; + } + trace_xfs_bmap_free_defer(tp->t_mountp, + XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, + XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); +} + #ifdef DEBUG /* * Check if an AGF has a free extent record whose length is equal to @@ -2903,13 +2957,16 @@ xfs_agf_verify( if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > mp->m_ag_maxlevels || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > mp->m_ag_maxlevels) + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > + mp->m_alloc_maxlevels || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > + mp->m_alloc_maxlevels) return __this_address; if (xfs_has_rmapbt(mp) && (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > mp->m_rmap_maxlevels)) + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > + mp->m_rmap_maxlevels)) return __this_address; if (xfs_has_rmapbt(mp) && @@ -3495,3 +3552,20 @@ xfs_agfl_walk( return 0; } + +int __init +xfs_extfree_intent_init_cache(void) +{ + xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent", + sizeof(struct xfs_extent_free_item), + 0, 0, NULL); + + return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_extfree_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_extfree_item_cache); + xfs_extfree_item_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index df4aefaf0046..1c14a0b1abea 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -98,7 +98,7 @@ unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, struct xfs_perag *pag); /* - * Compute and fill in value of m_ag_maxlevels. + * Compute and fill in value of m_alloc_maxlevels. */ void xfs_alloc_compute_maxlevels( @@ -248,4 +248,40 @@ xfs_buf_to_agfl_bno( return bp->b_addr; } +void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, + xfs_filblks_t len, const struct xfs_owner_info *oinfo, + bool skip_discard); + +/* + * List of extents to be free "later". + * The list is kept sorted on xbf_startblock. + */ +struct xfs_extent_free_item { + struct list_head xefi_list; + uint64_t xefi_owner; + xfs_fsblock_t xefi_startblock;/* starting fs block number */ + xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ + unsigned int xefi_flags; +}; + +#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ +#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ +#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ + +static inline void +xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, + xfs_filblks_t len, + const struct xfs_owner_info *oinfo) +{ + __xfs_free_extent_later(tp, bno, len, oinfo, false); +} + + +extern struct kmem_cache *xfs_extfree_item_cache; + +int __init xfs_extfree_intent_init_cache(void); +void xfs_extfree_intent_destroy_cache(void); + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 6746fd735550..8c9f73cc0bee 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -20,6 +20,7 @@ #include "xfs_trans.h" #include "xfs_ag.h" +static struct kmem_cache *xfs_allocbt_cur_cache; STATIC struct xfs_btree_cur * xfs_allocbt_dup_cursor( @@ -316,7 +317,7 @@ xfs_allocbt_verify( if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[btnum]) return __this_address; - } else if (level >= mp->m_ag_maxlevels) + } else if (level >= mp->m_alloc_maxlevels) return __this_address; return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]); @@ -477,12 +478,8 @@ xfs_allocbt_init_common( ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_btnum = btnum; - cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur = xfs_btree_alloc_cursor(mp, tp, btnum, mp->m_alloc_maxlevels, + xfs_allocbt_cur_cache); cur->bc_ag.abt.active = false; if (btnum == XFS_BTNUM_CNT) { @@ -571,6 +568,17 @@ xfs_allocbt_commit_staged_btree( } } +/* Calculate number of records in an alloc btree block. */ +static inline unsigned int +xfs_allocbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(xfs_alloc_rec_t); + return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); +} + /* * Calculate number of records in an alloc btree block. */ @@ -581,10 +589,26 @@ xfs_allocbt_maxrecs( int leaf) { blocklen -= XFS_ALLOC_BLOCK_LEN(mp); + return xfs_allocbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(xfs_alloc_rec_t); - return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); +/* Free space btrees are at their largest when every other block is free. */ +#define XFS_MAX_FREESP_RECORDS ((XFS_MAX_AG_BLOCKS + 1) / 2) + +/* Compute the max possible height for free space btrees. */ +unsigned int +xfs_allocbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, + XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); + + minrecs[0] = xfs_allocbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_allocbt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_FREESP_RECORDS); } /* Calculate the freespace btree size for some records. */ @@ -595,3 +619,22 @@ xfs_allocbt_calc_size( { return xfs_btree_calc_size(mp->m_alloc_mnr, len); } + +int __init +xfs_allocbt_init_cur_cache(void) +{ + xfs_allocbt_cur_cache = kmem_cache_create("xfs_bnobt_cur", + xfs_btree_cur_sizeof(xfs_allocbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_allocbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_allocbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_allocbt_cur_cache); + xfs_allocbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index 2f6b816aaf9f..45df893ef6bb 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -60,4 +60,9 @@ extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp, void xfs_allocbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); +unsigned int xfs_allocbt_maxlevels_ondisk(void); + +int __init xfs_allocbt_init_cur_cache(void); +void xfs_allocbt_destroy_cur_cache(void); + #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index e1d11e314228..014daa8c542d 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -770,7 +770,7 @@ xfs_attr_fork_remove( ASSERT(ip->i_afp->if_nextents == 0); xfs_idestroy_fork(ip->i_afp); - kmem_cache_free(xfs_ifork_zone, ip->i_afp); + kmem_cache_free(xfs_ifork_cache, ip->i_afp); ip->i_afp = NULL; ip->i_forkoff = 0; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b48230f1a361..4dccd4d90622 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -37,8 +37,7 @@ #include "xfs_icache.h" #include "xfs_iomap.h" - -kmem_zone_t *xfs_bmap_free_item_zone; +struct kmem_cache *xfs_bmap_intent_cache; /* * Miscellaneous helper functions @@ -93,6 +92,7 @@ xfs_bmap_compute_maxlevels( maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; } mp->m_bm_maxlevels[whichfork] = level; + ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk()); } unsigned int @@ -239,11 +239,11 @@ xfs_bmap_get_bp( if (!cur) return NULL; - for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { - if (!cur->bc_bufs[i]) + for (i = 0; i < cur->bc_maxlevels; i++) { + if (!cur->bc_levels[i].bp) break; - if (xfs_buf_daddr(cur->bc_bufs[i]) == bno) - return cur->bc_bufs[i]; + if (xfs_buf_daddr(cur->bc_levels[i].bp) == bno) + return cur->bc_levels[i].bp; } /* Chase down all the log items to see if the bp is there */ @@ -316,7 +316,7 @@ xfs_check_block( */ STATIC void xfs_bmap_check_leaf_extents( - xfs_btree_cur_t *cur, /* btree cursor or null */ + struct xfs_btree_cur *cur, /* btree cursor or null */ xfs_inode_t *ip, /* incore inode pointer */ int whichfork) /* data or attr fork */ { @@ -521,56 +521,6 @@ xfs_bmap_validate_ret( #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0) #endif /* DEBUG */ -/* - * bmap free list manipulation functions - */ - -/* - * Add the extent to the list of extents to be free at transaction end. - * The list is maintained sorted (by block number). - */ -void -__xfs_bmap_add_free( - struct xfs_trans *tp, - xfs_fsblock_t bno, - xfs_filblks_t len, - const struct xfs_owner_info *oinfo, - bool skip_discard) -{ - struct xfs_extent_free_item *new; /* new element */ -#ifdef DEBUG - struct xfs_mount *mp = tp->t_mountp; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - - ASSERT(bno != NULLFSBLOCK); - ASSERT(len > 0); - ASSERT(len <= MAXEXTLEN); - ASSERT(!isnullstartblock(bno)); - agno = XFS_FSB_TO_AGNO(mp, bno); - agbno = XFS_FSB_TO_AGBNO(mp, bno); - ASSERT(agno < mp->m_sb.sb_agcount); - ASSERT(agbno < mp->m_sb.sb_agblocks); - ASSERT(len < mp->m_sb.sb_agblocks); - ASSERT(agbno + len <= mp->m_sb.sb_agblocks); -#endif - ASSERT(xfs_bmap_free_item_zone != NULL); - - new = kmem_cache_alloc(xfs_bmap_free_item_zone, - GFP_KERNEL | __GFP_NOFAIL); - new->xefi_startblock = bno; - new->xefi_blockcount = (xfs_extlen_t)len; - if (oinfo) - new->xefi_oinfo = *oinfo; - else - new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; - new->xefi_skip_discard = skip_discard; - trace_xfs_bmap_free_defer(tp->t_mountp, - XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, - XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); -} - /* * Inode fork format manipulation functions */ @@ -625,12 +575,12 @@ xfs_bmap_btree_to_extents( if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo); + xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); ip->i_nblocks--; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); - if (cur->bc_bufs[0] == cbp) - cur->bc_bufs[0] = NULL; + if (cur->bc_levels[0].bp == cbp) + cur->bc_levels[0].bp = NULL; xfs_iroot_realloc(ip, -1, whichfork); ASSERT(ifp->if_broot == NULL); ifp->if_format = XFS_DINODE_FMT_EXTENTS; @@ -925,7 +875,7 @@ xfs_bmap_add_attrfork_btree( int *flags) /* inode logging flags */ { struct xfs_btree_block *block = ip->i_df.if_broot; - xfs_btree_cur_t *cur; /* btree cursor */ + struct xfs_btree_cur *cur; /* btree cursor */ int error; /* error return value */ xfs_mount_t *mp; /* file system mount struct */ int stat; /* newroot status */ @@ -968,7 +918,7 @@ xfs_bmap_add_attrfork_extents( struct xfs_inode *ip, /* incore inode pointer */ int *flags) /* inode logging flags */ { - xfs_btree_cur_t *cur; /* bmap btree cursor */ + struct xfs_btree_cur *cur; /* bmap btree cursor */ int error; /* error return value */ if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <= @@ -1988,11 +1938,11 @@ xfs_bmap_add_extent_unwritten_real( xfs_inode_t *ip, /* incore inode pointer */ int whichfork, struct xfs_iext_cursor *icur, - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ + struct xfs_btree_cur **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp) /* inode logging flags */ { - xfs_btree_cur_t *cur; /* btree cursor */ + struct xfs_btree_cur *cur; /* btree cursor */ int error; /* error return value */ int i; /* temp state */ struct xfs_ifork *ifp; /* inode fork pointer */ @@ -5045,7 +4995,7 @@ xfs_bmap_del_extent_real( xfs_inode_t *ip, /* incore inode pointer */ xfs_trans_t *tp, /* current transaction pointer */ struct xfs_iext_cursor *icur, - xfs_btree_cur_t *cur, /* if null, not a btree */ + struct xfs_btree_cur *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ int whichfork, /* data or attr fork */ @@ -5296,7 +5246,7 @@ xfs_bmap_del_extent_real( if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { xfs_refcount_decrease_extent(tp, del); } else { - __xfs_bmap_add_free(tp, del->br_startblock, + __xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, (bflags & XFS_BMAPI_NODISCARD) || del->br_state == XFS_EXT_UNWRITTEN); @@ -6189,7 +6139,7 @@ __xfs_bmap_add( bmap->br_blockcount, bmap->br_state); - bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS); + bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&bi->bi_list); bi->bi_type = type; bi->bi_owner = ip; @@ -6300,3 +6250,20 @@ xfs_bmap_validate_extent( return __this_address; return NULL; } + +int __init +xfs_bmap_intent_init_cache(void) +{ + xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent", + sizeof(struct xfs_bmap_intent), + 0, 0, NULL); + + return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_bmap_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_bmap_intent_cache); + xfs_bmap_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 67641f669918..03d9aaf87413 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -13,8 +13,6 @@ struct xfs_inode; struct xfs_mount; struct xfs_trans; -extern kmem_zone_t *xfs_bmap_free_item_zone; - /* * Argument structure for xfs_bmap_alloc. */ @@ -44,19 +42,6 @@ struct xfs_bmalloca { int flags; }; -/* - * List of extents to be free "later". - * The list is kept sorted on xbf_startblock. - */ -struct xfs_extent_free_item -{ - xfs_fsblock_t xefi_startblock;/* starting fs block number */ - xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ - bool xefi_skip_discard; - struct list_head xefi_list; - struct xfs_owner_info xefi_oinfo; /* extent owner */ -}; - #define XFS_BMAP_MAX_NMAP 4 /* @@ -189,9 +174,6 @@ unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork); -void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno, - xfs_filblks_t len, const struct xfs_owner_info *oinfo, - bool skip_discard); void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); @@ -239,16 +221,6 @@ int xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp, struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp, struct xfs_bmbt_irec *new, int *logflagsp); -static inline void -xfs_bmap_add_free( - struct xfs_trans *tp, - xfs_fsblock_t bno, - xfs_filblks_t len, - const struct xfs_owner_info *oinfo) -{ - __xfs_bmap_add_free(tp, bno, len, oinfo, false); -} - enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, XFS_BMAP_UNMAP, @@ -257,8 +229,8 @@ enum xfs_bmap_intent_type { struct xfs_bmap_intent { struct list_head bi_list; enum xfs_bmap_intent_type bi_type; - struct xfs_inode *bi_owner; int bi_whichfork; + struct xfs_inode *bi_owner; struct xfs_bmbt_irec bi_bmap; }; @@ -290,4 +262,9 @@ int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock, int flags); +extern struct kmem_cache *xfs_bmap_intent_cache; + +int __init xfs_bmap_intent_init_cache(void); +void xfs_bmap_intent_destroy_cache(void); + #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 72444b8b38a6..453309fc85f2 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -22,6 +22,8 @@ #include "xfs_trace.h" #include "xfs_rmap.h" +static struct kmem_cache *xfs_bmbt_cur_cache; + /* * Convert on-disk form of btree root to in-memory form. */ @@ -286,7 +288,7 @@ xfs_bmbt_free_block( struct xfs_owner_info oinfo; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); - xfs_bmap_add_free(cur->bc_tp, fsbno, 1, &oinfo); + xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); ip->i_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -552,13 +554,9 @@ xfs_bmbt_init_cursor( struct xfs_btree_cur *cur; ASSERT(whichfork != XFS_COW_FORK); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - - cur->bc_tp = tp; - cur->bc_mp = mp; + cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_BMAP, + mp->m_bm_maxlevels[whichfork], xfs_bmbt_cur_cache); cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; - cur->bc_btnum = XFS_BTNUM_BMAP; - cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2); cur->bc_ops = &xfs_bmbt_ops; @@ -575,6 +573,17 @@ xfs_bmbt_init_cursor( return cur; } +/* Calculate number of records in a block mapping btree block. */ +static inline unsigned int +xfs_bmbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(xfs_bmbt_rec_t); + return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); +} + /* * Calculate number of records in a bmap btree block. */ @@ -585,10 +594,24 @@ xfs_bmbt_maxrecs( int leaf) { blocklen -= XFS_BMBT_BLOCK_LEN(mp); + return xfs_bmbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(xfs_bmbt_rec_t); - return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); +/* Compute the max possible height for block mapping btrees. */ +unsigned int +xfs_bmbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, + XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); + + minrecs[0] = xfs_bmbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_bmbt_block_maxrecs(blocklen, false) / 2; + + /* One extra level for the inode root. */ + return xfs_btree_compute_maxlevels(minrecs, MAXEXTNUM) + 1; } /* @@ -654,3 +677,22 @@ xfs_bmbt_calc_size( { return xfs_btree_calc_size(mp->m_bmap_dmnr, len); } + +int __init +xfs_bmbt_init_cur_cache(void) +{ + xfs_bmbt_cur_cache = kmem_cache_create("xfs_bmbt_cur", + xfs_btree_cur_sizeof(xfs_bmbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_bmbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_bmbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_bmbt_cur_cache); + xfs_bmbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 729e3bc569be..3e7a40a83835 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -110,4 +110,9 @@ extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, extern unsigned long long xfs_bmbt_calc_size(struct xfs_mount *mp, unsigned long long len); +unsigned int xfs_bmbt_maxlevels_ondisk(void); + +int __init xfs_bmbt_init_cur_cache(void); +void xfs_bmbt_destroy_cur_cache(void); + #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 298395481713..b4e19aacb9de 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -22,11 +22,11 @@ #include "xfs_log.h" #include "xfs_btree_staging.h" #include "xfs_ag.h" - -/* - * Cursor allocation zone. - */ -kmem_zone_t *xfs_btree_cur_zone; +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_rmap_btree.h" +#include "xfs_refcount_btree.h" /* * Btree magic numbers. @@ -367,8 +367,8 @@ xfs_btree_del_cursor( * way we won't have initialized all the entries down to 0. */ for (i = 0; i < cur->bc_nlevels; i++) { - if (cur->bc_bufs[i]) - xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); + if (cur->bc_levels[i].bp) + xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp); else if (!error) break; } @@ -379,7 +379,7 @@ xfs_btree_del_cursor( kmem_free(cur->bc_ops); if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag) xfs_perag_put(cur->bc_ag.pag); - kmem_cache_free(xfs_btree_cur_zone, cur); + kmem_cache_free(cur->bc_cache, cur); } /* @@ -388,14 +388,14 @@ xfs_btree_del_cursor( */ int /* error */ xfs_btree_dup_cursor( - xfs_btree_cur_t *cur, /* input cursor */ - xfs_btree_cur_t **ncur) /* output cursor */ + struct xfs_btree_cur *cur, /* input cursor */ + struct xfs_btree_cur **ncur) /* output cursor */ { struct xfs_buf *bp; /* btree block's buffer pointer */ int error; /* error return value */ int i; /* level number of btree block */ xfs_mount_t *mp; /* mount structure for filesystem */ - xfs_btree_cur_t *new; /* new cursor value */ + struct xfs_btree_cur *new; /* new cursor value */ xfs_trans_t *tp; /* transaction pointer, can be NULL */ tp = cur->bc_tp; @@ -415,9 +415,9 @@ xfs_btree_dup_cursor( * For each level current, re-get the buffer and copy the ptr value. */ for (i = 0; i < new->bc_nlevels; i++) { - new->bc_ptrs[i] = cur->bc_ptrs[i]; - new->bc_ra[i] = cur->bc_ra[i]; - bp = cur->bc_bufs[i]; + new->bc_levels[i].ptr = cur->bc_levels[i].ptr; + new->bc_levels[i].ra = cur->bc_levels[i].ra; + bp = cur->bc_levels[i].bp; if (bp) { error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, xfs_buf_daddr(bp), mp->m_bsize, @@ -429,7 +429,7 @@ xfs_btree_dup_cursor( return error; } } - new->bc_bufs[i] = bp; + new->bc_levels[i].bp = bp; } *ncur = new; return 0; @@ -681,7 +681,7 @@ xfs_btree_get_block( return xfs_btree_get_iroot(cur); } - *bpp = cur->bc_bufs[level]; + *bpp = cur->bc_levels[level].bp; return XFS_BUF_TO_BLOCK(*bpp); } @@ -691,7 +691,7 @@ xfs_btree_get_block( */ STATIC int /* success=1, failure=0 */ xfs_btree_firstrec( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ @@ -711,7 +711,7 @@ xfs_btree_firstrec( /* * Set the ptr value to 1, that's the first record/key. */ - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; return 1; } @@ -721,7 +721,7 @@ xfs_btree_firstrec( */ STATIC int /* success=1, failure=0 */ xfs_btree_lastrec( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ @@ -741,7 +741,7 @@ xfs_btree_lastrec( /* * Set the ptr value to numrecs, that's the last record/key. */ - cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); + cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs); return 1; } @@ -922,11 +922,11 @@ xfs_btree_readahead( (lev == cur->bc_nlevels - 1)) return 0; - if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) + if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra) return 0; - cur->bc_ra[lev] |= lr; - block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); + cur->bc_levels[lev].ra |= lr; + block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) return xfs_btree_readahead_lblock(cur, lr, block); @@ -985,28 +985,28 @@ xfs_btree_readahead_ptr( */ STATIC void xfs_btree_setbuf( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int lev, /* level in btree */ struct xfs_buf *bp) /* new buffer to set */ { struct xfs_btree_block *b; /* btree block */ - if (cur->bc_bufs[lev]) - xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); - cur->bc_bufs[lev] = bp; - cur->bc_ra[lev] = 0; + if (cur->bc_levels[lev].bp) + xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp); + cur->bc_levels[lev].bp = bp; + cur->bc_levels[lev].ra = 0; b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; } else { if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; } } @@ -1548,7 +1548,7 @@ xfs_btree_increment( #endif /* We're done if we remain in the block after the increment. */ - if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) + if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block)) goto out1; /* Fail if we just went off the right edge of the tree. */ @@ -1571,7 +1571,7 @@ xfs_btree_increment( goto error0; #endif - if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) + if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block)) break; /* Read-ahead the right block for the next loop. */ @@ -1598,14 +1598,14 @@ xfs_btree_increment( for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { union xfs_btree_ptr *ptrp; - ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block); --lev; error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; xfs_btree_setbuf(cur, lev, bp); - cur->bc_ptrs[lev] = 1; + cur->bc_levels[lev].ptr = 1; } out1: *stat = 1; @@ -1641,7 +1641,7 @@ xfs_btree_decrement( xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); /* We're done if we remain in the block after the decrement. */ - if (--cur->bc_ptrs[level] > 0) + if (--cur->bc_levels[level].ptr > 0) goto out1; /* Get a pointer to the btree block. */ @@ -1665,7 +1665,7 @@ xfs_btree_decrement( * Stop when we don't go off the left edge of a block. */ for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - if (--cur->bc_ptrs[lev] > 0) + if (--cur->bc_levels[lev].ptr > 0) break; /* Read-ahead the left block for the next loop. */ xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); @@ -1691,13 +1691,13 @@ xfs_btree_decrement( for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { union xfs_btree_ptr *ptrp; - ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block); --lev; error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; xfs_btree_setbuf(cur, lev, bp); - cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); + cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block); } out1: *stat = 1; @@ -1735,7 +1735,7 @@ xfs_btree_lookup_get_block( * * Otherwise throw it away and get a new one. */ - bp = cur->bc_bufs[level]; + bp = cur->bc_levels[level].bp; error = xfs_btree_ptr_to_daddr(cur, pp, &daddr); if (error) return error; @@ -1864,7 +1864,7 @@ xfs_btree_lookup( return -EFSCORRUPTED; } - cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; + cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE; *stat = 0; return 0; } @@ -1916,7 +1916,7 @@ xfs_btree_lookup( if (error) goto error0; - cur->bc_ptrs[level] = keyno; + cur->bc_levels[level].ptr = keyno; } } @@ -1933,7 +1933,7 @@ xfs_btree_lookup( !xfs_btree_ptr_is_null(cur, &ptr)) { int i; - cur->bc_ptrs[0] = keyno; + cur->bc_levels[0].ptr = keyno; error = xfs_btree_increment(cur, 0, &i); if (error) goto error0; @@ -1944,7 +1944,7 @@ xfs_btree_lookup( } } else if (dir == XFS_LOOKUP_LE && diff > 0) keyno--; - cur->bc_ptrs[0] = keyno; + cur->bc_levels[0].ptr = keyno; /* Return if we succeeded or not. */ if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) @@ -2104,7 +2104,7 @@ __xfs_btree_updkeys( if (error) return error; #endif - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; nlkey = xfs_btree_key_addr(cur, ptr, block); nhkey = xfs_btree_high_key_addr(cur, ptr, block); if (!force_all && @@ -2171,7 +2171,7 @@ xfs_btree_update_keys( if (error) return error; #endif - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; kp = xfs_btree_key_addr(cur, ptr, block); xfs_btree_copy_keys(cur, kp, &key, 1); xfs_btree_log_keys(cur, bp, ptr, ptr); @@ -2205,7 +2205,7 @@ xfs_btree_update( goto error0; #endif /* Get the address of the rec to be updated. */ - ptr = cur->bc_ptrs[0]; + ptr = cur->bc_levels[0].ptr; rp = xfs_btree_rec_addr(cur, ptr, block); /* Fill in the new contents and log them. */ @@ -2280,7 +2280,7 @@ xfs_btree_lshift( * If the cursor entry is the one that would be moved, don't * do it... it's too complicated. */ - if (cur->bc_ptrs[level] <= 1) + if (cur->bc_levels[level].ptr <= 1) goto out0; /* Set up the left neighbor as "left". */ @@ -2414,7 +2414,7 @@ xfs_btree_lshift( goto error0; /* Slide the cursor value left one. */ - cur->bc_ptrs[level]--; + cur->bc_levels[level].ptr--; *stat = 1; return 0; @@ -2476,7 +2476,7 @@ xfs_btree_rshift( * do it... it's too complicated. */ lrecs = xfs_btree_get_numrecs(left); - if (cur->bc_ptrs[level] >= lrecs) + if (cur->bc_levels[level].ptr >= lrecs) goto out0; /* Set up the right neighbor as "right". */ @@ -2664,7 +2664,7 @@ __xfs_btree_split( */ lrecs = xfs_btree_get_numrecs(left); rrecs = lrecs / 2; - if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) + if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1) rrecs++; src_index = (lrecs - rrecs + 1); @@ -2760,9 +2760,9 @@ __xfs_btree_split( * If it's just pointing past the last entry in left, then we'll * insert there, so don't change anything in that case. */ - if (cur->bc_ptrs[level] > lrecs + 1) { + if (cur->bc_levels[level].ptr > lrecs + 1) { xfs_btree_setbuf(cur, level, rbp); - cur->bc_ptrs[level] -= lrecs; + cur->bc_levels[level].ptr -= lrecs; } /* * If there are more levels, we'll need another cursor which refers @@ -2772,7 +2772,7 @@ __xfs_btree_split( error = xfs_btree_dup_cursor(cur, curp); if (error) goto error0; - (*curp)->bc_ptrs[level + 1]++; + (*curp)->bc_levels[level + 1].ptr++; } *ptrp = rptr; *stat = 1; @@ -2933,7 +2933,8 @@ xfs_btree_new_iroot( be16_add_cpu(&block->bb_level, 1); xfs_btree_set_numrecs(block, 1); cur->bc_nlevels++; - cur->bc_ptrs[level + 1] = 1; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); + cur->bc_levels[level + 1].ptr = 1; kp = xfs_btree_key_addr(cur, 1, block); ckp = xfs_btree_key_addr(cur, 1, cblock); @@ -3094,8 +3095,9 @@ xfs_btree_new_root( /* Fix up the cursor. */ xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); - cur->bc_ptrs[cur->bc_nlevels] = nptr; + cur->bc_levels[cur->bc_nlevels].ptr = nptr; cur->bc_nlevels++; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); *stat = 1; return 0; error0: @@ -3152,7 +3154,7 @@ xfs_btree_make_block_unfull( return error; if (*stat) { - *oindex = *index = cur->bc_ptrs[level]; + *oindex = *index = cur->bc_levels[level].ptr; return 0; } @@ -3167,7 +3169,7 @@ xfs_btree_make_block_unfull( return error; - *index = cur->bc_ptrs[level]; + *index = cur->bc_levels[level].ptr; return 0; } @@ -3214,7 +3216,7 @@ xfs_btree_insrec( } /* If we're off the left edge, return failure. */ - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; if (ptr == 0) { *stat = 0; return 0; @@ -3557,7 +3559,7 @@ xfs_btree_kill_iroot( if (error) return error; - cur->bc_bufs[level - 1] = NULL; + cur->bc_levels[level - 1].bp = NULL; be16_add_cpu(&block->bb_level, -1); xfs_trans_log_inode(cur->bc_tp, ip, XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork)); @@ -3590,8 +3592,8 @@ xfs_btree_kill_root( if (error) return error; - cur->bc_bufs[level] = NULL; - cur->bc_ra[level] = 0; + cur->bc_levels[level].bp = NULL; + cur->bc_levels[level].ra = 0; cur->bc_nlevels--; return 0; @@ -3650,7 +3652,7 @@ xfs_btree_delrec( tcur = NULL; /* Get the index of the entry being deleted, check for nothing there. */ - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; if (ptr == 0) { *stat = 0; return 0; @@ -3960,7 +3962,7 @@ xfs_btree_delrec( xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); tcur = NULL; if (level == 0) - cur->bc_ptrs[0]++; + cur->bc_levels[0].ptr++; *stat = 1; return 0; @@ -4097,9 +4099,9 @@ xfs_btree_delrec( * cursor to the left block, and fix up the index. */ if (bp != lbp) { - cur->bc_bufs[level] = lbp; - cur->bc_ptrs[level] += lrecs; - cur->bc_ra[level] = 0; + cur->bc_levels[level].bp = lbp; + cur->bc_levels[level].ptr += lrecs; + cur->bc_levels[level].ra = 0; } /* * If we joined with the right neighbor and there's a level above @@ -4119,16 +4121,16 @@ xfs_btree_delrec( * We can't use decrement because it would change the next level up. */ if (level > 0) - cur->bc_ptrs[level]--; + cur->bc_levels[level].ptr--; /* * We combined blocks, so we have to update the parent keys if the - * btree supports overlapped intervals. However, bc_ptrs[level + 1] - * points to the old block so that the caller knows which record to - * delete. Therefore, the caller must be savvy enough to call updkeys - * for us if we return stat == 2. The other exit points from this - * function don't require deletions further up the tree, so they can - * call updkeys directly. + * btree supports overlapped intervals. However, + * bc_levels[level + 1].ptr points to the old block so that the caller + * knows which record to delete. Therefore, the caller must be savvy + * enough to call updkeys for us if we return stat == 2. The other + * exit points from this function don't require deletions further up + * the tree, so they can call updkeys directly. */ /* Return value means the next level up has something to do. */ @@ -4182,7 +4184,7 @@ xfs_btree_delete( if (i == 0) { for (level = 1; level < cur->bc_nlevels; level++) { - if (cur->bc_ptrs[level] == 0) { + if (cur->bc_levels[level].ptr == 0) { error = xfs_btree_decrement(cur, level, &i); if (error) goto error0; @@ -4213,7 +4215,7 @@ xfs_btree_get_rec( int error; /* error return value */ #endif - ptr = cur->bc_ptrs[0]; + ptr = cur->bc_levels[0].ptr; block = xfs_btree_get_block(cur, 0, &bp); #ifdef DEBUG @@ -4512,21 +4514,76 @@ xfs_btree_sblock_verify( } /* - * Calculate the number of btree levels needed to store a given number of - * records in a short-format btree. + * For the given limits on leaf and keyptr records per block, calculate the + * height of the tree needed to index the number of leaf records. */ -uint +unsigned int xfs_btree_compute_maxlevels( - uint *limits, - unsigned long len) + const unsigned int *limits, + unsigned long long records) { - uint level; - unsigned long maxblocks; + unsigned long long level_blocks = howmany_64(records, limits[0]); + unsigned int height = 1; - maxblocks = (len + limits[0] - 1) / limits[0]; - for (level = 1; maxblocks > 1; level++) - maxblocks = (maxblocks + limits[1] - 1) / limits[1]; - return level; + while (level_blocks > 1) { + level_blocks = howmany_64(level_blocks, limits[1]); + height++; + } + + return height; +} + +/* + * For the given limits on leaf and keyptr records per block, calculate the + * number of blocks needed to index the given number of leaf records. + */ +unsigned long long +xfs_btree_calc_size( + const unsigned int *limits, + unsigned long long records) +{ + unsigned long long level_blocks = howmany_64(records, limits[0]); + unsigned long long blocks = level_blocks; + + while (level_blocks > 1) { + level_blocks = howmany_64(level_blocks, limits[1]); + blocks += level_blocks; + } + + return blocks; +} + +/* + * Given a number of available blocks for the btree to consume with records and + * pointers, calculate the height of the tree needed to index all the records + * that space can hold based on the number of pointers each interior node + * holds. + * + * We start by assuming a single level tree consumes a single block, then track + * the number of blocks each node level consumes until we no longer have space + * to store the next node level. At this point, we are indexing all the leaf + * blocks in the space, and there's no more free space to split the tree any + * further. That's our maximum btree height. + */ +unsigned int +xfs_btree_space_to_height( + const unsigned int *limits, + unsigned long long leaf_blocks) +{ + unsigned long long node_blocks = limits[1]; + unsigned long long blocks_left = leaf_blocks - 1; + unsigned int height = 1; + + if (leaf_blocks < 1) + return 0; + + while (node_blocks < blocks_left) { + blocks_left -= node_blocks; + node_blocks *= limits[1]; + height++; + } + + return height; } /* @@ -4661,23 +4718,25 @@ xfs_btree_overlapped_query_range( if (error) goto out; #endif - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; while (level < cur->bc_nlevels) { block = xfs_btree_get_block(cur, level, &bp); /* End of node, pop back towards the root. */ - if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { + if (cur->bc_levels[level].ptr > + be16_to_cpu(block->bb_numrecs)) { pop_up: if (level < cur->bc_nlevels - 1) - cur->bc_ptrs[level + 1]++; + cur->bc_levels[level + 1].ptr++; level++; continue; } if (level == 0) { /* Handle a leaf node. */ - recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); + recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, + block); cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey, @@ -4700,14 +4759,15 @@ pop_up: /* Record is larger than high key; pop. */ goto pop_up; } - cur->bc_ptrs[level]++; + cur->bc_levels[level].ptr++; continue; } /* Handle an internal node. */ - lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); - hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); - pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); + lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block); + hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, + block); + pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block); ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key); hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp); @@ -4730,13 +4790,13 @@ pop_up: if (error) goto out; #endif - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; continue; } else if (hdiff < 0) { /* The low key is larger than the upper range; pop. */ goto pop_up; } - cur->bc_ptrs[level]++; + cur->bc_levels[level].ptr++; } out: @@ -4747,13 +4807,14 @@ out: * with a zero-results range query, so release the buffers if we * failed to return any results. */ - if (cur->bc_bufs[0] == NULL) { + if (cur->bc_levels[0].bp == NULL) { for (i = 0; i < cur->bc_nlevels; i++) { - if (cur->bc_bufs[i]) { - xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); - cur->bc_bufs[i] = NULL; - cur->bc_ptrs[i] = 0; - cur->bc_ra[i] = 0; + if (cur->bc_levels[i].bp) { + xfs_trans_brelse(cur->bc_tp, + cur->bc_levels[i].bp); + cur->bc_levels[i].bp = NULL; + cur->bc_levels[i].ptr = 0; + cur->bc_levels[i].ra = 0; } } } @@ -4816,29 +4877,6 @@ xfs_btree_query_all( return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); } -/* - * Calculate the number of blocks needed to store a given number of records - * in a short-format (per-AG metadata) btree. - */ -unsigned long long -xfs_btree_calc_size( - uint *limits, - unsigned long long len) -{ - int level; - int maxrecs; - unsigned long long rval; - - maxrecs = limits[0]; - for (level = 0, rval = 0; len > 1; level++) { - len += maxrecs - 1; - do_div(len, maxrecs); - maxrecs = limits[1]; - rval += len; - } - return rval; -} - static int xfs_btree_count_blocks_helper( struct xfs_btree_cur *cur, @@ -4915,7 +4953,7 @@ xfs_btree_has_more_records( block = xfs_btree_get_block(cur, 0, &bp); /* There are still records in this block. */ - if (cur->bc_ptrs[0] < xfs_btree_get_numrecs(block)) + if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block)) return true; /* There are more record blocks. */ @@ -4924,3 +4962,42 @@ xfs_btree_has_more_records( else return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK); } + +/* Set up all the btree cursor caches. */ +int __init +xfs_btree_init_cur_caches(void) +{ + int error; + + error = xfs_allocbt_init_cur_cache(); + if (error) + return error; + error = xfs_inobt_init_cur_cache(); + if (error) + goto err; + error = xfs_bmbt_init_cur_cache(); + if (error) + goto err; + error = xfs_rmapbt_init_cur_cache(); + if (error) + goto err; + error = xfs_refcountbt_init_cur_cache(); + if (error) + goto err; + + return 0; +err: + xfs_btree_destroy_cur_caches(); + return error; +} + +/* Destroy all the btree cursor caches, if they've been allocated. */ +void +xfs_btree_destroy_cur_caches(void) +{ + xfs_allocbt_destroy_cur_cache(); + xfs_inobt_destroy_cur_cache(); + xfs_bmbt_destroy_cur_cache(); + xfs_rmapbt_destroy_cur_cache(); + xfs_refcountbt_destroy_cur_cache(); +} diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 4eaf8517f850..22d9f411fde6 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -13,8 +13,6 @@ struct xfs_trans; struct xfs_ifork; struct xfs_perag; -extern kmem_zone_t *xfs_btree_cur_zone; - /* * Generic key, ptr and record wrapper structures. * @@ -92,8 +90,6 @@ uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum); #define XFS_BTREE_STATS_ADD(cur, stat, val) \ XFS_STATS_ADD_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat, val) -#define XFS_BTREE_MAXLEVELS 9 /* max of all btrees */ - struct xfs_btree_ops { /* size of the key and record structures */ size_t key_len; @@ -181,18 +177,18 @@ union xfs_btree_irec { /* Per-AG btree information. */ struct xfs_btree_cur_ag { - struct xfs_perag *pag; + struct xfs_perag *pag; union { struct xfs_buf *agbp; struct xbtree_afakeroot *afake; /* for staging cursor */ }; union { struct { - unsigned long nr_ops; /* # record updates */ - int shape_changes; /* # of extent splits */ + unsigned int nr_ops; /* # record updates */ + unsigned int shape_changes; /* # of extent splits */ } refc; struct { - bool active; /* allocation cursor state */ + bool active; /* allocation cursor state */ } abt; }; }; @@ -212,26 +208,35 @@ struct xfs_btree_cur_ino { #define XFS_BTCUR_BMBT_INVALID_OWNER (1 << 1) }; +struct xfs_btree_level { + /* buffer pointer */ + struct xfs_buf *bp; + + /* key/record number */ + uint16_t ptr; + + /* readahead info */ +#define XFS_BTCUR_LEFTRA (1 << 0) /* left sibling has been read-ahead */ +#define XFS_BTCUR_RIGHTRA (1 << 1) /* right sibling has been read-ahead */ + uint16_t ra; +}; + /* * Btree cursor structure. * This collects all information needed by the btree code in one place. */ -typedef struct xfs_btree_cur +struct xfs_btree_cur { struct xfs_trans *bc_tp; /* transaction we're in, if any */ struct xfs_mount *bc_mp; /* file system mount struct */ const struct xfs_btree_ops *bc_ops; - uint bc_flags; /* btree features - below */ + struct kmem_cache *bc_cache; /* cursor cache */ + unsigned int bc_flags; /* btree features - below */ + xfs_btnum_t bc_btnum; /* identifies which btree type */ union xfs_btree_irec bc_rec; /* current insert/search record value */ - struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */ - int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */ - uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */ -#define XFS_BTCUR_LEFTRA 1 /* left sibling has been read-ahead */ -#define XFS_BTCUR_RIGHTRA 2 /* right sibling has been read-ahead */ - uint8_t bc_nlevels; /* number of levels in the tree */ - uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ - xfs_btnum_t bc_btnum; /* identifies which btree type */ - int bc_statoff; /* offset of btre stats array */ + uint8_t bc_nlevels; /* number of levels in the tree */ + uint8_t bc_maxlevels; /* maximum levels for this btree type */ + int bc_statoff; /* offset of btree stats array */ /* * Short btree pointers need an agno to be able to turn the pointers @@ -243,7 +248,21 @@ typedef struct xfs_btree_cur struct xfs_btree_cur_ag bc_ag; struct xfs_btree_cur_ino bc_ino; }; -} xfs_btree_cur_t; + + /* Must be at the end of the struct! */ + struct xfs_btree_level bc_levels[]; +}; + +/* + * Compute the size of a btree cursor that can handle a btree of a given + * height. The bc_levels array handles node and leaf blocks, so its size + * is exactly nlevels. + */ +static inline size_t +xfs_btree_cur_sizeof(unsigned int nlevels) +{ + return struct_size((struct xfs_btree_cur *)NULL, bc_levels, nlevels); +} /* cursor flags */ #define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ @@ -258,7 +277,6 @@ typedef struct xfs_btree_cur */ #define XFS_BTREE_STAGING (1<<5) - #define XFS_BTREE_NOERROR 0 #define XFS_BTREE_ERROR 1 @@ -309,7 +327,7 @@ xfs_btree_check_sptr( */ void xfs_btree_del_cursor( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int error); /* del because of error */ /* @@ -318,8 +336,8 @@ xfs_btree_del_cursor( */ int /* error */ xfs_btree_dup_cursor( - xfs_btree_cur_t *cur, /* input cursor */ - xfs_btree_cur_t **ncur);/* output cursor */ + struct xfs_btree_cur *cur, /* input cursor */ + struct xfs_btree_cur **ncur);/* output cursor */ /* * Compute first and last byte offsets for the fields given. @@ -460,8 +478,12 @@ xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, unsigned int max_recs); -uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len); -unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); +unsigned int xfs_btree_compute_maxlevels(const unsigned int *limits, + unsigned long long records); +unsigned long long xfs_btree_calc_size(const unsigned int *limits, + unsigned long long records); +unsigned int xfs_btree_space_to_height(const unsigned int *limits, + unsigned long long blocks); /* * Return codes for the query range iterator function are 0 to continue @@ -527,7 +549,7 @@ struct xfs_ifork *xfs_btree_ifork_ptr(struct xfs_btree_cur *cur); /* Does this cursor point to the last block in the given level? */ static inline bool xfs_btree_islastblock( - xfs_btree_cur_t *cur, + struct xfs_btree_cur *cur, int level) { struct xfs_btree_block *block; @@ -558,4 +580,27 @@ void xfs_btree_copy_keys(struct xfs_btree_cur *cur, union xfs_btree_key *dst_key, const union xfs_btree_key *src_key, int numkeys); +static inline struct xfs_btree_cur * +xfs_btree_alloc_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_btnum_t btnum, + uint8_t maxlevels, + struct kmem_cache *cache) +{ + struct xfs_btree_cur *cur; + + cur = kmem_cache_zalloc(cache, GFP_NOFS | __GFP_NOFAIL); + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_btnum = btnum; + cur->bc_maxlevels = maxlevels; + cur->bc_cache = cache; + + return cur; +} + +int __init xfs_btree_init_cur_caches(void); +void xfs_btree_destroy_cur_caches(void); + #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index ac9e80152b5c..dd75e208b543 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -657,12 +657,12 @@ xfs_btree_bload_compute_geometry( * checking levels 0 and 1 here, so set bc_nlevels such that the btree * code doesn't interpret either as the root level. */ - cur->bc_nlevels = XFS_BTREE_MAXLEVELS - 1; + cur->bc_nlevels = cur->bc_maxlevels - 1; xfs_btree_bload_ensure_slack(cur, &bbl->leaf_slack, 0); xfs_btree_bload_ensure_slack(cur, &bbl->node_slack, 1); bbl->nr_records = nr_this_level = nr_records; - for (cur->bc_nlevels = 1; cur->bc_nlevels < XFS_BTREE_MAXLEVELS;) { + for (cur->bc_nlevels = 1; cur->bc_nlevels <= cur->bc_maxlevels;) { uint64_t level_blocks; uint64_t dontcare64; unsigned int level = cur->bc_nlevels - 1; @@ -703,6 +703,7 @@ xfs_btree_bload_compute_geometry( * block-based btree level. */ cur->bc_nlevels++; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, &avg_per_block, &level_blocks, &dontcare64); @@ -718,13 +719,14 @@ xfs_btree_bload_compute_geometry( /* Otherwise, we need another level of btree. */ cur->bc_nlevels++; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); } nr_blocks += level_blocks; nr_this_level = level_blocks; } - if (cur->bc_nlevels == XFS_BTREE_MAXLEVELS) + if (cur->bc_nlevels > cur->bc_maxlevels) return -EOVERFLOW; bbl->btree_height = cur->bc_nlevels; diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index c062e2c85178..dd7a2dbce1d1 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -72,7 +72,7 @@ STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *save_blk); -kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ +struct kmem_cache *xfs_da_state_cache; /* anchor for dir/attr state */ /* * Allocate a dir-state structure. @@ -84,7 +84,7 @@ xfs_da_state_alloc( { struct xfs_da_state *state; - state = kmem_cache_zalloc(xfs_da_state_zone, GFP_NOFS | __GFP_NOFAIL); + state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL); state->args = args; state->mp = args->dp->i_mount; return state; @@ -113,7 +113,7 @@ xfs_da_state_free(xfs_da_state_t *state) #ifdef DEBUG memset((char *)state, 0, sizeof(*state)); #endif /* DEBUG */ - kmem_cache_free(xfs_da_state_zone, state); + kmem_cache_free(xfs_da_state_cache, state); } static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork) diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index ad5dd324631a..0faf7d9ac241 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -9,7 +9,6 @@ struct xfs_inode; struct xfs_trans; -struct zone; /* * Directory/attribute geometry information. There will be one of these for each @@ -227,6 +226,6 @@ void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp, void xfs_da3_node_hdr_to_disk(struct xfs_mount *mp, struct xfs_da_intnode *to, struct xfs_da3_icnode_hdr *from); -extern struct kmem_zone *xfs_da_state_zone; +extern struct kmem_cache *xfs_da_state_cache; #endif /* __XFS_DA_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index eff4a127188e..0805ade2d300 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -18,6 +18,12 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_log.h" +#include "xfs_rmap.h" +#include "xfs_refcount.h" +#include "xfs_bmap.h" +#include "xfs_alloc.h" + +static struct kmem_cache *xfs_defer_pending_cache; /* * Deferred Operations in XFS @@ -232,23 +238,20 @@ xfs_defer_trans_abort( } } -/* Roll a transaction so we can do some deferred op processing. */ -STATIC int -xfs_defer_trans_roll( - struct xfs_trans **tpp) +/* + * Capture resources that the caller said not to release ("held") when the + * transaction commits. Caller is responsible for zero-initializing @dres. + */ +static int +xfs_defer_save_resources( + struct xfs_defer_resources *dres, + struct xfs_trans *tp) { - struct xfs_trans *tp = *tpp; struct xfs_buf_log_item *bli; struct xfs_inode_log_item *ili; struct xfs_log_item *lip; - struct xfs_buf *bplist[XFS_DEFER_OPS_NR_BUFS]; - struct xfs_inode *iplist[XFS_DEFER_OPS_NR_INODES]; - unsigned int ordered = 0; /* bitmap */ - int bpcount = 0, ipcount = 0; - int i; - int error; - BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS); + BUILD_BUG_ON(NBBY * sizeof(dres->dr_ordered) < XFS_DEFER_OPS_NR_BUFS); list_for_each_entry(lip, &tp->t_items, li_trans) { switch (lip->li_type) { @@ -256,28 +259,29 @@ xfs_defer_trans_roll( bli = container_of(lip, struct xfs_buf_log_item, bli_item); if (bli->bli_flags & XFS_BLI_HOLD) { - if (bpcount >= XFS_DEFER_OPS_NR_BUFS) { + if (dres->dr_bufs >= XFS_DEFER_OPS_NR_BUFS) { ASSERT(0); return -EFSCORRUPTED; } if (bli->bli_flags & XFS_BLI_ORDERED) - ordered |= (1U << bpcount); + dres->dr_ordered |= + (1U << dres->dr_bufs); else xfs_trans_dirty_buf(tp, bli->bli_buf); - bplist[bpcount++] = bli->bli_buf; + dres->dr_bp[dres->dr_bufs++] = bli->bli_buf; } break; case XFS_LI_INODE: ili = container_of(lip, struct xfs_inode_log_item, ili_item); if (ili->ili_lock_flags == 0) { - if (ipcount >= XFS_DEFER_OPS_NR_INODES) { + if (dres->dr_inos >= XFS_DEFER_OPS_NR_INODES) { ASSERT(0); return -EFSCORRUPTED; } xfs_trans_log_inode(tp, ili->ili_inode, XFS_ILOG_CORE); - iplist[ipcount++] = ili->ili_inode; + dres->dr_ip[dres->dr_inos++] = ili->ili_inode; } break; default: @@ -285,7 +289,43 @@ xfs_defer_trans_roll( } } - trace_xfs_defer_trans_roll(tp, _RET_IP_); + return 0; +} + +/* Attach the held resources to the transaction. */ +static void +xfs_defer_restore_resources( + struct xfs_trans *tp, + struct xfs_defer_resources *dres) +{ + unsigned short i; + + /* Rejoin the joined inodes. */ + for (i = 0; i < dres->dr_inos; i++) + xfs_trans_ijoin(tp, dres->dr_ip[i], 0); + + /* Rejoin the buffers and dirty them so the log moves forward. */ + for (i = 0; i < dres->dr_bufs; i++) { + xfs_trans_bjoin(tp, dres->dr_bp[i]); + if (dres->dr_ordered & (1U << i)) + xfs_trans_ordered_buf(tp, dres->dr_bp[i]); + xfs_trans_bhold(tp, dres->dr_bp[i]); + } +} + +/* Roll a transaction so we can do some deferred op processing. */ +STATIC int +xfs_defer_trans_roll( + struct xfs_trans **tpp) +{ + struct xfs_defer_resources dres = { }; + int error; + + error = xfs_defer_save_resources(&dres, *tpp); + if (error) + return error; + + trace_xfs_defer_trans_roll(*tpp, _RET_IP_); /* * Roll the transaction. Rolling always given a new transaction (even @@ -295,22 +335,11 @@ xfs_defer_trans_roll( * happened. */ error = xfs_trans_roll(tpp); - tp = *tpp; - /* Rejoin the joined inodes. */ - for (i = 0; i < ipcount; i++) - xfs_trans_ijoin(tp, iplist[i], 0); - - /* Rejoin the buffers and dirty them so the log moves forward. */ - for (i = 0; i < bpcount; i++) { - xfs_trans_bjoin(tp, bplist[i]); - if (ordered & (1U << i)) - xfs_trans_ordered_buf(tp, bplist[i]); - xfs_trans_bhold(tp, bplist[i]); - } + xfs_defer_restore_resources(*tpp, &dres); if (error) - trace_xfs_defer_trans_roll_error(tp, error); + trace_xfs_defer_trans_roll_error(*tpp, error); return error; } @@ -342,7 +371,7 @@ xfs_defer_cancel_list( ops->cancel_item(pwi); } ASSERT(dfp->dfp_count == 0); - kmem_free(dfp); + kmem_cache_free(xfs_defer_pending_cache, dfp); } } @@ -439,7 +468,7 @@ xfs_defer_finish_one( /* Done with the dfp, free it. */ list_del(&dfp->dfp_list); - kmem_free(dfp); + kmem_cache_free(xfs_defer_pending_cache, dfp); out: if (ops->finish_cleanup) ops->finish_cleanup(tp, state, error); @@ -573,8 +602,8 @@ xfs_defer_add( dfp = NULL; } if (!dfp) { - dfp = kmem_alloc(sizeof(struct xfs_defer_pending), - KM_NOFS); + dfp = kmem_cache_zalloc(xfs_defer_pending_cache, + GFP_NOFS | __GFP_NOFAIL); dfp->dfp_type = type; dfp->dfp_intent = NULL; dfp->dfp_done = NULL; @@ -627,10 +656,11 @@ xfs_defer_move( */ static struct xfs_defer_capture * xfs_defer_ops_capture( - struct xfs_trans *tp, - struct xfs_inode *capture_ip) + struct xfs_trans *tp) { struct xfs_defer_capture *dfc; + unsigned short i; + int error; if (list_empty(&tp->t_dfops)) return NULL; @@ -654,27 +684,48 @@ xfs_defer_ops_capture( /* Preserve the log reservation size. */ dfc->dfc_logres = tp->t_log_res; - /* - * Grab an extra reference to this inode and attach it to the capture - * structure. - */ - if (capture_ip) { - ihold(VFS_I(capture_ip)); - dfc->dfc_capture_ip = capture_ip; + error = xfs_defer_save_resources(&dfc->dfc_held, tp); + if (error) { + /* + * Resource capture should never fail, but if it does, we + * still have to shut down the log and release things + * properly. + */ + xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE); } + /* + * Grab extra references to the inodes and buffers because callers are + * expected to release their held references after we commit the + * transaction. + */ + for (i = 0; i < dfc->dfc_held.dr_inos; i++) { + ASSERT(xfs_isilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL)); + ihold(VFS_I(dfc->dfc_held.dr_ip[i])); + } + + for (i = 0; i < dfc->dfc_held.dr_bufs; i++) + xfs_buf_hold(dfc->dfc_held.dr_bp[i]); + return dfc; } /* Release all resources that we used to capture deferred ops. */ void -xfs_defer_ops_release( +xfs_defer_ops_capture_free( struct xfs_mount *mp, struct xfs_defer_capture *dfc) { + unsigned short i; + xfs_defer_cancel_list(mp, &dfc->dfc_dfops); - if (dfc->dfc_capture_ip) - xfs_irele(dfc->dfc_capture_ip); + + for (i = 0; i < dfc->dfc_held.dr_bufs; i++) + xfs_buf_relse(dfc->dfc_held.dr_bp[i]); + + for (i = 0; i < dfc->dfc_held.dr_inos; i++) + xfs_irele(dfc->dfc_held.dr_ip[i]); + kmem_free(dfc); } @@ -689,24 +740,21 @@ xfs_defer_ops_release( int xfs_defer_ops_capture_and_commit( struct xfs_trans *tp, - struct xfs_inode *capture_ip, struct list_head *capture_list) { struct xfs_mount *mp = tp->t_mountp; struct xfs_defer_capture *dfc; int error; - ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL)); - /* If we don't capture anything, commit transaction and exit. */ - dfc = xfs_defer_ops_capture(tp, capture_ip); + dfc = xfs_defer_ops_capture(tp); if (!dfc) return xfs_trans_commit(tp); /* Commit the transaction and add the capture structure to the list. */ error = xfs_trans_commit(tp); if (error) { - xfs_defer_ops_release(mp, dfc); + xfs_defer_ops_capture_free(mp, dfc); return error; } @@ -724,17 +772,19 @@ void xfs_defer_ops_continue( struct xfs_defer_capture *dfc, struct xfs_trans *tp, - struct xfs_inode **captured_ipp) + struct xfs_defer_resources *dres) { ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY)); /* Lock and join the captured inode to the new transaction. */ - if (dfc->dfc_capture_ip) { - xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0); - } - *captured_ipp = dfc->dfc_capture_ip; + if (dfc->dfc_held.dr_inos == 2) + xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL, + dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL); + else if (dfc->dfc_held.dr_inos == 1) + xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL); + xfs_defer_restore_resources(tp, &dfc->dfc_held); + memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources)); /* Move captured dfops chain and state to the transaction. */ list_splice_init(&dfc->dfc_dfops, &tp->t_dfops); @@ -742,3 +792,82 @@ xfs_defer_ops_continue( kmem_free(dfc); } + +/* Release the resources captured and continued during recovery. */ +void +xfs_defer_resources_rele( + struct xfs_defer_resources *dres) +{ + unsigned short i; + + for (i = 0; i < dres->dr_inos; i++) { + xfs_iunlock(dres->dr_ip[i], XFS_ILOCK_EXCL); + xfs_irele(dres->dr_ip[i]); + dres->dr_ip[i] = NULL; + } + + for (i = 0; i < dres->dr_bufs; i++) { + xfs_buf_relse(dres->dr_bp[i]); + dres->dr_bp[i] = NULL; + } + + dres->dr_inos = 0; + dres->dr_bufs = 0; + dres->dr_ordered = 0; +} + +static inline int __init +xfs_defer_init_cache(void) +{ + xfs_defer_pending_cache = kmem_cache_create("xfs_defer_pending", + sizeof(struct xfs_defer_pending), + 0, 0, NULL); + + return xfs_defer_pending_cache != NULL ? 0 : -ENOMEM; +} + +static inline void +xfs_defer_destroy_cache(void) +{ + kmem_cache_destroy(xfs_defer_pending_cache); + xfs_defer_pending_cache = NULL; +} + +/* Set up caches for deferred work items. */ +int __init +xfs_defer_init_item_caches(void) +{ + int error; + + error = xfs_defer_init_cache(); + if (error) + return error; + error = xfs_rmap_intent_init_cache(); + if (error) + goto err; + error = xfs_refcount_intent_init_cache(); + if (error) + goto err; + error = xfs_bmap_intent_init_cache(); + if (error) + goto err; + error = xfs_extfree_intent_init_cache(); + if (error) + goto err; + + return 0; +err: + xfs_defer_destroy_item_caches(); + return error; +} + +/* Destroy all the deferred work item caches, if they've been allocated. */ +void +xfs_defer_destroy_item_caches(void) +{ + xfs_extfree_intent_destroy_cache(); + xfs_bmap_intent_destroy_cache(); + xfs_refcount_intent_destroy_cache(); + xfs_rmap_intent_destroy_cache(); + xfs_defer_destroy_cache(); +} diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 05472f71fffe..7bb8a31ad65b 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -64,6 +64,30 @@ extern const struct xfs_defer_op_type xfs_rmap_update_defer_type; extern const struct xfs_defer_op_type xfs_extent_free_defer_type; extern const struct xfs_defer_op_type xfs_agfl_free_defer_type; +/* + * Deferred operation item relogging limits. + */ +#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ +#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ + +/* Resources that must be held across a transaction roll. */ +struct xfs_defer_resources { + /* held buffers */ + struct xfs_buf *dr_bp[XFS_DEFER_OPS_NR_BUFS]; + + /* inodes with no unlock flags */ + struct xfs_inode *dr_ip[XFS_DEFER_OPS_NR_INODES]; + + /* number of held buffers */ + unsigned short dr_bufs; + + /* bitmap of ordered buffers */ + unsigned short dr_ordered; + + /* number of held inodes */ + unsigned short dr_inos; +}; + /* * This structure enables a dfops user to detach the chain of deferred * operations from a transaction so that they can be continued later. @@ -83,11 +107,7 @@ struct xfs_defer_capture { /* Log reservation saved from the transaction. */ unsigned int dfc_logres; - /* - * An inode reference that must be maintained to complete the deferred - * work. - */ - struct xfs_inode *dfc_capture_ip; + struct xfs_defer_resources dfc_held; }; /* @@ -95,9 +115,14 @@ struct xfs_defer_capture { * This doesn't normally happen except log recovery. */ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp, - struct xfs_inode *capture_ip, struct list_head *capture_list); + struct list_head *capture_list); void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp, - struct xfs_inode **captured_ipp); -void xfs_defer_ops_release(struct xfs_mount *mp, struct xfs_defer_capture *d); + struct xfs_defer_resources *dres); +void xfs_defer_ops_capture_free(struct xfs_mount *mp, + struct xfs_defer_capture *d); +void xfs_defer_resources_rele(struct xfs_defer_resources *dres); + +int __init xfs_defer_init_item_caches(void); +void xfs_defer_destroy_item_caches(void); #endif /* __XFS_DEFER_H__ */ diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index deeb74becabc..15a362e2f5ea 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -22,7 +22,7 @@ xfs_calc_dquots_per_chunk( unsigned int nbblks) /* basic block units */ { ASSERT(nbblks > 0); - return BBTOB(nbblks) / sizeof(xfs_dqblk_t); + return BBTOB(nbblks) / sizeof(struct xfs_dqblk); } /* @@ -127,7 +127,7 @@ xfs_dqblk_repair( * Typically, a repair is only requested by quotacheck. */ ASSERT(id != -1); - memset(dqb, 0, sizeof(xfs_dqblk_t)); + memset(dqb, 0, sizeof(struct xfs_dqblk)); dqb->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); dqb->dd_diskdq.d_version = XFS_DQUOT_VERSION; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 2d7057b7984b..d665c04e69dd 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -184,7 +184,7 @@ typedef struct xfs_sb { * Superblock - on disk version. Must match the in core version above. * Must be padded to 64 bit alignment. */ -typedef struct xfs_dsb { +struct xfs_dsb { __be32 sb_magicnum; /* magic number == XFS_SB_MAGIC */ __be32 sb_blocksize; /* logical block size, bytes */ __be64 sb_dblocks; /* number of data blocks */ @@ -263,7 +263,7 @@ typedef struct xfs_dsb { uuid_t sb_meta_uuid; /* metadata file system unique id */ /* must be padded to 64 bit alignment */ -} xfs_dsb_t; +}; /* * Misc. Flags - warning - these will be cleared by xfs_repair unless @@ -780,7 +780,7 @@ static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds) * padding field for v3 inodes. */ #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ -typedef struct xfs_dinode { +struct xfs_dinode { __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ __be16 di_mode; /* mode and type of file */ __u8 di_version; /* inode version */ @@ -825,7 +825,7 @@ typedef struct xfs_dinode { uuid_t di_uuid; /* UUID of the filesystem */ /* structure must be padded to 64 bit alignment */ -} xfs_dinode_t; +}; #define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) @@ -1215,7 +1215,7 @@ struct xfs_disk_dquot { * This is what goes on disk. This is separated from the xfs_disk_dquot because * carrying the unnecessary padding would be a waste of memory. */ -typedef struct xfs_dqblk { +struct xfs_dqblk { struct xfs_disk_dquot dd_diskdq; /* portion living incore as well */ char dd_fill[4];/* filling for posterity */ @@ -1225,7 +1225,7 @@ typedef struct xfs_dqblk { __be32 dd_crc; /* checksum */ __be64 dd_lsn; /* last modification in log */ uuid_t dd_uuid; /* location information */ -} xfs_dqblk_t; +}; #define XFS_DQUOT_CRC_OFF offsetof(struct xfs_dqblk, dd_crc) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index bde2b4c64dbe..c43877c8a279 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -268,6 +268,8 @@ typedef struct xfs_fsop_resblks { */ #define XFS_MIN_AG_BYTES (1ULL << 24) /* 16 MB */ #define XFS_MAX_AG_BYTES (1ULL << 40) /* 1 TB */ +#define XFS_MAX_AG_BLOCKS (XFS_MAX_AG_BYTES / XFS_MIN_BLOCKSIZE) +#define XFS_MAX_CRC_AG_BLOCKS (XFS_MAX_AG_BYTES / XFS_MIN_CRC_BLOCKSIZE) /* keep the maximum size under 2^31 by a small amount */ #define XFS_MAX_LOG_BYTES \ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 994ad783d407..b418fe0c0679 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1827,7 +1827,7 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ - xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), + xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES); return; @@ -1872,7 +1872,7 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); - xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, agbno), + xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, &XFS_RMAP_OINFO_INODES); /* reset range to current bit and carry on... */ @@ -2793,6 +2793,7 @@ xfs_ialloc_setup_geometry( inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr, inodes); + ASSERT(igeo->inobt_maxlevels <= xfs_iallocbt_maxlevels_ondisk()); /* * Set the maximum inode count for this filesystem, being careful not diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 27190840c5d8..b2ad2fdc40f5 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -22,6 +22,8 @@ #include "xfs_rmap.h" #include "xfs_ag.h" +static struct kmem_cache *xfs_inobt_cur_cache; + STATIC int xfs_inobt_get_minrecs( struct xfs_btree_cur *cur, @@ -432,10 +434,8 @@ xfs_inobt_init_common( { struct xfs_btree_cur *cur; - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_btnum = btnum; + cur = xfs_btree_alloc_cursor(mp, tp, btnum, + M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache); if (btnum == XFS_BTNUM_INO) { cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2); cur->bc_ops = &xfs_inobt_ops; @@ -444,8 +444,6 @@ xfs_inobt_init_common( cur->bc_ops = &xfs_finobt_ops; } - cur->bc_blocklog = mp->m_sb.sb_blocklog; - if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; @@ -530,6 +528,17 @@ xfs_inobt_commit_staged_btree( } } +/* Calculate number of records in an inode btree block. */ +static inline unsigned int +xfs_inobt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(xfs_inobt_rec_t); + return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); +} + /* * Calculate number of records in an inobt btree block. */ @@ -540,10 +549,54 @@ xfs_inobt_maxrecs( int leaf) { blocklen -= XFS_INOBT_BLOCK_LEN(mp); + return xfs_inobt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(xfs_inobt_rec_t); - return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); +/* + * Maximum number of inode btree records per AG. Pretend that we can fill an + * entire AG completely full of inodes except for the AG headers. + */ +#define XFS_MAX_INODE_RECORDS \ + ((XFS_MAX_AG_BYTES - (4 * BBSIZE)) / XFS_DINODE_MIN_SIZE) / \ + XFS_INODES_PER_CHUNK + +/* Compute the max possible height for the inode btree. */ +static inline unsigned int +xfs_inobt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, + XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); + + minrecs[0] = xfs_inobt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_inobt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_INODE_RECORDS); +} + +/* Compute the max possible height for the free inode btree. */ +static inline unsigned int +xfs_finobt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN; + + minrecs[0] = xfs_inobt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_inobt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_INODE_RECORDS); +} + +/* Compute the max possible height for either inode btree. */ +unsigned int +xfs_iallocbt_maxlevels_ondisk(void) +{ + return max(xfs_inobt_maxlevels_ondisk(), + xfs_finobt_maxlevels_ondisk()); } /* @@ -761,3 +814,22 @@ xfs_iallocbt_calc_size( { return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len); } + +int __init +xfs_inobt_init_cur_cache(void) +{ + xfs_inobt_cur_cache = kmem_cache_create("xfs_inobt_cur", + xfs_btree_cur_sizeof(xfs_inobt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_inobt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_inobt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_inobt_cur_cache); + xfs_inobt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index 8a322d402e61..26451cb76b98 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -75,4 +75,9 @@ int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); +unsigned int xfs_iallocbt_maxlevels_ondisk(void); + +int __init xfs_inobt_init_cur_cache(void); +void xfs_inobt_destroy_cur_cache(void); + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 3932b4ebf903..cae9708c8587 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -51,9 +51,9 @@ xfs_inode_buf_verify( agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp)); ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; for (i = 0; i < ni; i++) { - int di_ok; - xfs_dinode_t *dip; - xfs_agino_t unlinked_ino; + struct xfs_dinode *dip; + xfs_agino_t unlinked_ino; + int di_ok; dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); unlinked_ino = be32_to_cpu(dip->di_next_unlinked); diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 1d174909f9bd..9149f4f796fc 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -26,7 +26,7 @@ #include "xfs_types.h" #include "xfs_errortag.h" -kmem_zone_t *xfs_ifork_zone; +struct kmem_cache *xfs_ifork_cache; void xfs_init_local_fork( @@ -67,10 +67,10 @@ xfs_init_local_fork( */ STATIC int xfs_iformat_local( - xfs_inode_t *ip, - xfs_dinode_t *dip, - int whichfork, - int size) + struct xfs_inode *ip, + struct xfs_dinode *dip, + int whichfork, + int size) { /* * If the size is unreasonable, then something @@ -162,8 +162,8 @@ xfs_iformat_extents( */ STATIC int xfs_iformat_btree( - xfs_inode_t *ip, - xfs_dinode_t *dip, + struct xfs_inode *ip, + struct xfs_dinode *dip, int whichfork) { struct xfs_mount *mp = ip->i_mount; @@ -284,7 +284,7 @@ xfs_ifork_alloc( { struct xfs_ifork *ifp; - ifp = kmem_cache_zalloc(xfs_ifork_zone, GFP_NOFS | __GFP_NOFAIL); + ifp = kmem_cache_zalloc(xfs_ifork_cache, GFP_NOFS | __GFP_NOFAIL); ifp->if_format = format; ifp->if_nextents = nextents; return ifp; @@ -325,7 +325,7 @@ xfs_iformat_attr_fork( } if (error) { - kmem_cache_free(xfs_ifork_zone, ip->i_afp); + kmem_cache_free(xfs_ifork_cache, ip->i_afp); ip->i_afp = NULL; } return error; @@ -580,8 +580,8 @@ xfs_iextents_copy( */ void xfs_iflush_fork( - xfs_inode_t *ip, - xfs_dinode_t *dip, + struct xfs_inode *ip, + struct xfs_dinode *dip, struct xfs_inode_log_item *iip, int whichfork) { @@ -676,7 +676,7 @@ xfs_ifork_init_cow( if (ip->i_cowfp) return; - ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_zone, + ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_cache, GFP_NOFS | __GFP_NOFAIL); ip->i_cowfp->if_format = XFS_DINODE_FMT_EXTENTS; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index a6f7897b6887..3d64a3acb0ed 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -221,7 +221,7 @@ static inline bool xfs_iext_peek_prev_extent(struct xfs_ifork *ifp, xfs_iext_get_extent((ifp), (ext), (got)); \ xfs_iext_next((ifp), (ext))) -extern struct kmem_zone *xfs_ifork_zone; +extern struct kmem_cache *xfs_ifork_cache; extern void xfs_ifork_init_cow(struct xfs_inode *ip); diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index e5d767a7fc5d..327ba25e9e17 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -24,6 +24,8 @@ #include "xfs_rmap.h" #include "xfs_ag.h" +struct kmem_cache *xfs_refcount_intent_cache; + /* Allowable refcount adjustment amounts. */ enum xfs_refc_adjust_op { XFS_REFCOUNT_ADJUST_INCREASE = 1, @@ -916,8 +918,7 @@ xfs_refcount_adjust_extents( struct xfs_btree_cur *cur, xfs_agblock_t *agbno, xfs_extlen_t *aglen, - enum xfs_refc_adjust_op adj, - struct xfs_owner_info *oinfo) + enum xfs_refc_adjust_op adj) { struct xfs_refcount_irec ext, tmp; int error; @@ -974,8 +975,8 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, tmp.rc_startblock); - xfs_bmap_add_free(cur->bc_tp, fsbno, - tmp.rc_blockcount, oinfo); + xfs_free_extent_later(cur->bc_tp, fsbno, + tmp.rc_blockcount, NULL); } (*agbno) += tmp.rc_blockcount; @@ -1019,8 +1020,8 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, ext.rc_startblock); - xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, - oinfo); + xfs_free_extent_later(cur->bc_tp, fsbno, + ext.rc_blockcount, NULL); } skip: @@ -1048,8 +1049,7 @@ xfs_refcount_adjust( xfs_extlen_t aglen, xfs_agblock_t *new_agbno, xfs_extlen_t *new_aglen, - enum xfs_refc_adjust_op adj, - struct xfs_owner_info *oinfo) + enum xfs_refc_adjust_op adj) { bool shape_changed; int shape_changes = 0; @@ -1092,8 +1092,7 @@ xfs_refcount_adjust( cur->bc_ag.refc.shape_changes++; /* Now that we've taken care of the ends, adjust the middle extents */ - error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, - adj, oinfo); + error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); if (error) goto out_error; @@ -1188,12 +1187,12 @@ xfs_refcount_finish_one( switch (type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_INCREASE, NULL); + new_len, XFS_REFCOUNT_ADJUST_INCREASE); *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_DECREASE, NULL); + new_len, XFS_REFCOUNT_ADJUST_DECREASE); *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); break; case XFS_REFCOUNT_ALLOC_COW: @@ -1235,8 +1234,8 @@ __xfs_refcount_add( type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), blockcount); - ri = kmem_alloc(sizeof(struct xfs_refcount_intent), - KM_NOFS); + ri = kmem_cache_alloc(xfs_refcount_intent_cache, + GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_startblock = startblock; @@ -1742,7 +1741,7 @@ xfs_refcount_recover_cow_leftovers( rr->rr_rrec.rc_blockcount); /* Free the block. */ - xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); + xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); error = xfs_trans_commit(tp); if (error) @@ -1782,3 +1781,20 @@ xfs_refcount_has_record( return xfs_btree_has_record(cur, &low, &high, exists); } + +int __init +xfs_refcount_intent_init_cache(void) +{ + xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", + sizeof(struct xfs_refcount_intent), + 0, 0, NULL); + + return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_refcount_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_refcount_intent_cache); + xfs_refcount_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 02cb3aa405be..9eb01edbd89d 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -32,8 +32,8 @@ enum xfs_refcount_intent_type { struct xfs_refcount_intent { struct list_head ri_list; enum xfs_refcount_intent_type ri_type; - xfs_fsblock_t ri_startblock; xfs_extlen_t ri_blockcount; + xfs_fsblock_t ri_startblock; }; void xfs_refcount_increase_extent(struct xfs_trans *tp, @@ -83,4 +83,9 @@ extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec, extern int xfs_refcount_insert(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); +extern struct kmem_cache *xfs_refcount_intent_cache; + +int __init xfs_refcount_intent_init_cache(void); +void xfs_refcount_intent_destroy_cache(void); + #endif /* __XFS_REFCOUNT_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 1ef9b99962ab..d14c1720b0fb 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -21,6 +21,8 @@ #include "xfs_rmap.h" #include "xfs_ag.h" +static struct kmem_cache *xfs_refcountbt_cur_cache; + static struct xfs_btree_cur * xfs_refcountbt_dup_cursor( struct xfs_btree_cur *cur) @@ -322,11 +324,8 @@ xfs_refcountbt_init_common( ASSERT(pag->pag_agno < mp->m_sb.sb_agcount); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_btnum = XFS_BTNUM_REFC; - cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_REFC, + mp->m_refc_maxlevels, xfs_refcountbt_cur_cache); cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2); cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; @@ -396,6 +395,18 @@ xfs_refcountbt_commit_staged_btree( xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_refcountbt_ops); } +/* Calculate number of records in a refcount btree block. */ +static inline unsigned int +xfs_refcountbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(struct xfs_refcount_rec); + return blocklen / (sizeof(struct xfs_refcount_key) + + sizeof(xfs_refcount_ptr_t)); +} + /* * Calculate the number of records in a refcount btree block. */ @@ -405,11 +416,22 @@ xfs_refcountbt_maxrecs( bool leaf) { blocklen -= XFS_REFCOUNT_BLOCK_LEN; + return xfs_refcountbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(struct xfs_refcount_rec); - return blocklen / (sizeof(struct xfs_refcount_key) + - sizeof(xfs_refcount_ptr_t)); +/* Compute the max possible height of the maximally sized refcount btree. */ +unsigned int +xfs_refcountbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN; + + minrecs[0] = xfs_refcountbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_refcountbt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_CRC_AG_BLOCKS); } /* Compute the maximum height of a refcount btree. */ @@ -417,8 +439,14 @@ void xfs_refcountbt_compute_maxlevels( struct xfs_mount *mp) { + if (!xfs_has_reflink(mp)) { + mp->m_refc_maxlevels = 0; + return; + } + mp->m_refc_maxlevels = xfs_btree_compute_maxlevels( mp->m_refc_mnr, mp->m_sb.sb_agblocks); + ASSERT(mp->m_refc_maxlevels <= xfs_refcountbt_maxlevels_ondisk()); } /* Calculate the refcount btree size for some records. */ @@ -488,3 +516,22 @@ xfs_refcountbt_calc_reserves( return error; } + +int __init +xfs_refcountbt_init_cur_cache(void) +{ + xfs_refcountbt_cur_cache = kmem_cache_create("xfs_refcbt_cur", + xfs_btree_cur_sizeof(xfs_refcountbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_refcountbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_refcountbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_refcountbt_cur_cache); + xfs_refcountbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index bd9ed9e1e41f..d66b37259bed 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -65,4 +65,9 @@ extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, void xfs_refcountbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); +unsigned int xfs_refcountbt_maxlevels_ondisk(void); + +int __init xfs_refcountbt_init_cur_cache(void); +void xfs_refcountbt_destroy_cur_cache(void); + #endif /* __XFS_REFCOUNT_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index f45929b1b94a..cd322174dbff 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -24,6 +24,8 @@ #include "xfs_inode.h" #include "xfs_ag.h" +struct kmem_cache *xfs_rmap_intent_cache; + /* * Lookup the first record less than or equal to [bno, len, owner, offset] * in the btree given by cur. @@ -2485,7 +2487,7 @@ __xfs_rmap_add( bmap->br_blockcount, bmap->br_state); - ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_NOFS); + ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_owner = owner; @@ -2779,3 +2781,20 @@ const struct xfs_owner_info XFS_RMAP_OINFO_REFC = { const struct xfs_owner_info XFS_RMAP_OINFO_COW = { .oi_owner = XFS_RMAP_OWN_COW, }; + +int __init +xfs_rmap_intent_init_cache(void) +{ + xfs_rmap_intent_cache = kmem_cache_create("xfs_rmap_intent", + sizeof(struct xfs_rmap_intent), + 0, 0, NULL); + + return xfs_rmap_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_rmap_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_rmap_intent_cache); + xfs_rmap_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index fd67904ed446..b718ebeda372 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -159,8 +159,8 @@ enum xfs_rmap_intent_type { struct xfs_rmap_intent { struct list_head ri_list; enum xfs_rmap_intent_type ri_type; - uint64_t ri_owner; int ri_whichfork; + uint64_t ri_owner; struct xfs_bmbt_irec ri_bmap; }; @@ -215,4 +215,9 @@ extern const struct xfs_owner_info XFS_RMAP_OINFO_INODES; extern const struct xfs_owner_info XFS_RMAP_OINFO_REFC; extern const struct xfs_owner_info XFS_RMAP_OINFO_COW; +extern struct kmem_cache *xfs_rmap_intent_cache; + +int __init xfs_rmap_intent_init_cache(void); +void xfs_rmap_intent_destroy_cache(void); + #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index b7dbbfb3aeed..69e104d0277f 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -22,6 +22,8 @@ #include "xfs_ag.h" #include "xfs_ag_resv.h" +static struct kmem_cache *xfs_rmapbt_cur_cache; + /* * Reverse map btree. * @@ -451,13 +453,10 @@ xfs_rmapbt_init_common( { struct xfs_btree_cur *cur; - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - cur->bc_tp = tp; - cur->bc_mp = mp; /* Overlapping btree; 2 keys per pointer. */ - cur->bc_btnum = XFS_BTNUM_RMAP; + cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP, + mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache); cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; - cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); cur->bc_ops = &xfs_rmapbt_ops; @@ -522,6 +521,18 @@ xfs_rmapbt_commit_staged_btree( xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops); } +/* Calculate number of records in a reverse mapping btree block. */ +static inline unsigned int +xfs_rmapbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(struct xfs_rmap_rec); + return blocklen / + (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t)); +} + /* * Calculate number of records in an rmap btree block. */ @@ -531,11 +542,33 @@ xfs_rmapbt_maxrecs( int leaf) { blocklen -= XFS_RMAP_BLOCK_LEN; + return xfs_rmapbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(struct xfs_rmap_rec); - return blocklen / - (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t)); +/* Compute the max possible height for reverse mapping btrees. */ +unsigned int +xfs_rmapbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN; + + minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2; + + /* + * Compute the asymptotic maxlevels for an rmapbt on any reflink fs. + * + * On a reflink filesystem, each AG block can have up to 2^32 (per the + * refcount record format) owners, which means that theoretically we + * could face up to 2^64 rmap records. However, we're likely to run + * out of blocks in the AG long before that happens, which means that + * we must compute the max height based on what the btree will look + * like if it consumes almost all the blocks in the AG due to maximal + * sharing factor. + */ + return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS); } /* Compute the maximum height of an rmap btree. */ @@ -543,26 +576,36 @@ void xfs_rmapbt_compute_maxlevels( struct xfs_mount *mp) { - /* - * On a non-reflink filesystem, the maximum number of rmap - * records is the number of blocks in the AG, hence the max - * rmapbt height is log_$maxrecs($agblocks). However, with - * reflink each AG block can have up to 2^32 (per the refcount - * record format) owners, which means that theoretically we - * could face up to 2^64 rmap records. - * - * That effectively means that the max rmapbt height must be - * XFS_BTREE_MAXLEVELS. "Fortunately" we'll run out of AG - * blocks to feed the rmapbt long before the rmapbt reaches - * maximum height. The reflink code uses ag_resv_critical to - * disallow reflinking when less than 10% of the per-AG metadata - * block reservation since the fallback is a regular file copy. - */ - if (xfs_has_reflink(mp)) - mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS; - else + if (!xfs_has_rmapbt(mp)) { + mp->m_rmap_maxlevels = 0; + return; + } + + if (xfs_has_reflink(mp)) { + /* + * Compute the asymptotic maxlevels for an rmap btree on a + * filesystem that supports reflink. + * + * On a reflink filesystem, each AG block can have up to 2^32 + * (per the refcount record format) owners, which means that + * theoretically we could face up to 2^64 rmap records. + * However, we're likely to run out of blocks in the AG long + * before that happens, which means that we must compute the + * max height based on what the btree will look like if it + * consumes almost all the blocks in the AG due to maximal + * sharing factor. + */ + mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr, + mp->m_sb.sb_agblocks); + } else { + /* + * If there's no block sharing, compute the maximum rmapbt + * height assuming one rmap record per AG block. + */ mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels( mp->m_rmap_mnr, mp->m_sb.sb_agblocks); + } + ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk()); } /* Calculate the refcount btree size for some records. */ @@ -633,3 +676,22 @@ xfs_rmapbt_calc_reserves( return error; } + +int __init +xfs_rmapbt_init_cur_cache(void) +{ + xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur", + xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_rmapbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_rmapbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_rmapbt_cur_cache); + xfs_rmapbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index f2eee6572af4..3244715dd111 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -59,4 +59,9 @@ extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp, extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used); +unsigned int xfs_rmapbt_maxlevels_ondisk(void); + +int __init xfs_rmapbt_init_cur_cache(void); +void xfs_rmapbt_destroy_cur_cache(void); + #endif /* __XFS_RMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index e58349be78bd..f4e84aa1d50a 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -495,7 +495,7 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) static void __xfs_sb_from_disk( struct xfs_sb *to, - xfs_dsb_t *from, + struct xfs_dsb *from, bool convert_xquota) { to->sb_magicnum = be32_to_cpu(from->sb_magicnum); @@ -571,7 +571,7 @@ __xfs_sb_from_disk( void xfs_sb_from_disk( struct xfs_sb *to, - xfs_dsb_t *from) + struct xfs_dsb *from) { __xfs_sb_from_disk(to, from, true); } diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 5e300daa2559..6f83d9b306ee 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -70,7 +70,7 @@ xfs_allocfree_log_count( { uint blocks; - blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1); + blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1); if (xfs_has_rmapbt(mp)) blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1); if (xfs_has_reflink(mp)) @@ -814,6 +814,19 @@ xfs_trans_resv_calc( struct xfs_mount *mp, struct xfs_trans_resv *resp) { + unsigned int rmap_maxlevels = mp->m_rmap_maxlevels; + + /* + * In the early days of rmap+reflink, we always set the rmap maxlevels + * to 9 even if the AG was small enough that it would never grow to + * that height. Transaction reservation sizes influence the minimum + * log size calculation, which influences the size of the log that mkfs + * creates. Use the old value here to ensure that newly formatted + * small filesystems will mount on older kernels. + */ + if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp)) + mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS; + /* * The following transactions are logged in physical format and * require a permanent reservation on space. @@ -916,4 +929,7 @@ xfs_trans_resv_calc( resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp); resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp); resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp); + + /* Put everything back the way it was. This goes at the end. */ + mp->m_rmap_maxlevels = rmap_maxlevels; } diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 50332be34388..87b31c69a773 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -17,6 +17,13 @@ /* Adding one rmap could split every level up to the top of the tree. */ #define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels) +/* + * Note that we historically set m_rmap_maxlevels to 9 when reflink is enabled, + * so we must preserve this behavior to avoid changing the transaction space + * reservations and minimum log size calculations for existing filesystems. + */ +#define XFS_OLD_REFLINK_RMAP_MAXLEVELS 9 + /* Blocks we might need to add "b" rmaps to a tree. */ #define XFS_NRMAPADD_SPACE_RES(mp, b)\ (((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \ @@ -74,7 +81,7 @@ #define XFS_DIOSTRAT_SPACE_RES(mp, v) \ (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v)) #define XFS_GROWFS_SPACE_RES(mp) \ - (2 * (mp)->m_ag_maxlevels) + (2 * (mp)->m_alloc_maxlevels) #define XFS_GROWFSRT_SPACE_RES(mp,b) \ ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK)) #define XFS_LINK_SPACE_RES(mp,nl) \ diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index ae3c9f6e2c69..bed798792226 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -555,11 +555,11 @@ xchk_agf( xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_alloc_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_alloc_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); if (xfs_has_rmapbt(mp)) { @@ -568,7 +568,7 @@ xchk_agf( xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_rmap_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); } @@ -578,7 +578,7 @@ xchk_agf( xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_refcount_level); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_refc_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); } @@ -850,6 +850,7 @@ xchk_agi( struct xfs_mount *mp = sc->mp; struct xfs_agi *agi; struct xfs_perag *pag; + struct xfs_ino_geometry *igeo = M_IGEO(sc->mp); xfs_agnumber_t agno = sc->sm->sm_agno; xfs_agblock_t agbno; xfs_agblock_t eoag; @@ -880,7 +881,7 @@ xchk_agi( xchk_block_set_corrupt(sc, sc->sa.agi_bp); level = be32_to_cpu(agi->agi_level); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > igeo->inobt_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agi_bp); if (xfs_has_finobt(mp)) { @@ -889,7 +890,7 @@ xchk_agi( xchk_block_set_corrupt(sc, sc->sa.agi_bp); level = be32_to_cpu(agi->agi_free_level); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > igeo->inobt_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agi_bp); } diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 0f8deee66f15..d7bfed52f4cd 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -122,7 +122,7 @@ xrep_check_btree_root( xfs_agnumber_t agno = sc->sm->sm_agno; return xfs_verify_agbno(mp, agno, fab->root) && - fab->height <= XFS_BTREE_MAXLEVELS; + fab->height <= fab->maxlevels; } /* @@ -339,18 +339,22 @@ xrep_agf( [XREP_AGF_BNOBT] = { .rmap_owner = XFS_RMAP_OWN_AG, .buf_ops = &xfs_bnobt_buf_ops, + .maxlevels = sc->mp->m_alloc_maxlevels, }, [XREP_AGF_CNTBT] = { .rmap_owner = XFS_RMAP_OWN_AG, .buf_ops = &xfs_cntbt_buf_ops, + .maxlevels = sc->mp->m_alloc_maxlevels, }, [XREP_AGF_RMAPBT] = { .rmap_owner = XFS_RMAP_OWN_AG, .buf_ops = &xfs_rmapbt_buf_ops, + .maxlevels = sc->mp->m_rmap_maxlevels, }, [XREP_AGF_REFCOUNTBT] = { .rmap_owner = XFS_RMAP_OWN_REFC, .buf_ops = &xfs_refcountbt_buf_ops, + .maxlevels = sc->mp->m_refc_maxlevels, }, [XREP_AGF_END] = { .buf_ops = NULL, @@ -881,10 +885,12 @@ xrep_agi( [XREP_AGI_INOBT] = { .rmap_owner = XFS_RMAP_OWN_INOBT, .buf_ops = &xfs_inobt_buf_ops, + .maxlevels = M_IGEO(sc->mp)->inobt_maxlevels, }, [XREP_AGI_FINOBT] = { .rmap_owner = XFS_RMAP_OWN_INOBT, .buf_ops = &xfs_finobt_buf_ops, + .maxlevels = M_IGEO(sc->mp)->inobt_maxlevels, }, [XREP_AGI_END] = { .buf_ops = NULL diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index d6d24c866bc4..b89bf9de9b1c 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -222,21 +222,21 @@ out: * 1 2 3 * * Pretend for this example that each leaf block has 100 btree records. For - * the first btree record, we'll observe that bc_ptrs[0] == 1, so we record - * that we saw block 1. Then we observe that bc_ptrs[1] == 1, so we record - * block 4. The list is [1, 4]. + * the first btree record, we'll observe that bc_levels[0].ptr == 1, so we + * record that we saw block 1. Then we observe that bc_levels[1].ptr == 1, so + * we record block 4. The list is [1, 4]. * - * For the second btree record, we see that bc_ptrs[0] == 2, so we exit the - * loop. The list remains [1, 4]. + * For the second btree record, we see that bc_levels[0].ptr == 2, so we exit + * the loop. The list remains [1, 4]. * * For the 101st btree record, we've moved onto leaf block 2. Now - * bc_ptrs[0] == 1 again, so we record that we saw block 2. We see that - * bc_ptrs[1] == 2, so we exit the loop. The list is now [1, 4, 2]. + * bc_levels[0].ptr == 1 again, so we record that we saw block 2. We see that + * bc_levels[1].ptr == 2, so we exit the loop. The list is now [1, 4, 2]. * - * For the 102nd record, bc_ptrs[0] == 2, so we continue. + * For the 102nd record, bc_levels[0].ptr == 2, so we continue. * - * For the 201st record, we've moved on to leaf block 3. bc_ptrs[0] == 1, so - * we add 3 to the list. Now it is [1, 4, 2, 3]. + * For the 201st record, we've moved on to leaf block 3. + * bc_levels[0].ptr == 1, so we add 3 to the list. Now it is [1, 4, 2, 3]. * * For the 300th record we just exit, with the list being [1, 4, 2, 3]. */ @@ -256,7 +256,7 @@ xbitmap_set_btcur_path( int i; int error; - for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) { + for (i = 0; i < cur->bc_nlevels && cur->bc_levels[i].ptr == 1; i++) { xfs_btree_get_block(cur, i, &bp); if (!bp) continue; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 017da9ceaee9..a4cbbc346f60 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -402,7 +402,7 @@ xchk_bmapbt_rec( * the root since the verifiers don't do that. */ if (xfs_has_crc(bs->cur->bc_mp) && - bs->cur->bc_ptrs[0] == 1) { + bs->cur->bc_levels[0].ptr == 1) { for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { block = xfs_btree_get_block(bs->cur, i, &bp); owner = be64_to_cpu(block->bb_u.l.bb_owner); diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index eccb855dc904..39dd46f038fe 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -136,14 +136,14 @@ xchk_btree_rec( struct xfs_buf *bp; block = xfs_btree_get_block(cur, 0, &bp); - rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); + rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block); trace_xchk_btree_rec(bs->sc, cur, 0); /* If this isn't the first record, are they in order? */ - if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) + if (cur->bc_levels[0].ptr > 1 && + !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) xchk_btree_set_corrupt(bs->sc, cur, 0); - bs->firstrec = false; memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len); if (cur->bc_nlevels == 1) @@ -152,7 +152,7 @@ xchk_btree_rec( /* Is this at least as large as the parent low key? */ cur->bc_ops->init_key_from_rec(&key, rec); keyblock = xfs_btree_get_block(cur, 1, &bp); - keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock); + keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0) xchk_btree_set_corrupt(bs->sc, cur, 1); @@ -161,7 +161,7 @@ xchk_btree_rec( /* Is this no larger than the parent high key? */ cur->bc_ops->init_high_key_from_rec(&hkey, rec); - keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock); + keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0) xchk_btree_set_corrupt(bs->sc, cur, 1); } @@ -183,23 +183,22 @@ xchk_btree_key( struct xfs_buf *bp; block = xfs_btree_get_block(cur, level, &bp); - key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); + key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block); trace_xchk_btree_key(bs->sc, cur, level); /* If this isn't the first key, are they in order? */ - if (!bs->firstkey[level] && - !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key)) + if (cur->bc_levels[level].ptr > 1 && + !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1], key)) xchk_btree_set_corrupt(bs->sc, cur, level); - bs->firstkey[level] = false; - memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len); + memcpy(&bs->lastkey[level - 1], key, cur->bc_ops->key_len); if (level + 1 >= cur->bc_nlevels) return; /* Is this at least as large as the parent low key? */ keyblock = xfs_btree_get_block(cur, level + 1, &bp); - keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); + keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0) xchk_btree_set_corrupt(bs->sc, cur, level); @@ -207,8 +206,9 @@ xchk_btree_key( return; /* Is this no larger than the parent high key? */ - key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); - keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); + key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block); + keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr, + keyblock); if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0) xchk_btree_set_corrupt(bs->sc, cur, level); } @@ -291,7 +291,7 @@ xchk_btree_block_check_sibling( /* Compare upper level pointer to sibling pointer. */ pblock = xfs_btree_get_block(ncur, level + 1, &pbp); - pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); + pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock); if (!xchk_btree_ptr_ok(bs, level + 1, pp)) goto out; if (pbp) @@ -596,7 +596,7 @@ xchk_btree_block_keys( /* Obtain the parent's copy of the keys for this block. */ parent_block = xfs_btree_get_block(cur, level + 1, &bp); - parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], + parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, parent_block); if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0) @@ -607,7 +607,7 @@ xchk_btree_block_keys( /* Get high keys */ high_bk = xfs_btree_high_key_from_key(cur, &block_keys); - high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], + high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr, parent_block); if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0) @@ -627,35 +627,39 @@ xchk_btree( const struct xfs_owner_info *oinfo, void *private) { - struct xchk_btree bs = { - .cur = cur, - .scrub_rec = scrub_fn, - .oinfo = oinfo, - .firstrec = true, - .private = private, - .sc = sc, - }; union xfs_btree_ptr ptr; + struct xchk_btree *bs; union xfs_btree_ptr *pp; union xfs_btree_rec *recp; struct xfs_btree_block *block; - int level; struct xfs_buf *bp; struct check_owner *co; struct check_owner *n; - int i; + size_t cur_sz; + int level; int error = 0; - /* Initialize scrub state */ - for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) - bs.firstkey[i] = true; - INIT_LIST_HEAD(&bs.to_check); - - /* Don't try to check a tree with a height we can't handle. */ - if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) { + /* + * Allocate the btree scrub context from the heap, because this + * structure can get rather large. Don't let a caller feed us a + * totally absurd size. + */ + cur_sz = xchk_btree_sizeof(cur->bc_nlevels); + if (cur_sz > PAGE_SIZE) { xchk_btree_set_corrupt(sc, cur, 0); - goto out; + return 0; } + bs = kmem_zalloc(cur_sz, KM_NOFS | KM_MAYFAIL); + if (!bs) + return -ENOMEM; + bs->cur = cur; + bs->scrub_rec = scrub_fn; + bs->oinfo = oinfo; + bs->private = private; + bs->sc = sc; + + /* Initialize scrub state */ + INIT_LIST_HEAD(&bs->to_check); /* * Load the root of the btree. The helper function absorbs @@ -663,79 +667,82 @@ xchk_btree( */ level = cur->bc_nlevels - 1; cur->bc_ops->init_ptr_from_cur(cur, &ptr); - if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr)) + if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr)) goto out; - error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp); + error = xchk_btree_get_block(bs, level, &ptr, &block, &bp); if (error || !block) goto out; - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; while (level < cur->bc_nlevels) { block = xfs_btree_get_block(cur, level, &bp); if (level == 0) { /* End of leaf, pop back towards the root. */ - if (cur->bc_ptrs[level] > + if (cur->bc_levels[level].ptr > be16_to_cpu(block->bb_numrecs)) { - xchk_btree_block_keys(&bs, level, block); + xchk_btree_block_keys(bs, level, block); if (level < cur->bc_nlevels - 1) - cur->bc_ptrs[level + 1]++; + cur->bc_levels[level + 1].ptr++; level++; continue; } /* Records in order for scrub? */ - xchk_btree_rec(&bs); + xchk_btree_rec(bs); /* Call out to the record checker. */ - recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); - error = bs.scrub_rec(&bs, recp); + recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, + block); + error = bs->scrub_rec(bs, recp); if (error) break; if (xchk_should_terminate(sc, &error) || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) break; - cur->bc_ptrs[level]++; + cur->bc_levels[level].ptr++; continue; } /* End of node, pop back towards the root. */ - if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { - xchk_btree_block_keys(&bs, level, block); + if (cur->bc_levels[level].ptr > + be16_to_cpu(block->bb_numrecs)) { + xchk_btree_block_keys(bs, level, block); if (level < cur->bc_nlevels - 1) - cur->bc_ptrs[level + 1]++; + cur->bc_levels[level + 1].ptr++; level++; continue; } /* Keys in order for scrub? */ - xchk_btree_key(&bs, level); + xchk_btree_key(bs, level); /* Drill another level deeper. */ - pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); - if (!xchk_btree_ptr_ok(&bs, level, pp)) { - cur->bc_ptrs[level]++; + pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block); + if (!xchk_btree_ptr_ok(bs, level, pp)) { + cur->bc_levels[level].ptr++; continue; } level--; - error = xchk_btree_get_block(&bs, level, pp, &block, &bp); + error = xchk_btree_get_block(bs, level, pp, &block, &bp); if (error || !block) goto out; - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; } out: /* Process deferred owner checks on btree blocks. */ - list_for_each_entry_safe(co, n, &bs.to_check, list) { - if (!error && bs.cur) - error = xchk_btree_check_block_owner(&bs, - co->level, co->daddr); + list_for_each_entry_safe(co, n, &bs->to_check, list) { + if (!error && bs->cur) + error = xchk_btree_check_block_owner(bs, co->level, + co->daddr); list_del(&co->list); kmem_free(co); } + kmem_free(bs); return error; } diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h index b7d2fc01fbf9..da61a53a0b61 100644 --- a/fs/xfs/scrub/btree.h +++ b/fs/xfs/scrub/btree.h @@ -39,11 +39,22 @@ struct xchk_btree { /* internal scrub state */ union xfs_btree_rec lastrec; - bool firstrec; - union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS]; - bool firstkey[XFS_BTREE_MAXLEVELS]; struct list_head to_check; + + /* this element must come last! */ + union xfs_btree_key lastkey[]; }; + +/* + * Calculate the size of a xchk_btree structure. There are nlevels-1 slots for + * keys because we track leaf records separately in lastrec. + */ +static inline size_t +xchk_btree_sizeof(unsigned int nlevels) +{ + return struct_size((struct xchk_btree *)NULL, lastkey, nlevels - 1); +} + int xchk_btree(struct xfs_scrub *sc, struct xfs_btree_cur *cur, xchk_btree_rec_fn scrub_fn, const struct xfs_owner_info *oinfo, void *private); diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 8a52514bc1ff..b962cfbbd92b 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -473,7 +473,7 @@ xchk_da_btree( xchk_da_btree_rec_fn scrub_fn, void *private) { - struct xchk_da_btree ds = {}; + struct xchk_da_btree *ds; struct xfs_mount *mp = sc->mp; struct xfs_da_state_blk *blks; struct xfs_da_node_entry *key; @@ -486,32 +486,35 @@ xchk_da_btree( return 0; /* Set up initial da state. */ - ds.dargs.dp = sc->ip; - ds.dargs.whichfork = whichfork; - ds.dargs.trans = sc->tp; - ds.dargs.op_flags = XFS_DA_OP_OKNOENT; - ds.state = xfs_da_state_alloc(&ds.dargs); - ds.sc = sc; - ds.private = private; + ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL); + if (!ds) + return -ENOMEM; + ds->dargs.dp = sc->ip; + ds->dargs.whichfork = whichfork; + ds->dargs.trans = sc->tp; + ds->dargs.op_flags = XFS_DA_OP_OKNOENT; + ds->state = xfs_da_state_alloc(&ds->dargs); + ds->sc = sc; + ds->private = private; if (whichfork == XFS_ATTR_FORK) { - ds.dargs.geo = mp->m_attr_geo; - ds.lowest = 0; - ds.highest = 0; + ds->dargs.geo = mp->m_attr_geo; + ds->lowest = 0; + ds->highest = 0; } else { - ds.dargs.geo = mp->m_dir_geo; - ds.lowest = ds.dargs.geo->leafblk; - ds.highest = ds.dargs.geo->freeblk; + ds->dargs.geo = mp->m_dir_geo; + ds->lowest = ds->dargs.geo->leafblk; + ds->highest = ds->dargs.geo->freeblk; } - blkno = ds.lowest; + blkno = ds->lowest; level = 0; /* Find the root of the da tree, if present. */ - blks = ds.state->path.blk; - error = xchk_da_btree_block(&ds, level, blkno); + blks = ds->state->path.blk; + error = xchk_da_btree_block(ds, level, blkno); if (error) goto out_state; /* - * We didn't find a block at ds.lowest, which means that there's + * We didn't find a block at ds->lowest, which means that there's * no LEAF1/LEAFN tree (at least not where it's supposed to be), * so jump out now. */ @@ -523,16 +526,16 @@ xchk_da_btree( /* Handle leaf block. */ if (blks[level].magic != XFS_DA_NODE_MAGIC) { /* End of leaf, pop back towards the root. */ - if (blks[level].index >= ds.maxrecs[level]) { + if (blks[level].index >= ds->maxrecs[level]) { if (level > 0) blks[level - 1].index++; - ds.tree_level++; + ds->tree_level++; level--; continue; } /* Dispatch record scrubbing. */ - error = scrub_fn(&ds, level); + error = scrub_fn(ds, level); if (error) break; if (xchk_should_terminate(sc, &error) || @@ -545,17 +548,17 @@ xchk_da_btree( /* End of node, pop back towards the root. */ - if (blks[level].index >= ds.maxrecs[level]) { + if (blks[level].index >= ds->maxrecs[level]) { if (level > 0) blks[level - 1].index++; - ds.tree_level++; + ds->tree_level++; level--; continue; } /* Hashes in order for scrub? */ - key = xchk_da_btree_node_entry(&ds, level); - error = xchk_da_btree_hash(&ds, level, &key->hashval); + key = xchk_da_btree_node_entry(ds, level); + error = xchk_da_btree_hash(ds, level, &key->hashval); if (error) goto out; @@ -564,11 +567,11 @@ xchk_da_btree( level++; if (level >= XFS_DA_NODE_MAXDEPTH) { /* Too deep! */ - xchk_da_set_corrupt(&ds, level - 1); + xchk_da_set_corrupt(ds, level - 1); break; } - ds.tree_level--; - error = xchk_da_btree_block(&ds, level, blkno); + ds->tree_level--; + error = xchk_da_btree_block(ds, level, blkno); if (error) goto out; if (blks[level].bp == NULL) @@ -587,6 +590,7 @@ out: } out_state: - xfs_da_state_free(ds.state); + xfs_da_state_free(ds->state); + kmem_free(ds); return error; } diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 3bb152d52a07..840f74ec431c 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -44,6 +44,9 @@ struct xrep_find_ag_btree { /* in: buffer ops */ const struct xfs_buf_ops *buf_ops; + /* in: maximum btree height */ + unsigned int maxlevels; + /* out: the highest btree block found and the tree height */ xfs_agblock_t root; unsigned int height; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 51e4c61916d2..8d528d35b725 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -461,15 +461,10 @@ xfs_scrub_metadata( struct file *file, struct xfs_scrub_metadata *sm) { - struct xfs_scrub sc = { - .file = file, - .sm = sm, - }; + struct xfs_scrub *sc; struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount; int error = 0; - sc.mp = mp; - BUILD_BUG_ON(sizeof(meta_scrub_ops) != (sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR)); @@ -489,59 +484,68 @@ xfs_scrub_metadata( xchk_experimental_warning(mp); - sc.ops = &meta_scrub_ops[sm->sm_type]; - sc.sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type); + sc = kmem_zalloc(sizeof(struct xfs_scrub), KM_NOFS | KM_MAYFAIL); + if (!sc) { + error = -ENOMEM; + goto out; + } + + sc->mp = mp; + sc->file = file; + sc->sm = sm; + sc->ops = &meta_scrub_ops[sm->sm_type]; + sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type); retry_op: /* * When repairs are allowed, prevent freezing or readonly remount while * scrub is running with a real transaction. */ if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) { - error = mnt_want_write_file(sc.file); + error = mnt_want_write_file(sc->file); if (error) - goto out; + goto out_sc; } /* Set up for the operation. */ - error = sc.ops->setup(&sc); + error = sc->ops->setup(sc); if (error) goto out_teardown; /* Scrub for errors. */ - error = sc.ops->scrub(&sc); - if (!(sc.flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) { + error = sc->ops->scrub(sc); + if (!(sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) { /* * Scrubbers return -EDEADLOCK to mean 'try harder'. * Tear down everything we hold, then set up again with * preparation for worst-case scenarios. */ - error = xchk_teardown(&sc, 0); + error = xchk_teardown(sc, 0); if (error) - goto out; - sc.flags |= XCHK_TRY_HARDER; + goto out_sc; + sc->flags |= XCHK_TRY_HARDER; goto retry_op; } else if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)) goto out_teardown; - xchk_update_health(&sc); + xchk_update_health(sc); - if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && - !(sc.flags & XREP_ALREADY_FIXED)) { + if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && + !(sc->flags & XREP_ALREADY_FIXED)) { bool needs_fix; /* Let debug users force us into the repair routines. */ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) - sc.sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - needs_fix = (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | - XFS_SCRUB_OFLAG_XCORRUPT | - XFS_SCRUB_OFLAG_PREEN)); + needs_fix = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | + XFS_SCRUB_OFLAG_XCORRUPT | + XFS_SCRUB_OFLAG_PREEN)); /* * If userspace asked for a repair but it wasn't necessary, * report that back to userspace. */ if (!needs_fix) { - sc.sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED; goto out_nofix; } @@ -549,26 +553,28 @@ retry_op: * If it's broken, userspace wants us to fix it, and we haven't * already tried to fix it, then attempt a repair. */ - error = xrep_attempt(&sc); + error = xrep_attempt(sc); if (error == -EAGAIN) { /* * Either the repair function succeeded or it couldn't * get all the resources it needs; either way, we go * back to the beginning and call the scrub function. */ - error = xchk_teardown(&sc, 0); + error = xchk_teardown(sc, 0); if (error) { xrep_failure(mp); - goto out; + goto out_sc; } goto retry_op; } } out_nofix: - xchk_postmortem(&sc); + xchk_postmortem(sc); out_teardown: - error = xchk_teardown(&sc, error); + error = xchk_teardown(sc, error); +out_sc: + kmem_free(sc); out: trace_xchk_done(XFS_I(file_inode(file)), sm, error); if (error == -EFSCORRUPTED || error == -EFSBADCRC) { diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index c0ef53fe6611..b5f94676c37c 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -21,13 +21,14 @@ xchk_btree_cur_fsbno( struct xfs_btree_cur *cur, int level) { - if (level < cur->bc_nlevels && cur->bc_bufs[level]) + if (level < cur->bc_nlevels && cur->bc_levels[level].bp) return XFS_DADDR_TO_FSB(cur->bc_mp, - xfs_buf_daddr(cur->bc_bufs[level])); - if (level == cur->bc_nlevels - 1 && cur->bc_flags & XFS_BTREE_LONG_PTRS) + xfs_buf_daddr(cur->bc_levels[level].bp)); + + if (level == cur->bc_nlevels - 1 && + (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)) return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_ino.ip->i_ino); - if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) - return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, 0); + return NULLFSBLOCK; } diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index a7bbb84f91a7..93ece6df02e3 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -348,7 +348,7 @@ TRACE_EVENT(xchk_btree_op_error, __entry->level = level; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->error = error; __entry->ret_ip = ret_ip; ), @@ -389,7 +389,7 @@ TRACE_EVENT(xchk_ifork_btree_op_error, __entry->type = sc->sm->sm_type; __entry->btnum = cur->bc_btnum; __entry->level = level; - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); __entry->error = error; @@ -431,7 +431,7 @@ TRACE_EVENT(xchk_btree_error, __entry->level = level; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->ret_ip = ret_ip; ), TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x ret_ip %pS", @@ -471,7 +471,7 @@ TRACE_EVENT(xchk_ifork_btree_error, __entry->level = level; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->ret_ip = ret_ip; ), TP_printk("dev %d:%d ino 0x%llx fork %s type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x ret_ip %pS", @@ -511,7 +511,7 @@ DECLARE_EVENT_CLASS(xchk_sbtree_class, __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); __entry->level = level; __entry->nlevels = cur->bc_nlevels; - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; ), TP_printk("dev %d:%d type %s btree %s agno 0x%x agbno 0x%x level %d nlevels %d ptr %d", MAJOR(__entry->dev), MINOR(__entry->dev), diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 34fc6148032a..c8c15c3c3147 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -82,6 +82,7 @@ xfs_end_ioend( struct iomap_ioend *ioend) { struct xfs_inode *ip = XFS_I(ioend->io_inode); + struct xfs_mount *mp = ip->i_mount; xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; unsigned int nofs_flag; @@ -97,18 +98,26 @@ xfs_end_ioend( /* * Just clean up the in-memory structures if the fs has been shut down. */ - if (xfs_is_shutdown(ip->i_mount)) { + if (xfs_is_shutdown(mp)) { error = -EIO; goto done; } /* - * Clean up any COW blocks on an I/O error. + * Clean up all COW blocks and underlying data fork delalloc blocks on + * I/O error. The delalloc punch is required because this ioend was + * mapped to blocks in the COW fork and the associated pages are no + * longer dirty. If we don't remove delalloc blocks here, they become + * stale and can corrupt free space accounting on unmount. */ error = blk_status_to_errno(ioend->io_bio->bi_status); if (unlikely(error)) { - if (ioend->io_flags & IOMAP_F_SHARED) + if (ioend->io_flags & IOMAP_F_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); + xfs_bmap_punch_delalloc_range(ip, + XFS_B_TO_FSBT(mp, offset), + XFS_B_TO_FSB(mp, size)); + } goto done; } diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 2b5da6218977..27265771f247 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -390,7 +390,7 @@ out_destroy_fork: /* kill the in-core attr fork before we drop the inode lock */ if (dp->i_afp) { xfs_idestroy_fork(dp->i_afp); - kmem_cache_free(xfs_ifork_zone, dp->i_afp); + kmem_cache_free(xfs_ifork_cache, dp->i_afp); dp->i_afp = NULL; } if (lock_mode) diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 03159970133f..e1f4d7d5a011 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -25,8 +25,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_bui_zone; -kmem_zone_t *xfs_bud_zone; +struct kmem_cache *xfs_bui_cache; +struct kmem_cache *xfs_bud_cache; static const struct xfs_item_ops xfs_bui_item_ops; @@ -39,7 +39,7 @@ STATIC void xfs_bui_item_free( struct xfs_bui_log_item *buip) { - kmem_cache_free(xfs_bui_zone, buip); + kmem_cache_free(xfs_bui_cache, buip); } /* @@ -138,7 +138,7 @@ xfs_bui_init( { struct xfs_bui_log_item *buip; - buip = kmem_cache_zalloc(xfs_bui_zone, GFP_KERNEL | __GFP_NOFAIL); + buip = kmem_cache_zalloc(xfs_bui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &buip->bui_item, XFS_LI_BUI, &xfs_bui_item_ops); buip->bui_format.bui_nextents = XFS_BUI_MAX_FAST_EXTENTS; @@ -198,7 +198,7 @@ xfs_bud_item_release( struct xfs_bud_log_item *budp = BUD_ITEM(lip); xfs_bui_release(budp->bud_buip); - kmem_cache_free(xfs_bud_zone, budp); + kmem_cache_free(xfs_bud_cache, budp); } static const struct xfs_item_ops xfs_bud_item_ops = { @@ -215,7 +215,7 @@ xfs_trans_get_bud( { struct xfs_bud_log_item *budp; - budp = kmem_cache_zalloc(xfs_bud_zone, GFP_KERNEL | __GFP_NOFAIL); + budp = kmem_cache_zalloc(xfs_bud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops); budp->bud_buip = buip; @@ -384,7 +384,7 @@ xfs_bmap_update_finish_item( bmap->bi_bmap.br_blockcount = count; return -EAGAIN; } - kmem_free(bmap); + kmem_cache_free(xfs_bmap_intent_cache, bmap); return error; } @@ -404,7 +404,7 @@ xfs_bmap_update_cancel_item( struct xfs_bmap_intent *bmap; bmap = container_of(item, struct xfs_bmap_intent, bi_list); - kmem_free(bmap); + kmem_cache_free(xfs_bmap_intent_cache, bmap); } const struct xfs_defer_op_type xfs_bmap_update_defer_type = { @@ -532,7 +532,7 @@ xfs_bui_item_recover( * Commit transaction, which frees the transaction and saves the inode * for later replay activities. */ - error = xfs_defer_ops_capture_and_commit(tp, ip, capture_list); + error = xfs_defer_ops_capture_and_commit(tp, capture_list); if (error) goto err_unlock; diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index b9be62f8bd52..3fafd3881a0b 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -25,7 +25,7 @@ /* kernel only BUI/BUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -65,7 +65,7 @@ struct xfs_bud_log_item { struct xfs_bud_log_format bud_format; }; -extern struct kmem_zone *xfs_bui_zone; -extern struct kmem_zone *xfs_bud_zone; +extern struct kmem_cache *xfs_bui_cache; +extern struct kmem_cache *xfs_bud_cache; #endif /* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 5fa6cd947dd4..631c5a61d89b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -20,7 +20,7 @@ #include "xfs_error.h" #include "xfs_ag.h" -static kmem_zone_t *xfs_buf_zone; +static struct kmem_cache *xfs_buf_cache; /* * Locking orders @@ -220,7 +220,7 @@ _xfs_buf_alloc( int i; *bpp = NULL; - bp = kmem_cache_zalloc(xfs_buf_zone, GFP_NOFS | __GFP_NOFAIL); + bp = kmem_cache_zalloc(xfs_buf_cache, GFP_NOFS | __GFP_NOFAIL); /* * We don't want certain flags to appear in b_flags unless they are @@ -247,7 +247,7 @@ _xfs_buf_alloc( */ error = xfs_buf_get_maps(bp, nmaps); if (error) { - kmem_cache_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_cache, bp); return error; } @@ -307,7 +307,7 @@ xfs_buf_free( kmem_free(bp->b_addr); xfs_buf_free_maps(bp); - kmem_cache_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_cache, bp); } static int @@ -2258,12 +2258,12 @@ xfs_buf_delwri_pushbuf( int __init xfs_buf_init(void) { - xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, + xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); - if (!xfs_buf_zone) + if (!xfs_buf_cache) goto out; return 0; @@ -2275,7 +2275,7 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { - kmem_cache_destroy(xfs_buf_zone); + kmem_cache_destroy(xfs_buf_cache); } void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index b1ab100c09e1..a7a8e4528881 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -23,7 +23,7 @@ #include "xfs_log.h" -kmem_zone_t *xfs_buf_item_zone; +struct kmem_cache *xfs_buf_item_cache; static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) { @@ -804,7 +804,7 @@ xfs_buf_item_init( return 0; } - bip = kmem_cache_zalloc(xfs_buf_item_zone, GFP_KERNEL | __GFP_NOFAIL); + bip = kmem_cache_zalloc(xfs_buf_item_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); bip->bli_buf = bp; @@ -825,7 +825,7 @@ xfs_buf_item_init( map_size = DIV_ROUND_UP(chunks, NBWORD); if (map_size > XFS_BLF_DATAMAP_SIZE) { - kmem_cache_free(xfs_buf_item_zone, bip); + kmem_cache_free(xfs_buf_item_cache, bip); xfs_err(mp, "buffer item dirty bitmap (%u uints) too small to reflect %u bytes!", map_size, @@ -1002,7 +1002,7 @@ xfs_buf_item_free( { xfs_buf_item_free_format(bip); kmem_free(bip->bli_item.li_lv_shadow); - kmem_cache_free(xfs_buf_item_zone, bip); + kmem_cache_free(xfs_buf_item_cache, bip); } /* diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 50aa0f5ef959..e11e9ef2338f 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -71,6 +71,6 @@ static inline void xfs_buf_dquot_io_fail(struct xfs_buf *bp) void xfs_buf_iodone(struct xfs_buf *); bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec); -extern kmem_zone_t *xfs_buf_item_zone; +extern struct kmem_cache *xfs_buf_item_cache; #endif /* __XFS_BUF_ITEM_H__ */ diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index a476c7ef5d53..70ca5751b13e 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -603,7 +603,7 @@ xlog_recover_do_inode_buffer( inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog; for (i = 0; i < inodes_per_buf; i++) { next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + - offsetof(xfs_dinode_t, di_next_unlinked); + offsetof(struct xfs_dinode, di_next_unlinked); while (next_unlinked_offset >= (reg_buf_offset + reg_buf_bytes)) { diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index c15d61d47a06..e48ae227bb11 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -38,8 +38,8 @@ * otherwise by the lowest id first, see xfs_dqlock2. */ -struct kmem_zone *xfs_qm_dqtrxzone; -static struct kmem_zone *xfs_qm_dqzone; +struct kmem_cache *xfs_dqtrx_cache; +static struct kmem_cache *xfs_dquot_cache; static struct lock_class_key xfs_dquot_group_class; static struct lock_class_key xfs_dquot_project_class; @@ -57,7 +57,7 @@ xfs_qm_dqdestroy( mutex_destroy(&dqp->q_qlock); XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); - kmem_cache_free(xfs_qm_dqzone, dqp); + kmem_cache_free(xfs_dquot_cache, dqp); } /* @@ -458,7 +458,7 @@ xfs_dquot_alloc( { struct xfs_dquot *dqp; - dqp = kmem_cache_zalloc(xfs_qm_dqzone, GFP_KERNEL | __GFP_NOFAIL); + dqp = kmem_cache_zalloc(xfs_dquot_cache, GFP_KERNEL | __GFP_NOFAIL); dqp->q_type = type; dqp->q_id = id; @@ -471,7 +471,7 @@ xfs_dquot_alloc( * Offset of dquot in the (fixed sized) dquot chunk. */ dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * - sizeof(xfs_dqblk_t); + sizeof(struct xfs_dqblk); /* * Because we want to use a counting completion, complete @@ -1363,22 +1363,22 @@ xfs_dqlock2( int __init xfs_qm_init(void) { - xfs_qm_dqzone = kmem_cache_create("xfs_dquot", + xfs_dquot_cache = kmem_cache_create("xfs_dquot", sizeof(struct xfs_dquot), 0, 0, NULL); - if (!xfs_qm_dqzone) + if (!xfs_dquot_cache) goto out; - xfs_qm_dqtrxzone = kmem_cache_create("xfs_dqtrx", + xfs_dqtrx_cache = kmem_cache_create("xfs_dqtrx", sizeof(struct xfs_dquot_acct), 0, 0, NULL); - if (!xfs_qm_dqtrxzone) - goto out_free_dqzone; + if (!xfs_dqtrx_cache) + goto out_free_dquot_cache; return 0; -out_free_dqzone: - kmem_cache_destroy(xfs_qm_dqzone); +out_free_dquot_cache: + kmem_cache_destroy(xfs_dquot_cache); out: return -ENOMEM; } @@ -1386,8 +1386,8 @@ out: void xfs_qm_exit(void) { - kmem_cache_destroy(xfs_qm_dqtrxzone); - kmem_cache_destroy(xfs_qm_dqzone); + kmem_cache_destroy(xfs_dqtrx_cache); + kmem_cache_destroy(xfs_dquot_cache); } /* diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 3f8a0713573a..47ef9c9c5c17 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -25,8 +25,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_efi_zone; -kmem_zone_t *xfs_efd_zone; +struct kmem_cache *xfs_efi_cache; +struct kmem_cache *xfs_efd_cache; static const struct xfs_item_ops xfs_efi_item_ops; @@ -43,7 +43,7 @@ xfs_efi_item_free( if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS) kmem_free(efip); else - kmem_cache_free(xfs_efi_zone, efip); + kmem_cache_free(xfs_efi_cache, efip); } /* @@ -161,7 +161,7 @@ xfs_efi_init( ((nextents - 1) * sizeof(xfs_extent_t))); efip = kmem_zalloc(size, 0); } else { - efip = kmem_cache_zalloc(xfs_efi_zone, + efip = kmem_cache_zalloc(xfs_efi_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -241,7 +241,7 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp) if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS) kmem_free(efdp); else - kmem_cache_free(xfs_efd_zone, efdp); + kmem_cache_free(xfs_efd_cache, efdp); } /* @@ -333,7 +333,7 @@ xfs_trans_get_efd( (nextents - 1) * sizeof(struct xfs_extent), 0); } else { - efdp = kmem_cache_zalloc(xfs_efd_zone, + efdp = kmem_cache_zalloc(xfs_efd_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -474,15 +474,21 @@ xfs_extent_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { + struct xfs_owner_info oinfo = { }; struct xfs_extent_free_item *free; int error; free = container_of(item, struct xfs_extent_free_item, xefi_list); + oinfo.oi_owner = free->xefi_owner; + if (free->xefi_flags & XFS_EFI_ATTR_FORK) + oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; + if (free->xefi_flags & XFS_EFI_BMBT_BLOCK) + oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; error = xfs_trans_free_extent(tp, EFD_ITEM(done), free->xefi_startblock, free->xefi_blockcount, - &free->xefi_oinfo, free->xefi_skip_discard); - kmem_free(free); + &oinfo, free->xefi_flags & XFS_EFI_SKIP_DISCARD); + kmem_cache_free(xfs_extfree_item_cache, free); return error; } @@ -502,7 +508,7 @@ xfs_extent_free_cancel_item( struct xfs_extent_free_item *free; free = container_of(item, struct xfs_extent_free_item, xefi_list); - kmem_free(free); + kmem_cache_free(xfs_extfree_item_cache, free); } const struct xfs_defer_op_type xfs_extent_free_defer_type = { @@ -525,6 +531,7 @@ xfs_agfl_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { + struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_efd_log_item *efdp = EFD_ITEM(done); struct xfs_extent_free_item *free; @@ -539,13 +546,13 @@ xfs_agfl_free_finish_item( ASSERT(free->xefi_blockcount == 1); agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); + oinfo.oi_owner = free->xefi_owner; trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (!error) - error = xfs_free_agfl_block(tp, agno, agbno, agbp, - &free->xefi_oinfo); + error = xfs_free_agfl_block(tp, agno, agbno, agbp, &oinfo); /* * Mark the transaction dirty, even on error. This ensures the @@ -564,7 +571,7 @@ xfs_agfl_free_finish_item( extp->ext_len = free->xefi_blockcount; efdp->efd_next_extent++; - kmem_free(free); + kmem_cache_free(xfs_extfree_item_cache, free); return error; } @@ -637,7 +644,7 @@ xfs_efi_item_recover( } - return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); + return xfs_defer_ops_capture_and_commit(tp, capture_list); abort_error: xfs_trans_cancel(tp); diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index cd2860c875bf..186d0f2137f1 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -9,7 +9,7 @@ /* kernel only EFI/EFD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -69,7 +69,7 @@ struct xfs_efd_log_item { */ #define XFS_EFD_MAX_FAST_EXTENTS 16 -extern struct kmem_zone *xfs_efi_zone; -extern struct kmem_zone *xfs_efd_zone; +extern struct kmem_cache *xfs_efi_cache; +extern struct kmem_cache *xfs_efd_cache; #endif /* __XFS_EXTFREE_ITEM_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f2210d927481..e1472004170e 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -77,10 +77,10 @@ xfs_inode_alloc( * XXX: If this didn't occur in transactions, we could drop GFP_NOFAIL * and return NULL here on ENOMEM. */ - ip = kmem_cache_alloc(xfs_inode_zone, GFP_KERNEL | __GFP_NOFAIL); + ip = kmem_cache_alloc(xfs_inode_cache, GFP_KERNEL | __GFP_NOFAIL); if (inode_init_always(mp->m_super, VFS_I(ip))) { - kmem_cache_free(xfs_inode_zone, ip); + kmem_cache_free(xfs_inode_cache, ip); return NULL; } @@ -130,11 +130,11 @@ xfs_inode_free_callback( if (ip->i_afp) { xfs_idestroy_fork(ip->i_afp); - kmem_cache_free(xfs_ifork_zone, ip->i_afp); + kmem_cache_free(xfs_ifork_cache, ip->i_afp); } if (ip->i_cowfp) { xfs_idestroy_fork(ip->i_cowfp); - kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); + kmem_cache_free(xfs_ifork_cache, ip->i_cowfp); } if (ip->i_itemp) { ASSERT(!test_bit(XFS_LI_IN_AIL, @@ -143,7 +143,7 @@ xfs_inode_free_callback( ip->i_itemp = NULL; } - kmem_cache_free(xfs_inode_zone, ip); + kmem_cache_free(xfs_inode_cache, ip); } static void diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 017904a34c02..508e184e3b8f 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -20,7 +20,7 @@ #include "xfs_ialloc.h" #include "xfs_trace.h" -kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ +struct kmem_cache *xfs_icreate_cache; /* inode create item */ static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip) { @@ -63,7 +63,7 @@ STATIC void xfs_icreate_item_release( struct xfs_log_item *lip) { - kmem_cache_free(xfs_icreate_zone, ICR_ITEM(lip)); + kmem_cache_free(xfs_icreate_cache, ICR_ITEM(lip)); } static const struct xfs_item_ops xfs_icreate_item_ops = { @@ -97,7 +97,7 @@ xfs_icreate_log( { struct xfs_icreate_item *icp; - icp = kmem_cache_zalloc(xfs_icreate_zone, GFP_KERNEL | __GFP_NOFAIL); + icp = kmem_cache_zalloc(xfs_icreate_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, &xfs_icreate_item_ops); diff --git a/fs/xfs/xfs_icreate_item.h b/fs/xfs/xfs_icreate_item.h index a50d0b01e15a..64992823108a 100644 --- a/fs/xfs/xfs_icreate_item.h +++ b/fs/xfs/xfs_icreate_item.h @@ -12,7 +12,7 @@ struct xfs_icreate_item { struct xfs_icreate_log ic_format; }; -extern kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ +extern struct kmem_cache *xfs_icreate_cache; /* inode create item */ void xfs_icreate_log(struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t agbno, unsigned int count, diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a4f6f034fb81..64b9bf334806 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -36,7 +36,7 @@ #include "xfs_reflink.h" #include "xfs_ag.h" -kmem_zone_t *xfs_inode_zone; +struct kmem_cache *xfs_inode_cache; /* * Used in xfs_itruncate_extents(). This is the maximum number of extents @@ -564,8 +564,6 @@ xfs_lock_two_inodes( struct xfs_inode *ip1, uint ip1_mode) { - struct xfs_inode *temp; - uint mode_temp; int attempts = 0; struct xfs_log_item *lp; @@ -578,12 +576,8 @@ xfs_lock_two_inodes( ASSERT(ip0->i_ino != ip1->i_ino); if (ip0->i_ino > ip1->i_ino) { - temp = ip0; - ip0 = ip1; - ip1 = temp; - mode_temp = ip0_mode; - ip0_mode = ip1_mode; - ip1_mode = mode_temp; + swap(ip0, ip1); + swap(ip0_mode, ip1_mode); } again: diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b21b177832d1..e635a3d64cba 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -504,7 +504,7 @@ static inline void xfs_setup_existing_inode(struct xfs_inode *ip) void xfs_irele(struct xfs_inode *ip); -extern struct kmem_zone *xfs_inode_zone; +extern struct kmem_cache *xfs_inode_cache; /* The default CoW extent size hint. */ #define XFS_DEFAULT_COWEXTSZ_HINT 32 diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 0659d19c211e..90d8e591baf8 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -21,7 +21,7 @@ #include -kmem_zone_t *xfs_ili_zone; /* inode log item zone */ +struct kmem_cache *xfs_ili_cache; /* inode log item */ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) { @@ -672,7 +672,7 @@ xfs_inode_item_init( struct xfs_inode_log_item *iip; ASSERT(ip->i_itemp == NULL); - iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_zone, + iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_cache, GFP_KERNEL | __GFP_NOFAIL); iip->ili_inode = ip; @@ -694,7 +694,7 @@ xfs_inode_item_destroy( ip->i_itemp = NULL; kmem_free(iip->ili_item.li_lv_shadow); - kmem_cache_free(xfs_ili_zone, iip); + kmem_cache_free(xfs_ili_cache, iip); } diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 403b45ab9aa2..1a302000d604 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -47,6 +47,6 @@ extern void xfs_iflush_abort(struct xfs_inode *); extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, struct xfs_inode_log_format *); -extern struct kmem_zone *xfs_ili_zone; +extern struct kmem_cache *xfs_ili_cache; #endif /* __XFS_INODE_ITEM_H__ */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f6cd2d4aa770..89fec9a18c34 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -21,7 +21,7 @@ #include "xfs_sb.h" #include "xfs_health.h" -kmem_zone_t *xfs_log_ticket_zone; +struct kmem_cache *xfs_log_ticket_cache; /* Local miscellaneous function prototypes */ STATIC struct xlog * @@ -3487,7 +3487,7 @@ xfs_log_ticket_put( { ASSERT(atomic_read(&ticket->t_ref) > 0); if (atomic_dec_and_test(&ticket->t_ref)) - kmem_cache_free(xfs_log_ticket_zone, ticket); + kmem_cache_free(xfs_log_ticket_cache, ticket); } xlog_ticket_t * @@ -3611,7 +3611,7 @@ xlog_ticket_alloc( struct xlog_ticket *tic; int unit_res; - tic = kmem_cache_zalloc(xfs_log_ticket_zone, GFP_NOFS | __GFP_NOFAIL); + tic = kmem_cache_zalloc(xfs_log_ticket_cache, GFP_NOFS | __GFP_NOFAIL); unit_res = xlog_calc_unit_res(log, unit_bytes); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 844fbeec3545..23103d68423c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -497,7 +497,7 @@ xlog_recover_cancel(struct xlog *); extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, char *dp, int size); -extern kmem_zone_t *xfs_log_ticket_zone; +extern struct kmem_cache *xfs_log_ticket_cache; struct xlog_ticket * xlog_ticket_alloc( struct xlog *log, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 10562ecbd9ea..53366cc0bc9e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2466,11 +2466,11 @@ xlog_finish_defer_ops( { struct xfs_defer_capture *dfc, *next; struct xfs_trans *tp; - struct xfs_inode *ip; int error = 0; list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { struct xfs_trans_res resv; + struct xfs_defer_resources dres; /* * Create a new transaction reservation from the captured @@ -2494,13 +2494,9 @@ xlog_finish_defer_ops( * from recovering a single intent item. */ list_del_init(&dfc->dfc_list); - xfs_defer_ops_continue(dfc, tp, &ip); - + xfs_defer_ops_continue(dfc, tp, &dres); error = xfs_trans_commit(tp); - if (ip) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_irele(ip); - } + xfs_defer_resources_rele(&dres); if (error) return error; } @@ -2520,7 +2516,7 @@ xlog_abort_defer_ops( list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { list_del_init(&dfc->dfc_list); - xfs_defer_ops_release(mp, dfc); + xfs_defer_ops_capture_free(mp, dfc); } } /* diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 06dac09eddbd..359109b6f0d3 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -567,6 +567,18 @@ xfs_mount_setup_inode_geom( xfs_ialloc_setup_geometry(mp); } +/* Compute maximum possible height for per-AG btree types for this fs. */ +static inline void +xfs_agbtree_compute_maxlevels( + struct xfs_mount *mp) +{ + unsigned int levels; + + levels = max(mp->m_alloc_maxlevels, M_IGEO(mp)->inobt_maxlevels); + levels = max(levels, mp->m_rmap_maxlevels); + mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels); +} + /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -638,6 +650,8 @@ xfs_mountfs( xfs_rmapbt_compute_maxlevels(mp); xfs_refcountbt_compute_maxlevels(mp); + xfs_agbtree_compute_maxlevels(mp); + /* * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks * is NOT aligned turn off m_dalign since allocator alignment is within diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index e091f3b3fa15..00720a02e761 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -128,10 +128,11 @@ typedef struct xfs_mount { uint m_rmap_mnr[2]; /* min rmap btree records */ uint m_refc_mxr[2]; /* max refc btree records */ uint m_refc_mnr[2]; /* min refc btree records */ - uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ - uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ + uint m_alloc_maxlevels; /* max alloc btree levels */ + uint m_bm_maxlevels[2]; /* max bmap btree levels */ uint m_rmap_maxlevels; /* max rmap btree levels */ uint m_refc_maxlevels; /* max refcount btree level */ + unsigned int m_agbtree_maxlevels; /* max level of all AG btrees */ xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ uint m_alloc_set_aside; /* space we can't use */ uint m_ag_max_usable; /* max space per AG */ diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 34c3b16f834f..f85e3b07ab44 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -219,7 +219,7 @@ _xfs_mru_cache_list_insert( * When destroying or reaping, all the elements that were migrated to the reap * list need to be deleted. For each element this involves removing it from the * data store, removing it from the reap list, calling the client's free - * function and deleting the element from the element zone. + * function and deleting the element from the element cache. * * We get called holding the mru->lock, which we drop and then reacquire. * Sparse need special help with this to tell it we know what we are doing. diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 5608066d6e53..32ac8d9c8940 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -850,7 +850,7 @@ xfs_qm_reset_dqcounts( */ #ifdef DEBUG j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) / - sizeof(xfs_dqblk_t); + sizeof(struct xfs_dqblk); ASSERT(mp->m_quotainfo->qi_dqperchunk == j); #endif dqb = bp->b_addr; diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 442a0f97a9d4..5bb12717ea28 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -11,7 +11,7 @@ struct xfs_inode; -extern struct kmem_zone *xfs_qm_dqtrxzone; +extern struct kmem_cache *xfs_dqtrx_cache; /* * Number of bmaps that we ask from bmapi when doing a quotacheck. diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 46904b793bd4..d3da67772d57 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -21,8 +21,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_cui_zone; -kmem_zone_t *xfs_cud_zone; +struct kmem_cache *xfs_cui_cache; +struct kmem_cache *xfs_cud_cache; static const struct xfs_item_ops xfs_cui_item_ops; @@ -38,7 +38,7 @@ xfs_cui_item_free( if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) kmem_free(cuip); else - kmem_cache_free(xfs_cui_zone, cuip); + kmem_cache_free(xfs_cui_cache, cuip); } /* @@ -143,7 +143,7 @@ xfs_cui_init( cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents), 0); else - cuip = kmem_cache_zalloc(xfs_cui_zone, + cuip = kmem_cache_zalloc(xfs_cui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); @@ -204,7 +204,7 @@ xfs_cud_item_release( struct xfs_cud_log_item *cudp = CUD_ITEM(lip); xfs_cui_release(cudp->cud_cuip); - kmem_cache_free(xfs_cud_zone, cudp); + kmem_cache_free(xfs_cud_cache, cudp); } static const struct xfs_item_ops xfs_cud_item_ops = { @@ -221,7 +221,7 @@ xfs_trans_get_cud( { struct xfs_cud_log_item *cudp; - cudp = kmem_cache_zalloc(xfs_cud_zone, GFP_KERNEL | __GFP_NOFAIL); + cudp = kmem_cache_zalloc(xfs_cud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); cudp->cud_cuip = cuip; @@ -384,7 +384,7 @@ xfs_refcount_update_finish_item( refc->ri_blockcount = new_aglen; return -EAGAIN; } - kmem_free(refc); + kmem_cache_free(xfs_refcount_intent_cache, refc); return error; } @@ -404,7 +404,7 @@ xfs_refcount_update_cancel_item( struct xfs_refcount_intent *refc; refc = container_of(item, struct xfs_refcount_intent, ri_list); - kmem_free(refc); + kmem_cache_free(xfs_refcount_intent_cache, refc); } const struct xfs_defer_op_type xfs_refcount_update_defer_type = { @@ -557,7 +557,7 @@ xfs_cui_item_recover( } xfs_refcount_finish_one_cleanup(tp, rcur, error); - return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); + return xfs_defer_ops_capture_and_commit(tp, capture_list); abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index f4f2e836540b..eb0ab13682d0 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -25,7 +25,7 @@ /* kernel only CUI/CUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -68,7 +68,7 @@ struct xfs_cud_log_item { struct xfs_cud_log_format cud_format; }; -extern struct kmem_zone *xfs_cui_zone; -extern struct kmem_zone *xfs_cud_zone; +extern struct kmem_cache *xfs_cui_cache; +extern struct kmem_cache *xfs_cud_cache; #endif /* __XFS_REFCOUNT_ITEM_H__ */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 76355f293488..cb0edb1d68ef 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -484,7 +484,7 @@ xfs_reflink_cancel_cow_blocks( xfs_refcount_free_cow_extent(*tpp, del.br_startblock, del.br_blockcount); - xfs_bmap_add_free(*tpp, del.br_startblock, + xfs_free_extent_later(*tpp, del.br_startblock, del.br_blockcount, NULL); /* Roll the transaction */ diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 5f0695980467..c3966b4c58ef 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -21,8 +21,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_rui_zone; -kmem_zone_t *xfs_rud_zone; +struct kmem_cache *xfs_rui_cache; +struct kmem_cache *xfs_rud_cache; static const struct xfs_item_ops xfs_rui_item_ops; @@ -38,7 +38,7 @@ xfs_rui_item_free( if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS) kmem_free(ruip); else - kmem_cache_free(xfs_rui_zone, ruip); + kmem_cache_free(xfs_rui_cache, ruip); } /* @@ -141,7 +141,7 @@ xfs_rui_init( if (nextents > XFS_RUI_MAX_FAST_EXTENTS) ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), 0); else - ruip = kmem_cache_zalloc(xfs_rui_zone, + ruip = kmem_cache_zalloc(xfs_rui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); @@ -227,7 +227,7 @@ xfs_rud_item_release( struct xfs_rud_log_item *rudp = RUD_ITEM(lip); xfs_rui_release(rudp->rud_ruip); - kmem_cache_free(xfs_rud_zone, rudp); + kmem_cache_free(xfs_rud_cache, rudp); } static const struct xfs_item_ops xfs_rud_item_ops = { @@ -244,7 +244,7 @@ xfs_trans_get_rud( { struct xfs_rud_log_item *rudp; - rudp = kmem_cache_zalloc(xfs_rud_zone, GFP_KERNEL | __GFP_NOFAIL); + rudp = kmem_cache_zalloc(xfs_rud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops); rudp->rud_ruip = ruip; @@ -427,7 +427,7 @@ xfs_rmap_update_finish_item( rmap->ri_bmap.br_startoff, rmap->ri_bmap.br_startblock, rmap->ri_bmap.br_blockcount, rmap->ri_bmap.br_state, state); - kmem_free(rmap); + kmem_cache_free(xfs_rmap_intent_cache, rmap); return error; } @@ -447,7 +447,7 @@ xfs_rmap_update_cancel_item( struct xfs_rmap_intent *rmap; rmap = container_of(item, struct xfs_rmap_intent, ri_list); - kmem_free(rmap); + kmem_cache_free(xfs_rmap_intent_cache, rmap); } const struct xfs_defer_op_type xfs_rmap_update_defer_type = { @@ -587,7 +587,7 @@ xfs_rui_item_recover( } xfs_rmap_finish_one_cleanup(tp, rcur, error); - return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); + return xfs_defer_ops_capture_and_commit(tp, capture_list); abort_error: xfs_rmap_finish_one_cleanup(tp, rcur, error); diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h index 31e6cdfff71f..802e5119eaca 100644 --- a/fs/xfs/xfs_rmap_item.h +++ b/fs/xfs/xfs_rmap_item.h @@ -28,7 +28,7 @@ /* kernel only RUI/RUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -68,7 +68,7 @@ struct xfs_rud_log_item { struct xfs_rud_log_format rud_format; }; -extern struct kmem_zone *xfs_rui_zone; -extern struct kmem_zone *xfs_rud_zone; +extern struct kmem_cache *xfs_rui_cache; +extern struct kmem_cache *xfs_rud_cache; #endif /* __XFS_RMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c4e0cd1c1c8c..e21459f9923a 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -37,6 +37,7 @@ #include "xfs_reflink.h" #include "xfs_pwork.h" #include "xfs_ag.h" +#include "xfs_defer.h" #include #include @@ -1951,196 +1952,194 @@ static struct file_system_type xfs_fs_type = { MODULE_ALIAS_FS("xfs"); STATIC int __init -xfs_init_zones(void) +xfs_init_caches(void) { - xfs_log_ticket_zone = kmem_cache_create("xfs_log_ticket", + int error; + + xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", sizeof(struct xlog_ticket), 0, 0, NULL); - if (!xfs_log_ticket_zone) + if (!xfs_log_ticket_cache) goto out; - xfs_bmap_free_item_zone = kmem_cache_create("xfs_bmap_free_item", - sizeof(struct xfs_extent_free_item), - 0, 0, NULL); - if (!xfs_bmap_free_item_zone) - goto out_destroy_log_ticket_zone; + error = xfs_btree_init_cur_caches(); + if (error) + goto out_destroy_log_ticket_cache; - xfs_btree_cur_zone = kmem_cache_create("xfs_btree_cur", - sizeof(struct xfs_btree_cur), - 0, 0, NULL); - if (!xfs_btree_cur_zone) - goto out_destroy_bmap_free_item_zone; + error = xfs_defer_init_item_caches(); + if (error) + goto out_destroy_btree_cur_cache; - xfs_da_state_zone = kmem_cache_create("xfs_da_state", + xfs_da_state_cache = kmem_cache_create("xfs_da_state", sizeof(struct xfs_da_state), 0, 0, NULL); - if (!xfs_da_state_zone) - goto out_destroy_btree_cur_zone; + if (!xfs_da_state_cache) + goto out_destroy_defer_item_cache; - xfs_ifork_zone = kmem_cache_create("xfs_ifork", + xfs_ifork_cache = kmem_cache_create("xfs_ifork", sizeof(struct xfs_ifork), 0, 0, NULL); - if (!xfs_ifork_zone) - goto out_destroy_da_state_zone; + if (!xfs_ifork_cache) + goto out_destroy_da_state_cache; - xfs_trans_zone = kmem_cache_create("xfs_trans", + xfs_trans_cache = kmem_cache_create("xfs_trans", sizeof(struct xfs_trans), 0, 0, NULL); - if (!xfs_trans_zone) - goto out_destroy_ifork_zone; + if (!xfs_trans_cache) + goto out_destroy_ifork_cache; /* - * The size of the zone allocated buf log item is the maximum + * The size of the cache-allocated buf log item is the maximum * size possible under XFS. This wastes a little bit of memory, * but it is much faster. */ - xfs_buf_item_zone = kmem_cache_create("xfs_buf_item", + xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", sizeof(struct xfs_buf_log_item), 0, 0, NULL); - if (!xfs_buf_item_zone) - goto out_destroy_trans_zone; + if (!xfs_buf_item_cache) + goto out_destroy_trans_cache; - xfs_efd_zone = kmem_cache_create("xfs_efd_item", + xfs_efd_cache = kmem_cache_create("xfs_efd_item", (sizeof(struct xfs_efd_log_item) + (XFS_EFD_MAX_FAST_EXTENTS - 1) * sizeof(struct xfs_extent)), 0, 0, NULL); - if (!xfs_efd_zone) - goto out_destroy_buf_item_zone; + if (!xfs_efd_cache) + goto out_destroy_buf_item_cache; - xfs_efi_zone = kmem_cache_create("xfs_efi_item", + xfs_efi_cache = kmem_cache_create("xfs_efi_item", (sizeof(struct xfs_efi_log_item) + (XFS_EFI_MAX_FAST_EXTENTS - 1) * sizeof(struct xfs_extent)), 0, 0, NULL); - if (!xfs_efi_zone) - goto out_destroy_efd_zone; + if (!xfs_efi_cache) + goto out_destroy_efd_cache; - xfs_inode_zone = kmem_cache_create("xfs_inode", + xfs_inode_cache = kmem_cache_create("xfs_inode", sizeof(struct xfs_inode), 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), xfs_fs_inode_init_once); - if (!xfs_inode_zone) - goto out_destroy_efi_zone; + if (!xfs_inode_cache) + goto out_destroy_efi_cache; - xfs_ili_zone = kmem_cache_create("xfs_ili", + xfs_ili_cache = kmem_cache_create("xfs_ili", sizeof(struct xfs_inode_log_item), 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); - if (!xfs_ili_zone) - goto out_destroy_inode_zone; + if (!xfs_ili_cache) + goto out_destroy_inode_cache; - xfs_icreate_zone = kmem_cache_create("xfs_icr", + xfs_icreate_cache = kmem_cache_create("xfs_icr", sizeof(struct xfs_icreate_item), 0, 0, NULL); - if (!xfs_icreate_zone) - goto out_destroy_ili_zone; + if (!xfs_icreate_cache) + goto out_destroy_ili_cache; - xfs_rud_zone = kmem_cache_create("xfs_rud_item", + xfs_rud_cache = kmem_cache_create("xfs_rud_item", sizeof(struct xfs_rud_log_item), 0, 0, NULL); - if (!xfs_rud_zone) - goto out_destroy_icreate_zone; + if (!xfs_rud_cache) + goto out_destroy_icreate_cache; - xfs_rui_zone = kmem_cache_create("xfs_rui_item", + xfs_rui_cache = kmem_cache_create("xfs_rui_item", xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_rui_zone) - goto out_destroy_rud_zone; + if (!xfs_rui_cache) + goto out_destroy_rud_cache; - xfs_cud_zone = kmem_cache_create("xfs_cud_item", + xfs_cud_cache = kmem_cache_create("xfs_cud_item", sizeof(struct xfs_cud_log_item), 0, 0, NULL); - if (!xfs_cud_zone) - goto out_destroy_rui_zone; + if (!xfs_cud_cache) + goto out_destroy_rui_cache; - xfs_cui_zone = kmem_cache_create("xfs_cui_item", + xfs_cui_cache = kmem_cache_create("xfs_cui_item", xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_cui_zone) - goto out_destroy_cud_zone; + if (!xfs_cui_cache) + goto out_destroy_cud_cache; - xfs_bud_zone = kmem_cache_create("xfs_bud_item", + xfs_bud_cache = kmem_cache_create("xfs_bud_item", sizeof(struct xfs_bud_log_item), 0, 0, NULL); - if (!xfs_bud_zone) - goto out_destroy_cui_zone; + if (!xfs_bud_cache) + goto out_destroy_cui_cache; - xfs_bui_zone = kmem_cache_create("xfs_bui_item", + xfs_bui_cache = kmem_cache_create("xfs_bui_item", xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_bui_zone) - goto out_destroy_bud_zone; + if (!xfs_bui_cache) + goto out_destroy_bud_cache; return 0; - out_destroy_bud_zone: - kmem_cache_destroy(xfs_bud_zone); - out_destroy_cui_zone: - kmem_cache_destroy(xfs_cui_zone); - out_destroy_cud_zone: - kmem_cache_destroy(xfs_cud_zone); - out_destroy_rui_zone: - kmem_cache_destroy(xfs_rui_zone); - out_destroy_rud_zone: - kmem_cache_destroy(xfs_rud_zone); - out_destroy_icreate_zone: - kmem_cache_destroy(xfs_icreate_zone); - out_destroy_ili_zone: - kmem_cache_destroy(xfs_ili_zone); - out_destroy_inode_zone: - kmem_cache_destroy(xfs_inode_zone); - out_destroy_efi_zone: - kmem_cache_destroy(xfs_efi_zone); - out_destroy_efd_zone: - kmem_cache_destroy(xfs_efd_zone); - out_destroy_buf_item_zone: - kmem_cache_destroy(xfs_buf_item_zone); - out_destroy_trans_zone: - kmem_cache_destroy(xfs_trans_zone); - out_destroy_ifork_zone: - kmem_cache_destroy(xfs_ifork_zone); - out_destroy_da_state_zone: - kmem_cache_destroy(xfs_da_state_zone); - out_destroy_btree_cur_zone: - kmem_cache_destroy(xfs_btree_cur_zone); - out_destroy_bmap_free_item_zone: - kmem_cache_destroy(xfs_bmap_free_item_zone); - out_destroy_log_ticket_zone: - kmem_cache_destroy(xfs_log_ticket_zone); + out_destroy_bud_cache: + kmem_cache_destroy(xfs_bud_cache); + out_destroy_cui_cache: + kmem_cache_destroy(xfs_cui_cache); + out_destroy_cud_cache: + kmem_cache_destroy(xfs_cud_cache); + out_destroy_rui_cache: + kmem_cache_destroy(xfs_rui_cache); + out_destroy_rud_cache: + kmem_cache_destroy(xfs_rud_cache); + out_destroy_icreate_cache: + kmem_cache_destroy(xfs_icreate_cache); + out_destroy_ili_cache: + kmem_cache_destroy(xfs_ili_cache); + out_destroy_inode_cache: + kmem_cache_destroy(xfs_inode_cache); + out_destroy_efi_cache: + kmem_cache_destroy(xfs_efi_cache); + out_destroy_efd_cache: + kmem_cache_destroy(xfs_efd_cache); + out_destroy_buf_item_cache: + kmem_cache_destroy(xfs_buf_item_cache); + out_destroy_trans_cache: + kmem_cache_destroy(xfs_trans_cache); + out_destroy_ifork_cache: + kmem_cache_destroy(xfs_ifork_cache); + out_destroy_da_state_cache: + kmem_cache_destroy(xfs_da_state_cache); + out_destroy_defer_item_cache: + xfs_defer_destroy_item_caches(); + out_destroy_btree_cur_cache: + xfs_btree_destroy_cur_caches(); + out_destroy_log_ticket_cache: + kmem_cache_destroy(xfs_log_ticket_cache); out: return -ENOMEM; } STATIC void -xfs_destroy_zones(void) +xfs_destroy_caches(void) { /* * Make sure all delayed rcu free are flushed before we * destroy caches. */ rcu_barrier(); - kmem_cache_destroy(xfs_bui_zone); - kmem_cache_destroy(xfs_bud_zone); - kmem_cache_destroy(xfs_cui_zone); - kmem_cache_destroy(xfs_cud_zone); - kmem_cache_destroy(xfs_rui_zone); - kmem_cache_destroy(xfs_rud_zone); - kmem_cache_destroy(xfs_icreate_zone); - kmem_cache_destroy(xfs_ili_zone); - kmem_cache_destroy(xfs_inode_zone); - kmem_cache_destroy(xfs_efi_zone); - kmem_cache_destroy(xfs_efd_zone); - kmem_cache_destroy(xfs_buf_item_zone); - kmem_cache_destroy(xfs_trans_zone); - kmem_cache_destroy(xfs_ifork_zone); - kmem_cache_destroy(xfs_da_state_zone); - kmem_cache_destroy(xfs_btree_cur_zone); - kmem_cache_destroy(xfs_bmap_free_item_zone); - kmem_cache_destroy(xfs_log_ticket_zone); + kmem_cache_destroy(xfs_bui_cache); + kmem_cache_destroy(xfs_bud_cache); + kmem_cache_destroy(xfs_cui_cache); + kmem_cache_destroy(xfs_cud_cache); + kmem_cache_destroy(xfs_rui_cache); + kmem_cache_destroy(xfs_rud_cache); + kmem_cache_destroy(xfs_icreate_cache); + kmem_cache_destroy(xfs_ili_cache); + kmem_cache_destroy(xfs_inode_cache); + kmem_cache_destroy(xfs_efi_cache); + kmem_cache_destroy(xfs_efd_cache); + kmem_cache_destroy(xfs_buf_item_cache); + kmem_cache_destroy(xfs_trans_cache); + kmem_cache_destroy(xfs_ifork_cache); + kmem_cache_destroy(xfs_da_state_cache); + xfs_defer_destroy_item_caches(); + xfs_btree_destroy_cur_caches(); + kmem_cache_destroy(xfs_log_ticket_cache); } STATIC int __init @@ -2233,13 +2232,13 @@ init_xfs_fs(void) if (error) goto out; - error = xfs_init_zones(); + error = xfs_init_caches(); if (error) goto out_destroy_hp; error = xfs_init_workqueues(); if (error) - goto out_destroy_zones; + goto out_destroy_caches; error = xfs_mru_cache_init(); if (error) @@ -2314,8 +2313,8 @@ init_xfs_fs(void) xfs_mru_cache_uninit(); out_destroy_wq: xfs_destroy_workqueues(); - out_destroy_zones: - xfs_destroy_zones(); + out_destroy_caches: + xfs_destroy_caches(); out_destroy_hp: xfs_cpu_hotplug_destroy(); out: @@ -2338,7 +2337,7 @@ exit_xfs_fs(void) xfs_buf_terminate(); xfs_mru_cache_uninit(); xfs_destroy_workqueues(); - xfs_destroy_zones(); + xfs_destroy_caches(); xfs_uuid_table_free(); xfs_cpu_hotplug_destroy(); } diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 18dc5eca6c04..8608f804388f 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -105,7 +105,7 @@ bug_on_assert_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bug_on_assert ? 1 : 0); + return sysfs_emit(buf, "%d\n", xfs_globals.bug_on_assert); } XFS_SYSFS_ATTR_RW(bug_on_assert); @@ -135,7 +135,7 @@ log_recovery_delay_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay); + return sysfs_emit(buf, "%d\n", xfs_globals.log_recovery_delay); } XFS_SYSFS_ATTR_RW(log_recovery_delay); @@ -165,7 +165,7 @@ mount_delay_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.mount_delay); + return sysfs_emit(buf, "%d\n", xfs_globals.mount_delay); } XFS_SYSFS_ATTR_RW(mount_delay); @@ -188,7 +188,7 @@ always_cow_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.always_cow); + return sysfs_emit(buf, "%d\n", xfs_globals.always_cow); } XFS_SYSFS_ATTR_RW(always_cow); @@ -224,7 +224,7 @@ pwork_threads_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads); + return sysfs_emit(buf, "%d\n", xfs_globals.pwork_threads); } XFS_SYSFS_ATTR_RW(pwork_threads); #endif /* DEBUG */ @@ -327,7 +327,7 @@ log_head_lsn_show( block = log->l_curr_block; spin_unlock(&log->l_icloglock); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); + return sysfs_emit(buf, "%d:%d\n", cycle, block); } XFS_SYSFS_ATTR_RO(log_head_lsn); @@ -341,7 +341,7 @@ log_tail_lsn_show( struct xlog *log = to_xlog(kobject); xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); + return sysfs_emit(buf, "%d:%d\n", cycle, block); } XFS_SYSFS_ATTR_RO(log_tail_lsn); @@ -356,7 +356,7 @@ reserve_grant_head_show( struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); + return sysfs_emit(buf, "%d:%d\n", cycle, bytes); } XFS_SYSFS_ATTR_RO(reserve_grant_head); @@ -370,7 +370,7 @@ write_grant_head_show( struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); + return sysfs_emit(buf, "%d:%d\n", cycle, bytes); } XFS_SYSFS_ATTR_RO(write_grant_head); @@ -425,7 +425,7 @@ max_retries_show( else retries = cfg->max_retries; - return snprintf(buf, PAGE_SIZE, "%d\n", retries); + return sysfs_emit(buf, "%d\n", retries); } static ssize_t @@ -466,7 +466,7 @@ retry_timeout_seconds_show( else timeout = jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC; - return snprintf(buf, PAGE_SIZE, "%d\n", timeout); + return sysfs_emit(buf, "%d\n", timeout); } static ssize_t @@ -504,7 +504,7 @@ fail_at_unmount_show( { struct xfs_mount *mp = err_to_mp(kobject); - return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_unmount); + return sysfs_emit(buf, "%d\n", mp->m_fail_unmount); } static ssize_t diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 1033a95fbf8e..4a8076ef8cb4 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2476,7 +2476,7 @@ DECLARE_EVENT_CLASS(xfs_btree_cur_class, __entry->btnum = cur->bc_btnum; __entry->level = level; __entry->nlevels = cur->bc_nlevels; - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->daddr = bp ? xfs_buf_daddr(bp) : -1; ), TP_printk("dev %d:%d btree %s level %d/%d ptr %d daddr 0x%llx", diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 67dec11e34c7..234a9d9c2f43 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -25,7 +25,7 @@ #include "xfs_dquot.h" #include "xfs_icache.h" -kmem_zone_t *xfs_trans_zone; +struct kmem_cache *xfs_trans_cache; #if defined(CONFIG_TRACEPOINTS) static void @@ -76,7 +76,7 @@ xfs_trans_free( if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); - kmem_cache_free(xfs_trans_zone, tp); + kmem_cache_free(xfs_trans_cache, tp); } /* @@ -95,7 +95,7 @@ xfs_trans_dup( trace_xfs_trans_dup(tp, _RET_IP_); - ntp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); + ntp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); /* * Initialize the new transaction structure. @@ -263,7 +263,7 @@ xfs_trans_alloc( * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ retry: - tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); + tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); xfs_trans_set_context(tp); @@ -477,7 +477,7 @@ STATIC void xfs_trans_apply_sb_deltas( xfs_trans_t *tp) { - xfs_dsb_t *sbp; + struct xfs_dsb *sbp; struct xfs_buf *bp; int whole = 0; @@ -541,14 +541,14 @@ xfs_trans_apply_sb_deltas( /* * Log the whole thing, the fields are noncontiguous. */ - xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_dsb_t) - 1); + xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1); else /* * Since all the modifiable fields are contiguous, we * can get away with this. */ - xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount), - offsetof(xfs_dsb_t, sb_frextents) + + xfs_trans_log_buf(tp, bp, offsetof(struct xfs_dsb, sb_icount), + offsetof(struct xfs_dsb, sb_frextents) + sizeof(sbp->sb_frextents) - 1); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 50da47f23a07..a487b264a9eb 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -112,12 +112,6 @@ void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, #define XFS_ITEM_LOCKED 2 #define XFS_ITEM_FLUSHING 3 -/* - * Deferred operation item relogging limits. - */ -#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ -#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ - /* * This is the structure maintained for every active transaction. */ @@ -243,7 +237,7 @@ void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp); -extern kmem_zone_t *xfs_trans_zone; +extern struct kmem_cache *xfs_trans_cache; static inline struct xfs_log_item * xfs_trans_item_relog( diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 3872ce671411..9ba7e6b9bed3 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -846,7 +846,7 @@ STATIC void xfs_trans_alloc_dqinfo( xfs_trans_t *tp) { - tp->t_dqinfo = kmem_cache_zalloc(xfs_qm_dqtrxzone, + tp->t_dqinfo = kmem_cache_zalloc(xfs_dqtrx_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -856,6 +856,6 @@ xfs_trans_free_dqinfo( { if (!tp->t_dqinfo) return; - kmem_cache_free(xfs_qm_dqtrxzone, tp->t_dqinfo); + kmem_cache_free(xfs_dqtrx_cache, tp->t_dqinfo); tp->t_dqinfo = NULL; }