From 9f5418010940236b2c39ea53b99055ca26ff1279 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 19 May 2016 00:17:26 +1000 Subject: [PATCH] xfs: concurrent readdir hangs on data buffer locks There's a three-process deadlock involving shared/exclusive barriers and inverted lock orders in the directory readdir implementation. It's a pre-existing problem with lock ordering, exposed by the VFS parallelisation code. process 1 process 2 process 3 --------- --------- --------- readdir iolock(shared) get_leaf_dents iterate entries ilock(shared) map, lock and read buffer iunlock(shared) process entries in buffer ..... readdir iolock(shared) get_leaf_dents iterate entries ilock(shared) map, lock buffer finish ->iterate_shared file_accessed() ->update_time start transaction ilock(excl) ..... finishes processing buffer get next buffer ilock(shared) And that's the deadlock. Fix this by dropping the current buffer lock in process 1 before trying to map the next buffer. This means we keep the lock order of ilock -> buffer lock intact and hence will allow process 3 to make progress and drop it's ilock(shared) once it is done. Reported-by: Xiong Zhou Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/xfs/xfs_dir2_readdir.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 93b3ab0c5435..f44f79996978 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -273,10 +273,11 @@ xfs_dir2_leaf_readbuf( size_t bufsize, struct xfs_dir2_leaf_map_info *mip, xfs_dir2_off_t *curoff, - struct xfs_buf **bpp) + struct xfs_buf **bpp, + bool trim_map) { struct xfs_inode *dp = args->dp; - struct xfs_buf *bp = *bpp; + struct xfs_buf *bp = NULL; struct xfs_bmbt_irec *map = mip->map; struct blk_plug plug; int error = 0; @@ -286,13 +287,10 @@ xfs_dir2_leaf_readbuf( struct xfs_da_geometry *geo = args->geo; /* - * If we have a buffer, we need to release it and - * take it out of the mapping. + * If the caller just finished processing a buffer, it will tell us + * we need to trim that block out of the mapping now it is done. */ - - if (bp) { - xfs_trans_brelse(NULL, bp); - bp = NULL; + if (trim_map) { mip->map_blocks -= geo->fsbcount; /* * Loop to get rid of the extents for the @@ -533,10 +531,17 @@ xfs_dir2_leaf_getdents( */ if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { int lock_mode; + bool trim_map = false; + + if (bp) { + xfs_trans_brelse(NULL, bp); + bp = NULL; + trim_map = true; + } lock_mode = xfs_ilock_data_map_shared(dp); error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, - &curoff, &bp); + &curoff, &bp, trim_map); xfs_iunlock(dp, lock_mode); if (error || !map_info->map_valid) break;