mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-16 08:44:21 +08:00
7ef3ff2fea
Nilfs2 eventually hangs in a stress test with fsstress program. This issue was caused by the following deadlock over I_SYNC flag between nilfs_segctor_thread() and writeback_sb_inodes(): nilfs_segctor_thread() nilfs_segctor_thread_construct() nilfs_segctor_unlock() nilfs_dispose_list() iput() iput_final() evict() inode_wait_for_writeback() * wait for I_SYNC flag writeback_sb_inodes() * set I_SYNC flag on inode->i_state __writeback_single_inode() do_writepages() nilfs_writepages() nilfs_construct_dsync_segment() nilfs_segctor_sync() * wait for completion of segment constructor inode_sync_complete() * clear I_SYNC flag after __writeback_single_inode() completed writeback_sb_inodes() calls do_writepages() for dirty inodes after setting I_SYNC flag on inode->i_state. do_writepages() in turn calls nilfs_writepages(), which can run segment constructor and wait for its completion. On the other hand, segment constructor calls iput(), which can call evict() and wait for the I_SYNC flag on inode_wait_for_writeback(). Since segment constructor doesn't know when I_SYNC will be set, it cannot know whether iput() will block or not unless inode->i_nlink has a non-zero count. We can prevent evict() from being called in iput() by implementing sop->drop_inode(), but it's not preferable to leave inodes with i_nlink == 0 for long periods because it even defers file truncation and inode deallocation. So, this instead resolves the deadlock by calling iput() asynchronously with a workqueue for inodes with i_nlink == 0. Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Tested-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
252 lines
8.3 KiB
C
252 lines
8.3 KiB
C
/*
|
|
* segment.h - NILFS Segment constructor prototypes and definitions
|
|
*
|
|
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*
|
|
* Written by Ryusuke Konishi <ryusuke@osrg.net>
|
|
*
|
|
*/
|
|
#ifndef _NILFS_SEGMENT_H
|
|
#define _NILFS_SEGMENT_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/nilfs2_fs.h>
|
|
#include "nilfs.h"
|
|
|
|
struct nilfs_root;
|
|
|
|
/**
|
|
* struct nilfs_recovery_info - Recovery information
|
|
* @ri_need_recovery: Recovery status
|
|
* @ri_super_root: Block number of the last super root
|
|
* @ri_ri_cno: Number of the last checkpoint
|
|
* @ri_lsegs_start: Region for roll-forwarding (start block number)
|
|
* @ri_lsegs_end: Region for roll-forwarding (end block number)
|
|
* @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start
|
|
* @ri_used_segments: List of segments to be mark active
|
|
* @ri_pseg_start: Block number of the last partial segment
|
|
* @ri_seq: Sequence number on the last partial segment
|
|
* @ri_segnum: Segment number on the last partial segment
|
|
* @ri_nextnum: Next segment number on the last partial segment
|
|
*/
|
|
struct nilfs_recovery_info {
|
|
int ri_need_recovery;
|
|
sector_t ri_super_root;
|
|
__u64 ri_cno;
|
|
|
|
sector_t ri_lsegs_start;
|
|
sector_t ri_lsegs_end;
|
|
u64 ri_lsegs_start_seq;
|
|
struct list_head ri_used_segments;
|
|
sector_t ri_pseg_start;
|
|
u64 ri_seq;
|
|
__u64 ri_segnum;
|
|
__u64 ri_nextnum;
|
|
};
|
|
|
|
/* ri_need_recovery */
|
|
#define NILFS_RECOVERY_SR_UPDATED 1 /* The super root was updated */
|
|
#define NILFS_RECOVERY_ROLLFORWARD_DONE 2 /* Rollforward was carried out */
|
|
|
|
/**
|
|
* struct nilfs_cstage - Context of collection stage
|
|
* @scnt: Stage count
|
|
* @flags: State flags
|
|
* @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file
|
|
* @gc_inode_ptr: Pointer on the list of gc-inodes
|
|
*/
|
|
struct nilfs_cstage {
|
|
int scnt;
|
|
unsigned flags;
|
|
struct nilfs_inode_info *dirty_file_ptr;
|
|
struct nilfs_inode_info *gc_inode_ptr;
|
|
};
|
|
|
|
struct nilfs_segment_buffer;
|
|
|
|
struct nilfs_segsum_pointer {
|
|
struct buffer_head *bh;
|
|
unsigned offset; /* offset in bytes */
|
|
};
|
|
|
|
/**
|
|
* struct nilfs_sc_info - Segment constructor information
|
|
* @sc_super: Back pointer to super_block struct
|
|
* @sc_root: root object of the current filesystem tree
|
|
* @sc_nblk_inc: Block count of current generation
|
|
* @sc_dirty_files: List of files to be written
|
|
* @sc_gc_inodes: List of GC inodes having blocks to be written
|
|
* @sc_iput_queue: list of inodes for which iput should be done
|
|
* @sc_iput_work: work struct to defer iput call
|
|
* @sc_freesegs: array of segment numbers to be freed
|
|
* @sc_nfreesegs: number of segments on @sc_freesegs
|
|
* @sc_dsync_inode: inode whose data pages are written for a sync operation
|
|
* @sc_dsync_start: start byte offset of data pages
|
|
* @sc_dsync_end: end byte offset of data pages (inclusive)
|
|
* @sc_segbufs: List of segment buffers
|
|
* @sc_write_logs: List of segment buffers to hold logs under writing
|
|
* @sc_segbuf_nblocks: Number of available blocks in segment buffers.
|
|
* @sc_curseg: Current segment buffer
|
|
* @sc_stage: Collection stage
|
|
* @sc_finfo_ptr: pointer to the current finfo struct in the segment summary
|
|
* @sc_binfo_ptr: pointer to the current binfo struct in the segment summary
|
|
* @sc_blk_cnt: Block count of a file
|
|
* @sc_datablk_cnt: Data block count of a file
|
|
* @sc_nblk_this_inc: Number of blocks included in the current logical segment
|
|
* @sc_seg_ctime: Creation time
|
|
* @sc_cno: checkpoint number of current log
|
|
* @sc_flags: Internal flags
|
|
* @sc_state_lock: spinlock for sc_state and so on
|
|
* @sc_state: Segctord state flags
|
|
* @sc_flush_request: inode bitmap of metadata files to be flushed
|
|
* @sc_wait_request: Client request queue
|
|
* @sc_wait_daemon: Daemon wait queue
|
|
* @sc_wait_task: Start/end wait queue to control segctord task
|
|
* @sc_seq_request: Request counter
|
|
* @sc_seq_accept: Accepted request count
|
|
* @sc_seq_done: Completion counter
|
|
* @sc_sync: Request of explicit sync operation
|
|
* @sc_interval: Timeout value of background construction
|
|
* @sc_mjcp_freq: Frequency of creating checkpoints
|
|
* @sc_lseg_stime: Start time of the latest logical segment
|
|
* @sc_watermark: Watermark for the number of dirty buffers
|
|
* @sc_timer: Timer for segctord
|
|
* @sc_task: current thread of segctord
|
|
*/
|
|
struct nilfs_sc_info {
|
|
struct super_block *sc_super;
|
|
struct nilfs_root *sc_root;
|
|
|
|
unsigned long sc_nblk_inc;
|
|
|
|
struct list_head sc_dirty_files;
|
|
struct list_head sc_gc_inodes;
|
|
struct list_head sc_iput_queue;
|
|
struct work_struct sc_iput_work;
|
|
|
|
__u64 *sc_freesegs;
|
|
size_t sc_nfreesegs;
|
|
|
|
struct nilfs_inode_info *sc_dsync_inode;
|
|
loff_t sc_dsync_start;
|
|
loff_t sc_dsync_end;
|
|
|
|
/* Segment buffers */
|
|
struct list_head sc_segbufs;
|
|
struct list_head sc_write_logs;
|
|
unsigned long sc_segbuf_nblocks;
|
|
struct nilfs_segment_buffer *sc_curseg;
|
|
|
|
struct nilfs_cstage sc_stage;
|
|
|
|
struct nilfs_segsum_pointer sc_finfo_ptr;
|
|
struct nilfs_segsum_pointer sc_binfo_ptr;
|
|
unsigned long sc_blk_cnt;
|
|
unsigned long sc_datablk_cnt;
|
|
unsigned long sc_nblk_this_inc;
|
|
time_t sc_seg_ctime;
|
|
__u64 sc_cno;
|
|
unsigned long sc_flags;
|
|
|
|
spinlock_t sc_state_lock;
|
|
unsigned long sc_state;
|
|
unsigned long sc_flush_request;
|
|
|
|
wait_queue_head_t sc_wait_request;
|
|
wait_queue_head_t sc_wait_daemon;
|
|
wait_queue_head_t sc_wait_task;
|
|
|
|
__u32 sc_seq_request;
|
|
__u32 sc_seq_accepted;
|
|
__u32 sc_seq_done;
|
|
|
|
int sc_sync;
|
|
unsigned long sc_interval;
|
|
unsigned long sc_mjcp_freq;
|
|
unsigned long sc_lseg_stime; /* in 1/HZ seconds */
|
|
unsigned long sc_watermark;
|
|
|
|
struct timer_list sc_timer;
|
|
struct task_struct *sc_task;
|
|
};
|
|
|
|
/* sc_flags */
|
|
enum {
|
|
NILFS_SC_DIRTY, /* One or more dirty meta-data blocks exist */
|
|
NILFS_SC_UNCLOSED, /* Logical segment is not closed */
|
|
NILFS_SC_SUPER_ROOT, /* The latest segment has a super root */
|
|
NILFS_SC_PRIOR_FLUSH, /* Requesting immediate flush without making a
|
|
checkpoint */
|
|
NILFS_SC_HAVE_DELTA, /* Next checkpoint will have update of files
|
|
other than DAT, cpfile, sufile, or files
|
|
moved by GC */
|
|
};
|
|
|
|
/* sc_state */
|
|
#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */
|
|
#define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */
|
|
|
|
/*
|
|
* Constant parameters
|
|
*/
|
|
#define NILFS_SC_CLEANUP_RETRY 3 /* Retry count of construction when
|
|
destroying segctord */
|
|
|
|
/*
|
|
* Default values of timeout, in seconds.
|
|
*/
|
|
#define NILFS_SC_DEFAULT_TIMEOUT 5 /* Timeout value of dirty blocks.
|
|
It triggers construction of a
|
|
logical segment with a super root */
|
|
#define NILFS_SC_DEFAULT_SR_FREQ 30 /* Maximum frequency of super root
|
|
creation */
|
|
|
|
/*
|
|
* The default threshold amount of data, in block counts.
|
|
*/
|
|
#define NILFS_SC_DEFAULT_WATERMARK 3600
|
|
|
|
/* super.c */
|
|
extern struct kmem_cache *nilfs_transaction_cachep;
|
|
|
|
/* segment.c */
|
|
extern void nilfs_relax_pressure_in_lock(struct super_block *);
|
|
|
|
extern int nilfs_construct_segment(struct super_block *);
|
|
extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *,
|
|
loff_t, loff_t);
|
|
extern void nilfs_flush_segment(struct super_block *, ino_t);
|
|
extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
|
|
void **);
|
|
|
|
int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root);
|
|
void nilfs_detach_log_writer(struct super_block *sb);
|
|
|
|
/* recovery.c */
|
|
extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t,
|
|
struct buffer_head **, int);
|
|
extern int nilfs_search_super_root(struct the_nilfs *,
|
|
struct nilfs_recovery_info *);
|
|
int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct super_block *sb,
|
|
struct nilfs_recovery_info *ri);
|
|
extern void nilfs_dispose_segment_list(struct list_head *);
|
|
|
|
#endif /* _NILFS_SEGMENT_H */
|