2012-11-29 12:28:09 +08:00
|
|
|
/*
|
2012-11-02 16:13:32 +08:00
|
|
|
* fs/f2fs/recovery.c
|
|
|
|
*
|
|
|
|
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
|
|
|
|
* http://www.samsung.com/
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/f2fs_fs.h>
|
|
|
|
#include "f2fs.h"
|
|
|
|
#include "node.h"
|
|
|
|
#include "segment.h"
|
|
|
|
|
|
|
|
static struct kmem_cache *fsync_entry_slab;
|
|
|
|
|
|
|
|
bool space_for_roll_forward(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
|
|
|
if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
|
|
|
|
> sbi->user_block_count)
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
|
|
|
|
nid_t ino)
|
|
|
|
{
|
|
|
|
struct list_head *this;
|
|
|
|
struct fsync_inode_entry *entry;
|
|
|
|
|
|
|
|
list_for_each(this, head) {
|
|
|
|
entry = list_entry(this, struct fsync_inode_entry, list);
|
|
|
|
if (entry->inode->i_ino == ino)
|
|
|
|
return entry;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int recover_dentry(struct page *ipage, struct inode *inode)
|
|
|
|
{
|
2013-05-20 09:23:40 +08:00
|
|
|
void *kaddr = page_address(ipage);
|
|
|
|
struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
|
2012-11-02 16:13:32 +08:00
|
|
|
struct f2fs_inode *raw_inode = &(raw_node->i);
|
2013-05-15 15:40:02 +08:00
|
|
|
nid_t pino = le32_to_cpu(raw_inode->i_pino);
|
2013-05-28 08:19:22 +08:00
|
|
|
struct f2fs_dir_entry *de;
|
2013-01-26 05:15:43 +08:00
|
|
|
struct qstr name;
|
2012-11-02 16:13:32 +08:00
|
|
|
struct page *page;
|
2013-05-28 08:19:22 +08:00
|
|
|
struct inode *dir, *einode;
|
2012-11-02 16:13:32 +08:00
|
|
|
int err = 0;
|
|
|
|
|
2013-05-15 15:40:02 +08:00
|
|
|
dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino);
|
|
|
|
if (!dir) {
|
|
|
|
dir = f2fs_iget(inode->i_sb, pino);
|
|
|
|
if (IS_ERR(dir)) {
|
|
|
|
err = PTR_ERR(dir);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
|
2013-06-05 16:42:45 +08:00
|
|
|
add_dirty_dir_inode(dir);
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|
|
|
|
|
2013-01-26 05:15:43 +08:00
|
|
|
name.len = le32_to_cpu(raw_inode->i_namelen);
|
|
|
|
name.name = raw_inode->i_name;
|
2013-05-28 08:19:22 +08:00
|
|
|
retry:
|
|
|
|
de = f2fs_find_entry(dir, &name, &page);
|
|
|
|
if (de && inode->i_ino == le32_to_cpu(de->ino)) {
|
2012-11-02 16:13:32 +08:00
|
|
|
kunmap(page);
|
|
|
|
f2fs_put_page(page, 0);
|
2013-05-28 08:19:22 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (de) {
|
|
|
|
einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
|
|
|
|
if (IS_ERR(einode)) {
|
|
|
|
WARN_ON(1);
|
|
|
|
if (PTR_ERR(einode) == -ENOENT)
|
|
|
|
err = -EEXIST;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
f2fs_delete_entry(de, page, einode);
|
|
|
|
iput(einode);
|
|
|
|
goto retry;
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|
2013-05-28 08:19:22 +08:00
|
|
|
err = __f2fs_add_link(dir, &name, inode);
|
2012-11-02 16:13:32 +08:00
|
|
|
out:
|
2013-05-16 14:04:49 +08:00
|
|
|
f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: "
|
|
|
|
"ino = %x, name = %s, dir = %lx, err = %d",
|
2013-05-23 18:02:13 +08:00
|
|
|
ino_of_node(ipage), raw_inode->i_name,
|
|
|
|
IS_ERR(dir) ? 0 : dir->i_ino, err);
|
2012-11-02 16:13:32 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int recover_inode(struct inode *inode, struct page *node_page)
|
|
|
|
{
|
|
|
|
void *kaddr = page_address(node_page);
|
|
|
|
struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
|
|
|
|
struct f2fs_inode *raw_inode = &(raw_node->i);
|
|
|
|
|
2013-05-16 14:04:49 +08:00
|
|
|
if (!IS_INODE(node_page))
|
|
|
|
return 0;
|
|
|
|
|
2012-11-28 15:12:41 +08:00
|
|
|
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
|
2012-11-02 16:13:32 +08:00
|
|
|
i_size_write(inode, le64_to_cpu(raw_inode->i_size));
|
|
|
|
inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
|
|
|
|
inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
|
|
|
|
inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
|
|
|
|
inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
|
|
|
|
inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
|
|
|
|
inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
|
|
|
|
|
2013-05-16 14:04:49 +08:00
|
|
|
if (is_dent_dnode(node_page))
|
|
|
|
return recover_dentry(node_page, inode);
|
|
|
|
|
|
|
|
f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
|
|
|
|
ino_of_node(node_page), raw_inode->i_name);
|
|
|
|
return 0;
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
|
|
|
|
{
|
|
|
|
unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
|
|
|
|
struct curseg_info *curseg;
|
|
|
|
struct page *page;
|
|
|
|
block_t blkaddr;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
/* get node pages in the current segment */
|
|
|
|
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
|
|
|
|
blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
|
|
|
|
|
|
|
|
/* read node page */
|
|
|
|
page = alloc_page(GFP_F2FS_ZERO);
|
|
|
|
if (IS_ERR(page))
|
|
|
|
return PTR_ERR(page);
|
|
|
|
lock_page(page);
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
struct fsync_inode_entry *entry;
|
|
|
|
|
2013-03-08 20:29:23 +08:00
|
|
|
err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC);
|
|
|
|
if (err)
|
2012-11-02 16:13:32 +08:00
|
|
|
goto out;
|
|
|
|
|
2013-03-08 20:29:23 +08:00
|
|
|
lock_page(page);
|
|
|
|
|
2013-03-20 18:01:06 +08:00
|
|
|
if (cp_ver != cpver_of_node(page))
|
2013-05-16 14:04:49 +08:00
|
|
|
break;
|
2012-11-02 16:13:32 +08:00
|
|
|
|
|
|
|
if (!is_fsync_dnode(page))
|
|
|
|
goto next;
|
|
|
|
|
|
|
|
entry = get_fsync_inode(head, ino_of_node(page));
|
|
|
|
if (entry) {
|
|
|
|
if (IS_INODE(page) && is_dent_dnode(page))
|
|
|
|
set_inode_flag(F2FS_I(entry->inode),
|
|
|
|
FI_INC_LINK);
|
|
|
|
} else {
|
|
|
|
if (IS_INODE(page) && is_dent_dnode(page)) {
|
2013-03-20 18:01:06 +08:00
|
|
|
err = recover_inode_page(sbi, page);
|
|
|
|
if (err)
|
2013-05-16 14:04:49 +08:00
|
|
|
break;
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* add this fsync inode to the list */
|
|
|
|
entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS);
|
|
|
|
if (!entry) {
|
|
|
|
err = -ENOMEM;
|
2013-05-16 14:04:49 +08:00
|
|
|
break;
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
|
|
|
|
if (IS_ERR(entry->inode)) {
|
|
|
|
err = PTR_ERR(entry->inode);
|
2012-12-22 11:10:12 +08:00
|
|
|
kmem_cache_free(fsync_entry_slab, entry);
|
2013-05-16 14:04:49 +08:00
|
|
|
break;
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|
2012-12-22 11:10:12 +08:00
|
|
|
list_add_tail(&entry->list, head);
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|
2013-05-15 09:49:13 +08:00
|
|
|
entry->blkaddr = blkaddr;
|
|
|
|
|
2013-05-16 14:04:49 +08:00
|
|
|
err = recover_inode(entry->inode, page);
|
|
|
|
if (err && err != -ENOENT)
|
|
|
|
break;
|
2012-11-02 16:13:32 +08:00
|
|
|
next:
|
|
|
|
/* check next segment */
|
|
|
|
blkaddr = next_blkaddr_of_node(page);
|
|
|
|
}
|
|
|
|
unlock_page(page);
|
2013-03-08 20:29:23 +08:00
|
|
|
out:
|
2012-11-02 16:13:32 +08:00
|
|
|
__free_pages(page, 0);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2013-06-27 09:28:54 +08:00
|
|
|
static void destroy_fsync_dnodes(struct list_head *head)
|
2012-11-02 16:13:32 +08:00
|
|
|
{
|
2013-01-20 23:02:58 +08:00
|
|
|
struct fsync_inode_entry *entry, *tmp;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(entry, tmp, head, list) {
|
2012-11-02 16:13:32 +08:00
|
|
|
iput(entry->inode);
|
|
|
|
list_del(&entry->list);
|
|
|
|
kmem_cache_free(fsync_entry_slab, entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-22 07:20:01 +08:00
|
|
|
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
|
2013-05-22 07:02:02 +08:00
|
|
|
block_t blkaddr, struct dnode_of_data *dn)
|
2012-11-02 16:13:32 +08:00
|
|
|
{
|
|
|
|
struct seg_entry *sentry;
|
|
|
|
unsigned int segno = GET_SEGNO(sbi, blkaddr);
|
|
|
|
unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) &
|
|
|
|
(sbi->blocks_per_seg - 1);
|
|
|
|
struct f2fs_summary sum;
|
2013-05-22 07:02:02 +08:00
|
|
|
nid_t ino, nid;
|
2012-11-02 16:13:32 +08:00
|
|
|
void *kaddr;
|
|
|
|
struct inode *inode;
|
|
|
|
struct page *node_page;
|
|
|
|
block_t bidx;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
sentry = get_seg_entry(sbi, segno);
|
|
|
|
if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
|
2013-05-22 07:20:01 +08:00
|
|
|
return 0;
|
2012-11-02 16:13:32 +08:00
|
|
|
|
|
|
|
/* Get the previous summary */
|
|
|
|
for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
|
|
|
|
struct curseg_info *curseg = CURSEG_I(sbi, i);
|
|
|
|
if (curseg->segno == segno) {
|
|
|
|
sum = curseg->sum_blk->entries[blkoff];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i > CURSEG_COLD_DATA) {
|
|
|
|
struct page *sum_page = get_sum_page(sbi, segno);
|
|
|
|
struct f2fs_summary_block *sum_node;
|
|
|
|
kaddr = page_address(sum_page);
|
|
|
|
sum_node = (struct f2fs_summary_block *)kaddr;
|
|
|
|
sum = sum_node->entries[blkoff];
|
|
|
|
f2fs_put_page(sum_page, 1);
|
|
|
|
}
|
|
|
|
|
2013-05-22 07:02:02 +08:00
|
|
|
/* Use the locked dnode page and inode */
|
|
|
|
nid = le32_to_cpu(sum.nid);
|
|
|
|
if (dn->inode->i_ino == nid) {
|
|
|
|
struct dnode_of_data tdn = *dn;
|
|
|
|
tdn.nid = nid;
|
|
|
|
tdn.node_page = dn->inode_page;
|
2013-06-24 06:47:23 +08:00
|
|
|
tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
|
2013-05-22 07:02:02 +08:00
|
|
|
truncate_data_blocks_range(&tdn, 1);
|
2013-05-22 07:20:01 +08:00
|
|
|
return 0;
|
2013-05-22 07:02:02 +08:00
|
|
|
} else if (dn->nid == nid) {
|
|
|
|
struct dnode_of_data tdn = *dn;
|
2013-06-24 06:47:23 +08:00
|
|
|
tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
|
2013-05-22 07:02:02 +08:00
|
|
|
truncate_data_blocks_range(&tdn, 1);
|
2013-05-22 07:20:01 +08:00
|
|
|
return 0;
|
2013-05-22 07:02:02 +08:00
|
|
|
}
|
|
|
|
|
2012-11-02 16:13:32 +08:00
|
|
|
/* Get the node page */
|
2013-05-22 07:02:02 +08:00
|
|
|
node_page = get_node_page(sbi, nid);
|
2013-05-22 07:20:01 +08:00
|
|
|
if (IS_ERR(node_page))
|
|
|
|
return PTR_ERR(node_page);
|
2012-11-02 16:13:32 +08:00
|
|
|
bidx = start_bidx_of_node(ofs_of_node(node_page)) +
|
2013-05-22 07:02:02 +08:00
|
|
|
le16_to_cpu(sum.ofs_in_node);
|
2012-11-02 16:13:32 +08:00
|
|
|
ino = ino_of_node(node_page);
|
|
|
|
f2fs_put_page(node_page, 1);
|
|
|
|
|
|
|
|
/* Deallocate previous index in the node page */
|
f2fs: avoid balanc_fs during evict_inode
1. Background
Previously, if f2fs tries to move data blocks of an *evicting* inode during the
cleaning process, it stops the process incompletely and then restarts the whole
process, since it needs a locked inode to grab victim data pages in its address
space. In order to get a locked inode, iget_locked() by f2fs_iget() is normally
used, but, it waits if the inode is on freeing.
So, here is a deadlock scenario.
1. f2fs_evict_inode() <- inode "A"
2. f2fs_balance_fs()
3. f2fs_gc()
4. gc_data_segment()
5. f2fs_iget() <- inode "A" too!
If step #1 and #5 treat a same inode "A", step #5 would fall into deadlock since
the inode "A" is on freeing. In order to resolve this, f2fs_iget_nowait() which
skips __wait_on_freeing_inode() was introduced in step #5, and stops f2fs_gc()
to complete f2fs_evict_inode().
1. f2fs_evict_inode() <- inode "A"
2. f2fs_balance_fs()
3. f2fs_gc()
4. gc_data_segment()
5. f2fs_iget_nowait() <- inode "A", then stop f2fs_gc() w/ -ENOENT
2. Problem and Solution
In the above scenario, however, f2fs cannot finish f2fs_evict_inode() only if:
o there are not enough free sections, and
o f2fs_gc() tries to move data blocks of the *evicting* inode repeatedly.
So, the final solution is to use f2fs_iget() and remove f2fs_balance_fs() in
f2fs_evict_inode().
The f2fs_evict_inode() actually truncates all the data and node blocks, which
means that it doesn't produce any dirty node pages accordingly.
So, we don't need to do f2fs_balance_fs() in practical.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-01-31 14:36:04 +08:00
|
|
|
inode = f2fs_iget(sbi->sb, ino);
|
2012-12-22 11:09:43 +08:00
|
|
|
if (IS_ERR(inode))
|
2013-05-22 07:20:01 +08:00
|
|
|
return PTR_ERR(inode);
|
2012-12-22 11:09:43 +08:00
|
|
|
|
2012-11-02 16:13:32 +08:00
|
|
|
truncate_hole(inode, bidx, bidx + 1);
|
|
|
|
iput(inode);
|
2013-05-22 07:20:01 +08:00
|
|
|
return 0;
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|
|
|
|
|
2013-03-20 18:01:06 +08:00
|
|
|
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
|
2012-11-02 16:13:32 +08:00
|
|
|
struct page *page, block_t blkaddr)
|
|
|
|
{
|
|
|
|
unsigned int start, end;
|
|
|
|
struct dnode_of_data dn;
|
|
|
|
struct f2fs_summary sum;
|
|
|
|
struct node_info ni;
|
2013-05-16 14:04:49 +08:00
|
|
|
int err = 0, recovered = 0;
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
int ilock;
|
2012-11-02 16:13:32 +08:00
|
|
|
|
|
|
|
start = start_bidx_of_node(ofs_of_node(page));
|
|
|
|
if (IS_INODE(page))
|
|
|
|
end = start + ADDRS_PER_INODE;
|
|
|
|
else
|
|
|
|
end = start + ADDRS_PER_BLOCK;
|
|
|
|
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
ilock = mutex_lock_op(sbi);
|
2012-11-02 16:13:32 +08:00
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
|
2013-03-20 18:01:06 +08:00
|
|
|
err = get_dnode_of_data(&dn, start, ALLOC_NODE);
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
if (err) {
|
|
|
|
mutex_unlock_op(sbi, ilock);
|
2013-03-20 18:01:06 +08:00
|
|
|
return err;
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
}
|
2012-11-02 16:13:32 +08:00
|
|
|
|
|
|
|
wait_on_page_writeback(dn.node_page);
|
|
|
|
|
|
|
|
get_node_info(sbi, dn.nid, &ni);
|
|
|
|
BUG_ON(ni.ino != ino_of_node(page));
|
|
|
|
BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page));
|
|
|
|
|
|
|
|
for (; start < end; start++) {
|
|
|
|
block_t src, dest;
|
|
|
|
|
|
|
|
src = datablock_addr(dn.node_page, dn.ofs_in_node);
|
|
|
|
dest = datablock_addr(page, dn.ofs_in_node);
|
|
|
|
|
|
|
|
if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
|
|
|
|
if (src == NULL_ADDR) {
|
|
|
|
int err = reserve_new_block(&dn);
|
|
|
|
/* We should not get -ENOSPC */
|
|
|
|
BUG_ON(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check the previous node page having this index */
|
2013-05-22 07:20:01 +08:00
|
|
|
err = check_index_in_prev_nodes(sbi, dest, &dn);
|
|
|
|
if (err)
|
|
|
|
goto err;
|
2012-11-02 16:13:32 +08:00
|
|
|
|
|
|
|
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
|
|
|
|
|
|
|
|
/* write dummy data page */
|
|
|
|
recover_data_page(sbi, NULL, &sum, src, dest);
|
|
|
|
update_extent_cache(dest, &dn);
|
2013-05-16 14:04:49 +08:00
|
|
|
recovered++;
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|
|
|
|
dn.ofs_in_node++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* write node page in place */
|
|
|
|
set_summary(&sum, dn.nid, 0, 0);
|
|
|
|
if (IS_INODE(dn.node_page))
|
|
|
|
sync_inode_page(&dn);
|
|
|
|
|
|
|
|
copy_node_footer(dn.node_page, page);
|
|
|
|
fill_node_footer(dn.node_page, dn.nid, ni.ino,
|
|
|
|
ofs_of_node(page), false);
|
|
|
|
set_page_dirty(dn.node_page);
|
|
|
|
|
|
|
|
recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
|
2013-05-22 07:20:01 +08:00
|
|
|
err:
|
2012-11-02 16:13:32 +08:00
|
|
|
f2fs_put_dnode(&dn);
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
mutex_unlock_op(sbi, ilock);
|
2013-05-16 14:04:49 +08:00
|
|
|
|
|
|
|
f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, "
|
2013-05-22 07:20:01 +08:00
|
|
|
"recovered_data = %d blocks, err = %d",
|
|
|
|
inode->i_ino, recovered, err);
|
|
|
|
return err;
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|
|
|
|
|
2013-03-20 18:01:06 +08:00
|
|
|
static int recover_data(struct f2fs_sb_info *sbi,
|
2012-11-02 16:13:32 +08:00
|
|
|
struct list_head *head, int type)
|
|
|
|
{
|
|
|
|
unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
|
|
|
|
struct curseg_info *curseg;
|
|
|
|
struct page *page;
|
2013-03-20 18:01:06 +08:00
|
|
|
int err = 0;
|
2012-11-02 16:13:32 +08:00
|
|
|
block_t blkaddr;
|
|
|
|
|
|
|
|
/* get node pages in the current segment */
|
|
|
|
curseg = CURSEG_I(sbi, type);
|
|
|
|
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
|
|
|
|
|
|
|
|
/* read node page */
|
|
|
|
page = alloc_page(GFP_NOFS | __GFP_ZERO);
|
|
|
|
if (IS_ERR(page))
|
2013-03-20 18:01:06 +08:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2012-11-02 16:13:32 +08:00
|
|
|
lock_page(page);
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
struct fsync_inode_entry *entry;
|
|
|
|
|
2013-03-20 18:01:06 +08:00
|
|
|
err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC);
|
|
|
|
if (err)
|
2012-11-02 16:13:32 +08:00
|
|
|
goto out;
|
|
|
|
|
2013-03-08 20:29:23 +08:00
|
|
|
lock_page(page);
|
|
|
|
|
2012-11-02 16:13:32 +08:00
|
|
|
if (cp_ver != cpver_of_node(page))
|
2013-05-20 09:26:09 +08:00
|
|
|
break;
|
2012-11-02 16:13:32 +08:00
|
|
|
|
|
|
|
entry = get_fsync_inode(head, ino_of_node(page));
|
|
|
|
if (!entry)
|
|
|
|
goto next;
|
|
|
|
|
2013-03-20 18:01:06 +08:00
|
|
|
err = do_recover_data(sbi, entry->inode, page, blkaddr);
|
|
|
|
if (err)
|
2013-05-20 09:26:09 +08:00
|
|
|
break;
|
2012-11-02 16:13:32 +08:00
|
|
|
|
|
|
|
if (entry->blkaddr == blkaddr) {
|
|
|
|
iput(entry->inode);
|
|
|
|
list_del(&entry->list);
|
|
|
|
kmem_cache_free(fsync_entry_slab, entry);
|
|
|
|
}
|
|
|
|
next:
|
|
|
|
/* check next segment */
|
|
|
|
blkaddr = next_blkaddr_of_node(page);
|
|
|
|
}
|
|
|
|
unlock_page(page);
|
2013-03-08 20:29:23 +08:00
|
|
|
out:
|
2012-11-02 16:13:32 +08:00
|
|
|
__free_pages(page, 0);
|
|
|
|
|
2013-03-20 18:01:06 +08:00
|
|
|
if (!err)
|
|
|
|
allocate_new_segments(sbi);
|
|
|
|
return err;
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|
|
|
|
|
2013-03-20 18:01:06 +08:00
|
|
|
int recover_fsync_data(struct f2fs_sb_info *sbi)
|
2012-11-02 16:13:32 +08:00
|
|
|
{
|
|
|
|
struct list_head inode_list;
|
2013-03-20 18:01:06 +08:00
|
|
|
int err;
|
2012-11-02 16:13:32 +08:00
|
|
|
|
|
|
|
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
|
|
|
|
sizeof(struct fsync_inode_entry), NULL);
|
|
|
|
if (unlikely(!fsync_entry_slab))
|
2013-03-20 18:01:06 +08:00
|
|
|
return -ENOMEM;
|
2012-11-02 16:13:32 +08:00
|
|
|
|
|
|
|
INIT_LIST_HEAD(&inode_list);
|
|
|
|
|
|
|
|
/* step #1: find fsynced inode numbers */
|
2013-05-15 15:12:18 +08:00
|
|
|
sbi->por_doing = 1;
|
2013-03-20 18:01:06 +08:00
|
|
|
err = find_fsync_dnodes(sbi, &inode_list);
|
|
|
|
if (err)
|
2012-11-02 16:13:32 +08:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
if (list_empty(&inode_list))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* step #2: recover data */
|
2013-03-20 18:01:06 +08:00
|
|
|
err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
|
2012-11-02 16:13:32 +08:00
|
|
|
BUG_ON(!list_empty(&inode_list));
|
|
|
|
out:
|
2013-06-27 09:28:54 +08:00
|
|
|
destroy_fsync_dnodes(&inode_list);
|
2012-11-02 16:13:32 +08:00
|
|
|
kmem_cache_destroy(fsync_entry_slab);
|
2013-05-15 15:12:18 +08:00
|
|
|
sbi->por_doing = 0;
|
2013-05-20 13:48:49 +08:00
|
|
|
if (!err)
|
|
|
|
write_checkpoint(sbi, false);
|
2013-03-20 18:01:06 +08:00
|
|
|
return err;
|
2012-11-02 16:13:32 +08:00
|
|
|
}
|