linux/fs/f2fs/xattr.c

588 lines
14 KiB
C
Raw Normal View History

/*
* fs/f2fs/xattr.c
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*
* Portions of this code from linux/fs/ext2/xattr.c
*
* Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
*
* Fix by Harrison Xing <harrison@mountainviewdata.com>.
* Extended attributes for symlinks and special files added per
* suggestion of Luka Renko <luka.renko@hermes.si>.
* xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
* Red Hat Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/rwsem.h>
#include <linux/f2fs_fs.h>
#include <linux/security.h>
#include <linux/posix_acl_xattr.h>
#include "f2fs.h"
#include "xattr.h"
static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, void *buffer, size_t size)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
switch (handler->flags) {
case F2FS_XATTR_INDEX_USER:
if (!test_opt(sbi, XATTR_USER))
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
return -EINVAL;
}
return f2fs_getxattr(inode, handler->flags, name,
buffer, size, NULL);
}
static int f2fs_xattr_generic_set(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
switch (handler->flags) {
case F2FS_XATTR_INDEX_USER:
if (!test_opt(sbi, XATTR_USER))
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
return -EINVAL;
}
return f2fs_setxattr(inode, handler->flags, name,
value, size, NULL, flags);
}
static bool f2fs_xattr_user_list(struct dentry *dentry)
{
struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
return test_opt(sbi, XATTR_USER);
}
static bool f2fs_xattr_trusted_list(struct dentry *dentry)
{
return capable(CAP_SYS_ADMIN);
}
static int f2fs_xattr_advise_get(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, void *buffer, size_t size)
{
f2fs: avoid NULL pointer dereference in f2fs_xattr_advise_get We will encounter oops by executing below command. getfattr -n system.advise /mnt/f2fs/file Killed message log: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<f8b54d69>] f2fs_xattr_advise_get+0x29/0x40 [f2fs] *pdpt = 00000000319b7001 *pde = 0000000000000000 Oops: 0002 [#1] SMP Modules linked in: f2fs(O) snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq joydev snd_seq_device snd_timer bnep snd rfcomm microcode bluetooth soundcore i2c_piix4 mac_hid serio_raw parport_pc ppdev lp parport binfmt_misc hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs] CPU: 3 PID: 3134 Comm: getfattr Tainted: G O 4.0.0-rc1 #6 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 task: f3a71b60 ti: f19a6000 task.ti: f19a6000 EIP: 0060:[<f8b54d69>] EFLAGS: 00010246 CPU: 3 EIP is at f2fs_xattr_advise_get+0x29/0x40 [f2fs] EAX: 00000000 EBX: f19a7e71 ECX: 00000000 EDX: f8b5b467 ESI: 00000000 EDI: f2008570 EBP: f19a7e14 ESP: f19a7e08 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 CR0: 80050033 CR2: 00000000 CR3: 319b8000 CR4: 000007f0 Stack: f8b5a634 c0cbb580 00000000 f19a7e34 c1193850 00000000 00000007 f19a7e71 f19a7e64 c0cbb580 c1193810 f19a7e50 c1193c00 00000000 00000000 00000000 c0cbb580 00000000 f19a7f70 c1194097 00000000 00000000 00000000 74737973 Call Trace: [<c1193850>] generic_getxattr+0x40/0x50 [<c1193810>] ? xattr_resolve_name+0x80/0x80 [<c1193c00>] vfs_getxattr+0x70/0xa0 [<c1194097>] getxattr+0x87/0x190 [<c11801d7>] ? path_lookupat+0x57/0x5f0 [<c11819d2>] ? putname+0x32/0x50 [<c116653a>] ? kmem_cache_alloc+0x2a/0x130 [<c11819d2>] ? putname+0x32/0x50 [<c11819d2>] ? putname+0x32/0x50 [<c11819d2>] ? putname+0x32/0x50 [<c11827f9>] ? user_path_at_empty+0x49/0x70 [<c118283f>] ? user_path_at+0x1f/0x30 [<c11941e7>] path_getxattr+0x47/0x80 [<c11948e7>] SyS_getxattr+0x27/0x30 [<c163f748>] sysenter_do_call+0x12/0x12 Code: 66 90 55 89 e5 57 56 53 66 66 66 66 90 8b 78 20 89 d3 ba 67 b4 b5 f8 89 d8 89 ce e8 42 7c 7b c8 85 c0 75 16 0f b6 87 44 01 00 00 <88> 06 b8 01 00 00 00 5b 5e 5f 5d c3 8d 76 00 b8 ea ff ff ff eb EIP: [<f8b54d69>] f2fs_xattr_advise_get+0x29/0x40 [f2fs] SS:ESP 0068:f19a7e08 CR2: 0000000000000000 ---[ end trace 860260654f1f416a ]--- The reason is that in getfattr there are two steps which is indicated by strace info: 1) try to lookup and get size of specified xattr. 2) get value of the extented attribute. strace info: getxattr("/mnt/f2fs/file", "system.advise", 0x0, 0) = 1 getxattr("/mnt/f2fs/file", "system.advise", "\x00", 256) = 1 For the first step, getfattr may pass a NULL pointer in @value and zero in @size as parameters for ->getxattr, but we access this @value pointer directly without checking whether the pointer is valid or not in f2fs_xattr_advise_get, so the oops occurs. This patch fixes this issue by verifying @value pointer before using. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2015-03-23 10:36:15 +08:00
if (buffer)
*((char *)buffer) = F2FS_I(inode)->i_advise;
return sizeof(char);
}
static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
{
if (!inode_owner_or_capable(inode))
return -EPERM;
if (value == NULL)
return -EINVAL;
F2FS_I(inode)->i_advise |= *(char *)value;
f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
#ifdef CONFIG_F2FS_FS_SECURITY
static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
void *page)
{
const struct xattr *xattr;
int err = 0;
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
xattr->name, xattr->value,
xattr->value_len, (struct page *)page, 0);
if (err < 0)
break;
}
return err;
}
int f2fs_init_security(struct inode *inode, struct inode *dir,
const struct qstr *qstr, struct page *ipage)
{
return security_inode_init_security(inode, dir, qstr,
&f2fs_initxattrs, ipage);
}
#endif
const struct xattr_handler f2fs_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.flags = F2FS_XATTR_INDEX_USER,
.list = f2fs_xattr_user_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
const struct xattr_handler f2fs_xattr_trusted_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
.flags = F2FS_XATTR_INDEX_TRUSTED,
.list = f2fs_xattr_trusted_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
const struct xattr_handler f2fs_xattr_advise_handler = {
.name = F2FS_SYSTEM_ADVISE_NAME,
.flags = F2FS_XATTR_INDEX_ADVISE,
.get = f2fs_xattr_advise_get,
.set = f2fs_xattr_advise_set,
};
const struct xattr_handler f2fs_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.flags = F2FS_XATTR_INDEX_SECURITY,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
static const struct xattr_handler *f2fs_xattr_handler_map[] = {
[F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler,
#ifdef CONFIG_F2FS_FS_POSIX_ACL
[F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler,
[F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
#endif
[F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler,
#ifdef CONFIG_F2FS_FS_SECURITY
[F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler,
#endif
[F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler,
};
const struct xattr_handler *f2fs_xattr_handlers[] = {
&f2fs_xattr_user_handler,
#ifdef CONFIG_F2FS_FS_POSIX_ACL
&posix_acl_access_xattr_handler,
&posix_acl_default_xattr_handler,
#endif
&f2fs_xattr_trusted_handler,
#ifdef CONFIG_F2FS_FS_SECURITY
&f2fs_xattr_security_handler,
#endif
&f2fs_xattr_advise_handler,
NULL,
};
static inline const struct xattr_handler *f2fs_xattr_handler(int index)
{
const struct xattr_handler *handler = NULL;
if (index > 0 && index < ARRAY_SIZE(f2fs_xattr_handler_map))
handler = f2fs_xattr_handler_map[index];
return handler;
}
static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
if (entry->e_name_index != index)
continue;
if (entry->e_name_len != len)
continue;
if (!memcmp(entry->e_name, name, len))
break;
}
return entry;
}
static int read_all_xattrs(struct inode *inode, struct page *ipage,
void **base_addr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_xattr_header *header;
size_t size = PAGE_SIZE, inline_size = 0;
void *txattr_addr;
int err;
inline_size = inline_xattr_size(inode);
txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
if (!txattr_addr)
return -ENOMEM;
/* read from inline xattr */
if (inline_size) {
struct page *page = NULL;
void *inline_addr;
if (ipage) {
inline_addr = inline_xattr_addr(ipage);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
}
inline_addr = inline_xattr_addr(page);
}
memcpy(txattr_addr, inline_addr, inline_size);
f2fs_put_page(page, 1);
}
/* read from xattr node block */
if (F2FS_I(inode)->i_xattr_nid) {
struct page *xpage;
void *xattr_addr;
/* The inode already has an extended attribute block. */
xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
goto fail;
}
xattr_addr = page_address(xpage);
memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE);
f2fs_put_page(xpage, 1);
}
header = XATTR_HDR(txattr_addr);
/* never been allocated xattrs */
if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) {
header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
header->h_refcount = cpu_to_le32(1);
}
*base_addr = txattr_addr;
return 0;
fail:
kzfree(txattr_addr);
return err;
}
static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
void *txattr_addr, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
size_t inline_size = 0;
void *xattr_addr;
struct page *xpage;
nid_t new_nid = 0;
int err;
inline_size = inline_xattr_size(inode);
if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
if (!alloc_nid(sbi, &new_nid))
return -ENOSPC;
/* write to inline xattr */
if (inline_size) {
struct page *page = NULL;
void *inline_addr;
if (ipage) {
inline_addr = inline_xattr_addr(ipage);
f2fs_wait_on_page_writeback(ipage, NODE, true);
set_page_dirty(ipage);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
alloc_nid_failed(sbi, new_nid);
return PTR_ERR(page);
}
inline_addr = inline_xattr_addr(page);
f2fs_wait_on_page_writeback(page, NODE, true);
}
memcpy(inline_addr, txattr_addr, inline_size);
f2fs_put_page(page, 1);
/* no need to use xattr node block */
if (hsize <= inline_size) {
err = truncate_xattr_node(inode, ipage);
alloc_nid_failed(sbi, new_nid);
return err;
}
}
/* write to xattr node block */
if (F2FS_I(inode)->i_xattr_nid) {
xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
if (IS_ERR(xpage)) {
alloc_nid_failed(sbi, new_nid);
return PTR_ERR(xpage);
}
f2fs_bug_on(sbi, new_nid);
f2fs_wait_on_page_writeback(xpage, NODE, true);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage);
if (IS_ERR(xpage)) {
alloc_nid_failed(sbi, new_nid);
return PTR_ERR(xpage);
}
alloc_nid_done(sbi, new_nid);
}
xattr_addr = page_address(xpage);
memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
sizeof(struct node_footer));
set_page_dirty(xpage);
f2fs_put_page(xpage, 1);
/* need to checkpoint during fsync */
F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
return 0;
}
int f2fs_getxattr(struct inode *inode, int index, const char *name,
f2fs: avoid deadlock on init_inode_metadata Previously, init_inode_metadata does not hold any parent directory's inode page. So, f2fs_init_acl can grab its parent inode page without any problem. But, when we use inline_dentry, that page is grabbed during f2fs_add_link, so that we can fall into deadlock condition like below. INFO: task mknod:11006 blocked for more than 120 seconds. Tainted: G OE 3.17.0-rc1+ #13 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. mknod D ffff88003fc94580 0 11006 11004 0x00000000 ffff880007717b10 0000000000000002 ffff88003c323220 ffff880007717fd8 0000000000014580 0000000000014580 ffff88003daecb30 ffff88003c323220 ffff88003fc94e80 ffff88003ffbb4e8 ffff880007717ba0 0000000000000002 Call Trace: [<ffffffff8173dc40>] ? bit_wait+0x50/0x50 [<ffffffff8173d4cd>] io_schedule+0x9d/0x130 [<ffffffff8173dc6c>] bit_wait_io+0x2c/0x50 [<ffffffff8173da3b>] __wait_on_bit_lock+0x4b/0xb0 [<ffffffff811640a7>] __lock_page+0x67/0x70 [<ffffffff810acf50>] ? autoremove_wake_function+0x40/0x40 [<ffffffff811652cc>] pagecache_get_page+0x14c/0x1e0 [<ffffffffa029afa9>] get_node_page+0x59/0x130 [f2fs] [<ffffffffa02a63ad>] read_all_xattrs+0x24d/0x430 [f2fs] [<ffffffffa02a6ca2>] f2fs_getxattr+0x52/0xe0 [f2fs] [<ffffffffa02a7481>] f2fs_get_acl+0x41/0x2d0 [f2fs] [<ffffffff8122d847>] get_acl+0x47/0x70 [<ffffffff8122db5a>] posix_acl_create+0x5a/0x150 [<ffffffffa02a7759>] f2fs_init_acl+0x29/0xcb [f2fs] [<ffffffffa0286a8d>] init_inode_metadata+0x5d/0x340 [f2fs] [<ffffffffa029253a>] f2fs_add_inline_entry+0x12a/0x2e0 [f2fs] [<ffffffffa0286ea5>] __f2fs_add_link+0x45/0x4a0 [f2fs] [<ffffffffa028b5b6>] ? f2fs_new_inode+0x146/0x220 [f2fs] [<ffffffffa028b816>] f2fs_mknod+0x86/0xf0 [f2fs] [<ffffffff811e3ec1>] vfs_mknod+0xe1/0x160 [<ffffffff811e4b26>] SyS_mknod+0x1f6/0x200 [<ffffffff81741d7f>] tracesys+0xe1/0xe6 Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-10-14 10:42:53 +08:00
void *buffer, size_t buffer_size, struct page *ipage)
{
struct f2fs_xattr_entry *entry;
void *base_addr;
int error = 0;
size_t size, len;
if (name == NULL)
return -EINVAL;
len = strlen(name);
if (len > F2FS_NAME_LEN)
return -ERANGE;
error = read_all_xattrs(inode, ipage, &base_addr);
if (error)
return error;
entry = __find_xattr(base_addr, index, len, name);
if (IS_XATTR_LAST_ENTRY(entry)) {
error = -ENODATA;
goto cleanup;
}
size = le16_to_cpu(entry->e_value_size);
if (buffer && size > buffer_size) {
error = -ERANGE;
goto cleanup;
}
if (buffer) {
char *pval = entry->e_name + entry->e_name_len;
memcpy(buffer, pval, size);
}
error = size;
cleanup:
kzfree(base_addr);
return error;
}
ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
{
struct inode *inode = d_inode(dentry);
struct f2fs_xattr_entry *entry;
void *base_addr;
int error = 0;
size_t rest = buffer_size;
error = read_all_xattrs(inode, NULL, &base_addr);
if (error)
return error;
list_for_each_xattr(entry, base_addr) {
const struct xattr_handler *handler =
f2fs_xattr_handler(entry->e_name_index);
const char *prefix;
size_t prefix_len;
size_t size;
if (!handler || (handler->list && !handler->list(dentry)))
continue;
prefix = handler->prefix ?: handler->name;
prefix_len = strlen(prefix);
size = prefix_len + entry->e_name_len + 1;
if (buffer) {
if (size > rest) {
error = -ERANGE;
goto cleanup;
}
memcpy(buffer, prefix, prefix_len);
buffer += prefix_len;
memcpy(buffer, entry->e_name, entry->e_name_len);
buffer += entry->e_name_len;
*buffer++ = 0;
}
rest -= size;
}
error = buffer_size - rest;
cleanup:
kzfree(base_addr);
return error;
}
static int __f2fs_setxattr(struct inode *inode, int index,
const char *name, const void *value, size_t size,
struct page *ipage, int flags)
{
struct f2fs_xattr_entry *here, *last;
void *base_addr;
int found, newsize;
size_t len;
__u32 new_hsize;
int error = 0;
if (name == NULL)
return -EINVAL;
if (value == NULL)
size = 0;
len = strlen(name);
if (len > F2FS_NAME_LEN)
return -ERANGE;
if (size > MAX_VALUE_LEN(inode))
return -E2BIG;
error = read_all_xattrs(inode, ipage, &base_addr);
if (error)
return error;
/* find entry with wanted name. */
here = __find_xattr(base_addr, index, len, name);
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
if ((flags & XATTR_REPLACE) && !found) {
error = -ENODATA;
goto exit;
} else if ((flags & XATTR_CREATE) && found) {
error = -EEXIST;
goto exit;
}
last = here;
while (!IS_XATTR_LAST_ENTRY(last))
last = XATTR_NEXT_ENTRY(last);
newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
/* 1. Check space */
if (value) {
int free;
/*
* If value is NULL, it is remove operation.
* In case of update operation, we calculate free.
*/
free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
if (found)
free = free + ENTRY_SIZE(here);
if (unlikely(free < newsize)) {
error = -E2BIG;
goto exit;
}
}
/* 2. Remove old entry */
if (found) {
/*
* If entry is found, remove old entry.
* If not found, remove operation is not needed.
*/
struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here);
int oldsize = ENTRY_SIZE(here);
memmove(here, next, (char *)last - (char *)next);
last = (struct f2fs_xattr_entry *)((char *)last - oldsize);
memset(last, 0, oldsize);
}
new_hsize = (char *)last - (char *)base_addr;
/* 3. Write new entry */
if (value) {
char *pval;
/*
* Before we come here, old entry is removed.
* We just write new entry.
*/
last->e_name_index = index;
last->e_name_len = len;
memcpy(last->e_name, name, len);
pval = last->e_name + len;
memcpy(pval, value, size);
last->e_value_size = cpu_to_le16(size);
new_hsize += newsize;
}
error = write_all_xattrs(inode, new_hsize, base_addr, ipage);
if (error)
goto exit;
if (is_inode_flag_set(inode, FI_ACL_MODE)) {
inode->i_mode = F2FS_I(inode)->i_acl_mode;
inode->i_ctime = current_time(inode);
clear_inode_flag(inode, FI_ACL_MODE);
}
if (index == F2FS_XATTR_INDEX_ENCRYPTION &&
!strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT))
f2fs_set_encrypted_inode(inode);
f2fs_mark_inode_dirty_sync(inode, true);
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
kzfree(base_addr);
return error;
}
int f2fs_setxattr(struct inode *inode, int index, const char *name,
const void *value, size_t size,
struct page *ipage, int flags)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
/* this case is only from init_inode_metadata */
if (ipage)
return __f2fs_setxattr(inode, index, name, value,
size, ipage, flags);
f2fs_balance_fs(sbi, true);
f2fs: use rw_sem instead of fs_lock(locks mutex) The fs_locks is used to block other ops(ex, recovery) when doing checkpoint. And each other operate routine(besides checkpoint) needs to acquire a fs_lock, there is a terrible problem here, if these are too many concurrency threads acquiring fs_lock, so that they will block each other and may lead to some performance problem, but this is not the phenomenon we want to see. Though there are some optimization patches introduced to enhance the usage of fs_lock, but the thorough solution is using a *rw_sem* to replace the fs_lock. Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other, this can avoid the problem described above completely. Because of the weakness of rw_sem, the above change may introduce a potential problem that the checkpoint thread might get starved if other threads are intensively locking the read semaphore for I/O.(Pointed out by Xu Jin) In order to avoid this, a wait_list is introduced, the appending read semaphore ops will be dropped into the wait_list if checkpoint thread is waiting for write semaphore, and will be waked up when checkpoint thread gives up write semaphore. Thanks to Kim's previous review and test, and will be very glad to see other guys' performance tests about this patch. V2: -fix the potential starvation problem. -use more suitable func name suggested by Xu Jin. Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com> [Jaegeuk Kim: adjust minor coding standard] Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 18:08:30 +08:00
f2fs_lock_op(sbi);
/* protect xattr_ver */
down_write(&F2FS_I(inode)->i_sem);
err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
up_write(&F2FS_I(inode)->i_sem);
f2fs: use rw_sem instead of fs_lock(locks mutex) The fs_locks is used to block other ops(ex, recovery) when doing checkpoint. And each other operate routine(besides checkpoint) needs to acquire a fs_lock, there is a terrible problem here, if these are too many concurrency threads acquiring fs_lock, so that they will block each other and may lead to some performance problem, but this is not the phenomenon we want to see. Though there are some optimization patches introduced to enhance the usage of fs_lock, but the thorough solution is using a *rw_sem* to replace the fs_lock. Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other, this can avoid the problem described above completely. Because of the weakness of rw_sem, the above change may introduce a potential problem that the checkpoint thread might get starved if other threads are intensively locking the read semaphore for I/O.(Pointed out by Xu Jin) In order to avoid this, a wait_list is introduced, the appending read semaphore ops will be dropped into the wait_list if checkpoint thread is waiting for write semaphore, and will be waked up when checkpoint thread gives up write semaphore. Thanks to Kim's previous review and test, and will be very glad to see other guys' performance tests about this patch. V2: -fix the potential starvation problem. -use more suitable func name suggested by Xu Jin. Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com> [Jaegeuk Kim: adjust minor coding standard] Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 18:08:30 +08:00
f2fs_unlock_op(sbi);
f2fs_update_time(sbi, REQ_TIME);
return err;
}