mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
Orangefs: kernel client part 3
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
This commit is contained in:
parent
5db11c21a9
commit
274dcf55bd
473
fs/orangefs/namei.c
Normal file
473
fs/orangefs/namei.c
Normal file
@ -0,0 +1,473 @@
|
||||
/*
|
||||
* (C) 2001 Clemson University and The University of Chicago
|
||||
*
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Linux VFS namei operations.
|
||||
*/
|
||||
|
||||
#include "protocol.h"
|
||||
#include "pvfs2-kernel.h"
|
||||
|
||||
/*
|
||||
* Get a newly allocated inode to go with a negative dentry.
|
||||
*/
|
||||
static int pvfs2_create(struct inode *dir,
|
||||
struct dentry *dentry,
|
||||
umode_t mode,
|
||||
bool exclusive)
|
||||
{
|
||||
struct pvfs2_inode_s *parent = PVFS2_I(dir);
|
||||
struct pvfs2_kernel_op_s *new_op;
|
||||
struct inode *inode;
|
||||
int ret;
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__);
|
||||
|
||||
new_op = op_alloc(PVFS2_VFS_OP_CREATE);
|
||||
if (!new_op)
|
||||
return -ENOMEM;
|
||||
|
||||
new_op->upcall.req.create.parent_refn = parent->refn;
|
||||
|
||||
fill_default_sys_attrs(new_op->upcall.req.create.attributes,
|
||||
PVFS_TYPE_METAFILE, mode);
|
||||
|
||||
strncpy(new_op->upcall.req.create.d_name,
|
||||
dentry->d_name.name, PVFS2_NAME_LEN);
|
||||
|
||||
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"Create Got PVFS2 handle %pU on fsid %d (ret=%d)\n",
|
||||
&new_op->downcall.resp.create.refn.khandle,
|
||||
new_op->downcall.resp.create.refn.fs_id, ret);
|
||||
|
||||
if (ret < 0) {
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"%s: failed with error code %d\n",
|
||||
__func__, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode = pvfs2_new_inode(dir->i_sb, dir, S_IFREG | mode, 0,
|
||||
&new_op->downcall.resp.create.refn);
|
||||
if (IS_ERR(inode)) {
|
||||
gossip_err("*** Failed to allocate pvfs2 file inode\n");
|
||||
ret = PTR_ERR(inode);
|
||||
goto out;
|
||||
}
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"Assigned file inode new number of %pU\n",
|
||||
get_khandle_from_ino(inode));
|
||||
|
||||
d_instantiate(dentry, inode);
|
||||
unlock_new_inode(inode);
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"Inode (Regular File) %pU -> %s\n",
|
||||
get_khandle_from_ino(inode),
|
||||
dentry->d_name.name);
|
||||
|
||||
SetMtimeFlag(parent);
|
||||
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
|
||||
mark_inode_dirty_sync(dir);
|
||||
ret = 0;
|
||||
out:
|
||||
op_release(new_op);
|
||||
gossip_debug(GOSSIP_NAME_DEBUG, "%s: returning %d\n", __func__, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to resolve an object name (dentry->d_name), parent handle, and
|
||||
* fsid into a handle for the object.
|
||||
*/
|
||||
static struct dentry *pvfs2_lookup(struct inode *dir, struct dentry *dentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct pvfs2_inode_s *parent = PVFS2_I(dir);
|
||||
struct pvfs2_kernel_op_s *new_op;
|
||||
struct inode *inode;
|
||||
struct dentry *res;
|
||||
int ret = -EINVAL;
|
||||
|
||||
/*
|
||||
* in theory we could skip a lookup here (if the intent is to
|
||||
* create) in order to avoid a potentially failed lookup, but
|
||||
* leaving it in can skip a valid lookup and try to create a file
|
||||
* that already exists (e.g. the vfs already handles checking for
|
||||
* -EEXIST on O_EXCL opens, which is broken if we skip this lookup
|
||||
* in the create path)
|
||||
*/
|
||||
gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n",
|
||||
__func__, dentry->d_name.name);
|
||||
|
||||
if (dentry->d_name.len > (PVFS2_NAME_LEN - 1))
|
||||
return ERR_PTR(-ENAMETOOLONG);
|
||||
|
||||
new_op = op_alloc(PVFS2_VFS_OP_LOOKUP);
|
||||
if (!new_op)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
new_op->upcall.req.lookup.sym_follow = flags & LOOKUP_FOLLOW;
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %pU\n",
|
||||
__FILE__,
|
||||
__func__,
|
||||
__LINE__,
|
||||
&parent->refn.khandle);
|
||||
new_op->upcall.req.lookup.parent_refn = parent->refn;
|
||||
|
||||
strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name,
|
||||
PVFS2_NAME_LEN);
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"%s: doing lookup on %s under %pU,%d (follow=%s)\n",
|
||||
__func__,
|
||||
new_op->upcall.req.lookup.d_name,
|
||||
&new_op->upcall.req.lookup.parent_refn.khandle,
|
||||
new_op->upcall.req.lookup.parent_refn.fs_id,
|
||||
((new_op->upcall.req.lookup.sym_follow ==
|
||||
PVFS2_LOOKUP_LINK_FOLLOW) ? "yes" : "no"));
|
||||
|
||||
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"Lookup Got %pU, fsid %d (ret=%d)\n",
|
||||
&new_op->downcall.resp.lookup.refn.khandle,
|
||||
new_op->downcall.resp.lookup.refn.fs_id,
|
||||
ret);
|
||||
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT) {
|
||||
/*
|
||||
* if no inode was found, add a negative dentry to
|
||||
* dcache anyway; if we don't, we don't hold expected
|
||||
* lookup semantics and we most noticeably break
|
||||
* during directory renames.
|
||||
*
|
||||
* however, if the operation failed or exited, do not
|
||||
* add the dentry (e.g. in the case that a touch is
|
||||
* issued on a file that already exists that was
|
||||
* interrupted during this lookup -- no need to add
|
||||
* another negative dentry for an existing file)
|
||||
*/
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"pvfs2_lookup: Adding *negative* dentry "
|
||||
"%p for %s\n",
|
||||
dentry,
|
||||
dentry->d_name.name);
|
||||
|
||||
d_add(dentry, NULL);
|
||||
res = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* must be a non-recoverable error */
|
||||
res = ERR_PTR(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode = pvfs2_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
|
||||
if (IS_ERR(inode)) {
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"error %ld from iget\n", PTR_ERR(inode));
|
||||
res = ERR_CAST(inode);
|
||||
goto out;
|
||||
}
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"%s:%s:%d "
|
||||
"Found good inode [%lu] with count [%d]\n",
|
||||
__FILE__,
|
||||
__func__,
|
||||
__LINE__,
|
||||
inode->i_ino,
|
||||
(int)atomic_read(&inode->i_count));
|
||||
|
||||
/* update dentry/inode pair into dcache */
|
||||
res = d_splice_alias(inode, dentry);
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"Lookup success (inode ct = %d)\n",
|
||||
(int)atomic_read(&inode->i_count));
|
||||
out:
|
||||
op_release(new_op);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* return 0 on success; non-zero otherwise */
|
||||
static int pvfs2_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct pvfs2_inode_s *parent = PVFS2_I(dir);
|
||||
struct pvfs2_kernel_op_s *new_op;
|
||||
int ret;
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"%s: called on %s\n"
|
||||
" (inode %pU): Parent is %pU | fs_id %d\n",
|
||||
__func__,
|
||||
dentry->d_name.name,
|
||||
get_khandle_from_ino(inode),
|
||||
&parent->refn.khandle,
|
||||
parent->refn.fs_id);
|
||||
|
||||
new_op = op_alloc(PVFS2_VFS_OP_REMOVE);
|
||||
if (!new_op)
|
||||
return -ENOMEM;
|
||||
|
||||
new_op->upcall.req.remove.parent_refn = parent->refn;
|
||||
strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name,
|
||||
PVFS2_NAME_LEN);
|
||||
|
||||
ret = service_operation(new_op, "pvfs2_unlink",
|
||||
get_interruptible_flag(inode));
|
||||
|
||||
/* when request is serviced properly, free req op struct */
|
||||
op_release(new_op);
|
||||
|
||||
if (!ret) {
|
||||
drop_nlink(inode);
|
||||
|
||||
SetMtimeFlag(parent);
|
||||
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
|
||||
mark_inode_dirty_sync(dir);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* pvfs2_link() is only implemented here to make sure that we return a
|
||||
* reasonable error code (the kernel will return a misleading EPERM
|
||||
* otherwise). PVFS2 does not support hard links.
|
||||
*/
|
||||
static int pvfs2_link(struct dentry *old_dentry,
|
||||
struct inode *dir,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/*
|
||||
* pvfs2_mknod() is only implemented here to make sure that we return a
|
||||
* reasonable error code (the kernel will return a misleading EPERM
|
||||
* otherwise). PVFS2 does not support special files such as fifos or devices.
|
||||
*/
|
||||
static int pvfs2_mknod(struct inode *dir,
|
||||
struct dentry *dentry,
|
||||
umode_t mode,
|
||||
dev_t rdev)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int pvfs2_symlink(struct inode *dir,
|
||||
struct dentry *dentry,
|
||||
const char *symname)
|
||||
{
|
||||
struct pvfs2_inode_s *parent = PVFS2_I(dir);
|
||||
struct pvfs2_kernel_op_s *new_op;
|
||||
struct inode *inode;
|
||||
int mode = 755;
|
||||
int ret;
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__);
|
||||
|
||||
if (!symname)
|
||||
return -EINVAL;
|
||||
|
||||
new_op = op_alloc(PVFS2_VFS_OP_SYMLINK);
|
||||
if (!new_op)
|
||||
return -ENOMEM;
|
||||
|
||||
new_op->upcall.req.sym.parent_refn = parent->refn;
|
||||
|
||||
fill_default_sys_attrs(new_op->upcall.req.sym.attributes,
|
||||
PVFS_TYPE_SYMLINK,
|
||||
mode);
|
||||
|
||||
strncpy(new_op->upcall.req.sym.entry_name,
|
||||
dentry->d_name.name,
|
||||
PVFS2_NAME_LEN);
|
||||
strncpy(new_op->upcall.req.sym.target, symname, PVFS2_NAME_LEN);
|
||||
|
||||
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"Symlink Got PVFS2 handle %pU on fsid %d (ret=%d)\n",
|
||||
&new_op->downcall.resp.sym.refn.khandle,
|
||||
new_op->downcall.resp.sym.refn.fs_id, ret);
|
||||
|
||||
if (ret < 0) {
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"%s: failed with error code %d\n",
|
||||
__func__, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode = pvfs2_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0,
|
||||
&new_op->downcall.resp.sym.refn);
|
||||
if (IS_ERR(inode)) {
|
||||
gossip_err
|
||||
("*** Failed to allocate pvfs2 symlink inode\n");
|
||||
ret = PTR_ERR(inode);
|
||||
goto out;
|
||||
}
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"Assigned symlink inode new number of %pU\n",
|
||||
get_khandle_from_ino(inode));
|
||||
|
||||
d_instantiate(dentry, inode);
|
||||
unlock_new_inode(inode);
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"Inode (Symlink) %pU -> %s\n",
|
||||
get_khandle_from_ino(inode),
|
||||
dentry->d_name.name);
|
||||
|
||||
SetMtimeFlag(parent);
|
||||
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
|
||||
mark_inode_dirty_sync(dir);
|
||||
ret = 0;
|
||||
out:
|
||||
op_release(new_op);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pvfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
{
|
||||
struct pvfs2_inode_s *parent = PVFS2_I(dir);
|
||||
struct pvfs2_kernel_op_s *new_op;
|
||||
struct inode *inode;
|
||||
int ret;
|
||||
|
||||
new_op = op_alloc(PVFS2_VFS_OP_MKDIR);
|
||||
if (!new_op)
|
||||
return -ENOMEM;
|
||||
|
||||
new_op->upcall.req.mkdir.parent_refn = parent->refn;
|
||||
|
||||
fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes,
|
||||
PVFS_TYPE_DIRECTORY, mode);
|
||||
|
||||
strncpy(new_op->upcall.req.mkdir.d_name,
|
||||
dentry->d_name.name, PVFS2_NAME_LEN);
|
||||
|
||||
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"Mkdir Got PVFS2 handle %pU on fsid %d\n",
|
||||
&new_op->downcall.resp.mkdir.refn.khandle,
|
||||
new_op->downcall.resp.mkdir.refn.fs_id);
|
||||
|
||||
if (ret < 0) {
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"%s: failed with error code %d\n",
|
||||
__func__, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode = pvfs2_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0,
|
||||
&new_op->downcall.resp.mkdir.refn);
|
||||
if (IS_ERR(inode)) {
|
||||
gossip_err("*** Failed to allocate pvfs2 dir inode\n");
|
||||
ret = PTR_ERR(inode);
|
||||
goto out;
|
||||
}
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"Assigned dir inode new number of %pU\n",
|
||||
get_khandle_from_ino(inode));
|
||||
|
||||
d_instantiate(dentry, inode);
|
||||
unlock_new_inode(inode);
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"Inode (Directory) %pU -> %s\n",
|
||||
get_khandle_from_ino(inode),
|
||||
dentry->d_name.name);
|
||||
|
||||
/*
|
||||
* NOTE: we have no good way to keep nlink consistent for directories
|
||||
* across clients; keep constant at 1.
|
||||
*/
|
||||
SetMtimeFlag(parent);
|
||||
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
|
||||
mark_inode_dirty_sync(dir);
|
||||
out:
|
||||
op_release(new_op);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pvfs2_rename(struct inode *old_dir,
|
||||
struct dentry *old_dentry,
|
||||
struct inode *new_dir,
|
||||
struct dentry *new_dentry)
|
||||
{
|
||||
struct pvfs2_kernel_op_s *new_op;
|
||||
int ret;
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"pvfs2_rename: called (%s/%s => %s/%s) ct=%d\n",
|
||||
old_dentry->d_parent->d_name.name,
|
||||
old_dentry->d_name.name,
|
||||
new_dentry->d_parent->d_name.name,
|
||||
new_dentry->d_name.name,
|
||||
d_count(new_dentry));
|
||||
|
||||
new_op = op_alloc(PVFS2_VFS_OP_RENAME);
|
||||
if (!new_op)
|
||||
return -EINVAL;
|
||||
|
||||
new_op->upcall.req.rename.old_parent_refn = PVFS2_I(old_dir)->refn;
|
||||
new_op->upcall.req.rename.new_parent_refn = PVFS2_I(new_dir)->refn;
|
||||
|
||||
strncpy(new_op->upcall.req.rename.d_old_name,
|
||||
old_dentry->d_name.name,
|
||||
PVFS2_NAME_LEN);
|
||||
strncpy(new_op->upcall.req.rename.d_new_name,
|
||||
new_dentry->d_name.name,
|
||||
PVFS2_NAME_LEN);
|
||||
|
||||
ret = service_operation(new_op,
|
||||
"pvfs2_rename",
|
||||
get_interruptible_flag(old_dentry->d_inode));
|
||||
|
||||
gossip_debug(GOSSIP_NAME_DEBUG,
|
||||
"pvfs2_rename: got downcall status %d\n",
|
||||
ret);
|
||||
|
||||
if (new_dentry->d_inode)
|
||||
new_dentry->d_inode->i_ctime = CURRENT_TIME;
|
||||
|
||||
op_release(new_op);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* PVFS2 implementation of VFS inode operations for directories */
|
||||
struct inode_operations pvfs2_dir_inode_operations = {
|
||||
.lookup = pvfs2_lookup,
|
||||
.get_acl = pvfs2_get_acl,
|
||||
.set_acl = pvfs2_set_acl,
|
||||
.create = pvfs2_create,
|
||||
.link = pvfs2_link,
|
||||
.unlink = pvfs2_unlink,
|
||||
.symlink = pvfs2_symlink,
|
||||
.mkdir = pvfs2_mkdir,
|
||||
.rmdir = pvfs2_unlink,
|
||||
.mknod = pvfs2_mknod,
|
||||
.rename = pvfs2_rename,
|
||||
.setattr = pvfs2_setattr,
|
||||
.getattr = pvfs2_getattr,
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
.listxattr = pvfs2_listxattr,
|
||||
};
|
970
fs/orangefs/pvfs2-bufmap.c
Normal file
970
fs/orangefs/pvfs2-bufmap.c
Normal file
@ -0,0 +1,970 @@
|
||||
/*
|
||||
* (C) 2001 Clemson University and The University of Chicago
|
||||
*
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
#include "protocol.h"
|
||||
#include "pvfs2-kernel.h"
|
||||
#include "pvfs2-bufmap.h"
|
||||
|
||||
DECLARE_WAIT_QUEUE_HEAD(pvfs2_bufmap_init_waitq);
|
||||
|
||||
struct pvfs2_bufmap {
|
||||
atomic_t refcnt;
|
||||
|
||||
int desc_size;
|
||||
int desc_shift;
|
||||
int desc_count;
|
||||
int total_size;
|
||||
int page_count;
|
||||
|
||||
struct page **page_array;
|
||||
struct pvfs_bufmap_desc *desc_array;
|
||||
|
||||
/* array to track usage of buffer descriptors */
|
||||
int *buffer_index_array;
|
||||
spinlock_t buffer_index_lock;
|
||||
|
||||
/* array to track usage of buffer descriptors for readdir */
|
||||
int readdir_index_array[PVFS2_READDIR_DEFAULT_DESC_COUNT];
|
||||
spinlock_t readdir_index_lock;
|
||||
} *__pvfs2_bufmap;
|
||||
|
||||
static DEFINE_SPINLOCK(pvfs2_bufmap_lock);
|
||||
|
||||
static void
|
||||
pvfs2_bufmap_unmap(struct pvfs2_bufmap *bufmap)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bufmap->page_count; i++)
|
||||
page_cache_release(bufmap->page_array[i]);
|
||||
}
|
||||
|
||||
static void
|
||||
pvfs2_bufmap_free(struct pvfs2_bufmap *bufmap)
|
||||
{
|
||||
kfree(bufmap->page_array);
|
||||
kfree(bufmap->desc_array);
|
||||
kfree(bufmap->buffer_index_array);
|
||||
kfree(bufmap);
|
||||
}
|
||||
|
||||
struct pvfs2_bufmap *pvfs2_bufmap_ref(void)
|
||||
{
|
||||
struct pvfs2_bufmap *bufmap = NULL;
|
||||
|
||||
spin_lock(&pvfs2_bufmap_lock);
|
||||
if (__pvfs2_bufmap) {
|
||||
bufmap = __pvfs2_bufmap;
|
||||
atomic_inc(&bufmap->refcnt);
|
||||
}
|
||||
spin_unlock(&pvfs2_bufmap_lock);
|
||||
return bufmap;
|
||||
}
|
||||
|
||||
void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap)
|
||||
{
|
||||
if (atomic_dec_and_lock(&bufmap->refcnt, &pvfs2_bufmap_lock)) {
|
||||
__pvfs2_bufmap = NULL;
|
||||
spin_unlock(&pvfs2_bufmap_lock);
|
||||
|
||||
pvfs2_bufmap_unmap(bufmap);
|
||||
pvfs2_bufmap_free(bufmap);
|
||||
}
|
||||
}
|
||||
|
||||
inline int pvfs_bufmap_size_query(void)
|
||||
{
|
||||
struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
|
||||
int size = bufmap ? bufmap->desc_size : 0;
|
||||
|
||||
pvfs2_bufmap_unref(bufmap);
|
||||
return size;
|
||||
}
|
||||
|
||||
inline int pvfs_bufmap_shift_query(void)
|
||||
{
|
||||
struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
|
||||
int shift = bufmap ? bufmap->desc_shift : 0;
|
||||
|
||||
pvfs2_bufmap_unref(bufmap);
|
||||
return shift;
|
||||
}
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
|
||||
static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
|
||||
|
||||
/*
|
||||
* get_bufmap_init
|
||||
*
|
||||
* If bufmap_init is 1, then the shared memory system, including the
|
||||
* buffer_index_array, is available. Otherwise, it is not.
|
||||
*
|
||||
* returns the value of bufmap_init
|
||||
*/
|
||||
int get_bufmap_init(void)
|
||||
{
|
||||
return __pvfs2_bufmap ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
static struct pvfs2_bufmap *
|
||||
pvfs2_bufmap_alloc(struct PVFS_dev_map_desc *user_desc)
|
||||
{
|
||||
struct pvfs2_bufmap *bufmap;
|
||||
|
||||
bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
|
||||
if (!bufmap)
|
||||
goto out;
|
||||
|
||||
atomic_set(&bufmap->refcnt, 1);
|
||||
bufmap->total_size = user_desc->total_size;
|
||||
bufmap->desc_count = user_desc->count;
|
||||
bufmap->desc_size = user_desc->size;
|
||||
bufmap->desc_shift = ilog2(bufmap->desc_size);
|
||||
|
||||
spin_lock_init(&bufmap->buffer_index_lock);
|
||||
bufmap->buffer_index_array =
|
||||
kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL);
|
||||
if (!bufmap->buffer_index_array) {
|
||||
gossip_err("pvfs2: could not allocate %d buffer indices\n",
|
||||
bufmap->desc_count);
|
||||
goto out_free_bufmap;
|
||||
}
|
||||
spin_lock_init(&bufmap->readdir_index_lock);
|
||||
|
||||
bufmap->desc_array =
|
||||
kcalloc(bufmap->desc_count, sizeof(struct pvfs_bufmap_desc),
|
||||
GFP_KERNEL);
|
||||
if (!bufmap->desc_array) {
|
||||
gossip_err("pvfs2: could not allocate %d descriptors\n",
|
||||
bufmap->desc_count);
|
||||
goto out_free_index_array;
|
||||
}
|
||||
|
||||
bufmap->page_count = bufmap->total_size / PAGE_SIZE;
|
||||
|
||||
/* allocate storage to track our page mappings */
|
||||
bufmap->page_array =
|
||||
kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
|
||||
if (!bufmap->page_array)
|
||||
goto out_free_desc_array;
|
||||
|
||||
return bufmap;
|
||||
|
||||
out_free_desc_array:
|
||||
kfree(bufmap->desc_array);
|
||||
out_free_index_array:
|
||||
kfree(bufmap->buffer_index_array);
|
||||
out_free_bufmap:
|
||||
kfree(bufmap);
|
||||
out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap,
|
||||
struct PVFS_dev_map_desc *user_desc)
|
||||
{
|
||||
int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
|
||||
int offset = 0, ret, i;
|
||||
|
||||
/* map the pages */
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
ret = get_user_pages(current,
|
||||
current->mm,
|
||||
(unsigned long)user_desc->ptr,
|
||||
bufmap->page_count,
|
||||
1,
|
||||
0,
|
||||
bufmap->page_array,
|
||||
NULL);
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (ret != bufmap->page_count) {
|
||||
gossip_err("pvfs2 error: asked for %d pages, only got %d.\n",
|
||||
bufmap->page_count, ret);
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
SetPageError(bufmap->page_array[i]);
|
||||
page_cache_release(bufmap->page_array[i]);
|
||||
}
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* ideally we want to get kernel space pointers for each page, but
|
||||
* we can't kmap that many pages at once if highmem is being used.
|
||||
* so instead, we just kmap/kunmap the page address each time the
|
||||
* kaddr is needed.
|
||||
*/
|
||||
for (i = 0; i < bufmap->page_count; i++)
|
||||
flush_dcache_page(bufmap->page_array[i]);
|
||||
|
||||
/* build a list of available descriptors */
|
||||
for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
|
||||
bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
|
||||
bufmap->desc_array[i].array_count = pages_per_desc;
|
||||
bufmap->desc_array[i].uaddr =
|
||||
(user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
|
||||
offset += pages_per_desc;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* pvfs_bufmap_initialize()
|
||||
*
|
||||
* initializes the mapped buffer interface
|
||||
*
|
||||
* returns 0 on success, -errno on failure
|
||||
*/
|
||||
int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc)
|
||||
{
|
||||
struct pvfs2_bufmap *bufmap;
|
||||
int ret = -EINVAL;
|
||||
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"pvfs_bufmap_initialize: called (ptr ("
|
||||
"%p) sz (%d) cnt(%d).\n",
|
||||
user_desc->ptr,
|
||||
user_desc->size,
|
||||
user_desc->count);
|
||||
|
||||
/*
|
||||
* sanity check alignment and size of buffer that caller wants to
|
||||
* work with
|
||||
*/
|
||||
if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
|
||||
(unsigned long)user_desc->ptr) {
|
||||
gossip_err("pvfs2 error: memory alignment (front). %p\n",
|
||||
user_desc->ptr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
|
||||
!= (unsigned long)(user_desc->ptr + user_desc->total_size)) {
|
||||
gossip_err("pvfs2 error: memory alignment (back).(%p + %d)\n",
|
||||
user_desc->ptr,
|
||||
user_desc->total_size);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (user_desc->total_size != (user_desc->size * user_desc->count)) {
|
||||
gossip_err("pvfs2 error: user provided an oddly sized buffer: (%d, %d, %d)\n",
|
||||
user_desc->total_size,
|
||||
user_desc->size,
|
||||
user_desc->count);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((user_desc->size % PAGE_SIZE) != 0) {
|
||||
gossip_err("pvfs2 error: bufmap size not page size divisible (%d).\n",
|
||||
user_desc->size);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = -ENOMEM;
|
||||
bufmap = pvfs2_bufmap_alloc(user_desc);
|
||||
if (!bufmap)
|
||||
goto out;
|
||||
|
||||
ret = pvfs2_bufmap_map(bufmap, user_desc);
|
||||
if (ret)
|
||||
goto out_free_bufmap;
|
||||
|
||||
|
||||
spin_lock(&pvfs2_bufmap_lock);
|
||||
if (__pvfs2_bufmap) {
|
||||
spin_unlock(&pvfs2_bufmap_lock);
|
||||
gossip_err("pvfs2: error: bufmap already initialized.\n");
|
||||
ret = -EALREADY;
|
||||
goto out_unmap_bufmap;
|
||||
}
|
||||
__pvfs2_bufmap = bufmap;
|
||||
spin_unlock(&pvfs2_bufmap_lock);
|
||||
|
||||
/*
|
||||
* If there are operations in pvfs2_bufmap_init_waitq, wake them up.
|
||||
* This scenario occurs when the client-core is restarted and I/O
|
||||
* requests in the in-progress or waiting tables are restarted. I/O
|
||||
* requests cannot be restarted until the shared memory system is
|
||||
* completely re-initialized, so we put the I/O requests in this
|
||||
* waitq until initialization has completed. NOTE: the I/O requests
|
||||
* are also on a timer, so they don't wait forever just in case the
|
||||
* client-core doesn't come back up.
|
||||
*/
|
||||
wake_up_interruptible(&pvfs2_bufmap_init_waitq);
|
||||
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"pvfs_bufmap_initialize: exiting normally\n");
|
||||
return 0;
|
||||
|
||||
out_unmap_bufmap:
|
||||
pvfs2_bufmap_unmap(bufmap);
|
||||
out_free_bufmap:
|
||||
pvfs2_bufmap_free(bufmap);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* pvfs_bufmap_finalize()
|
||||
*
|
||||
* shuts down the mapped buffer interface and releases any resources
|
||||
* associated with it
|
||||
*
|
||||
* no return value
|
||||
*/
|
||||
void pvfs_bufmap_finalize(void)
|
||||
{
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_finalize: called\n");
|
||||
BUG_ON(!__pvfs2_bufmap);
|
||||
pvfs2_bufmap_unref(__pvfs2_bufmap);
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"pvfs2_bufmap_finalize: exiting normally\n");
|
||||
}
|
||||
|
||||
struct slot_args {
|
||||
int slot_count;
|
||||
int *slot_array;
|
||||
spinlock_t *slot_lock;
|
||||
wait_queue_head_t *slot_wq;
|
||||
};
|
||||
|
||||
static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index)
|
||||
{
|
||||
int ret = -1;
|
||||
int i = 0;
|
||||
DECLARE_WAITQUEUE(my_wait, current);
|
||||
|
||||
|
||||
add_wait_queue_exclusive(slargs->slot_wq, &my_wait);
|
||||
|
||||
while (1) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
/*
|
||||
* check for available desc, slot_lock is the appropriate
|
||||
* index_lock
|
||||
*/
|
||||
spin_lock(slargs->slot_lock);
|
||||
for (i = 0; i < slargs->slot_count; i++)
|
||||
if (slargs->slot_array[i] == 0) {
|
||||
slargs->slot_array[i] = 1;
|
||||
*buffer_index = i;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
spin_unlock(slargs->slot_lock);
|
||||
|
||||
/* if we acquired a buffer, then break out of while */
|
||||
if (ret == 0)
|
||||
break;
|
||||
|
||||
if (!signal_pending(current)) {
|
||||
int timeout =
|
||||
MSECS_TO_JIFFIES(1000 * slot_timeout_secs);
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"[BUFMAP]: waiting %d "
|
||||
"seconds for a slot\n",
|
||||
slot_timeout_secs);
|
||||
if (!schedule_timeout(timeout)) {
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"*** wait_for_a_slot timed out\n");
|
||||
ret = -ETIMEDOUT;
|
||||
break;
|
||||
}
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"[BUFMAP]: woken up by a slot becoming available.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2: %s interrupted.\n",
|
||||
__func__);
|
||||
ret = -EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
set_current_state(TASK_RUNNING);
|
||||
remove_wait_queue(slargs->slot_wq, &my_wait);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void put_back_slot(struct slot_args *slargs, int buffer_index)
|
||||
{
|
||||
/* slot_lock is the appropriate index_lock */
|
||||
spin_lock(slargs->slot_lock);
|
||||
if (buffer_index < 0 || buffer_index >= slargs->slot_count) {
|
||||
spin_unlock(slargs->slot_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/* put the desc back on the queue */
|
||||
slargs->slot_array[buffer_index] = 0;
|
||||
spin_unlock(slargs->slot_lock);
|
||||
|
||||
/* wake up anyone who may be sleeping on the queue */
|
||||
wake_up_interruptible(slargs->slot_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
* pvfs_bufmap_get()
|
||||
*
|
||||
* gets a free mapped buffer descriptor, will sleep until one becomes
|
||||
* available if necessary
|
||||
*
|
||||
* returns 0 on success, -errno on failure
|
||||
*/
|
||||
int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index)
|
||||
{
|
||||
struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
|
||||
struct slot_args slargs;
|
||||
int ret;
|
||||
|
||||
if (!bufmap) {
|
||||
gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
slargs.slot_count = bufmap->desc_count;
|
||||
slargs.slot_array = bufmap->buffer_index_array;
|
||||
slargs.slot_lock = &bufmap->buffer_index_lock;
|
||||
slargs.slot_wq = &bufmap_waitq;
|
||||
ret = wait_for_a_slot(&slargs, buffer_index);
|
||||
if (ret)
|
||||
pvfs2_bufmap_unref(bufmap);
|
||||
*mapp = bufmap;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* pvfs_bufmap_put()
|
||||
*
|
||||
* returns a mapped buffer descriptor to the collection
|
||||
*
|
||||
* no return value
|
||||
*/
|
||||
void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index)
|
||||
{
|
||||
struct slot_args slargs;
|
||||
|
||||
slargs.slot_count = bufmap->desc_count;
|
||||
slargs.slot_array = bufmap->buffer_index_array;
|
||||
slargs.slot_lock = &bufmap->buffer_index_lock;
|
||||
slargs.slot_wq = &bufmap_waitq;
|
||||
put_back_slot(&slargs, buffer_index);
|
||||
pvfs2_bufmap_unref(bufmap);
|
||||
}
|
||||
|
||||
/*
|
||||
* readdir_index_get()
|
||||
*
|
||||
* gets a free descriptor, will sleep until one becomes
|
||||
* available if necessary.
|
||||
* Although the readdir buffers are not mapped into kernel space
|
||||
* we could do that at a later point of time. Regardless, these
|
||||
* indices are used by the client-core.
|
||||
*
|
||||
* returns 0 on success, -errno on failure
|
||||
*/
|
||||
int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index)
|
||||
{
|
||||
struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
|
||||
struct slot_args slargs;
|
||||
int ret;
|
||||
|
||||
if (!bufmap) {
|
||||
gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
|
||||
slargs.slot_array = bufmap->readdir_index_array;
|
||||
slargs.slot_lock = &bufmap->readdir_index_lock;
|
||||
slargs.slot_wq = &readdir_waitq;
|
||||
ret = wait_for_a_slot(&slargs, buffer_index);
|
||||
if (ret)
|
||||
pvfs2_bufmap_unref(bufmap);
|
||||
*mapp = bufmap;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index)
|
||||
{
|
||||
struct slot_args slargs;
|
||||
|
||||
slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
|
||||
slargs.slot_array = bufmap->readdir_index_array;
|
||||
slargs.slot_lock = &bufmap->readdir_index_lock;
|
||||
slargs.slot_wq = &readdir_waitq;
|
||||
put_back_slot(&slargs, buffer_index);
|
||||
pvfs2_bufmap_unref(bufmap);
|
||||
}
|
||||
|
||||
/*
|
||||
* pvfs_bufmap_copy_iovec_from_user()
|
||||
*
|
||||
* copies data from several user space address's in an iovec
|
||||
* to a mapped buffer
|
||||
*
|
||||
* Note that the mapped buffer is a series of pages and therefore
|
||||
* the copies have to be split by PAGE_SIZE bytes at a time.
|
||||
* Note that this routine checks that summation of iov_len
|
||||
* across all the elements of iov is equal to size.
|
||||
*
|
||||
* returns 0 on success, -errno on failure
|
||||
*/
|
||||
int pvfs_bufmap_copy_iovec_from_user(struct pvfs2_bufmap *bufmap,
|
||||
int buffer_index,
|
||||
const struct iovec *iov,
|
||||
unsigned long nr_segs,
|
||||
size_t size)
|
||||
{
|
||||
size_t ret = 0;
|
||||
size_t amt_copied = 0;
|
||||
size_t cur_copy_size = 0;
|
||||
unsigned int to_page_offset = 0;
|
||||
unsigned int to_page_index = 0;
|
||||
void *to_kaddr = NULL;
|
||||
void __user *from_addr = NULL;
|
||||
struct iovec *copied_iovec = NULL;
|
||||
struct pvfs_bufmap_desc *to;
|
||||
unsigned int seg;
|
||||
char *tmp_printer = NULL;
|
||||
int tmp_int = 0;
|
||||
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"pvfs_bufmap_copy_iovec_from_user: index %d, "
|
||||
"size %zd\n",
|
||||
buffer_index,
|
||||
size);
|
||||
|
||||
to = &bufmap->desc_array[buffer_index];
|
||||
|
||||
/*
|
||||
* copy the passed in iovec so that we can change some of its fields
|
||||
*/
|
||||
copied_iovec = kmalloc_array(nr_segs,
|
||||
sizeof(*copied_iovec),
|
||||
PVFS2_BUFMAP_GFP_FLAGS);
|
||||
if (copied_iovec == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
|
||||
/*
|
||||
* Go through each segment in the iovec and make sure that
|
||||
* the summation of iov_len matches the given size.
|
||||
*/
|
||||
for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
|
||||
amt_copied += copied_iovec[seg].iov_len;
|
||||
if (amt_copied != size) {
|
||||
gossip_err(
|
||||
"pvfs2_bufmap_copy_iovec_from_user: computed total ("
|
||||
"%zd) is not equal to (%zd)\n",
|
||||
amt_copied,
|
||||
size);
|
||||
kfree(copied_iovec);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
to_page_index = 0;
|
||||
to_page_offset = 0;
|
||||
amt_copied = 0;
|
||||
seg = 0;
|
||||
/*
|
||||
* Go through each segment in the iovec and copy its
|
||||
* buffer into the mapped buffer one page at a time though
|
||||
*/
|
||||
while (amt_copied < size) {
|
||||
struct iovec *iv = &copied_iovec[seg];
|
||||
int inc_to_page_index;
|
||||
|
||||
if (iv->iov_len < (PAGE_SIZE - to_page_offset)) {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(iv->iov_len, size - amt_copied);
|
||||
seg++;
|
||||
from_addr = iv->iov_base;
|
||||
inc_to_page_index = 0;
|
||||
} else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(iv->iov_len, size - amt_copied);
|
||||
seg++;
|
||||
from_addr = iv->iov_base;
|
||||
inc_to_page_index = 1;
|
||||
} else {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(PAGE_SIZE - to_page_offset,
|
||||
size - amt_copied);
|
||||
from_addr = iv->iov_base;
|
||||
iv->iov_base += cur_copy_size;
|
||||
iv->iov_len -= cur_copy_size;
|
||||
inc_to_page_index = 1;
|
||||
}
|
||||
to_kaddr = pvfs2_kmap(to->page_array[to_page_index]);
|
||||
ret =
|
||||
copy_from_user(to_kaddr + to_page_offset,
|
||||
from_addr,
|
||||
cur_copy_size);
|
||||
if (!PageReserved(to->page_array[to_page_index]))
|
||||
SetPageDirty(to->page_array[to_page_index]);
|
||||
|
||||
if (!tmp_printer) {
|
||||
tmp_printer = (char *)(to_kaddr + to_page_offset);
|
||||
tmp_int += tmp_printer[0];
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"First character (integer value) in pvfs_bufmap_copy_from_user: %d\n",
|
||||
tmp_int);
|
||||
}
|
||||
|
||||
pvfs2_kunmap(to->page_array[to_page_index]);
|
||||
if (ret) {
|
||||
gossip_err("Failed to copy data from user space\n");
|
||||
kfree(copied_iovec);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
amt_copied += cur_copy_size;
|
||||
if (inc_to_page_index) {
|
||||
to_page_offset = 0;
|
||||
to_page_index++;
|
||||
} else {
|
||||
to_page_offset += cur_copy_size;
|
||||
}
|
||||
}
|
||||
kfree(copied_iovec);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* pvfs_bufmap_copy_iovec_from_kernel()
|
||||
*
|
||||
* copies data from several kernel space address's in an iovec
|
||||
* to a mapped buffer
|
||||
*
|
||||
* Note that the mapped buffer is a series of pages and therefore
|
||||
* the copies have to be split by PAGE_SIZE bytes at a time.
|
||||
* Note that this routine checks that summation of iov_len
|
||||
* across all the elements of iov is equal to size.
|
||||
*
|
||||
* returns 0 on success, -errno on failure
|
||||
*/
|
||||
int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap,
|
||||
int buffer_index, const struct iovec *iov,
|
||||
unsigned long nr_segs, size_t size)
|
||||
{
|
||||
size_t amt_copied = 0;
|
||||
size_t cur_copy_size = 0;
|
||||
int to_page_index = 0;
|
||||
void *to_kaddr = NULL;
|
||||
void *from_kaddr = NULL;
|
||||
struct iovec *copied_iovec = NULL;
|
||||
struct pvfs_bufmap_desc *to;
|
||||
unsigned int seg;
|
||||
unsigned to_page_offset = 0;
|
||||
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"pvfs_bufmap_copy_iovec_from_kernel: index %d, "
|
||||
"size %zd\n",
|
||||
buffer_index,
|
||||
size);
|
||||
|
||||
to = &bufmap->desc_array[buffer_index];
|
||||
/*
|
||||
* copy the passed in iovec so that we can change some of its fields
|
||||
*/
|
||||
copied_iovec = kmalloc_array(nr_segs,
|
||||
sizeof(*copied_iovec),
|
||||
PVFS2_BUFMAP_GFP_FLAGS);
|
||||
if (copied_iovec == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
|
||||
/*
|
||||
* Go through each segment in the iovec and make sure that
|
||||
* the summation of iov_len matches the given size.
|
||||
*/
|
||||
for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
|
||||
amt_copied += copied_iovec[seg].iov_len;
|
||||
if (amt_copied != size) {
|
||||
gossip_err("pvfs2_bufmap_copy_iovec_from_kernel: computed total(%zd) is not equal to (%zd)\n",
|
||||
amt_copied,
|
||||
size);
|
||||
kfree(copied_iovec);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
to_page_index = 0;
|
||||
amt_copied = 0;
|
||||
seg = 0;
|
||||
to_page_offset = 0;
|
||||
/*
|
||||
* Go through each segment in the iovec and copy its
|
||||
* buffer into the mapped buffer one page at a time though
|
||||
*/
|
||||
while (amt_copied < size) {
|
||||
struct iovec *iv = &copied_iovec[seg];
|
||||
int inc_to_page_index;
|
||||
|
||||
if (iv->iov_len < (PAGE_SIZE - to_page_offset)) {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(iv->iov_len, size - amt_copied);
|
||||
seg++;
|
||||
from_kaddr = iv->iov_base;
|
||||
inc_to_page_index = 0;
|
||||
} else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(iv->iov_len, size - amt_copied);
|
||||
seg++;
|
||||
from_kaddr = iv->iov_base;
|
||||
inc_to_page_index = 1;
|
||||
} else {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(PAGE_SIZE - to_page_offset,
|
||||
size - amt_copied);
|
||||
from_kaddr = iv->iov_base;
|
||||
iv->iov_base += cur_copy_size;
|
||||
iv->iov_len -= cur_copy_size;
|
||||
inc_to_page_index = 1;
|
||||
}
|
||||
to_kaddr = pvfs2_kmap(to->page_array[to_page_index]);
|
||||
memcpy(to_kaddr + to_page_offset, from_kaddr, cur_copy_size);
|
||||
if (!PageReserved(to->page_array[to_page_index]))
|
||||
SetPageDirty(to->page_array[to_page_index]);
|
||||
pvfs2_kunmap(to->page_array[to_page_index]);
|
||||
amt_copied += cur_copy_size;
|
||||
if (inc_to_page_index) {
|
||||
to_page_offset = 0;
|
||||
to_page_index++;
|
||||
} else {
|
||||
to_page_offset += cur_copy_size;
|
||||
}
|
||||
}
|
||||
kfree(copied_iovec);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* pvfs_bufmap_copy_to_user_iovec()
|
||||
*
|
||||
* copies data to several user space address's in an iovec
|
||||
* from a mapped buffer
|
||||
*
|
||||
* returns 0 on success, -errno on failure
|
||||
*/
|
||||
int pvfs_bufmap_copy_to_user_iovec(struct pvfs2_bufmap *bufmap,
|
||||
int buffer_index, const struct iovec *iov,
|
||||
unsigned long nr_segs, size_t size)
|
||||
{
|
||||
size_t ret = 0;
|
||||
size_t amt_copied = 0;
|
||||
size_t cur_copy_size = 0;
|
||||
int from_page_index = 0;
|
||||
void *from_kaddr = NULL;
|
||||
void __user *to_addr = NULL;
|
||||
struct iovec *copied_iovec = NULL;
|
||||
struct pvfs_bufmap_desc *from;
|
||||
unsigned int seg;
|
||||
unsigned from_page_offset = 0;
|
||||
char *tmp_printer = NULL;
|
||||
int tmp_int = 0;
|
||||
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"pvfs_bufmap_copy_to_user_iovec: index %d, size %zd\n",
|
||||
buffer_index,
|
||||
size);
|
||||
|
||||
from = &bufmap->desc_array[buffer_index];
|
||||
/*
|
||||
* copy the passed in iovec so that we can change some of its fields
|
||||
*/
|
||||
copied_iovec = kmalloc_array(nr_segs,
|
||||
sizeof(*copied_iovec),
|
||||
PVFS2_BUFMAP_GFP_FLAGS);
|
||||
if (copied_iovec == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
|
||||
/*
|
||||
* Go through each segment in the iovec and make sure that
|
||||
* the summation of iov_len is greater than the given size.
|
||||
*/
|
||||
for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
|
||||
amt_copied += copied_iovec[seg].iov_len;
|
||||
if (amt_copied < size) {
|
||||
gossip_err("pvfs2_bufmap_copy_to_user_iovec: computed total (%zd) is less than (%zd)\n",
|
||||
amt_copied,
|
||||
size);
|
||||
kfree(copied_iovec);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
from_page_index = 0;
|
||||
amt_copied = 0;
|
||||
seg = 0;
|
||||
from_page_offset = 0;
|
||||
/*
|
||||
* Go through each segment in the iovec and copy from the mapper buffer,
|
||||
* but make sure that we do so one page at a time.
|
||||
*/
|
||||
while (amt_copied < size) {
|
||||
struct iovec *iv = &copied_iovec[seg];
|
||||
int inc_from_page_index;
|
||||
|
||||
if (iv->iov_len < (PAGE_SIZE - from_page_offset)) {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(iv->iov_len, size - amt_copied);
|
||||
seg++;
|
||||
to_addr = iv->iov_base;
|
||||
inc_from_page_index = 0;
|
||||
} else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(iv->iov_len, size - amt_copied);
|
||||
seg++;
|
||||
to_addr = iv->iov_base;
|
||||
inc_from_page_index = 1;
|
||||
} else {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(PAGE_SIZE - from_page_offset,
|
||||
size - amt_copied);
|
||||
to_addr = iv->iov_base;
|
||||
iv->iov_base += cur_copy_size;
|
||||
iv->iov_len -= cur_copy_size;
|
||||
inc_from_page_index = 1;
|
||||
}
|
||||
from_kaddr = pvfs2_kmap(from->page_array[from_page_index]);
|
||||
if (!tmp_printer) {
|
||||
tmp_printer = (char *)(from_kaddr + from_page_offset);
|
||||
tmp_int += tmp_printer[0];
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"First character (integer value) in pvfs_bufmap_copy_to_user_iovec: %d\n",
|
||||
tmp_int);
|
||||
}
|
||||
ret =
|
||||
copy_to_user(to_addr,
|
||||
from_kaddr + from_page_offset,
|
||||
cur_copy_size);
|
||||
pvfs2_kunmap(from->page_array[from_page_index]);
|
||||
if (ret) {
|
||||
gossip_err("Failed to copy data to user space\n");
|
||||
kfree(copied_iovec);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
amt_copied += cur_copy_size;
|
||||
if (inc_from_page_index) {
|
||||
from_page_offset = 0;
|
||||
from_page_index++;
|
||||
} else {
|
||||
from_page_offset += cur_copy_size;
|
||||
}
|
||||
}
|
||||
kfree(copied_iovec);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* pvfs_bufmap_copy_to_kernel_iovec()
|
||||
*
|
||||
* copies data to several kernel space address's in an iovec
|
||||
* from a mapped buffer
|
||||
*
|
||||
* returns 0 on success, -errno on failure
|
||||
*/
|
||||
int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap,
|
||||
int buffer_index, const struct iovec *iov,
|
||||
unsigned long nr_segs, size_t size)
|
||||
{
|
||||
size_t amt_copied = 0;
|
||||
size_t cur_copy_size = 0;
|
||||
int from_page_index = 0;
|
||||
void *from_kaddr = NULL;
|
||||
void *to_kaddr = NULL;
|
||||
struct iovec *copied_iovec = NULL;
|
||||
struct pvfs_bufmap_desc *from;
|
||||
unsigned int seg;
|
||||
unsigned int from_page_offset = 0;
|
||||
|
||||
gossip_debug(GOSSIP_BUFMAP_DEBUG,
|
||||
"pvfs_bufmap_copy_to_kernel_iovec: index %d, size %zd\n",
|
||||
buffer_index,
|
||||
size);
|
||||
|
||||
from = &bufmap->desc_array[buffer_index];
|
||||
/*
|
||||
* copy the passed in iovec so that we can change some of its fields
|
||||
*/
|
||||
copied_iovec = kmalloc_array(nr_segs,
|
||||
sizeof(*copied_iovec),
|
||||
PVFS2_BUFMAP_GFP_FLAGS);
|
||||
if (copied_iovec == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
|
||||
/*
|
||||
* Go through each segment in the iovec and make sure that
|
||||
* the summation of iov_len is greater than the given size.
|
||||
*/
|
||||
for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
|
||||
amt_copied += copied_iovec[seg].iov_len;
|
||||
|
||||
if (amt_copied < size) {
|
||||
gossip_err("pvfs2_bufmap_copy_to_kernel_iovec: computed total (%zd) is less than (%zd)\n",
|
||||
amt_copied,
|
||||
size);
|
||||
kfree(copied_iovec);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
from_page_index = 0;
|
||||
amt_copied = 0;
|
||||
seg = 0;
|
||||
from_page_offset = 0;
|
||||
/*
|
||||
* Go through each segment in the iovec and copy from the mapper buffer,
|
||||
* but make sure that we do so one page at a time.
|
||||
*/
|
||||
while (amt_copied < size) {
|
||||
struct iovec *iv = &copied_iovec[seg];
|
||||
int inc_from_page_index;
|
||||
|
||||
if (iv->iov_len < (PAGE_SIZE - from_page_offset)) {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(iv->iov_len, size - amt_copied);
|
||||
seg++;
|
||||
to_kaddr = iv->iov_base;
|
||||
inc_from_page_index = 0;
|
||||
} else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(iv->iov_len, size - amt_copied);
|
||||
seg++;
|
||||
to_kaddr = iv->iov_base;
|
||||
inc_from_page_index = 1;
|
||||
} else {
|
||||
cur_copy_size =
|
||||
PVFS_util_min(PAGE_SIZE - from_page_offset,
|
||||
size - amt_copied);
|
||||
to_kaddr = iv->iov_base;
|
||||
iv->iov_base += cur_copy_size;
|
||||
iv->iov_len -= cur_copy_size;
|
||||
inc_from_page_index = 1;
|
||||
}
|
||||
from_kaddr = pvfs2_kmap(from->page_array[from_page_index]);
|
||||
memcpy(to_kaddr, from_kaddr + from_page_offset, cur_copy_size);
|
||||
pvfs2_kunmap(from->page_array[from_page_index]);
|
||||
amt_copied += cur_copy_size;
|
||||
if (inc_from_page_index) {
|
||||
from_page_offset = 0;
|
||||
from_page_index++;
|
||||
} else {
|
||||
from_page_offset += cur_copy_size;
|
||||
}
|
||||
}
|
||||
kfree(copied_iovec);
|
||||
return 0;
|
||||
}
|
260
fs/orangefs/pvfs2-cache.c
Normal file
260
fs/orangefs/pvfs2-cache.c
Normal file
@ -0,0 +1,260 @@
|
||||
/*
|
||||
* (C) 2001 Clemson University and The University of Chicago
|
||||
*
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
#include "protocol.h"
|
||||
#include "pvfs2-kernel.h"
|
||||
|
||||
/* tags assigned to kernel upcall operations */
|
||||
static __u64 next_tag_value;
|
||||
static DEFINE_SPINLOCK(next_tag_value_lock);
|
||||
|
||||
/* the pvfs2 memory caches */
|
||||
|
||||
/* a cache for pvfs2 upcall/downcall operations */
|
||||
static struct kmem_cache *op_cache;
|
||||
|
||||
/* a cache for device (/dev/pvfs2-req) communication */
|
||||
static struct kmem_cache *dev_req_cache;
|
||||
|
||||
/* a cache for pvfs2_kiocb objects (i.e pvfs2 iocb structures ) */
|
||||
static struct kmem_cache *pvfs2_kiocb_cache;
|
||||
|
||||
int op_cache_initialize(void)
|
||||
{
|
||||
op_cache = kmem_cache_create("pvfs2_op_cache",
|
||||
sizeof(struct pvfs2_kernel_op_s),
|
||||
0,
|
||||
PVFS2_CACHE_CREATE_FLAGS,
|
||||
NULL);
|
||||
|
||||
if (!op_cache) {
|
||||
gossip_err("Cannot create pvfs2_op_cache\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* initialize our atomic tag counter */
|
||||
spin_lock(&next_tag_value_lock);
|
||||
next_tag_value = 100;
|
||||
spin_unlock(&next_tag_value_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int op_cache_finalize(void)
|
||||
{
|
||||
kmem_cache_destroy(op_cache);
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *get_opname_string(struct pvfs2_kernel_op_s *new_op)
|
||||
{
|
||||
if (new_op) {
|
||||
__s32 type = new_op->upcall.type;
|
||||
|
||||
if (type == PVFS2_VFS_OP_FILE_IO)
|
||||
return "OP_FILE_IO";
|
||||
else if (type == PVFS2_VFS_OP_LOOKUP)
|
||||
return "OP_LOOKUP";
|
||||
else if (type == PVFS2_VFS_OP_CREATE)
|
||||
return "OP_CREATE";
|
||||
else if (type == PVFS2_VFS_OP_GETATTR)
|
||||
return "OP_GETATTR";
|
||||
else if (type == PVFS2_VFS_OP_REMOVE)
|
||||
return "OP_REMOVE";
|
||||
else if (type == PVFS2_VFS_OP_MKDIR)
|
||||
return "OP_MKDIR";
|
||||
else if (type == PVFS2_VFS_OP_READDIR)
|
||||
return "OP_READDIR";
|
||||
else if (type == PVFS2_VFS_OP_READDIRPLUS)
|
||||
return "OP_READDIRPLUS";
|
||||
else if (type == PVFS2_VFS_OP_SETATTR)
|
||||
return "OP_SETATTR";
|
||||
else if (type == PVFS2_VFS_OP_SYMLINK)
|
||||
return "OP_SYMLINK";
|
||||
else if (type == PVFS2_VFS_OP_RENAME)
|
||||
return "OP_RENAME";
|
||||
else if (type == PVFS2_VFS_OP_STATFS)
|
||||
return "OP_STATFS";
|
||||
else if (type == PVFS2_VFS_OP_TRUNCATE)
|
||||
return "OP_TRUNCATE";
|
||||
else if (type == PVFS2_VFS_OP_MMAP_RA_FLUSH)
|
||||
return "OP_MMAP_RA_FLUSH";
|
||||
else if (type == PVFS2_VFS_OP_FS_MOUNT)
|
||||
return "OP_FS_MOUNT";
|
||||
else if (type == PVFS2_VFS_OP_FS_UMOUNT)
|
||||
return "OP_FS_UMOUNT";
|
||||
else if (type == PVFS2_VFS_OP_GETXATTR)
|
||||
return "OP_GETXATTR";
|
||||
else if (type == PVFS2_VFS_OP_SETXATTR)
|
||||
return "OP_SETXATTR";
|
||||
else if (type == PVFS2_VFS_OP_LISTXATTR)
|
||||
return "OP_LISTXATTR";
|
||||
else if (type == PVFS2_VFS_OP_REMOVEXATTR)
|
||||
return "OP_REMOVEXATTR";
|
||||
else if (type == PVFS2_VFS_OP_PARAM)
|
||||
return "OP_PARAM";
|
||||
else if (type == PVFS2_VFS_OP_PERF_COUNT)
|
||||
return "OP_PERF_COUNT";
|
||||
else if (type == PVFS2_VFS_OP_CANCEL)
|
||||
return "OP_CANCEL";
|
||||
else if (type == PVFS2_VFS_OP_FSYNC)
|
||||
return "OP_FSYNC";
|
||||
else if (type == PVFS2_VFS_OP_FSKEY)
|
||||
return "OP_FSKEY";
|
||||
else if (type == PVFS2_VFS_OP_FILE_IOX)
|
||||
return "OP_FILE_IOX";
|
||||
}
|
||||
return "OP_UNKNOWN?";
|
||||
}
|
||||
|
||||
static struct pvfs2_kernel_op_s *op_alloc_common(__s32 op_linger, __s32 type)
|
||||
{
|
||||
struct pvfs2_kernel_op_s *new_op = NULL;
|
||||
|
||||
new_op = kmem_cache_alloc(op_cache, PVFS2_CACHE_ALLOC_FLAGS);
|
||||
if (new_op) {
|
||||
memset(new_op, 0, sizeof(struct pvfs2_kernel_op_s));
|
||||
|
||||
INIT_LIST_HEAD(&new_op->list);
|
||||
spin_lock_init(&new_op->lock);
|
||||
init_waitqueue_head(&new_op->waitq);
|
||||
|
||||
init_waitqueue_head(&new_op->io_completion_waitq);
|
||||
atomic_set(&new_op->aio_ref_count, 0);
|
||||
|
||||
pvfs2_op_initialize(new_op);
|
||||
|
||||
/* initialize the op specific tag and upcall credentials */
|
||||
spin_lock(&next_tag_value_lock);
|
||||
new_op->tag = next_tag_value++;
|
||||
if (next_tag_value == 0)
|
||||
next_tag_value = 100;
|
||||
spin_unlock(&next_tag_value_lock);
|
||||
new_op->upcall.type = type;
|
||||
new_op->attempts = 0;
|
||||
gossip_debug(GOSSIP_CACHE_DEBUG,
|
||||
"Alloced OP (%p: %llu %s)\n",
|
||||
new_op,
|
||||
llu(new_op->tag),
|
||||
get_opname_string(new_op));
|
||||
|
||||
new_op->upcall.uid = from_kuid(current_user_ns(),
|
||||
current_fsuid());
|
||||
|
||||
new_op->upcall.gid = from_kgid(current_user_ns(),
|
||||
current_fsgid());
|
||||
|
||||
new_op->op_linger = new_op->op_linger_tmp = op_linger;
|
||||
} else {
|
||||
gossip_err("op_alloc: kmem_cache_alloc failed!\n");
|
||||
}
|
||||
return new_op;
|
||||
}
|
||||
|
||||
struct pvfs2_kernel_op_s *op_alloc(__s32 type)
|
||||
{
|
||||
return op_alloc_common(1, type);
|
||||
}
|
||||
|
||||
struct pvfs2_kernel_op_s *op_alloc_trailer(__s32 type)
|
||||
{
|
||||
return op_alloc_common(2, type);
|
||||
}
|
||||
|
||||
void op_release(struct pvfs2_kernel_op_s *pvfs2_op)
|
||||
{
|
||||
if (pvfs2_op) {
|
||||
gossip_debug(GOSSIP_CACHE_DEBUG,
|
||||
"Releasing OP (%p: %llu)\n",
|
||||
pvfs2_op,
|
||||
llu(pvfs2_op->tag));
|
||||
pvfs2_op_initialize(pvfs2_op);
|
||||
kmem_cache_free(op_cache, pvfs2_op);
|
||||
} else {
|
||||
gossip_err("NULL pointer in op_release\n");
|
||||
}
|
||||
}
|
||||
|
||||
int dev_req_cache_initialize(void)
|
||||
{
|
||||
dev_req_cache = kmem_cache_create("pvfs2_devreqcache",
|
||||
MAX_ALIGNED_DEV_REQ_DOWNSIZE,
|
||||
0,
|
||||
PVFS2_CACHE_CREATE_FLAGS,
|
||||
NULL);
|
||||
|
||||
if (!dev_req_cache) {
|
||||
gossip_err("Cannot create pvfs2_dev_req_cache\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dev_req_cache_finalize(void)
|
||||
{
|
||||
kmem_cache_destroy(dev_req_cache);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *dev_req_alloc(void)
|
||||
{
|
||||
void *buffer;
|
||||
|
||||
buffer = kmem_cache_alloc(dev_req_cache, PVFS2_CACHE_ALLOC_FLAGS);
|
||||
if (buffer == NULL)
|
||||
gossip_err("Failed to allocate from dev_req_cache\n");
|
||||
else
|
||||
memset(buffer, 0, sizeof(MAX_ALIGNED_DEV_REQ_DOWNSIZE));
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void dev_req_release(void *buffer)
|
||||
{
|
||||
if (buffer)
|
||||
kmem_cache_free(dev_req_cache, buffer);
|
||||
else
|
||||
gossip_err("NULL pointer passed to dev_req_release\n");
|
||||
}
|
||||
|
||||
int kiocb_cache_initialize(void)
|
||||
{
|
||||
pvfs2_kiocb_cache = kmem_cache_create("pvfs2_kiocbcache",
|
||||
sizeof(struct pvfs2_kiocb_s),
|
||||
0,
|
||||
PVFS2_CACHE_CREATE_FLAGS,
|
||||
NULL);
|
||||
|
||||
if (!pvfs2_kiocb_cache) {
|
||||
gossip_err("Cannot create pvfs2_kiocb_cache!\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kiocb_cache_finalize(void)
|
||||
{
|
||||
kmem_cache_destroy(pvfs2_kiocb_cache);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct pvfs2_kiocb_s *kiocb_alloc(void)
|
||||
{
|
||||
struct pvfs2_kiocb_s *x = NULL;
|
||||
|
||||
x = kmem_cache_alloc(pvfs2_kiocb_cache, PVFS2_CACHE_ALLOC_FLAGS);
|
||||
if (x == NULL)
|
||||
gossip_err("kiocb_alloc: kmem_cache_alloc failed!\n");
|
||||
else
|
||||
memset(x, 0, sizeof(struct pvfs2_kiocb_s));
|
||||
return x;
|
||||
}
|
||||
|
||||
void kiocb_release(struct pvfs2_kiocb_s *x)
|
||||
{
|
||||
if (x)
|
||||
kmem_cache_free(pvfs2_kiocb_cache, x);
|
||||
else
|
||||
gossip_err("kiocb_release: kmem_cache_free NULL pointer!\n");
|
||||
}
|
458
fs/orangefs/pvfs2-debugfs.c
Normal file
458
fs/orangefs/pvfs2-debugfs.c
Normal file
@ -0,0 +1,458 @@
|
||||
/*
|
||||
* What: /sys/kernel/debug/orangefs/debug-help
|
||||
* Date: June 2015
|
||||
* Contact: Mike Marshall <hubcap@omnibond.com>
|
||||
* Description:
|
||||
* List of client and kernel debug keywords.
|
||||
*
|
||||
*
|
||||
* What: /sys/kernel/debug/orangefs/client-debug
|
||||
* Date: June 2015
|
||||
* Contact: Mike Marshall <hubcap@omnibond.com>
|
||||
* Description:
|
||||
* Debug setting for "the client", the userspace
|
||||
* helper for the kernel module.
|
||||
*
|
||||
*
|
||||
* What: /sys/kernel/debug/orangefs/kernel-debug
|
||||
* Date: June 2015
|
||||
* Contact: Mike Marshall <hubcap@omnibond.com>
|
||||
* Description:
|
||||
* Debug setting for the orangefs kernel module.
|
||||
*
|
||||
* Any of the keywords, or comma-separated lists
|
||||
* of keywords, from debug-help can be catted to
|
||||
* client-debug or kernel-debug.
|
||||
*
|
||||
* "none", "all" and "verbose" are special keywords
|
||||
* for client-debug. Setting client-debug to "all"
|
||||
* is kind of like trying to drink water from a
|
||||
* fire hose, "verbose" triggers most of the same
|
||||
* output except for the constant flow of output
|
||||
* from the main wait loop.
|
||||
*
|
||||
* "none" and "all" are similar settings for kernel-debug
|
||||
* no need for a "verbose".
|
||||
*/
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include "pvfs2-debugfs.h"
|
||||
#include "protocol.h"
|
||||
#include "pvfs2-kernel.h"
|
||||
|
||||
static int orangefs_debug_disabled = 1;
|
||||
|
||||
static int orangefs_debug_help_open(struct inode *, struct file *);
|
||||
|
||||
const struct file_operations debug_help_fops = {
|
||||
.open = orangefs_debug_help_open,
|
||||
.read = seq_read,
|
||||
.release = seq_release,
|
||||
.llseek = seq_lseek,
|
||||
};
|
||||
|
||||
static void *help_start(struct seq_file *, loff_t *);
|
||||
static void *help_next(struct seq_file *, void *, loff_t *);
|
||||
static void help_stop(struct seq_file *, void *);
|
||||
static int help_show(struct seq_file *, void *);
|
||||
|
||||
static const struct seq_operations help_debug_ops = {
|
||||
.start = help_start,
|
||||
.next = help_next,
|
||||
.stop = help_stop,
|
||||
.show = help_show,
|
||||
};
|
||||
|
||||
/*
|
||||
* Used to protect data in ORANGEFS_KMOD_DEBUG_FILE and
|
||||
* ORANGEFS_KMOD_DEBUG_FILE.
|
||||
*/
|
||||
DEFINE_MUTEX(orangefs_debug_lock);
|
||||
|
||||
int orangefs_debug_open(struct inode *, struct file *);
|
||||
|
||||
static ssize_t orangefs_debug_read(struct file *,
|
||||
char __user *,
|
||||
size_t,
|
||||
loff_t *);
|
||||
|
||||
static ssize_t orangefs_debug_write(struct file *,
|
||||
const char __user *,
|
||||
size_t,
|
||||
loff_t *);
|
||||
|
||||
static const struct file_operations kernel_debug_fops = {
|
||||
.open = orangefs_debug_open,
|
||||
.read = orangefs_debug_read,
|
||||
.write = orangefs_debug_write,
|
||||
.llseek = generic_file_llseek,
|
||||
};
|
||||
|
||||
/*
|
||||
* initialize kmod debug operations, create orangefs debugfs dir and
|
||||
* ORANGEFS_KMOD_DEBUG_HELP_FILE.
|
||||
*/
|
||||
int pvfs2_debugfs_init(void)
|
||||
{
|
||||
|
||||
int rc = -ENOMEM;
|
||||
|
||||
debug_dir = debugfs_create_dir("orangefs", NULL);
|
||||
if (!debug_dir)
|
||||
goto out;
|
||||
|
||||
help_file_dentry = debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE,
|
||||
0444,
|
||||
debug_dir,
|
||||
debug_help_string,
|
||||
&debug_help_fops);
|
||||
if (!help_file_dentry)
|
||||
goto out;
|
||||
|
||||
orangefs_debug_disabled = 0;
|
||||
rc = 0;
|
||||
|
||||
out:
|
||||
if (rc)
|
||||
pvfs2_debugfs_cleanup();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
void pvfs2_debugfs_cleanup(void)
|
||||
{
|
||||
debugfs_remove_recursive(debug_dir);
|
||||
}
|
||||
|
||||
/* open ORANGEFS_KMOD_DEBUG_HELP_FILE */
|
||||
static int orangefs_debug_help_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int rc = -ENODEV;
|
||||
int ret;
|
||||
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
|
||||
"orangefs_debug_help_open: start\n");
|
||||
|
||||
if (orangefs_debug_disabled)
|
||||
goto out;
|
||||
|
||||
ret = seq_open(file, &help_debug_ops);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
((struct seq_file *)(file->private_data))->private = inode->i_private;
|
||||
|
||||
rc = 0;
|
||||
|
||||
out:
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
|
||||
"orangefs_debug_help_open: rc:%d:\n",
|
||||
rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* I think start always gets called again after stop. Start
|
||||
* needs to return NULL when it is done. The whole "payload"
|
||||
* in this case is a single (long) string, so by the second
|
||||
* time we get to start (pos = 1), we're done.
|
||||
*/
|
||||
static void *help_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
void *payload = NULL;
|
||||
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_start: start\n");
|
||||
|
||||
if (*pos == 0)
|
||||
payload = m->private;
|
||||
|
||||
return payload;
|
||||
}
|
||||
|
||||
static void *help_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n");
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void help_stop(struct seq_file *m, void *p)
|
||||
{
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_stop: start\n");
|
||||
}
|
||||
|
||||
static int help_show(struct seq_file *m, void *v)
|
||||
{
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_show: start\n");
|
||||
|
||||
seq_puts(m, v);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize the kernel-debug file.
|
||||
*/
|
||||
int pvfs2_kernel_debug_init(void)
|
||||
{
|
||||
|
||||
int rc = -ENOMEM;
|
||||
struct dentry *ret;
|
||||
char *k_buffer = NULL;
|
||||
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
|
||||
|
||||
k_buffer = kzalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
|
||||
if (!k_buffer)
|
||||
goto out;
|
||||
|
||||
if (strlen(kernel_debug_string) + 1 < PVFS2_MAX_DEBUG_STRING_LEN) {
|
||||
strcpy(k_buffer, kernel_debug_string);
|
||||
strcat(k_buffer, "\n");
|
||||
} else {
|
||||
strcpy(k_buffer, "none\n");
|
||||
pr_info("%s: overflow 1!\n", __func__);
|
||||
}
|
||||
|
||||
ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE,
|
||||
0444,
|
||||
debug_dir,
|
||||
k_buffer,
|
||||
&kernel_debug_fops);
|
||||
if (!ret) {
|
||||
pr_info("%s: failed to create %s.\n",
|
||||
__func__,
|
||||
ORANGEFS_KMOD_DEBUG_FILE);
|
||||
goto out;
|
||||
}
|
||||
|
||||
rc = 0;
|
||||
|
||||
out:
|
||||
if (rc)
|
||||
pvfs2_debugfs_cleanup();
|
||||
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize the client-debug file.
|
||||
*/
|
||||
int pvfs2_client_debug_init(void)
|
||||
{
|
||||
|
||||
int rc = -ENOMEM;
|
||||
char *c_buffer = NULL;
|
||||
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
|
||||
|
||||
c_buffer = kzalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
|
||||
if (!c_buffer)
|
||||
goto out;
|
||||
|
||||
if (strlen(client_debug_string) + 1 < PVFS2_MAX_DEBUG_STRING_LEN) {
|
||||
strcpy(c_buffer, client_debug_string);
|
||||
strcat(c_buffer, "\n");
|
||||
} else {
|
||||
strcpy(c_buffer, "none\n");
|
||||
pr_info("%s: overflow! 2\n", __func__);
|
||||
}
|
||||
|
||||
client_debug_dentry = debugfs_create_file(ORANGEFS_CLIENT_DEBUG_FILE,
|
||||
0444,
|
||||
debug_dir,
|
||||
c_buffer,
|
||||
&kernel_debug_fops);
|
||||
if (!client_debug_dentry) {
|
||||
pr_info("%s: failed to create %s.\n",
|
||||
__func__,
|
||||
ORANGEFS_CLIENT_DEBUG_FILE);
|
||||
goto out;
|
||||
}
|
||||
|
||||
rc = 0;
|
||||
|
||||
out:
|
||||
if (rc)
|
||||
pvfs2_debugfs_cleanup();
|
||||
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/
|
||||
int orangefs_debug_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int rc = -ENODEV;
|
||||
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
|
||||
"%s: orangefs_debug_disabled: %d\n",
|
||||
__func__,
|
||||
orangefs_debug_disabled);
|
||||
|
||||
if (orangefs_debug_disabled)
|
||||
goto out;
|
||||
|
||||
rc = 0;
|
||||
mutex_lock(&orangefs_debug_lock);
|
||||
file->private_data = inode->i_private;
|
||||
mutex_unlock(&orangefs_debug_lock);
|
||||
|
||||
out:
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
|
||||
"orangefs_debug_open: rc: %d\n",
|
||||
rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static ssize_t orangefs_debug_read(struct file *file,
|
||||
char __user *ubuf,
|
||||
size_t count,
|
||||
loff_t *ppos)
|
||||
{
|
||||
char *buf;
|
||||
int sprintf_ret;
|
||||
ssize_t read_ret = -ENOMEM;
|
||||
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "orangefs_debug_read: start\n");
|
||||
|
||||
buf = kmalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
|
||||
if (!buf)
|
||||
goto out;
|
||||
|
||||
mutex_lock(&orangefs_debug_lock);
|
||||
sprintf_ret = sprintf(buf, "%s", (char *)file->private_data);
|
||||
mutex_unlock(&orangefs_debug_lock);
|
||||
|
||||
read_ret = simple_read_from_buffer(ubuf, count, ppos, buf, sprintf_ret);
|
||||
|
||||
kfree(buf);
|
||||
|
||||
out:
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
|
||||
"orangefs_debug_read: ret: %zu\n",
|
||||
read_ret);
|
||||
|
||||
return read_ret;
|
||||
}
|
||||
|
||||
static ssize_t orangefs_debug_write(struct file *file,
|
||||
const char __user *ubuf,
|
||||
size_t count,
|
||||
loff_t *ppos)
|
||||
{
|
||||
char *buf;
|
||||
int rc = -EFAULT;
|
||||
size_t silly = 0;
|
||||
char *debug_string;
|
||||
struct pvfs2_kernel_op_s *new_op = NULL;
|
||||
struct client_debug_mask c_mask = { NULL, 0, 0 };
|
||||
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
|
||||
"orangefs_debug_write: %s\n",
|
||||
file->f_path.dentry->d_name.name);
|
||||
|
||||
/*
|
||||
* Thwart users who try to jamb a ridiculous number
|
||||
* of bytes into the debug file...
|
||||
*/
|
||||
if (count > PVFS2_MAX_DEBUG_STRING_LEN + 1) {
|
||||
silly = count;
|
||||
count = PVFS2_MAX_DEBUG_STRING_LEN + 1;
|
||||
}
|
||||
|
||||
buf = kmalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
|
||||
if (!buf)
|
||||
goto out;
|
||||
memset(buf, 0, PVFS2_MAX_DEBUG_STRING_LEN);
|
||||
|
||||
if (copy_from_user(buf, ubuf, count - 1)) {
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
|
||||
"%s: copy_from_user failed!\n",
|
||||
__func__);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map the keyword string from userspace into a valid debug mask.
|
||||
* The mapping process involves mapping the human-inputted string
|
||||
* into a valid mask, and then rebuilding the string from the
|
||||
* verified valid mask.
|
||||
*
|
||||
* A service operation is required to set a new client-side
|
||||
* debug mask.
|
||||
*/
|
||||
if (!strcmp(file->f_path.dentry->d_name.name,
|
||||
ORANGEFS_KMOD_DEBUG_FILE)) {
|
||||
debug_string_to_mask(buf, &gossip_debug_mask, 0);
|
||||
debug_mask_to_string(&gossip_debug_mask, 0);
|
||||
debug_string = kernel_debug_string;
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
|
||||
"New kernel debug string is %s\n",
|
||||
kernel_debug_string);
|
||||
} else {
|
||||
/* Can't reset client debug mask if client is not running. */
|
||||
if (is_daemon_in_service()) {
|
||||
pr_info("%s: Client not running :%d:\n",
|
||||
__func__,
|
||||
is_daemon_in_service());
|
||||
goto out;
|
||||
}
|
||||
|
||||
debug_string_to_mask(buf, &c_mask, 1);
|
||||
debug_mask_to_string(&c_mask, 1);
|
||||
debug_string = client_debug_string;
|
||||
|
||||
new_op = op_alloc(PVFS2_VFS_OP_PARAM);
|
||||
if (!new_op) {
|
||||
pr_info("%s: op_alloc failed!\n", __func__);
|
||||
goto out;
|
||||
}
|
||||
|
||||
new_op->upcall.req.param.op =
|
||||
PVFS2_PARAM_REQUEST_OP_TWO_MASK_VALUES;
|
||||
new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_SET;
|
||||
memset(new_op->upcall.req.param.s_value,
|
||||
0,
|
||||
PVFS2_MAX_DEBUG_STRING_LEN);
|
||||
sprintf(new_op->upcall.req.param.s_value,
|
||||
"%llx %llx\n",
|
||||
c_mask.mask1,
|
||||
c_mask.mask2);
|
||||
|
||||
/* service_operation returns 0 on success... */
|
||||
rc = service_operation(new_op,
|
||||
"pvfs2_param",
|
||||
PVFS2_OP_INTERRUPTIBLE);
|
||||
|
||||
if (rc)
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
|
||||
"%s: service_operation failed! rc:%d:\n",
|
||||
__func__,
|
||||
rc);
|
||||
|
||||
op_release(new_op);
|
||||
}
|
||||
|
||||
mutex_lock(&orangefs_debug_lock);
|
||||
memset(file->f_inode->i_private, 0, PVFS2_MAX_DEBUG_STRING_LEN);
|
||||
sprintf((char *)file->f_inode->i_private, "%s\n", debug_string);
|
||||
mutex_unlock(&orangefs_debug_lock);
|
||||
|
||||
*ppos += count;
|
||||
if (silly)
|
||||
rc = silly;
|
||||
else
|
||||
rc = count;
|
||||
|
||||
out:
|
||||
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
|
||||
"orangefs_debug_write: rc: %d\n",
|
||||
rc);
|
||||
kfree(buf);
|
||||
return rc;
|
||||
}
|
316
fs/orangefs/pvfs2-mod.c
Normal file
316
fs/orangefs/pvfs2-mod.c
Normal file
@ -0,0 +1,316 @@
|
||||
/*
|
||||
* (C) 2001 Clemson University and The University of Chicago
|
||||
*
|
||||
* Changes by Acxiom Corporation to add proc file handler for pvfs2 client
|
||||
* parameters, Copyright Acxiom Corporation, 2005.
|
||||
*
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
#include "protocol.h"
|
||||
#include "pvfs2-kernel.h"
|
||||
#include "pvfs2-debugfs.h"
|
||||
#include "pvfs2-sysfs.h"
|
||||
|
||||
/* PVFS2_VERSION is a ./configure define */
|
||||
#ifndef PVFS2_VERSION
|
||||
#define PVFS2_VERSION "Unknown"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* global variables declared here
|
||||
*/
|
||||
|
||||
/* array of client debug keyword/mask values */
|
||||
struct client_debug_mask *cdm_array;
|
||||
int cdm_element_count;
|
||||
|
||||
char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN] = "none";
|
||||
char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN];
|
||||
char client_debug_array_string[PVFS2_MAX_DEBUG_STRING_LEN];
|
||||
|
||||
char *debug_help_string;
|
||||
int help_string_initialized;
|
||||
struct dentry *help_file_dentry;
|
||||
struct dentry *client_debug_dentry;
|
||||
struct dentry *debug_dir;
|
||||
int client_verbose_index;
|
||||
int client_all_index;
|
||||
struct pvfs2_stats g_pvfs2_stats;
|
||||
|
||||
/* the size of the hash tables for ops in progress */
|
||||
int hash_table_size = 509;
|
||||
|
||||
static ulong module_parm_debug_mask;
|
||||
__u64 gossip_debug_mask;
|
||||
struct client_debug_mask client_debug_mask = { NULL, 0, 0 };
|
||||
unsigned int kernel_mask_set_mod_init; /* implicitly false */
|
||||
int op_timeout_secs = PVFS2_DEFAULT_OP_TIMEOUT_SECS;
|
||||
int slot_timeout_secs = PVFS2_DEFAULT_SLOT_TIMEOUT_SECS;
|
||||
__u32 DEBUG_LINE = 50;
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("PVFS2 Development Team");
|
||||
MODULE_DESCRIPTION("The Linux Kernel VFS interface to PVFS2");
|
||||
MODULE_PARM_DESC(module_parm_debug_mask, "debugging level (see pvfs2-debug.h for values)");
|
||||
MODULE_PARM_DESC(op_timeout_secs, "Operation timeout in seconds");
|
||||
MODULE_PARM_DESC(slot_timeout_secs, "Slot timeout in seconds");
|
||||
MODULE_PARM_DESC(hash_table_size,
|
||||
"size of hash table for operations in progress");
|
||||
|
||||
static struct file_system_type pvfs2_fs_type = {
|
||||
.name = "pvfs2",
|
||||
.mount = pvfs2_mount,
|
||||
.kill_sb = pvfs2_kill_sb,
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
module_param(hash_table_size, int, 0);
|
||||
module_param(module_parm_debug_mask, ulong, 0755);
|
||||
module_param(op_timeout_secs, int, 0);
|
||||
module_param(slot_timeout_secs, int, 0);
|
||||
|
||||
/* synchronizes the request device file */
|
||||
struct mutex devreq_mutex;
|
||||
|
||||
/*
|
||||
blocks non-priority requests from being queued for servicing. this
|
||||
could be used for protecting the request list data structure, but
|
||||
for now it's only being used to stall the op addition to the request
|
||||
list
|
||||
*/
|
||||
struct mutex request_mutex;
|
||||
|
||||
/* hash table for storing operations waiting for matching downcall */
|
||||
struct list_head *htable_ops_in_progress;
|
||||
DEFINE_SPINLOCK(htable_ops_in_progress_lock);
|
||||
|
||||
/* list for queueing upcall operations */
|
||||
LIST_HEAD(pvfs2_request_list);
|
||||
|
||||
/* used to protect the above pvfs2_request_list */
|
||||
DEFINE_SPINLOCK(pvfs2_request_list_lock);
|
||||
|
||||
/* used for incoming request notification */
|
||||
DECLARE_WAIT_QUEUE_HEAD(pvfs2_request_list_waitq);
|
||||
|
||||
static int __init pvfs2_init(void)
|
||||
{
|
||||
int ret = -1;
|
||||
__u32 i = 0;
|
||||
|
||||
/* convert input debug mask to a 64-bit unsigned integer */
|
||||
gossip_debug_mask = (unsigned long long) module_parm_debug_mask;
|
||||
|
||||
/*
|
||||
* set the kernel's gossip debug string; invalid mask values will
|
||||
* be ignored.
|
||||
*/
|
||||
debug_mask_to_string(&gossip_debug_mask, 0);
|
||||
|
||||
/* remove any invalid values from the mask */
|
||||
debug_string_to_mask(kernel_debug_string, &gossip_debug_mask, 0);
|
||||
|
||||
/*
|
||||
* if the mask has a non-zero value, then indicate that the mask
|
||||
* was set when the kernel module was loaded. The pvfs2 dev ioctl
|
||||
* command will look at this boolean to determine if the kernel's
|
||||
* debug mask should be overwritten when the client-core is started.
|
||||
*/
|
||||
if (gossip_debug_mask != 0)
|
||||
kernel_mask_set_mod_init = true;
|
||||
|
||||
/* print information message to the system log */
|
||||
pr_info("pvfs2: pvfs2_init called with debug mask: :%s: :%llx:\n",
|
||||
kernel_debug_string,
|
||||
(unsigned long long)gossip_debug_mask);
|
||||
|
||||
ret = bdi_init(&pvfs2_backing_dev_info);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (op_timeout_secs < 0)
|
||||
op_timeout_secs = 0;
|
||||
|
||||
if (slot_timeout_secs < 0)
|
||||
slot_timeout_secs = 0;
|
||||
|
||||
/* initialize global book keeping data structures */
|
||||
ret = op_cache_initialize();
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
ret = dev_req_cache_initialize();
|
||||
if (ret < 0)
|
||||
goto cleanup_op;
|
||||
|
||||
ret = pvfs2_inode_cache_initialize();
|
||||
if (ret < 0)
|
||||
goto cleanup_req;
|
||||
|
||||
ret = kiocb_cache_initialize();
|
||||
if (ret < 0)
|
||||
goto cleanup_inode;
|
||||
|
||||
/* Initialize the pvfsdev subsystem. */
|
||||
ret = pvfs2_dev_init();
|
||||
if (ret < 0) {
|
||||
gossip_err("pvfs2: could not initialize device subsystem %d!\n",
|
||||
ret);
|
||||
goto cleanup_kiocb;
|
||||
}
|
||||
|
||||
mutex_init(&devreq_mutex);
|
||||
mutex_init(&request_mutex);
|
||||
|
||||
htable_ops_in_progress =
|
||||
kcalloc(hash_table_size, sizeof(struct list_head), GFP_KERNEL);
|
||||
if (!htable_ops_in_progress) {
|
||||
gossip_err("Failed to initialize op hashtable");
|
||||
ret = -ENOMEM;
|
||||
goto cleanup_device;
|
||||
}
|
||||
|
||||
/* initialize a doubly linked at each hash table index */
|
||||
for (i = 0; i < hash_table_size; i++)
|
||||
INIT_LIST_HEAD(&htable_ops_in_progress[i]);
|
||||
|
||||
ret = fsid_key_table_initialize();
|
||||
if (ret < 0)
|
||||
goto cleanup_progress_table;
|
||||
|
||||
/*
|
||||
* Build the contents of /sys/kernel/debug/orangefs/debug-help
|
||||
* from the keywords in the kernel keyword/mask array.
|
||||
*
|
||||
* The keywords in the client keyword/mask array are
|
||||
* unknown at boot time.
|
||||
*
|
||||
* orangefs_prepare_debugfs_help_string will be used again
|
||||
* later to rebuild the debug-help file after the client starts
|
||||
* and passes along the needed info. The argument signifies
|
||||
* which time orangefs_prepare_debugfs_help_string is being
|
||||
* called.
|
||||
*
|
||||
*/
|
||||
ret = orangefs_prepare_debugfs_help_string(1);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
pvfs2_debugfs_init();
|
||||
pvfs2_kernel_debug_init();
|
||||
orangefs_sysfs_init();
|
||||
|
||||
ret = register_filesystem(&pvfs2_fs_type);
|
||||
if (ret == 0) {
|
||||
pr_info("pvfs2: module version %s loaded\n", PVFS2_VERSION);
|
||||
return 0;
|
||||
}
|
||||
|
||||
pvfs2_debugfs_cleanup();
|
||||
orangefs_sysfs_exit();
|
||||
fsid_key_table_finalize();
|
||||
|
||||
cleanup_progress_table:
|
||||
kfree(htable_ops_in_progress);
|
||||
|
||||
cleanup_device:
|
||||
pvfs2_dev_cleanup();
|
||||
|
||||
cleanup_kiocb:
|
||||
kiocb_cache_finalize();
|
||||
|
||||
cleanup_inode:
|
||||
pvfs2_inode_cache_finalize();
|
||||
|
||||
cleanup_req:
|
||||
dev_req_cache_finalize();
|
||||
|
||||
cleanup_op:
|
||||
op_cache_finalize();
|
||||
|
||||
err:
|
||||
bdi_destroy(&pvfs2_backing_dev_info);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit pvfs2_exit(void)
|
||||
{
|
||||
int i = 0;
|
||||
struct pvfs2_kernel_op_s *cur_op = NULL;
|
||||
|
||||
gossip_debug(GOSSIP_INIT_DEBUG, "pvfs2: pvfs2_exit called\n");
|
||||
|
||||
unregister_filesystem(&pvfs2_fs_type);
|
||||
pvfs2_debugfs_cleanup();
|
||||
orangefs_sysfs_exit();
|
||||
fsid_key_table_finalize();
|
||||
pvfs2_dev_cleanup();
|
||||
/* clear out all pending upcall op requests */
|
||||
spin_lock(&pvfs2_request_list_lock);
|
||||
while (!list_empty(&pvfs2_request_list)) {
|
||||
cur_op = list_entry(pvfs2_request_list.next,
|
||||
struct pvfs2_kernel_op_s,
|
||||
list);
|
||||
list_del(&cur_op->list);
|
||||
gossip_debug(GOSSIP_INIT_DEBUG,
|
||||
"Freeing unhandled upcall request type %d\n",
|
||||
cur_op->upcall.type);
|
||||
op_release(cur_op);
|
||||
}
|
||||
spin_unlock(&pvfs2_request_list_lock);
|
||||
|
||||
for (i = 0; i < hash_table_size; i++)
|
||||
while (!list_empty(&htable_ops_in_progress[i])) {
|
||||
cur_op = list_entry(htable_ops_in_progress[i].next,
|
||||
struct pvfs2_kernel_op_s,
|
||||
list);
|
||||
op_release(cur_op);
|
||||
}
|
||||
|
||||
kiocb_cache_finalize();
|
||||
pvfs2_inode_cache_finalize();
|
||||
dev_req_cache_finalize();
|
||||
op_cache_finalize();
|
||||
|
||||
kfree(htable_ops_in_progress);
|
||||
|
||||
bdi_destroy(&pvfs2_backing_dev_info);
|
||||
|
||||
pr_info("pvfs2: module version %s unloaded\n", PVFS2_VERSION);
|
||||
}
|
||||
|
||||
/*
|
||||
* What we do in this function is to walk the list of operations
|
||||
* that are in progress in the hash table and mark them as purged as well.
|
||||
*/
|
||||
void purge_inprogress_ops(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < hash_table_size; i++) {
|
||||
struct pvfs2_kernel_op_s *op;
|
||||
struct pvfs2_kernel_op_s *next;
|
||||
|
||||
list_for_each_entry_safe(op,
|
||||
next,
|
||||
&htable_ops_in_progress[i],
|
||||
list) {
|
||||
spin_lock(&op->lock);
|
||||
gossip_debug(GOSSIP_INIT_DEBUG,
|
||||
"pvfs2-client-core: purging in-progress op tag "
|
||||
"%llu %s\n",
|
||||
llu(op->tag),
|
||||
get_opname_string(op));
|
||||
set_op_state_purged(op);
|
||||
spin_unlock(&op->lock);
|
||||
wake_up_interruptible(&op->waitq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module_init(pvfs2_init);
|
||||
module_exit(pvfs2_exit);
|
Loading…
Reference in New Issue
Block a user