2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-23 04:34:11 +08:00
linux-next/fs/jfs/super.c

1085 lines
25 KiB
C
Raw Normal View History

/*
* Copyright (C) International Business Machines Corp., 2000-2004
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/parser.h>
#include <linux/completion.h>
#include <linux/vfs.h>
#include <linux/quotaops.h>
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
#include <linux/mount.h>
#include <linux/moduleparam.h>
#include <linux/kthread.h>
#include <linux/posix_acl.h>
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
#include <linux/crc32.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/uaccess.h>
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
#include <linux/seq_file.h>
#include <linux/blkdev.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
#include "jfs_inode.h"
#include "jfs_metapage.h"
#include "jfs_superblock.h"
#include "jfs_dmap.h"
#include "jfs_imap.h"
#include "jfs_acl.h"
#include "jfs_debug.h"
#include "jfs_xattr.h"
#include "jfs_dinode.h"
MODULE_DESCRIPTION("The Journaled Filesystem (JFS)");
MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM");
MODULE_LICENSE("GPL");
static struct kmem_cache *jfs_inode_cachep;
static const struct super_operations jfs_super_operations;
static const struct export_operations jfs_export_operations;
static struct file_system_type jfs_fs_type;
#define MAX_COMMIT_THREADS 64
static int commit_threads;
module_param(commit_threads, int, 0);
MODULE_PARM_DESC(commit_threads, "Number of commit threads");
static struct task_struct *jfsCommitThread[MAX_COMMIT_THREADS];
struct task_struct *jfsIOthread;
struct task_struct *jfsSyncThread;
#ifdef CONFIG_JFS_DEBUG
int jfsloglevel = JFS_LOGLEVEL_WARN;
module_param(jfsloglevel, int, 0644);
MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)");
#endif
static void jfs_handle_error(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
if (sb->s_flags & MS_RDONLY)
return;
updateSuper(sb, FM_DIRTY);
if (sbi->flag & JFS_ERR_PANIC)
panic("JFS (device %s): panic forced after error\n",
sb->s_id);
else if (sbi->flag & JFS_ERR_REMOUNT_RO) {
jfs_err("ERROR: (device %s): remounting filesystem as read-only",
sb->s_id);
sb->s_flags |= MS_RDONLY;
}
/* nothing is done for continue beyond marking the superblock dirty */
}
void jfs_error(struct super_block *sb, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
pr_err("ERROR: (device %s): %ps: %pV\n",
sb->s_id, __builtin_return_address(0), &vaf);
va_end(args);
jfs_handle_error(sb);
}
static struct inode *jfs_alloc_inode(struct super_block *sb)
{
struct jfs_inode_info *jfs_inode;
jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS);
if (!jfs_inode)
return NULL;
#ifdef CONFIG_QUOTA
memset(&jfs_inode->i_dquot, 0, sizeof(jfs_inode->i_dquot));
#endif
return &jfs_inode->vfs_inode;
}
2011-01-07 14:49:49 +08:00
static void jfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
struct jfs_inode_info *ji = JFS_IP(inode);
kmem_cache_free(jfs_inode_cachep, ji);
}
static void jfs_destroy_inode(struct inode *inode)
{
struct jfs_inode_info *ji = JFS_IP(inode);
BUG_ON(!list_empty(&ji->anon_inode_list));
spin_lock_irq(&ji->ag_lock);
if (ji->active_ag != -1) {
struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
atomic_dec(&bmap->db_active[ji->active_ag]);
ji->active_ag = -1;
}
spin_unlock_irq(&ji->ag_lock);
2011-01-07 14:49:49 +08:00
call_rcu(&inode->i_rcu, jfs_i_callback);
}
static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb);
s64 maxinodes;
struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap;
jfs_info("In jfs_statfs");
buf->f_type = JFS_SUPER_MAGIC;
buf->f_bsize = sbi->bsize;
buf->f_blocks = sbi->bmap->db_mapsize;
buf->f_bfree = sbi->bmap->db_nfree;
buf->f_bavail = sbi->bmap->db_nfree;
/*
* If we really return the number of allocated & free inodes, some
* applications will fail because they won't see enough free inodes.
* We'll try to calculate some guess as to how many inodes we can
* really allocate
*
* buf->f_files = atomic_read(&imap->im_numinos);
* buf->f_ffree = atomic_read(&imap->im_numfree);
*/
maxinodes = min((s64) atomic_read(&imap->im_numinos) +
((sbi->bmap->db_nfree >> imap->im_l2nbperiext)
<< L2INOSPEREXT), (s64) 0xffffffffLL);
buf->f_files = maxinodes;
buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) -
atomic_read(&imap->im_numfree));
buf->f_fsid.val[0] = (u32)crc32_le(0, sbi->uuid, sizeof(sbi->uuid)/2);
buf->f_fsid.val[1] = (u32)crc32_le(0, sbi->uuid + sizeof(sbi->uuid)/2,
sizeof(sbi->uuid)/2);
buf->f_namelen = JFS_NAME_MAX;
return 0;
}
#ifdef CONFIG_QUOTA
static int jfs_quota_off(struct super_block *sb, int type);
static int jfs_quota_on(struct super_block *sb, int type, int format_id,
const struct path *path);
static void jfs_quota_off_umount(struct super_block *sb)
{
int type;
for (type = 0; type < MAXQUOTAS; type++)
jfs_quota_off(sb, type);
}
static const struct quotactl_ops jfs_quotactl_ops = {
.quota_on = jfs_quota_on,
.quota_off = jfs_quota_off,
.quota_sync = dquot_quota_sync,
.get_state = dquot_get_state,
.set_info = dquot_set_dqinfo,
.get_dqblk = dquot_get_dqblk,
.set_dqblk = dquot_set_dqblk,
.get_nextdqblk = dquot_get_next_dqblk,
};
#else
static inline void jfs_quota_off_umount(struct super_block *sb)
{
}
#endif
static void jfs_put_super(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
int rc;
jfs_info("In jfs_put_super");
jfs_quota_off_umount(sb);
rc = jfs_umount(sb);
if (rc)
jfs_err("jfs_umount failed with return code %d", rc);
unload_nls(sbi->nls_tab);
truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
iput(sbi->direct_inode);
kfree(sbi);
}
enum {
Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
Opt_discard, Opt_nodiscard, Opt_discard_minblk
};
static const match_table_t tokens = {
{Opt_integrity, "integrity"},
{Opt_nointegrity, "nointegrity"},
{Opt_iocharset, "iocharset=%s"},
{Opt_resize, "resize=%u"},
{Opt_resize_nosize, "resize"},
{Opt_errors, "errors=%s"},
{Opt_ignore, "noquota"},
{Opt_ignore, "quota"},
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
{Opt_usrquota, "usrquota"},
{Opt_grpquota, "grpquota"},
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_umask, "umask=%u"},
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
{Opt_discard_minblk, "discard=%u"},
{Opt_err, NULL}
};
static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
int *flag)
{
void *nls_map = (void *)-1; /* -1: no change; NULL: none */
char *p;
struct jfs_sb_info *sbi = JFS_SBI(sb);
*newLVSize = 0;
if (!options)
return 1;
while ((p = strsep(&options, ",")) != NULL) {
substring_t args[MAX_OPT_ARGS];
int token;
if (!*p)
continue;
token = match_token(p, tokens, args);
switch (token) {
case Opt_integrity:
*flag &= ~JFS_NOINTEGRITY;
break;
case Opt_nointegrity:
*flag |= JFS_NOINTEGRITY;
break;
case Opt_ignore:
/* Silently ignore the quota options */
/* Don't do anything ;-) */
break;
case Opt_iocharset:
if (nls_map && nls_map != (void *) -1)
unload_nls(nls_map);
if (!strcmp(args[0].from, "none"))
nls_map = NULL;
else {
nls_map = load_nls(args[0].from);
if (!nls_map) {
pr_err("JFS: charset not found\n");
goto cleanup;
}
}
break;
case Opt_resize:
{
char *resize = args[0].from;
int rc = kstrtoll(resize, 0, newLVSize);
if (rc)
goto cleanup;
break;
}
case Opt_resize_nosize:
{
*newLVSize = i_size_read(sb->s_bdev->bd_inode) >>
sb->s_blocksize_bits;
if (*newLVSize == 0)
pr_err("JFS: Cannot determine volume size\n");
break;
}
case Opt_errors:
{
char *errors = args[0].from;
if (!errors || !*errors)
goto cleanup;
if (!strcmp(errors, "continue")) {
*flag &= ~JFS_ERR_REMOUNT_RO;
*flag &= ~JFS_ERR_PANIC;
*flag |= JFS_ERR_CONTINUE;
} else if (!strcmp(errors, "remount-ro")) {
*flag &= ~JFS_ERR_CONTINUE;
*flag &= ~JFS_ERR_PANIC;
*flag |= JFS_ERR_REMOUNT_RO;
} else if (!strcmp(errors, "panic")) {
*flag &= ~JFS_ERR_CONTINUE;
*flag &= ~JFS_ERR_REMOUNT_RO;
*flag |= JFS_ERR_PANIC;
} else {
pr_err("JFS: %s is an invalid error handler\n",
errors);
goto cleanup;
}
break;
}
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
#ifdef CONFIG_QUOTA
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
case Opt_quota:
case Opt_usrquota:
*flag |= JFS_USRQUOTA;
break;
case Opt_grpquota:
*flag |= JFS_GRPQUOTA;
break;
#else
case Opt_usrquota:
case Opt_grpquota:
case Opt_quota:
pr_err("JFS: quota operations not supported\n");
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
break;
#endif
case Opt_uid:
{
char *uid = args[0].from;
uid_t val;
int rc = kstrtouint(uid, 0, &val);
if (rc)
goto cleanup;
sbi->uid = make_kuid(current_user_ns(), val);
if (!uid_valid(sbi->uid))
goto cleanup;
break;
}
case Opt_gid:
{
char *gid = args[0].from;
gid_t val;
int rc = kstrtouint(gid, 0, &val);
if (rc)
goto cleanup;
sbi->gid = make_kgid(current_user_ns(), val);
if (!gid_valid(sbi->gid))
goto cleanup;
break;
}
case Opt_umask:
{
char *umask = args[0].from;
int rc = kstrtouint(umask, 8, &sbi->umask);
if (rc)
goto cleanup;
if (sbi->umask & ~0777) {
pr_err("JFS: Invalid value of umask\n");
goto cleanup;
}
break;
}
case Opt_discard:
{
struct request_queue *q = bdev_get_queue(sb->s_bdev);
/* if set to 1, even copying files will cause
* trimming :O
* -> user has more control over the online trimming
*/
sbi->minblks_trim = 64;
if (blk_queue_discard(q))
*flag |= JFS_DISCARD;
else
pr_err("JFS: discard option not supported on device\n");
break;
}
case Opt_nodiscard:
*flag &= ~JFS_DISCARD;
break;
case Opt_discard_minblk:
{
struct request_queue *q = bdev_get_queue(sb->s_bdev);
char *minblks_trim = args[0].from;
int rc;
if (blk_queue_discard(q)) {
*flag |= JFS_DISCARD;
rc = kstrtouint(minblks_trim, 0,
&sbi->minblks_trim);
if (rc)
goto cleanup;
} else
pr_err("JFS: discard option not supported on device\n");
break;
}
default:
printk("jfs: Unrecognized mount option \"%s\" or missing value\n",
p);
goto cleanup;
}
}
if (nls_map != (void *) -1) {
/* Discard old (if remount) */
unload_nls(sbi->nls_tab);
sbi->nls_tab = nls_map;
}
return 1;
cleanup:
if (nls_map && nls_map != (void *) -1)
unload_nls(nls_map);
return 0;
}
static int jfs_remount(struct super_block *sb, int *flags, char *data)
{
s64 newLVSize = 0;
int rc = 0;
int flag = JFS_SBI(sb)->flag;
int ret;
fs: push sync_filesystem() down to the file system's remount_fs() Previously, the no-op "mount -o mount /dev/xxx" operation when the file system is already mounted read-write causes an implied, unconditional syncfs(). This seems pretty stupid, and it's certainly documented or guaraunteed to do this, nor is it particularly useful, except in the case where the file system was mounted rw and is getting remounted read-only. However, it's possible that there might be some file systems that are actually depending on this behavior. In most file systems, it's probably fine to only call sync_filesystem() when transitioning from read-write to read-only, and there are some file systems where this is not needed at all (for example, for a pseudo-filesystem or something like romfs). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: linux-fsdevel@vger.kernel.org Cc: Christoph Hellwig <hch@infradead.org> Cc: Artem Bityutskiy <dedekind1@gmail.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Evgeniy Dushistov <dushistov@mail.ru> Cc: Jan Kara <jack@suse.cz> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Anders Larsen <al@alarsen.net> Cc: Phillip Lougher <phillip@squashfs.org.uk> Cc: Kees Cook <keescook@chromium.org> Cc: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz> Cc: Petr Vandrovec <petr@vandrovec.name> Cc: xfs@oss.sgi.com Cc: linux-btrfs@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Cc: codalist@coda.cs.cmu.edu Cc: linux-ext4@vger.kernel.org Cc: linux-f2fs-devel@lists.sourceforge.net Cc: fuse-devel@lists.sourceforge.net Cc: cluster-devel@redhat.com Cc: linux-mtd@lists.infradead.org Cc: jfs-discussion@lists.sourceforge.net Cc: linux-nfs@vger.kernel.org Cc: linux-nilfs@vger.kernel.org Cc: linux-ntfs-dev@lists.sourceforge.net Cc: ocfs2-devel@oss.oracle.com Cc: reiserfs-devel@vger.kernel.org
2014-03-13 22:14:33 +08:00
sync_filesystem(sb);
if (!parse_options(data, sb, &newLVSize, &flag))
return -EINVAL;
if (newLVSize) {
if (sb->s_flags & MS_RDONLY) {
pr_err("JFS: resize requires volume to be mounted read-write\n");
return -EROFS;
}
rc = jfs_extendfs(sb, newLVSize, 0);
if (rc)
return rc;
}
if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
/*
* Invalidate any previously read metadata. fsck may have
* changed the on-disk data since we mounted r/o
*/
truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0);
JFS_SBI(sb)->flag = flag;
ret = jfs_mount_rw(sb, 1);
/* mark the fs r/w for quota activity */
sb->s_flags &= ~MS_RDONLY;
dquot_resume(sb, -1);
return ret;
}
if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) {
rc = dquot_suspend(sb, -1);
if (rc < 0)
return rc;
rc = jfs_umount_rw(sb);
JFS_SBI(sb)->flag = flag;
return rc;
}
if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY))
if (!(sb->s_flags & MS_RDONLY)) {
rc = jfs_umount_rw(sb);
if (rc)
return rc;
JFS_SBI(sb)->flag = flag;
ret = jfs_mount_rw(sb, 1);
return ret;
}
JFS_SBI(sb)->flag = flag;
return 0;
}
static int jfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct jfs_sb_info *sbi;
struct inode *inode;
int rc;
s64 newLVSize = 0;
int flag, ret = -EINVAL;
jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags);
sbi = kzalloc(sizeof(struct jfs_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
sb->s_max_links = JFS_LINK_MAX;
sbi->sb = sb;
sbi->uid = INVALID_UID;
sbi->gid = INVALID_GID;
sbi->umask = -1;
/* initialize the mount flag and determine the default error handler */
flag = JFS_ERR_REMOUNT_RO;
if (!parse_options((char *) data, sb, &newLVSize, &flag))
goto out_kfree;
sbi->flag = flag;
#ifdef CONFIG_JFS_POSIX_ACL
sb->s_flags |= MS_POSIXACL;
#endif
if (newLVSize) {
pr_err("resize option for remount only\n");
goto out_kfree;
}
/*
* Initialize blocksize to 4K.
*/
sb_set_blocksize(sb, PSIZE);
/*
* Set method vectors.
*/
sb->s_op = &jfs_super_operations;
sb->s_export_op = &jfs_export_operations;
sb->s_xattr = jfs_xattr_handlers;
#ifdef CONFIG_QUOTA
sb->dq_op = &dquot_operations;
sb->s_qcop = &jfs_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
#endif
/*
* Initialize direct-mapping inode/address-space
*/
inode = new_inode(sb);
if (inode == NULL) {
ret = -ENOMEM;
goto out_unload;
}
inode->i_ino = 0;
inode->i_size = i_size_read(sb->s_bdev->bd_inode);
inode->i_mapping->a_ops = &jfs_metapage_aops;
hlist_add_fake(&inode->i_hash);
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
sbi->direct_inode = inode;
rc = jfs_mount(sb);
if (rc) {
if (!silent)
jfs_err("jfs_mount failed w/return code = %d", rc);
goto out_mount_failed;
}
if (sb->s_flags & MS_RDONLY)
sbi->log = NULL;
else {
rc = jfs_mount_rw(sb, 0);
if (rc) {
if (!silent) {
jfs_err("jfs_mount_rw failed, return code = %d",
rc);
}
goto out_no_rw;
}
}
sb->s_magic = JFS_SUPER_MAGIC;
if (sbi->mntflag & JFS_OS2)
sb->s_d_op = &jfs_ci_dentry_operations;
inode = jfs_iget(sb, ROOT_I);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
goto out_no_rw;
}
sb->s_root = d_make_root(inode);
if (!sb->s_root)
goto out_no_root;
/* logical blocks are represented by 40 bits in pxd_t, etc. */
sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
#if BITS_PER_LONG == 32
/*
* Page cache is indexed by long.
* I would use MAX_LFS_FILESIZE, but it's only half as big
*/
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 20:29:47 +08:00
sb->s_maxbytes = min(((u64) PAGE_SIZE << 32) - 1,
(u64)sb->s_maxbytes);
#endif
sb->s_time_gran = 1;
return 0;
out_no_root:
jfs_err("jfs_read_super: get root dentry failed");
out_no_rw:
rc = jfs_umount(sb);
if (rc)
jfs_err("jfs_umount failed with return code %d", rc);
out_mount_failed:
filemap_write_and_wait(sbi->direct_inode->i_mapping);
truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
make_bad_inode(sbi->direct_inode);
iput(sbi->direct_inode);
sbi->direct_inode = NULL;
out_unload:
unload_nls(sbi->nls_tab);
out_kfree:
kfree(sbi);
return ret;
}
filesystem freeze: add error handling of write_super_lockfs/unlockfs Currently, ext3 in mainline Linux doesn't have the freeze feature which suspends write requests. So, we cannot take a backup which keeps the filesystem's consistency with the storage device's features (snapshot and replication) while it is mounted. In many case, a commercial filesystem (e.g. VxFS) has the freeze feature and it would be used to get the consistent backup. If Linux's standard filesystem ext3 has the freeze feature, we can do it without a commercial filesystem. So I have implemented the ioctls of the freeze feature. I think we can take the consistent backup with the following steps. 1. Freeze the filesystem with the freeze ioctl. 2. Separate the replication volume or create the snapshot with the storage device's feature. 3. Unfreeze the filesystem with the unfreeze ioctl. 4. Take the backup from the separated replication volume or the snapshot. This patch: VFS: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they can return an error. Rename write_super_lockfs and unlockfs of the super block operation freeze_fs and unfreeze_fs to avoid a confusion. ext3, ext4, xfs, gfs2, jfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that write_super_lockfs returns an error if needed, and unlockfs always returns 0. reiserfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they always return 0 (success) to keep a current behavior. Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com> Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com> Cc: <xfs-masters@oss.sgi.com> Cc: <linux-ext4@vger.kernel.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Alasdair G Kergon <agk@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-10 08:40:58 +08:00
static int jfs_freeze(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct jfs_log *log = sbi->log;
int rc = 0;
if (!(sb->s_flags & MS_RDONLY)) {
txQuiesce(sb);
rc = lmLogShutdown(log);
if (rc) {
jfs_error(sb, "lmLogShutdown failed\n");
/* let operations fail rather than hang */
txResume(sb);
return rc;
}
rc = updateSuper(sb, FM_CLEAN);
if (rc) {
jfs_err("jfs_freeze: updateSuper failed");
/*
* Don't fail here. Everything succeeded except
* marking the superblock clean, so there's really
* no harm in leaving it frozen for now.
*/
}
}
filesystem freeze: add error handling of write_super_lockfs/unlockfs Currently, ext3 in mainline Linux doesn't have the freeze feature which suspends write requests. So, we cannot take a backup which keeps the filesystem's consistency with the storage device's features (snapshot and replication) while it is mounted. In many case, a commercial filesystem (e.g. VxFS) has the freeze feature and it would be used to get the consistent backup. If Linux's standard filesystem ext3 has the freeze feature, we can do it without a commercial filesystem. So I have implemented the ioctls of the freeze feature. I think we can take the consistent backup with the following steps. 1. Freeze the filesystem with the freeze ioctl. 2. Separate the replication volume or create the snapshot with the storage device's feature. 3. Unfreeze the filesystem with the unfreeze ioctl. 4. Take the backup from the separated replication volume or the snapshot. This patch: VFS: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they can return an error. Rename write_super_lockfs and unlockfs of the super block operation freeze_fs and unfreeze_fs to avoid a confusion. ext3, ext4, xfs, gfs2, jfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that write_super_lockfs returns an error if needed, and unlockfs always returns 0. reiserfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they always return 0 (success) to keep a current behavior. Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com> Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com> Cc: <xfs-masters@oss.sgi.com> Cc: <linux-ext4@vger.kernel.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Alasdair G Kergon <agk@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-10 08:40:58 +08:00
return 0;
}
filesystem freeze: add error handling of write_super_lockfs/unlockfs Currently, ext3 in mainline Linux doesn't have the freeze feature which suspends write requests. So, we cannot take a backup which keeps the filesystem's consistency with the storage device's features (snapshot and replication) while it is mounted. In many case, a commercial filesystem (e.g. VxFS) has the freeze feature and it would be used to get the consistent backup. If Linux's standard filesystem ext3 has the freeze feature, we can do it without a commercial filesystem. So I have implemented the ioctls of the freeze feature. I think we can take the consistent backup with the following steps. 1. Freeze the filesystem with the freeze ioctl. 2. Separate the replication volume or create the snapshot with the storage device's feature. 3. Unfreeze the filesystem with the unfreeze ioctl. 4. Take the backup from the separated replication volume or the snapshot. This patch: VFS: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they can return an error. Rename write_super_lockfs and unlockfs of the super block operation freeze_fs and unfreeze_fs to avoid a confusion. ext3, ext4, xfs, gfs2, jfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that write_super_lockfs returns an error if needed, and unlockfs always returns 0. reiserfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they always return 0 (success) to keep a current behavior. Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com> Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com> Cc: <xfs-masters@oss.sgi.com> Cc: <linux-ext4@vger.kernel.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Alasdair G Kergon <agk@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-10 08:40:58 +08:00
static int jfs_unfreeze(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct jfs_log *log = sbi->log;
int rc = 0;
if (!(sb->s_flags & MS_RDONLY)) {
rc = updateSuper(sb, FM_MOUNT);
if (rc) {
jfs_error(sb, "updateSuper failed\n");
goto out;
}
rc = lmLogInit(log);
if (rc)
jfs_error(sb, "lmLogInit failed\n");
out:
txResume(sb);
}
return rc;
}
static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
}
static int jfs_sync_fs(struct super_block *sb, int wait)
{
struct jfs_log *log = JFS_SBI(sb)->log;
/* log == NULL indicates read-only mount */
if (log) {
/*
* Write quota structures to quota file, sync_blockdev() will
* write them to disk later
*/
dquot_writeback_dquots(sb, -1);
jfs_flush_journal(log, wait);
jfs_syncpt(log, 0);
}
return 0;
}
static int jfs_show_options(struct seq_file *seq, struct dentry *root)
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
{
struct jfs_sb_info *sbi = JFS_SBI(root->d_sb);
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
if (uid_valid(sbi->uid))
seq_printf(seq, ",uid=%d", from_kuid(&init_user_ns, sbi->uid));
if (gid_valid(sbi->gid))
seq_printf(seq, ",gid=%d", from_kgid(&init_user_ns, sbi->gid));
if (sbi->umask != -1)
seq_printf(seq, ",umask=%03o", sbi->umask);
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
if (sbi->flag & JFS_NOINTEGRITY)
seq_puts(seq, ",nointegrity");
if (sbi->flag & JFS_DISCARD)
seq_printf(seq, ",discard=%u", sbi->minblks_trim);
if (sbi->nls_tab)
seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset);
if (sbi->flag & JFS_ERR_CONTINUE)
seq_printf(seq, ",errors=continue");
if (sbi->flag & JFS_ERR_PANIC)
seq_printf(seq, ",errors=panic");
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
#ifdef CONFIG_QUOTA
[PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon <mbellon@mvista.com> Acked-by: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Jan Kara <jack@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-07 06:16:54 +08:00
if (sbi->flag & JFS_USRQUOTA)
seq_puts(seq, ",usrquota");
if (sbi->flag & JFS_GRPQUOTA)
seq_puts(seq, ",grpquota");
#endif
return 0;
}
#ifdef CONFIG_QUOTA
/* Read data from quotafile - avoid pagecache and such because we cannot afford
* acquiring the locks... As quota files are never truncated and quota code
* itself serializes the operations (and no one else should touch the files)
* we don't have to be afraid of races */
static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off)
{
struct inode *inode = sb_dqopt(sb)->files[type];
sector_t blk = off >> sb->s_blocksize_bits;
int err = 0;
int offset = off & (sb->s_blocksize - 1);
int tocopy;
size_t toread;
struct buffer_head tmp_bh;
struct buffer_head *bh;
loff_t i_size = i_size_read(inode);
if (off > i_size)
return 0;
if (off+len > i_size)
len = i_size-off;
toread = len;
while (toread > 0) {
tocopy = sb->s_blocksize - offset < toread ?
sb->s_blocksize - offset : toread;
tmp_bh.b_state = 0;
tmp_bh.b_size = i_blocksize(inode);
err = jfs_get_block(inode, blk, &tmp_bh, 0);
if (err)
return err;
if (!buffer_mapped(&tmp_bh)) /* A hole? */
memset(data, 0, tocopy);
else {
bh = sb_bread(sb, tmp_bh.b_blocknr);
if (!bh)
return -EIO;
memcpy(data, bh->b_data+offset, tocopy);
brelse(bh);
}
offset = 0;
toread -= tocopy;
data += tocopy;
blk++;
}
return len;
}
/* Write to quotafile */
static ssize_t jfs_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off)
{
struct inode *inode = sb_dqopt(sb)->files[type];
sector_t blk = off >> sb->s_blocksize_bits;
int err = 0;
int offset = off & (sb->s_blocksize - 1);
int tocopy;
size_t towrite = len;
struct buffer_head tmp_bh;
struct buffer_head *bh;
inode_lock(inode);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
tmp_bh.b_size = i_blocksize(inode);
err = jfs_get_block(inode, blk, &tmp_bh, 1);
if (err)
goto out;
if (offset || tocopy != sb->s_blocksize)
bh = sb_bread(sb, tmp_bh.b_blocknr);
else
bh = sb_getblk(sb, tmp_bh.b_blocknr);
if (!bh) {
err = -EIO;
goto out;
}
lock_buffer(bh);
memcpy(bh->b_data+offset, data, tocopy);
flush_dcache_page(bh->b_page);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
unlock_buffer(bh);
brelse(bh);
offset = 0;
towrite -= tocopy;
data += tocopy;
blk++;
}
out:
if (len == towrite) {
inode_unlock(inode);
return err;
}
if (inode->i_size < off+len-towrite)
i_size_write(inode, off+len-towrite);
inode->i_version++;
inode->i_mtime = inode->i_ctime = current_time(inode);
mark_inode_dirty(inode);
inode_unlock(inode);
return len - towrite;
}
static struct dquot **jfs_get_dquots(struct inode *inode)
{
return JFS_IP(inode)->i_dquot;
}
static int jfs_quota_on(struct super_block *sb, int type, int format_id,
const struct path *path)
{
int err;
struct inode *inode;
err = dquot_quota_on(sb, type, format_id, path);
if (err)
return err;
inode = d_inode(path->dentry);
inode_lock(inode);
JFS_IP(inode)->mode2 |= JFS_NOATIME_FL | JFS_IMMUTABLE_FL;
inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
S_NOATIME | S_IMMUTABLE);
inode_unlock(inode);
mark_inode_dirty(inode);
return 0;
}
static int jfs_quota_off(struct super_block *sb, int type)
{
struct inode *inode = sb_dqopt(sb)->files[type];
int err;
if (!inode || !igrab(inode))
goto out;
err = dquot_quota_off(sb, type);
if (err)
goto out_put;
inode_lock(inode);
JFS_IP(inode)->mode2 &= ~(JFS_NOATIME_FL | JFS_IMMUTABLE_FL);
inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
inode_unlock(inode);
mark_inode_dirty(inode);
out_put:
iput(inode);
return err;
out:
return dquot_quota_off(sb, type);
}
#endif
static const struct super_operations jfs_super_operations = {
.alloc_inode = jfs_alloc_inode,
.destroy_inode = jfs_destroy_inode,
.dirty_inode = jfs_dirty_inode,
.write_inode = jfs_write_inode,
.evict_inode = jfs_evict_inode,
.put_super = jfs_put_super,
.sync_fs = jfs_sync_fs,
filesystem freeze: add error handling of write_super_lockfs/unlockfs Currently, ext3 in mainline Linux doesn't have the freeze feature which suspends write requests. So, we cannot take a backup which keeps the filesystem's consistency with the storage device's features (snapshot and replication) while it is mounted. In many case, a commercial filesystem (e.g. VxFS) has the freeze feature and it would be used to get the consistent backup. If Linux's standard filesystem ext3 has the freeze feature, we can do it without a commercial filesystem. So I have implemented the ioctls of the freeze feature. I think we can take the consistent backup with the following steps. 1. Freeze the filesystem with the freeze ioctl. 2. Separate the replication volume or create the snapshot with the storage device's feature. 3. Unfreeze the filesystem with the unfreeze ioctl. 4. Take the backup from the separated replication volume or the snapshot. This patch: VFS: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they can return an error. Rename write_super_lockfs and unlockfs of the super block operation freeze_fs and unfreeze_fs to avoid a confusion. ext3, ext4, xfs, gfs2, jfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that write_super_lockfs returns an error if needed, and unlockfs always returns 0. reiserfs: Changed the type of write_super_lockfs and unlockfs from "void" to "int" so that they always return 0 (success) to keep a current behavior. Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com> Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com> Cc: <xfs-masters@oss.sgi.com> Cc: <linux-ext4@vger.kernel.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Alasdair G Kergon <agk@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-10 08:40:58 +08:00
.freeze_fs = jfs_freeze,
.unfreeze_fs = jfs_unfreeze,
.statfs = jfs_statfs,
.remount_fs = jfs_remount,
.show_options = jfs_show_options,
#ifdef CONFIG_QUOTA
.quota_read = jfs_quota_read,
.quota_write = jfs_quota_write,
.get_dquots = jfs_get_dquots,
#endif
};
static const struct export_operations jfs_export_operations = {
.fh_to_dentry = jfs_fh_to_dentry,
.fh_to_parent = jfs_fh_to_parent,
.get_parent = jfs_get_parent,
};
static struct file_system_type jfs_fs_type = {
.owner = THIS_MODULE,
.name = "jfs",
.mount = jfs_do_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
fs: Limit sys_mount to only request filesystem modules. Modify the request_module to prefix the file system type with "fs-" and add aliases to all of the filesystems that can be built as modules to match. A common practice is to build all of the kernel code and leave code that is not commonly needed as modules, with the result that many users are exposed to any bug anywhere in the kernel. Looking for filesystems with a fs- prefix limits the pool of possible modules that can be loaded by mount to just filesystems trivially making things safer with no real cost. Using aliases means user space can control the policy of which filesystem modules are auto-loaded by editing /etc/modprobe.d/*.conf with blacklist and alias directives. Allowing simple, safe, well understood work-arounds to known problematic software. This also addresses a rare but unfortunate problem where the filesystem name is not the same as it's module name and module auto-loading would not work. While writing this patch I saw a handful of such cases. The most significant being autofs that lives in the module autofs4. This is relevant to user namespaces because we can reach the request module in get_fs_type() without having any special permissions, and people get uncomfortable when a user specified string (in this case the filesystem type) goes all of the way to request_module. After having looked at this issue I don't think there is any particular reason to perform any filtering or permission checks beyond making it clear in the module request that we want a filesystem module. The common pattern in the kernel is to call request_module() without regards to the users permissions. In general all a filesystem module does once loaded is call register_filesystem() and go to sleep. Which means there is not much attack surface exposed by loading a filesytem module unless the filesystem is mounted. In a user namespace filesystems are not mounted unless .fs_flags = FS_USERNS_MOUNT, which most filesystems do not set today. Acked-by: Serge Hallyn <serge.hallyn@canonical.com> Acked-by: Kees Cook <keescook@chromium.org> Reported-by: Kees Cook <keescook@google.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
2013-03-03 11:39:14 +08:00
MODULE_ALIAS_FS("jfs");
static void init_once(void *foo)
{
struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
memset(jfs_ip, 0, sizeof(struct jfs_inode_info));
INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
init_rwsem(&jfs_ip->rdwrlock);
mutex_init(&jfs_ip->commit_mutex);
init_rwsem(&jfs_ip->xattr_sem);
spin_lock_init(&jfs_ip->ag_lock);
jfs_ip->active_ag = -1;
inode_init_once(&jfs_ip->vfs_inode);
}
static int __init init_jfs_fs(void)
{
int i;
int rc;
jfs_inode_cachep =
kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0,
2016-01-15 07:18:21 +08:00
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
init_once);
if (jfs_inode_cachep == NULL)
return -ENOMEM;
/*
* Metapage initialization
*/
rc = metapage_init();
if (rc) {
jfs_err("metapage_init failed w/rc = %d", rc);
goto free_slab;
}
/*
* Transaction Manager initialization
*/
rc = txInit();
if (rc) {
jfs_err("txInit failed w/rc = %d", rc);
goto free_metapage;
}
/*
* I/O completion thread (endio)
*/
jfsIOthread = kthread_run(jfsIOWait, NULL, "jfsIO");
if (IS_ERR(jfsIOthread)) {
rc = PTR_ERR(jfsIOthread);
jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
goto end_txmngr;
}
if (commit_threads < 1)
commit_threads = num_online_cpus();
if (commit_threads > MAX_COMMIT_THREADS)
commit_threads = MAX_COMMIT_THREADS;
for (i = 0; i < commit_threads; i++) {
jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL,
"jfsCommit");
if (IS_ERR(jfsCommitThread[i])) {
rc = PTR_ERR(jfsCommitThread[i]);
jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
commit_threads = i;
goto kill_committask;
}
}
jfsSyncThread = kthread_run(jfs_sync, NULL, "jfsSync");
if (IS_ERR(jfsSyncThread)) {
rc = PTR_ERR(jfsSyncThread);
jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
goto kill_committask;
}
#ifdef PROC_FS_JFS
jfs_proc_init();
#endif
rc = register_filesystem(&jfs_fs_type);
if (!rc)
return 0;
#ifdef PROC_FS_JFS
jfs_proc_clean();
#endif
kthread_stop(jfsSyncThread);
kill_committask:
for (i = 0; i < commit_threads; i++)
kthread_stop(jfsCommitThread[i]);
kthread_stop(jfsIOthread);
end_txmngr:
txExit();
free_metapage:
metapage_exit();
free_slab:
kmem_cache_destroy(jfs_inode_cachep);
return rc;
}
static void __exit exit_jfs_fs(void)
{
int i;
jfs_info("exit_jfs_fs called");
txExit();
metapage_exit();
kthread_stop(jfsIOthread);
for (i = 0; i < commit_threads; i++)
kthread_stop(jfsCommitThread[i]);
kthread_stop(jfsSyncThread);
#ifdef PROC_FS_JFS
jfs_proc_clean();
#endif
unregister_filesystem(&jfs_fs_type);
/*
* Make sure all delayed rcu free inodes are flushed before we
* destroy cache.
*/
rcu_barrier();
kmem_cache_destroy(jfs_inode_cachep);
}
module_init(init_jfs_fs)
module_exit(exit_jfs_fs)