xfs: introduce inode log format object

We currently carry around and log an entire inode core in the
struct xfs_inode. A lot of the information in the inode core is
duplicated in the VFS inode, but we cannot remove this duplication
of infomration because the inode core is logged directly in
xfs_inode_item_format().

Add a new function xfs_inode_item_format_core() that copies the
inode core data into a struct xfs_icdinode that is pulled directly
from the log vector buffer. This means we no longer directly
copy the inode core, but copy the structures one member at a time.
This will be slightly less efficient than copying, but will allow us
to remove duplicate and unnecessary items from the struct xfs_inode.

To enable us to do this, call the new structure a xfs_log_dinode,
so that we know it's different to the physical xfs_dinode and the
in-core xfs_icdinode.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
Dave Chinner 2016-02-09 16:54:58 +11:00 committed by Dave Chinner
parent 36f90b0a2d
commit f8d55aa052
8 changed files with 218 additions and 44 deletions

View File

@ -196,8 +196,8 @@ xfs_imap_to_bp(
void
xfs_dinode_from_disk(
xfs_icdinode_t *to,
xfs_dinode_t *from)
struct xfs_icdinode *to,
struct xfs_dinode *from)
{
to->di_magic = be16_to_cpu(from->di_magic);
to->di_mode = be16_to_cpu(from->di_mode);
@ -243,8 +243,8 @@ xfs_dinode_from_disk(
void
xfs_dinode_to_disk(
xfs_dinode_t *to,
xfs_icdinode_t *from)
struct xfs_dinode *to,
struct xfs_icdinode *from)
{
to->di_magic = cpu_to_be16(from->di_magic);
to->di_mode = cpu_to_be16(from->di_mode);

View File

@ -20,7 +20,58 @@
struct xfs_inode;
struct xfs_dinode;
struct xfs_icdinode;
/*
* In memory representation of the XFS inode. This is held in the in-core
* struct xfs_inode to represent the on disk values, but no longer needs to be
* identical to the on-disk structure as it is always translated to on-disk
* format specific structures at the appropriate time.
*/
struct xfs_icdinode {
__uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */
__uint16_t di_mode; /* mode and type of file */
__int8_t di_version; /* inode version */
__int8_t di_format; /* format of di_c data */
__uint16_t di_onlink; /* old number of links to file */
__uint32_t di_uid; /* owner's user id */
__uint32_t di_gid; /* owner's group id */
__uint32_t di_nlink; /* number of links to file */
__uint16_t di_projid_lo; /* lower part of owner's project id */
__uint16_t di_projid_hi; /* higher part of owner's project id */
__uint8_t di_pad[6]; /* unused, zeroed space */
__uint16_t di_flushiter; /* incremented on flush */
xfs_ictimestamp_t di_atime; /* time last accessed */
xfs_ictimestamp_t di_mtime; /* time last modified */
xfs_ictimestamp_t di_ctime; /* time created/inode modified */
xfs_fsize_t di_size; /* number of bytes in file */
xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
xfs_extnum_t di_nextents; /* number of extents in data fork */
xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
__uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
__int8_t di_aformat; /* format of attr fork's data */
__uint32_t di_dmevmask; /* DMIG event mask */
__uint16_t di_dmstate; /* DMIG state info */
__uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
__uint32_t di_gen; /* generation number */
/* di_next_unlinked is the only non-core field in the old dinode */
xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */
/* start of the extended dinode, writable fields */
__uint32_t di_crc; /* CRC of the inode */
__uint64_t di_changecount; /* number of attribute changes */
xfs_lsn_t di_lsn; /* flush sequence */
__uint64_t di_flags2; /* more random flags */
__uint8_t di_pad2[16]; /* more padding for future expansion */
/* fields only written to during inode creation */
xfs_ictimestamp_t di_crtime; /* time created */
xfs_ino_t di_ino; /* inode number */
uuid_t di_uuid; /* UUID of the filesystem */
/* structure must be padded to 64 bit alignment */
};
/*
* Inode location information. Stored in the inode and passed to

View File

@ -290,6 +290,7 @@ typedef struct xfs_inode_log_format_64 {
__int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_64_t;
/*
* Flags for xfs_trans_log_inode flags field.
*/
@ -360,10 +361,10 @@ typedef struct xfs_ictimestamp {
} xfs_ictimestamp_t;
/*
* NOTE: This structure must be kept identical to struct xfs_dinode
* except for the endianness annotations.
* Define the format of the inode core that is logged. This structure must be
* kept identical to struct xfs_dinode except for the endianness annotations.
*/
typedef struct xfs_icdinode {
struct xfs_log_dinode {
__uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */
__uint16_t di_mode; /* mode and type of file */
__int8_t di_version; /* inode version */
@ -407,13 +408,13 @@ typedef struct xfs_icdinode {
uuid_t di_uuid; /* UUID of the filesystem */
/* structure must be padded to 64 bit alignment */
} xfs_icdinode_t;
};
static inline uint xfs_icdinode_size(int version)
static inline uint xfs_log_dinode_size(int version)
{
if (version == 3)
return sizeof(struct xfs_icdinode);
return offsetof(struct xfs_icdinode, di_next_unlinked);
return sizeof(struct xfs_log_dinode);
return offsetof(struct xfs_log_dinode, di_next_unlinked);
}
/*

View File

@ -79,7 +79,7 @@ xfs_inode_alloc(
memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
ip->i_flags = 0;
ip->i_delayed_blks = 0;
memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
memset(&ip->i_d, 0, sizeof(ip->i_d));
return ip;
}

View File

@ -63,7 +63,7 @@ typedef struct xfs_inode {
unsigned long i_flags; /* see defined flags below */
unsigned int i_delayed_blks; /* count of delay alloc blks */
xfs_icdinode_t i_d; /* most of ondisk inode */
struct xfs_icdinode i_d; /* most of ondisk inode */
/* VFS inode */
struct inode i_vnode; /* embedded VFS inode */

View File

@ -135,7 +135,7 @@ xfs_inode_item_size(
*nvecs += 2;
*nbytes += sizeof(struct xfs_inode_log_format) +
xfs_icdinode_size(ip->i_d.di_version);
xfs_log_dinode_size(ip->i_d.di_version);
xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
if (XFS_IFORK_Q(ip))
@ -322,6 +322,127 @@ xfs_inode_item_format_attr_fork(
}
}
static void
xfs_icdinode_to_log_dinode(
struct xfs_icdinode *from,
struct xfs_log_dinode *to)
{
to->di_magic = from->di_magic;
to->di_mode = from->di_mode;
to->di_version = from->di_version;
to->di_format = from->di_format;
to->di_onlink = from->di_onlink;
to->di_uid = from->di_uid;
to->di_gid = from->di_gid;
to->di_nlink = from->di_nlink;
to->di_projid_lo = from->di_projid_lo;
to->di_projid_hi = from->di_projid_hi;
memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
to->di_atime.t_sec = from->di_atime.t_sec;
to->di_atime.t_nsec = from->di_atime.t_nsec;
to->di_mtime.t_sec = from->di_mtime.t_sec;
to->di_mtime.t_nsec = from->di_mtime.t_nsec;
to->di_ctime.t_sec = from->di_ctime.t_sec;
to->di_ctime.t_nsec = from->di_ctime.t_nsec;
to->di_size = from->di_size;
to->di_nblocks = from->di_nblocks;
to->di_extsize = from->di_extsize;
to->di_nextents = from->di_nextents;
to->di_anextents = from->di_anextents;
to->di_forkoff = from->di_forkoff;
to->di_aformat = from->di_aformat;
to->di_dmevmask = from->di_dmevmask;
to->di_dmstate = from->di_dmstate;
to->di_flags = from->di_flags;
to->di_gen = from->di_gen;
if (from->di_version == 3) {
to->di_changecount = from->di_changecount;
to->di_crtime.t_sec = from->di_crtime.t_sec;
to->di_crtime.t_nsec = from->di_crtime.t_nsec;
to->di_flags2 = from->di_flags2;
to->di_ino = from->di_ino;
to->di_lsn = from->di_lsn;
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &from->di_uuid);
to->di_flushiter = 0;
} else {
to->di_flushiter = from->di_flushiter;
}
}
/*
* Recovery needs to be able to convert a log dinode back to a real dinode
* for writeback we do that by converting a log dinode to a icdinode, and
* then passing that to the formatting function.
*/
void
xfs_log_dinode_to_icdinode(
struct xfs_log_dinode *from,
struct xfs_icdinode *to)
{
to->di_magic = from->di_magic;
to->di_mode = from->di_mode;
to->di_version = from->di_version;
to->di_format = from->di_format;
to->di_onlink = from->di_onlink;
to->di_uid = from->di_uid;
to->di_gid = from->di_gid;
to->di_nlink = from->di_nlink;
to->di_projid_lo = from->di_projid_lo;
to->di_projid_hi = from->di_projid_hi;
memset(to->di_pad, 0, sizeof(to->di_pad));
to->di_atime.t_sec = from->di_atime.t_sec;
to->di_atime.t_nsec = from->di_atime.t_nsec;
to->di_mtime.t_sec = from->di_mtime.t_sec;
to->di_mtime.t_nsec = from->di_mtime.t_nsec;
to->di_ctime.t_sec = from->di_ctime.t_sec;
to->di_ctime.t_nsec = from->di_ctime.t_nsec;
to->di_size = from->di_size;
to->di_nblocks = from->di_nblocks;
to->di_extsize = from->di_extsize;
to->di_nextents = from->di_nextents;
to->di_anextents = from->di_anextents;
to->di_forkoff = from->di_forkoff;
to->di_aformat = from->di_aformat;
to->di_dmevmask = from->di_dmevmask;
to->di_dmstate = from->di_dmstate;
to->di_flags = from->di_flags;
to->di_gen = from->di_gen;
if (from->di_version == 3) {
to->di_changecount = from->di_changecount;
to->di_crtime.t_sec = from->di_crtime.t_sec;
to->di_crtime.t_nsec = from->di_crtime.t_nsec;
to->di_flags2 = from->di_flags2;
to->di_ino = from->di_ino;
to->di_lsn = from->di_lsn;
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &from->di_uuid);
to->di_flushiter = 0;
} else {
to->di_flushiter = from->di_flushiter;
}
}
/*
* Format the inode core. Current timestamp data is only in the VFS inode
* fields, so we need to grab them from there. Hence rather than just copying
* the XFS inode core structure, format the fields directly into the iovec.
*/
static void
xfs_inode_item_format_core(
struct xfs_inode *ip,
struct xfs_log_vec *lv,
struct xfs_log_iovec **vecp)
{
struct xfs_log_dinode *dic;
dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
xfs_icdinode_to_log_dinode(&ip->i_d, dic);
xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version));
}
/*
* This is called to fill in the vector of log iovecs for the given inode
* log item. It fills the first item with an inode log format structure,
@ -351,10 +472,7 @@ xfs_inode_item_format(
ilf->ilf_size = 2; /* format + core */
xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE,
&ip->i_d,
xfs_icdinode_size(ip->i_d.di_version));
xfs_inode_item_format_core(ip, lv, &vecp);
xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
if (XFS_IFORK_Q(ip)) {
xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);

View File

@ -49,6 +49,8 @@ extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *);
extern void xfs_iflush_abort(struct xfs_inode *, bool);
extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
xfs_inode_log_format_t *);
extern void xfs_log_dinode_to_icdinode(struct xfs_log_dinode *from,
struct xfs_icdinode *to);
extern struct kmem_zone *xfs_ili_zone;

View File

@ -2839,7 +2839,8 @@ xlog_recover_inode_pass2(
int error;
int attr_index;
uint fields;
xfs_icdinode_t *dicp;
struct xfs_log_dinode *ldip;
struct xfs_icdinode icic;
uint isize;
int need_free = 0;
@ -2892,8 +2893,8 @@ xlog_recover_inode_pass2(
error = -EFSCORRUPTED;
goto out_release;
}
dicp = item->ri_buf[1].i_addr;
if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
ldip = item->ri_buf[1].i_addr;
if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) {
xfs_alert(mp,
"%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
__func__, item, in_f->ilf_ino);
@ -2929,13 +2930,13 @@ xlog_recover_inode_pass2(
* to skip replay when the on disk inode is newer than the log one
*/
if (!xfs_sb_version_hascrc(&mp->m_sb) &&
dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
/*
* Deal with the wrap case, DI_MAX_FLUSH is less
* than smaller numbers
*/
if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
/* do nothing */
} else {
trace_xfs_log_recover_inode_skip(log, in_f);
@ -2945,13 +2946,13 @@ xlog_recover_inode_pass2(
}
/* Take the opportunity to reset the flush iteration count */
dicp->di_flushiter = 0;
ldip->di_flushiter = 0;
if (unlikely(S_ISREG(dicp->di_mode))) {
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE)) {
if (unlikely(S_ISREG(ldip->di_mode))) {
if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
(ldip->di_format != XFS_DINODE_FMT_BTREE)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
XFS_ERRLEVEL_LOW, mp, dicp);
XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
"%s: Bad regular inode log record, rec ptr 0x%p, "
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
@ -2959,12 +2960,12 @@ xlog_recover_inode_pass2(
error = -EFSCORRUPTED;
goto out_release;
}
} else if (unlikely(S_ISDIR(dicp->di_mode))) {
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE) &&
(dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
} else if (unlikely(S_ISDIR(ldip->di_mode))) {
if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
(ldip->di_format != XFS_DINODE_FMT_BTREE) &&
(ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
XFS_ERRLEVEL_LOW, mp, dicp);
XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
"%s: Bad dir inode log record, rec ptr 0x%p, "
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
@ -2973,32 +2974,32 @@ xlog_recover_inode_pass2(
goto out_release;
}
}
if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
XFS_ERRLEVEL_LOW, mp, dicp);
XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
"%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
"dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
__func__, item, dip, bp, in_f->ilf_ino,
dicp->di_nextents + dicp->di_anextents,
dicp->di_nblocks);
ldip->di_nextents + ldip->di_anextents,
ldip->di_nblocks);
error = -EFSCORRUPTED;
goto out_release;
}
if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
XFS_ERRLEVEL_LOW, mp, dicp);
XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
"%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
"dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
error = -EFSCORRUPTED;
goto out_release;
}
isize = xfs_icdinode_size(dicp->di_version);
isize = xfs_log_dinode_size(ldip->di_version);
if (unlikely(item->ri_buf[1].i_len > isize)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
XFS_ERRLEVEL_LOW, mp, dicp);
XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
"%s: Bad inode log record length %d, rec ptr 0x%p",
__func__, item->ri_buf[1].i_len, item);
@ -3007,7 +3008,8 @@ xlog_recover_inode_pass2(
}
/* The core is in in-core format */
xfs_dinode_to_disk(dip, dicp);
xfs_log_dinode_to_icdinode(ldip, &icic);
xfs_dinode_to_disk(dip, &icic);
/* the rest is in on-disk format */
if (item->ri_buf[1].i_len > isize) {