ceph: size handling in MClientRequest, cap updates and inode traces

For encrypted inodes, transmit a rounded-up size to the MDS as the
normal file size and send the real inode size in fscrypt_file field.
Also, fix up creates and truncates to also transmit fscrypt_file.

When we get an inode trace from the MDS, grab the fscrypt_file field if
the inode is encrypted, and use it to populate the i_size field instead
of the regular inode size field.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
Reviewed-and-tested-by: Luís Henriques <lhenriques@suse.de>
Reviewed-by: Milind Changire <mchangir@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Jeff Layton 2022-08-25 09:31:06 -04:00 committed by Ilya Dryomov
parent 14e034a61c
commit 16be62fc8a
6 changed files with 73 additions and 25 deletions

View File

@ -1217,10 +1217,9 @@ struct cap_msg_args {
umode_t mode;
bool inline_data;
bool wake;
bool encrypted;
u32 fscrypt_auth_len;
u32 fscrypt_file_len;
u8 fscrypt_auth[sizeof(struct ceph_fscrypt_auth)]; // for context
u8 fscrypt_file[sizeof(u64)]; // for size
};
/* Marshal up the cap msg to the MDS */
@ -1255,6 +1254,12 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
fc->ino = cpu_to_le64(arg->ino);
fc->snap_follows = cpu_to_le64(arg->follows);
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
if (arg->encrypted)
fc->size = cpu_to_le64(round_up(arg->size,
CEPH_FSCRYPT_BLOCK_SIZE));
else
#endif
fc->size = cpu_to_le64(arg->size);
fc->max_size = cpu_to_le64(arg->max_size);
ceph_encode_timespec64(&fc->mtime, &arg->mtime);
@ -1315,11 +1320,17 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
ceph_encode_64(&p, 0);
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
/* fscrypt_auth and fscrypt_file (version 12) */
/*
* fscrypt_auth and fscrypt_file (version 12)
*
* fscrypt_auth holds the crypto context (if any). fscrypt_file
* tracks the real i_size as an __le64 field (and we use a rounded-up
* i_size in the traditional size field).
*/
ceph_encode_32(&p, arg->fscrypt_auth_len);
ceph_encode_copy(&p, arg->fscrypt_auth, arg->fscrypt_auth_len);
ceph_encode_32(&p, arg->fscrypt_file_len);
ceph_encode_copy(&p, arg->fscrypt_file, arg->fscrypt_file_len);
ceph_encode_32(&p, sizeof(__le64));
ceph_encode_64(&p, arg->size);
#else /* CONFIG_FS_ENCRYPTION */
ceph_encode_32(&p, 0);
ceph_encode_32(&p, 0);
@ -1391,7 +1402,6 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
arg->follows = flushing ? ci->i_head_snapc->seq : 0;
arg->flush_tid = flush_tid;
arg->oldest_flush_tid = oldest_flush_tid;
arg->size = i_size_read(inode);
ci->i_reported_size = arg->size;
arg->max_size = ci->i_wanted_max_size;
@ -1445,6 +1455,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
}
}
arg->flags = flags;
arg->encrypted = IS_ENCRYPTED(inode);
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
if (ci->fscrypt_auth_len &&
WARN_ON_ONCE(ci->fscrypt_auth_len > sizeof(struct ceph_fscrypt_auth))) {
@ -1456,21 +1467,21 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
min_t(size_t, ci->fscrypt_auth_len,
sizeof(arg->fscrypt_auth)));
}
/* FIXME: use this to track "real" size */
arg->fscrypt_file_len = 0;
#endif /* CONFIG_FS_ENCRYPTION */
}
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4 + 8)
static inline int cap_msg_size(struct cap_msg_args *arg)
{
return CAP_MSG_FIXED_FIELDS + arg->fscrypt_auth_len;
}
#else
#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4)
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
static inline int cap_msg_size(struct cap_msg_args *arg)
{
return CAP_MSG_FIXED_FIELDS + arg->fscrypt_auth_len +
arg->fscrypt_file_len;
}
#else
static inline int cap_msg_size(struct cap_msg_args *arg)
{
return CAP_MSG_FIXED_FIELDS;
@ -1550,13 +1561,10 @@ static inline int __send_flush_snap(struct inode *inode,
arg.inline_data = capsnap->inline_data;
arg.flags = 0;
arg.wake = false;
arg.encrypted = IS_ENCRYPTED(inode);
/*
* No fscrypt_auth changes from a capsnap. It will need
* to update fscrypt_file on size changes (TODO).
*/
/* No fscrypt_auth changes from a capsnap.*/
arg.fscrypt_auth_len = 0;
arg.fscrypt_file_len = 0;
msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(&arg),
GFP_NOFS, false);

View File

@ -915,6 +915,9 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
goto out_req;
}
if (S_ISREG(mode) && IS_ENCRYPTED(dir))
set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_parent = dir;

View File

@ -790,6 +790,7 @@ retry:
req->r_parent = dir;
ihold(dir);
if (IS_ENCRYPTED(dir)) {
set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
if (!fscrypt_has_encryption_key(dir)) {
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_NOKEY_NAME;

View File

@ -1028,6 +1028,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
if (new_version ||
(new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
u64 size = le64_to_cpu(info->size);
s64 old_pool = ci->i_layout.pool_id;
struct ceph_string *old_ns;
@ -1041,10 +1042,22 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
pool_ns = old_ns;
if (IS_ENCRYPTED(inode) && size &&
iinfo->fscrypt_file_len == sizeof(__le64)) {
u64 fsize = __le64_to_cpu(*(__le64 *)iinfo->fscrypt_file);
if (size == round_up(fsize, CEPH_FSCRYPT_BLOCK_SIZE)) {
size = fsize;
} else {
pr_warn("fscrypt size mismatch: size=%llu fscrypt_file=%llu, discarding fscrypt_file size.\n",
info->size, size);
}
}
queue_trunc = ceph_fill_file_size(inode, issued,
le32_to_cpu(info->truncate_seq),
le64_to_cpu(info->truncate_size),
le64_to_cpu(info->size));
size);
/* only update max_size on auth cap */
if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
ci->i_max_size != le64_to_cpu(info->max_size)) {
@ -2388,11 +2401,25 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr,
}
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
attr->ia_size != isize) {
req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
req->r_args.setattr.old_size = cpu_to_le64(isize);
mask |= CEPH_SETATTR_SIZE;
release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
if (IS_ENCRYPTED(inode) && attr->ia_size) {
set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
mask |= CEPH_SETATTR_FSCRYPT_FILE;
req->r_args.setattr.size =
cpu_to_le64(round_up(attr->ia_size,
CEPH_FSCRYPT_BLOCK_SIZE));
req->r_args.setattr.old_size =
cpu_to_le64(round_up(isize,
CEPH_FSCRYPT_BLOCK_SIZE));
req->r_fscrypt_file = attr->ia_size;
/* FIXME: client must zero out any partial blocks! */
} else {
req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
req->r_args.setattr.old_size = cpu_to_le64(isize);
req->r_fscrypt_file = 0;
}
}
}
if (ia_valid & ATTR_MTIME) {

View File

@ -2832,7 +2832,12 @@ static void encode_mclientrequest_tail(void **p,
} else {
ceph_encode_32(p, 0);
}
ceph_encode_32(p, 0); // fscrypt_file for now
if (test_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags)) {
ceph_encode_32(p, sizeof(__le64));
ceph_encode_64(p, req->r_fscrypt_file);
} else {
ceph_encode_32(p, 0);
}
}
/*
@ -2922,6 +2927,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
/* fscrypt_file */
len += sizeof(u32);
if (test_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags))
len += sizeof(__le64);
msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
if (!msg) {

View File

@ -282,6 +282,7 @@ struct ceph_mds_request {
#define CEPH_MDS_R_DID_PREPOPULATE (6) /* prepopulated readdir */
#define CEPH_MDS_R_PARENT_LOCKED (7) /* is r_parent->i_rwsem wlocked? */
#define CEPH_MDS_R_ASYNC (8) /* async request */
#define CEPH_MDS_R_FSCRYPT_FILE (9) /* must marshal fscrypt_file field */
unsigned long r_req_flags;
struct mutex r_fill_mutex;
@ -289,6 +290,7 @@ struct ceph_mds_request {
union ceph_mds_request_args r_args;
struct ceph_fscrypt_auth *r_fscrypt_auth;
u64 r_fscrypt_file;
u8 *r_altname; /* fscrypt binary crypttext for long filenames */
u32 r_altname_len; /* length of r_altname */