A fix for a potential data corruption in differential backup and

snapshot-based mirroring scenarios in RBD and a reference counting
 fixup to avoid use-after-free in CephFS, all marked for stable.
 -----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAmSDUh4THGlkcnlvbW92
 QGdtYWlsLmNvbQAKCRBKf944AhHzi0guB/4l7wOFnFvC+Dz5Y0KKuq2zFGQ64eZM
 hVpKEANsV/py/MTOdCzhW5cNcNj5/g8+1eozGxA8IzckzWf+25ziIn+BNWOO7DK1
 eO1U0wdiFnkXzr3nKSqNqm+hrUupAUd4Rb6644I4FwWKRu1WQydRjmvFVE+gw86O
 eeXujr3IlhhDF/VqO0sekCx9MaFPQaCaoscM3gU04meKAG84jt3oezueOlRqYFTX
 batwJ33wzVtLSh1NJIhC0iBMuBgvnuqQ9R8bHTdSNkR8Ov4V3B4DQGL4lmYnBxbv
 L3fMcz+sdZu3bDptUta4ZgdS4LkxUUUUEK07XeoBhAjZ3qPrMiD/gXay
 =S7Tu
 -----END PGP SIGNATURE-----

Merge tag 'ceph-for-6.4-rc6' of https://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
 "A fix for a potential data corruption in differential backup and
  snapshot-based mirroring scenarios in RBD and a reference counting
  fixup to avoid use-after-free in CephFS, all marked for stable"

* tag 'ceph-for-6.4-rc6' of https://github.com/ceph/ceph-client:
  ceph: fix use-after-free bug for inodes when flushing capsnaps
  rbd: get snapshot context after exclusive lock is ensured to be held
  rbd: move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting
This commit is contained in:
Linus Torvalds 2023-06-09 10:53:58 -07:00
commit 7e8c948b3f
3 changed files with 53 additions and 19 deletions

View File

@ -1334,14 +1334,30 @@ static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
/*
* Must be called after rbd_obj_calc_img_extents().
*/
static bool rbd_obj_copyup_enabled(struct rbd_obj_request *obj_req)
static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req)
{
if (!obj_req->num_img_extents ||
(rbd_obj_is_entire(obj_req) &&
!obj_req->img_request->snapc->num_snaps))
return false;
rbd_assert(obj_req->img_request->snapc);
return true;
if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
dout("%s %p objno %llu discard\n", __func__, obj_req,
obj_req->ex.oe_objno);
return;
}
if (!obj_req->num_img_extents) {
dout("%s %p objno %llu not overlapping\n", __func__, obj_req,
obj_req->ex.oe_objno);
return;
}
if (rbd_obj_is_entire(obj_req) &&
!obj_req->img_request->snapc->num_snaps) {
dout("%s %p objno %llu entire\n", __func__, obj_req,
obj_req->ex.oe_objno);
return;
}
obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
}
static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
@ -1442,6 +1458,7 @@ __rbd_obj_add_osd_request(struct rbd_obj_request *obj_req,
static struct ceph_osd_request *
rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops)
{
rbd_assert(obj_req->img_request->snapc);
return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
num_ops);
}
@ -1578,15 +1595,18 @@ static void rbd_img_request_init(struct rbd_img_request *img_request,
mutex_init(&img_request->state_mutex);
}
/*
* Only snap_id is captured here, for reads. For writes, snapshot
* context is captured in rbd_img_object_requests() after exclusive
* lock is ensured to be held.
*/
static void rbd_img_capture_header(struct rbd_img_request *img_req)
{
struct rbd_device *rbd_dev = img_req->rbd_dev;
lockdep_assert_held(&rbd_dev->header_rwsem);
if (rbd_img_is_write(img_req))
img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
else
if (!rbd_img_is_write(img_req))
img_req->snap_id = rbd_dev->spec->snap_id;
if (rbd_dev_parent_get(rbd_dev))
@ -2233,9 +2253,6 @@ static int rbd_obj_init_write(struct rbd_obj_request *obj_req)
if (ret)
return ret;
if (rbd_obj_copyup_enabled(obj_req))
obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
obj_req->write_state = RBD_OBJ_WRITE_START;
return 0;
}
@ -2341,8 +2358,6 @@ static int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req)
if (ret)
return ret;
if (rbd_obj_copyup_enabled(obj_req))
obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
if (!obj_req->num_img_extents) {
obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
if (rbd_obj_is_entire(obj_req))
@ -3286,6 +3301,7 @@ again:
case RBD_OBJ_WRITE_START:
rbd_assert(!*result);
rbd_obj_set_copyup_enabled(obj_req);
if (rbd_obj_write_is_noop(obj_req))
return true;
@ -3472,9 +3488,19 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
static void rbd_img_object_requests(struct rbd_img_request *img_req)
{
struct rbd_device *rbd_dev = img_req->rbd_dev;
struct rbd_obj_request *obj_req;
rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
rbd_assert(!need_exclusive_lock(img_req) ||
__rbd_is_lock_owner(rbd_dev));
if (rbd_img_is_write(img_req)) {
rbd_assert(!img_req->snapc);
down_read(&rbd_dev->header_rwsem);
img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
up_read(&rbd_dev->header_rwsem);
}
for_each_obj_request(img_req, obj_req) {
int result = 0;
@ -3492,7 +3518,6 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req)
static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
{
struct rbd_device *rbd_dev = img_req->rbd_dev;
int ret;
again:
@ -3513,9 +3538,6 @@ again:
if (*result)
return true;
rbd_assert(!need_exclusive_lock(img_req) ||
__rbd_is_lock_owner(rbd_dev));
rbd_img_object_requests(img_req);
if (!img_req->pending.num_pending) {
*result = img_req->pending.result;
@ -3977,6 +3999,10 @@ static int rbd_post_acquire_action(struct rbd_device *rbd_dev)
{
int ret;
ret = rbd_dev_refresh(rbd_dev);
if (ret)
return ret;
if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
ret = rbd_object_map_open(rbd_dev);
if (ret)

View File

@ -1627,6 +1627,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
struct inode *inode = &ci->netfs.inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_session *session = NULL;
bool need_put = false;
int mds;
dout("ceph_flush_snaps %p\n", inode);
@ -1671,8 +1672,13 @@ out:
ceph_put_mds_session(session);
/* we flushed them all; remove this inode from the queue */
spin_lock(&mdsc->snap_flush_lock);
if (!list_empty(&ci->i_snap_flush_item))
need_put = true;
list_del_init(&ci->i_snap_flush_item);
spin_unlock(&mdsc->snap_flush_lock);
if (need_put)
iput(inode);
}
/*

View File

@ -693,8 +693,10 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
capsnap->size);
spin_lock(&mdsc->snap_flush_lock);
if (list_empty(&ci->i_snap_flush_item))
if (list_empty(&ci->i_snap_flush_item)) {
ihold(inode);
list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
}
spin_unlock(&mdsc->snap_flush_lock);
return 1; /* caller may want to ceph_flush_snaps */
}