mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-25 13:43:55 +08:00
rbd: introduce OWN_BVECS data type
If the layout is "fancy", we need to be able to rearrange the provided bio_vecs in stripe unit chunks to make it possible for the messenger to read/write directly from/to the provided data buffer, without employing a temporary data buffer for assembling the result. Higher level bio_vec arrays are generally immutable, so this requires copying into a private array. Only the bio_vecs themselves are shuffled around, not the actual data. OWN_BVECS doesn't own any pages. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
e93aca0abb
commit
afb978884c
@ -215,6 +215,7 @@ enum obj_request_type {
|
|||||||
OBJ_REQUEST_NODATA = 1,
|
OBJ_REQUEST_NODATA = 1,
|
||||||
OBJ_REQUEST_BIO, /* pointer into provided bio (list) */
|
OBJ_REQUEST_BIO, /* pointer into provided bio (list) */
|
||||||
OBJ_REQUEST_BVECS, /* pointer into provided bio_vec array */
|
OBJ_REQUEST_BVECS, /* pointer into provided bio_vec array */
|
||||||
|
OBJ_REQUEST_OWN_BVECS, /* private bio_vec array, doesn't own pages */
|
||||||
};
|
};
|
||||||
|
|
||||||
enum obj_operation_type {
|
enum obj_operation_type {
|
||||||
@ -261,6 +262,7 @@ struct rbd_obj_request {
|
|||||||
struct {
|
struct {
|
||||||
struct ceph_bvec_iter bvec_pos;
|
struct ceph_bvec_iter bvec_pos;
|
||||||
u32 bvec_count;
|
u32 bvec_count;
|
||||||
|
u32 bvec_idx;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
struct bio_vec *copyup_bvecs;
|
struct bio_vec *copyup_bvecs;
|
||||||
@ -1238,7 +1240,7 @@ static void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Zero a range in @obj_req data buffer defined by a bio (list) or
|
* Zero a range in @obj_req data buffer defined by a bio (list) or
|
||||||
* bio_vec array.
|
* (private) bio_vec array.
|
||||||
*
|
*
|
||||||
* @off is relative to the start of the data buffer.
|
* @off is relative to the start of the data buffer.
|
||||||
*/
|
*/
|
||||||
@ -1250,6 +1252,7 @@ static void rbd_obj_zero_range(struct rbd_obj_request *obj_req, u32 off,
|
|||||||
zero_bios(&obj_req->bio_pos, off, bytes);
|
zero_bios(&obj_req->bio_pos, off, bytes);
|
||||||
break;
|
break;
|
||||||
case OBJ_REQUEST_BVECS:
|
case OBJ_REQUEST_BVECS:
|
||||||
|
case OBJ_REQUEST_OWN_BVECS:
|
||||||
zero_bvecs(&obj_req->bvec_pos, off, bytes);
|
zero_bvecs(&obj_req->bvec_pos, off, bytes);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -1485,6 +1488,9 @@ static void rbd_obj_request_destroy(struct kref *kref)
|
|||||||
case OBJ_REQUEST_BIO:
|
case OBJ_REQUEST_BIO:
|
||||||
case OBJ_REQUEST_BVECS:
|
case OBJ_REQUEST_BVECS:
|
||||||
break; /* Nothing to do */
|
break; /* Nothing to do */
|
||||||
|
case OBJ_REQUEST_OWN_BVECS:
|
||||||
|
kfree(obj_request->bvec_pos.bvecs);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
rbd_assert(0);
|
rbd_assert(0);
|
||||||
}
|
}
|
||||||
@ -1679,8 +1685,10 @@ static void rbd_osd_req_setup_data(struct rbd_obj_request *obj_req, u32 which)
|
|||||||
obj_req->ex.oe_len);
|
obj_req->ex.oe_len);
|
||||||
break;
|
break;
|
||||||
case OBJ_REQUEST_BVECS:
|
case OBJ_REQUEST_BVECS:
|
||||||
|
case OBJ_REQUEST_OWN_BVECS:
|
||||||
rbd_assert(obj_req->bvec_pos.iter.bi_size ==
|
rbd_assert(obj_req->bvec_pos.iter.bi_size ==
|
||||||
obj_req->ex.oe_len);
|
obj_req->ex.oe_len);
|
||||||
|
rbd_assert(obj_req->bvec_idx == obj_req->bvec_count);
|
||||||
osd_req_op_extent_osd_data_bvec_pos(obj_req->osd_req, which,
|
osd_req_op_extent_osd_data_bvec_pos(obj_req->osd_req, which,
|
||||||
&obj_req->bvec_pos);
|
&obj_req->bvec_pos);
|
||||||
break;
|
break;
|
||||||
@ -1893,6 +1901,8 @@ struct rbd_img_fill_ctx {
|
|||||||
union rbd_img_fill_iter *pos;
|
union rbd_img_fill_iter *pos;
|
||||||
union rbd_img_fill_iter iter;
|
union rbd_img_fill_iter iter;
|
||||||
ceph_object_extent_fn_t set_pos_fn;
|
ceph_object_extent_fn_t set_pos_fn;
|
||||||
|
ceph_object_extent_fn_t count_fn;
|
||||||
|
ceph_object_extent_fn_t copy_fn;
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct ceph_object_extent *alloc_object_extent(void *arg)
|
static struct ceph_object_extent *alloc_object_extent(void *arg)
|
||||||
@ -1909,18 +1919,21 @@ static struct ceph_object_extent *alloc_object_extent(void *arg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Map a list of image extents to a list of object extents, create the
|
* While su != os && sc == 1 is technically not fancy (it's the same
|
||||||
* corresponding object requests (normally each to a different object,
|
* layout as su == os && sc == 1), we can't use the nocopy path for it
|
||||||
* but not always) and add them to @img_req. For each object request,
|
* because ->set_pos_fn() should be called only once per object.
|
||||||
* set up its data descriptor to point to the corresponding chunk of
|
* ceph_file_to_extents() invokes action_fn once per stripe unit, so
|
||||||
* @fctx->pos data buffer.
|
* treat su != os && sc == 1 as fancy.
|
||||||
*
|
|
||||||
* @fctx->pos data buffer is assumed to be large enough.
|
|
||||||
*/
|
*/
|
||||||
static int rbd_img_fill_request(struct rbd_img_request *img_req,
|
static bool rbd_layout_is_fancy(struct ceph_file_layout *l)
|
||||||
struct ceph_file_extent *img_extents,
|
{
|
||||||
u32 num_img_extents,
|
return l->stripe_unit != l->object_size;
|
||||||
struct rbd_img_fill_ctx *fctx)
|
}
|
||||||
|
|
||||||
|
static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req,
|
||||||
|
struct ceph_file_extent *img_extents,
|
||||||
|
u32 num_img_extents,
|
||||||
|
struct rbd_img_fill_ctx *fctx)
|
||||||
{
|
{
|
||||||
u32 i;
|
u32 i;
|
||||||
int ret;
|
int ret;
|
||||||
@ -1946,6 +1959,81 @@ static int rbd_img_fill_request(struct rbd_img_request *img_req,
|
|||||||
return __rbd_img_fill_request(img_req);
|
return __rbd_img_fill_request(img_req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Map a list of image extents to a list of object extents, create the
|
||||||
|
* corresponding object requests (normally each to a different object,
|
||||||
|
* but not always) and add them to @img_req. For each object request,
|
||||||
|
* set up its data descriptor to point to the corresponding chunk(s) of
|
||||||
|
* @fctx->pos data buffer.
|
||||||
|
*
|
||||||
|
* Because ceph_file_to_extents() will merge adjacent object extents
|
||||||
|
* together, each object request's data descriptor may point to multiple
|
||||||
|
* different chunks of @fctx->pos data buffer.
|
||||||
|
*
|
||||||
|
* @fctx->pos data buffer is assumed to be large enough.
|
||||||
|
*/
|
||||||
|
static int rbd_img_fill_request(struct rbd_img_request *img_req,
|
||||||
|
struct ceph_file_extent *img_extents,
|
||||||
|
u32 num_img_extents,
|
||||||
|
struct rbd_img_fill_ctx *fctx)
|
||||||
|
{
|
||||||
|
struct rbd_device *rbd_dev = img_req->rbd_dev;
|
||||||
|
struct rbd_obj_request *obj_req;
|
||||||
|
u32 i;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (fctx->pos_type == OBJ_REQUEST_NODATA ||
|
||||||
|
!rbd_layout_is_fancy(&rbd_dev->layout))
|
||||||
|
return rbd_img_fill_request_nocopy(img_req, img_extents,
|
||||||
|
num_img_extents, fctx);
|
||||||
|
|
||||||
|
img_req->data_type = OBJ_REQUEST_OWN_BVECS;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create object requests and determine ->bvec_count for each object
|
||||||
|
* request. Note that ->bvec_count sum over all object requests may
|
||||||
|
* be greater than the number of bio_vecs in the provided bio (list)
|
||||||
|
* or bio_vec array because when mapped, those bio_vecs can straddle
|
||||||
|
* stripe unit boundaries.
|
||||||
|
*/
|
||||||
|
fctx->iter = *fctx->pos;
|
||||||
|
for (i = 0; i < num_img_extents; i++) {
|
||||||
|
ret = ceph_file_to_extents(&rbd_dev->layout,
|
||||||
|
img_extents[i].fe_off,
|
||||||
|
img_extents[i].fe_len,
|
||||||
|
&img_req->object_extents,
|
||||||
|
alloc_object_extent, img_req,
|
||||||
|
fctx->count_fn, &fctx->iter);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
for_each_obj_request(img_req, obj_req) {
|
||||||
|
obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count,
|
||||||
|
sizeof(*obj_req->bvec_pos.bvecs),
|
||||||
|
GFP_NOIO);
|
||||||
|
if (!obj_req->bvec_pos.bvecs)
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fill in each object request's private bio_vec array, splitting and
|
||||||
|
* rearranging the provided bio_vecs in stripe unit chunks as needed.
|
||||||
|
*/
|
||||||
|
fctx->iter = *fctx->pos;
|
||||||
|
for (i = 0; i < num_img_extents; i++) {
|
||||||
|
ret = ceph_iterate_extents(&rbd_dev->layout,
|
||||||
|
img_extents[i].fe_off,
|
||||||
|
img_extents[i].fe_len,
|
||||||
|
&img_req->object_extents,
|
||||||
|
fctx->copy_fn, &fctx->iter);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
return __rbd_img_fill_request(img_req);
|
||||||
|
}
|
||||||
|
|
||||||
static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
|
static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
|
||||||
u64 off, u64 len)
|
u64 off, u64 len)
|
||||||
{
|
{
|
||||||
@ -1970,6 +2058,32 @@ static void set_bio_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
|
|||||||
ceph_bio_iter_advance(it, bytes);
|
ceph_bio_iter_advance(it, bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void count_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
|
||||||
|
{
|
||||||
|
struct rbd_obj_request *obj_req =
|
||||||
|
container_of(ex, struct rbd_obj_request, ex);
|
||||||
|
struct ceph_bio_iter *it = arg;
|
||||||
|
|
||||||
|
dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
|
||||||
|
ceph_bio_iter_advance_step(it, bytes, ({
|
||||||
|
obj_req->bvec_count++;
|
||||||
|
}));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void copy_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
|
||||||
|
{
|
||||||
|
struct rbd_obj_request *obj_req =
|
||||||
|
container_of(ex, struct rbd_obj_request, ex);
|
||||||
|
struct ceph_bio_iter *it = arg;
|
||||||
|
|
||||||
|
dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
|
||||||
|
ceph_bio_iter_advance_step(it, bytes, ({
|
||||||
|
obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
|
||||||
|
obj_req->bvec_pos.iter.bi_size += bv.bv_len;
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
static int __rbd_img_fill_from_bio(struct rbd_img_request *img_req,
|
static int __rbd_img_fill_from_bio(struct rbd_img_request *img_req,
|
||||||
struct ceph_file_extent *img_extents,
|
struct ceph_file_extent *img_extents,
|
||||||
u32 num_img_extents,
|
u32 num_img_extents,
|
||||||
@ -1979,6 +2093,8 @@ static int __rbd_img_fill_from_bio(struct rbd_img_request *img_req,
|
|||||||
.pos_type = OBJ_REQUEST_BIO,
|
.pos_type = OBJ_REQUEST_BIO,
|
||||||
.pos = (union rbd_img_fill_iter *)bio_pos,
|
.pos = (union rbd_img_fill_iter *)bio_pos,
|
||||||
.set_pos_fn = set_bio_pos,
|
.set_pos_fn = set_bio_pos,
|
||||||
|
.count_fn = count_bio_bvecs,
|
||||||
|
.copy_fn = copy_bio_bvecs,
|
||||||
};
|
};
|
||||||
|
|
||||||
return rbd_img_fill_request(img_req, img_extents, num_img_extents,
|
return rbd_img_fill_request(img_req, img_extents, num_img_extents,
|
||||||
@ -2005,6 +2121,29 @@ static void set_bvec_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
|
|||||||
ceph_bvec_iter_advance(it, bytes);
|
ceph_bvec_iter_advance(it, bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void count_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
|
||||||
|
{
|
||||||
|
struct rbd_obj_request *obj_req =
|
||||||
|
container_of(ex, struct rbd_obj_request, ex);
|
||||||
|
struct ceph_bvec_iter *it = arg;
|
||||||
|
|
||||||
|
ceph_bvec_iter_advance_step(it, bytes, ({
|
||||||
|
obj_req->bvec_count++;
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void copy_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
|
||||||
|
{
|
||||||
|
struct rbd_obj_request *obj_req =
|
||||||
|
container_of(ex, struct rbd_obj_request, ex);
|
||||||
|
struct ceph_bvec_iter *it = arg;
|
||||||
|
|
||||||
|
ceph_bvec_iter_advance_step(it, bytes, ({
|
||||||
|
obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
|
||||||
|
obj_req->bvec_pos.iter.bi_size += bv.bv_len;
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
static int __rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
|
static int __rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
|
||||||
struct ceph_file_extent *img_extents,
|
struct ceph_file_extent *img_extents,
|
||||||
u32 num_img_extents,
|
u32 num_img_extents,
|
||||||
@ -2014,6 +2153,8 @@ static int __rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
|
|||||||
.pos_type = OBJ_REQUEST_BVECS,
|
.pos_type = OBJ_REQUEST_BVECS,
|
||||||
.pos = (union rbd_img_fill_iter *)bvec_pos,
|
.pos = (union rbd_img_fill_iter *)bvec_pos,
|
||||||
.set_pos_fn = set_bvec_pos,
|
.set_pos_fn = set_bvec_pos,
|
||||||
|
.count_fn = count_bvecs,
|
||||||
|
.copy_fn = copy_bvecs,
|
||||||
};
|
};
|
||||||
|
|
||||||
return rbd_img_fill_request(img_req, img_extents, num_img_extents,
|
return rbd_img_fill_request(img_req, img_extents, num_img_extents,
|
||||||
@ -2071,6 +2212,7 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
|
|||||||
&obj_req->bio_pos);
|
&obj_req->bio_pos);
|
||||||
break;
|
break;
|
||||||
case OBJ_REQUEST_BVECS:
|
case OBJ_REQUEST_BVECS:
|
||||||
|
case OBJ_REQUEST_OWN_BVECS:
|
||||||
ret = __rbd_img_fill_from_bvecs(child_img_req,
|
ret = __rbd_img_fill_from_bvecs(child_img_req,
|
||||||
obj_req->img_extents,
|
obj_req->img_extents,
|
||||||
obj_req->num_img_extents,
|
obj_req->num_img_extents,
|
||||||
|
Loading…
Reference in New Issue
Block a user