From daeda1cca91d58bb6c8e45f6734f021bab9c28b7 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 3 May 2011 15:00:55 +0200 Subject: [PATCH] drbd: RCU for disk_conf Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 54 +++++++++-- drivers/block/drbd/drbd_main.c | 25 +++-- drivers/block/drbd/drbd_nl.c | 144 +++++++++++++++++++---------- drivers/block/drbd/drbd_receiver.c | 140 ++++++++++++++++++---------- drivers/block/drbd/drbd_state.c | 16 +++- drivers/block/drbd/drbd_worker.c | 38 +++++--- 6 files changed, 283 insertions(+), 134 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f8d0ac386858..cd77dd497b94 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -777,7 +777,7 @@ struct drbd_backing_dev { struct block_device *backing_bdev; struct block_device *md_bdev; struct drbd_md md; - struct disk_conf dc; /* The user provided config... */ + struct disk_conf *disk_conf; /* RCU, for updates: mdev->tconn->conf_update */ sector_t known_size; /* last known size of that backing device */ }; @@ -1644,8 +1644,13 @@ static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) { - switch (mdev->ldev->dc.on_io_error) { - case EP_PASS_ON: + enum drbd_io_error_p ep; + + rcu_read_lock(); + ep = rcu_dereference(mdev->ldev->disk_conf)->on_io_error; + rcu_read_unlock(); + switch (ep) { + case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ if (!forcedetach) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s.\n", where); @@ -1694,9 +1699,9 @@ static inline void drbd_chk_io_error_(struct drbd_conf *mdev, * BTW, for internal meta data, this happens to be the maximum capacity * we could agree upon with our peer node. */ -static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) +static inline sector_t _drbd_md_first_sector(int meta_dev_idx, struct drbd_backing_dev *bdev) { - switch (bdev->dc.meta_dev_idx) { + switch (meta_dev_idx) { case DRBD_MD_INDEX_INTERNAL: case DRBD_MD_INDEX_FLEX_INT: return bdev->md.md_offset + bdev->md.bm_offset; @@ -1706,13 +1711,30 @@ static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) } } +static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) +{ + int meta_dev_idx; + + rcu_read_lock(); + meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; + rcu_read_unlock(); + + return _drbd_md_first_sector(meta_dev_idx, bdev); +} + /** * drbd_md_last_sector() - Return the last sector number of the meta data area * @bdev: Meta data block device. */ static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) { - switch (bdev->dc.meta_dev_idx) { + int meta_dev_idx; + + rcu_read_lock(); + meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; + rcu_read_unlock(); + + switch (meta_dev_idx) { case DRBD_MD_INDEX_INTERNAL: case DRBD_MD_INDEX_FLEX_INT: return bdev->md.md_offset + MD_AL_OFFSET - 1; @@ -1740,12 +1762,18 @@ static inline sector_t drbd_get_capacity(struct block_device *bdev) static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) { sector_t s; - switch (bdev->dc.meta_dev_idx) { + int meta_dev_idx; + + rcu_read_lock(); + meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; + rcu_read_unlock(); + + switch (meta_dev_idx) { case DRBD_MD_INDEX_INTERNAL: case DRBD_MD_INDEX_FLEX_INT: s = drbd_get_capacity(bdev->backing_bdev) ? min_t(sector_t, DRBD_MAX_SECTORS_FLEX, - drbd_md_first_sector(bdev)) + _drbd_md_first_sector(meta_dev_idx, bdev)) : 0; break; case DRBD_MD_INDEX_FLEX_EXT: @@ -1771,9 +1799,15 @@ static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { - switch (bdev->dc.meta_dev_idx) { + int meta_dev_idx; + + rcu_read_lock(); + meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; + rcu_read_unlock(); + + switch (meta_dev_idx) { default: /* external, some index */ - return MD_RESERVED_SECT * bdev->dc.meta_dev_idx; + return MD_RESERVED_SECT * meta_dev_idx; case DRBD_MD_INDEX_INTERNAL: /* with drbd08, internal meta data is always "flexible" */ case DRBD_MD_INDEX_FLEX_INT: diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e37244485d72..de6afa75dec6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -866,6 +866,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev) const int apv = mdev->tconn->agreed_pro_version; enum drbd_packet cmd; struct net_conf *nc; + struct disk_conf *dc; sock = &mdev->tconn->data; p = drbd_prepare_command(mdev, sock); @@ -887,11 +888,12 @@ int drbd_send_sync_param(struct drbd_conf *mdev) memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); if (get_ldev(mdev)) { - p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate); - p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead); - p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target); - p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target); - p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate); + dc = rcu_dereference(mdev->ldev->disk_conf); + p->rate = cpu_to_be32(dc->resync_rate); + p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead); + p->c_delay_target = cpu_to_be32(dc->c_delay_target); + p->c_fill_target = cpu_to_be32(dc->c_fill_target); + p->c_max_rate = cpu_to_be32(dc->c_max_rate); put_ldev(mdev); } else { p->rate = cpu_to_be32(DRBD_RATE_DEF); @@ -1056,7 +1058,9 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl if (get_ldev_if_state(mdev, D_NEGOTIATING)) { D_ASSERT(mdev->ldev->backing_bdev); d_size = drbd_get_max_capacity(mdev->ldev); - u_size = mdev->ldev->dc.disk_size; + rcu_read_lock(); + u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + rcu_read_unlock(); q_order_type = drbd_queue_order_type(mdev); max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE); @@ -2889,7 +2893,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) for (i = UI_CURRENT; i < UI_SIZE; i++) bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); bdev->md.flags = be32_to_cpu(buffer->flags); - bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents); bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); spin_lock_irq(&mdev->tconn->req_lock); @@ -2901,8 +2904,12 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } spin_unlock_irq(&mdev->tconn->req_lock); - if (bdev->dc.al_extents < 7) - bdev->dc.al_extents = 127; + mutex_lock(&mdev->tconn->conf_update); + /* This blocks wants to be get removed... */ + bdev->disk_conf->al_extents = be32_to_cpu(buffer->al_nr_extents); + if (bdev->disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) + bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; + mutex_unlock(&mdev->tconn->conf_update); err: mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b8ea4807c981..ea62838e0794 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -384,7 +384,8 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn) rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { if (get_ldev_if_state(mdev, D_CONSISTENT)) { - fp = max_t(enum drbd_fencing_p, fp, mdev->ldev->dc.fencing); + fp = max_t(enum drbd_fencing_p, fp, + rcu_dereference(mdev->ldev->disk_conf)->fencing); put_ldev(mdev); } } @@ -678,7 +679,12 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { sector_t md_size_sect = 0; - switch (bdev->dc.meta_dev_idx) { + int meta_dev_idx; + + rcu_read_lock(); + meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx; + + switch (meta_dev_idx) { default: /* v07 style fixed size indexed meta data */ bdev->md.md_size_sect = MD_RESERVED_SECT; @@ -713,6 +719,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET; break; } + rcu_read_unlock(); } /* input size is expected to be in KB */ @@ -803,7 +810,9 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds /* TODO: should only be some assert here, not (re)init... */ drbd_md_set_sector_offsets(mdev, mdev->ldev); - u_size = mdev->ldev->dc.disk_size; + rcu_read_lock(); + u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + rcu_read_unlock(); size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED); if (drbd_get_capacity(mdev->this_bdev) != size || @@ -979,7 +988,9 @@ static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_ struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); - max_segments = mdev->ldev->dc.max_bio_bvecs; + rcu_read_lock(); + max_segments = rcu_dereference(mdev->ldev->disk_conf)->max_bio_bvecs; + rcu_read_unlock(); put_ldev(mdev); } @@ -1095,7 +1106,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; struct drbd_conf *mdev; - struct disk_conf *new_disk_conf; + struct disk_conf *new_disk_conf, *old_disk_conf; int err, fifo_size; int *rs_plan_s = NULL; @@ -1114,19 +1125,15 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) goto out; } -/* FIXME freeze IO, cluster wide. - * - * We should make sure no-one uses - * some half-updated struct when we - * assign it later. */ - - new_disk_conf = kmalloc(sizeof(*new_disk_conf), GFP_KERNEL); + new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL); if (!new_disk_conf) { retcode = ERR_NOMEM; goto fail; } - memcpy(new_disk_conf, &mdev->ldev->dc, sizeof(*new_disk_conf)); + mutex_lock(&mdev->tconn->conf_update); + old_disk_conf = mdev->ldev->disk_conf; + *new_disk_conf = *old_disk_conf; if (should_set_defaults(info)) set_disk_conf_defaults(new_disk_conf); @@ -1151,7 +1158,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (!rs_plan_s) { dev_err(DEV, "kmalloc of fifo_buffer failed"); retcode = ERR_NOMEM; - goto fail; + goto fail_unlock; } } @@ -1171,31 +1178,37 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (err) { retcode = ERR_NOMEM; - goto fail; + goto fail_unlock; } - /* FIXME - * To avoid someone looking at a half-updated struct, we probably - * should have a rw-semaphor on net_conf and disk_conf. - */ write_lock_irq(&global_state_lock); retcode = drbd_sync_after_valid(mdev, new_disk_conf->resync_after); if (retcode == NO_ERROR) { - mdev->ldev->dc = *new_disk_conf; + rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); drbd_sync_after_changed(mdev); } write_unlock_irq(&global_state_lock); - drbd_md_sync(mdev); + if (retcode != NO_ERROR) + goto fail_unlock; + drbd_md_sync(mdev); if (mdev->state.conn >= C_CONNECTED) drbd_send_sync_param(mdev); + mutex_unlock(&mdev->tconn->conf_update); + synchronize_rcu(); + kfree(old_disk_conf); + goto success; + +fail_unlock: + mutex_unlock(&mdev->tconn->conf_update); fail: - put_ldev(mdev); kfree(new_disk_conf); kfree(rs_plan_s); +success: + put_ldev(mdev); out: drbd_adm_finish(info, retcode); return 0; @@ -1210,6 +1223,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) sector_t max_possible_sectors; sector_t min_md_device_sectors; struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ + struct disk_conf *new_disk_conf = NULL; struct block_device *bdev; struct lru_cache *resync_lru = NULL; union drbd_state ns, os; @@ -1243,17 +1257,22 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) retcode = ERR_NOMEM; goto fail; } + new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); + if (!new_disk_conf) { + retcode = ERR_NOMEM; + goto fail; + } + nbc->disk_conf = new_disk_conf; - set_disk_conf_defaults(&nbc->dc); - - err = disk_conf_from_attrs(&nbc->dc, info); + set_disk_conf_defaults(new_disk_conf); + err = disk_conf_from_attrs(new_disk_conf, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } - if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { + if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -1261,7 +1280,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); nc = rcu_dereference(mdev->tconn->net_conf); if (nc) { - if (nbc->dc.fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { + if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { rcu_read_unlock(); retcode = ERR_STONITH_AND_PROT_A; goto fail; @@ -1269,10 +1288,10 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } rcu_read_unlock(); - bdev = blkdev_get_by_path(nbc->dc.backing_dev, + bdev = blkdev_get_by_path(new_disk_conf->backing_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev); if (IS_ERR(bdev)) { - dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, + dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev, PTR_ERR(bdev)); retcode = ERR_OPEN_DISK; goto fail; @@ -1287,12 +1306,12 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * should check it for you already; but if you don't, or * someone fooled it, we need to double check here) */ - bdev = blkdev_get_by_path(nbc->dc.meta_dev, + bdev = blkdev_get_by_path(new_disk_conf->meta_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - (nbc->dc.meta_dev_idx < 0) ? + (new_disk_conf->meta_dev_idx < 0) ? (void *)mdev : (void *)drbd_m_holder); if (IS_ERR(bdev)) { - dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, + dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev, PTR_ERR(bdev)); retcode = ERR_OPEN_MD_DISK; goto fail; @@ -1300,8 +1319,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) nbc->md_bdev = bdev; if ((nbc->backing_bdev == nbc->md_bdev) != - (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || - nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { + (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL || + new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -1317,21 +1336,21 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ drbd_md_set_sector_offsets(mdev, nbc); - if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) { + if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", (unsigned long long) drbd_get_max_capacity(nbc), - (unsigned long long) nbc->dc.disk_size); + (unsigned long long) new_disk_conf->disk_size); retcode = ERR_DISK_TO_SMALL; goto fail; } - if (nbc->dc.meta_dev_idx < 0) { + if (new_disk_conf->meta_dev_idx < 0) { max_possible_sectors = DRBD_MAX_SECTORS_FLEX; /* at least one MB, otherwise it does not make sense */ min_md_device_sectors = (2<<10); } else { max_possible_sectors = DRBD_MAX_SECTORS; - min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1); + min_md_device_sectors = MD_RESERVED_SECT * (new_disk_conf->meta_dev_idx + 1); } if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { @@ -1356,7 +1375,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) dev_warn(DEV, "==> truncating very big lower level device " "to currently maximum possible %llu sectors <==\n", (unsigned long long) max_possible_sectors); - if (nbc->dc.meta_dev_idx >= 0) + if (new_disk_conf->meta_dev_idx >= 0) dev_warn(DEV, "==>> using internal or flexible " "meta data may help <<==\n"); } @@ -1399,14 +1418,14 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } /* Since we are diskless, fix the activity log first... */ - if (drbd_check_al_size(mdev, &nbc->dc)) { + if (drbd_check_al_size(mdev, new_disk_conf)) { retcode = ERR_NOMEM; goto force_diskless_dec; } /* Prevent shrinking of consistent devices ! */ if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && - drbd_new_dev_size(mdev, nbc, nbc->dc.disk_size, 0) < nbc->md.la_size_sect) { + drbd_new_dev_size(mdev, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { dev_warn(DEV, "refusing to truncate a consistent device\n"); retcode = ERR_DISK_TO_SMALL; goto force_diskless_dec; @@ -1419,11 +1438,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ - if (nbc->dc.no_md_flush) + if (new_disk_conf->no_md_flush) set_bit(MD_NO_FUA, &mdev->flags); else clear_bit(MD_NO_FUA, &mdev->flags); + /* FIXME Missing stuff: rs_plan_s, clip al range */ + /* Point of no return reached. * Devices and memory are no longer released by error cleanup below. * now mdev takes over responsibility, and the state engine should @@ -1433,6 +1454,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) mdev->resync = resync_lru; nbc = NULL; resync_lru = NULL; + new_disk_conf = NULL; mdev->write_ordering = WO_bdev_flush; drbd_bump_write_ordering(mdev, WO_bdev_flush); @@ -1530,9 +1552,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED)) ns.pdsk = D_OUTDATED; - if ( ns.disk == D_CONSISTENT && - (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE)) + rcu_read_lock(); + if (ns.disk == D_CONSISTENT && + (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE)) ns.disk = D_UP_TO_DATE; + rcu_read_unlock(); /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before @@ -1589,6 +1613,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) FMODE_READ | FMODE_WRITE | FMODE_EXCL); kfree(nbc); } + kfree(new_disk_conf); lc_destroy(resync_lru); finish: @@ -1691,7 +1716,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n idr_for_each_entry(&tconn->volumes, mdev, i) { if (get_ldev(mdev)) { - enum drbd_fencing_p fp = mdev->ldev->dc.fencing; + enum drbd_fencing_p fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; put_ldev(mdev); if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) return ERR_STONITH_AND_PROT_A; @@ -2159,11 +2184,13 @@ void resync_after_online_grow(struct drbd_conf *mdev) int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) { + struct disk_conf *old_disk_conf, *new_disk_conf = NULL; struct resize_parms rs; struct drbd_conf *mdev; enum drbd_ret_code retcode; enum determine_dev_size dd; enum dds_flags ddsf; + sector_t u_size; int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -2204,10 +2231,31 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) goto fail; } + rcu_read_lock(); + u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + rcu_read_unlock(); + if (u_size != (sector_t)rs.resize_size) { + new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL); + if (!new_disk_conf) { + retcode = ERR_NOMEM; + goto fail; + } + } + if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); - mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; + if (new_disk_conf) { + mutex_lock(&mdev->tconn->conf_update); + old_disk_conf = mdev->ldev->disk_conf; + *new_disk_conf = *old_disk_conf; + new_disk_conf->disk_size = (sector_t)rs.resize_size; + rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); + mutex_unlock(&mdev->tconn->conf_update); + synchronize_rcu(); + kfree(old_disk_conf); + } + ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); dd = drbd_determine_dev_size(mdev, ddsf); drbd_md_sync(mdev); @@ -2501,11 +2549,11 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive)) goto nla_put_failure; + rcu_read_lock(); if (got_ldev) - if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive)) + if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive)) goto nla_put_failure; - rcu_read_lock(); nc = rcu_dereference(mdev->tconn->net_conf); if (nc) err = net_conf_to_skb(skb, nc, exclude_sensitive); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bba0050f836b..add41764ec54 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1166,6 +1166,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, */ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local) { + struct disk_conf *dc; enum write_ordering_e pwo; static char *write_ordering_str[] = { [WO_none] = "none", @@ -1175,10 +1176,14 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) pwo = mdev->write_ordering; wo = min(pwo, wo); - if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush) + rcu_read_lock(); + dc = rcu_dereference(mdev->ldev->disk_conf); + + if (wo == WO_bdev_flush && dc->no_disk_flush) wo = WO_drain_io; - if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain) + if (wo == WO_drain_io && dc->no_disk_drain) wo = WO_none; + rcu_read_unlock(); mdev->write_ordering = wo; if (pwo != mdev->write_ordering || wo == WO_bdev_flush) dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]); @@ -2190,9 +2195,14 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) struct lc_element *tmp; int curr_events; int throttle = 0; + unsigned int c_min_rate; + + rcu_read_lock(); + c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate; + rcu_read_unlock(); /* feature disabled? */ - if (mdev->ldev->dc.c_min_rate == 0) + if (c_min_rate == 0) return 0; spin_lock_irq(&mdev->al_lock); @@ -2232,7 +2242,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) db = mdev->rs_mark_left[i] - rs_left; dbdt = Bit2KB(db/dt); - if (dbdt > mdev->ldev->dc.c_min_rate) + if (dbdt > c_min_rate) throttle = 1; } return throttle; @@ -3147,6 +3157,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; struct net_conf *old_net_conf, *new_net_conf = NULL; + struct disk_conf *old_disk_conf, *new_disk_conf = NULL; const int apv = tconn->agreed_pro_version; int *rs_plan_s = NULL; int fifo_size = 0; @@ -3189,24 +3200,34 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) if (err) return err; - if (get_ldev(mdev)) { - mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); - put_ldev(mdev); + new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); + if (!new_disk_conf) { + dev_err(DEV, "Allocation of new disk_conf failed\n"); + return -ENOMEM; } + mutex_lock(&mdev->tconn->conf_update); + old_net_conf = mdev->tconn->net_conf; + old_disk_conf = mdev->ldev->disk_conf; + *new_disk_conf = *old_disk_conf; + + new_disk_conf->resync_rate = be32_to_cpu(p->rate); + if (apv >= 88) { if (apv == 88) { if (data_size > SHARED_SECRET_MAX) { dev_err(DEV, "verify-alg too long, " "peer wants %u, accepting only %u byte\n", data_size, SHARED_SECRET_MAX); + mutex_unlock(&mdev->tconn->conf_update); return -EIO; } err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size); - if (err) + if (err) { + mutex_unlock(&mdev->tconn->conf_update); return err; - + } /* we expect NUL terminated string */ /* but just in case someone tries to be evil */ D_ASSERT(p->verify_alg[data_size-1] == 0); @@ -3221,9 +3242,6 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) p->csums_alg[SHARED_SECRET_MAX-1] = 0; } - mutex_lock(&mdev->tconn->conf_update); - old_net_conf = mdev->tconn->net_conf; - if (strcmp(old_net_conf->verify_alg, p->verify_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", @@ -3252,14 +3270,13 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } } - if (apv > 94 && get_ldev(mdev)) { - mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); - mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); - mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target); - mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target); - mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate); + if (apv > 94) { + new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead); + new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target); + new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target); + new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate); - fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; + fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); if (!rs_plan_s) { @@ -3268,7 +3285,6 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) goto disconnect; } } - put_ldev(mdev); } if (verify_tfm || csums_tfm) { @@ -3296,21 +3312,24 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } rcu_assign_pointer(tconn->net_conf, new_net_conf); } - mutex_unlock(&mdev->tconn->conf_update); - if (new_net_conf) { - synchronize_rcu(); - kfree(old_net_conf); - } - - spin_lock(&mdev->peer_seq_lock); - if (fifo_size != mdev->rs_plan_s.size) { - kfree(mdev->rs_plan_s.values); - mdev->rs_plan_s.values = rs_plan_s; - mdev->rs_plan_s.size = fifo_size; - mdev->rs_planed = 0; - } - spin_unlock(&mdev->peer_seq_lock); } + + rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); + spin_lock(&mdev->peer_seq_lock); + if (rs_plan_s) { + kfree(mdev->rs_plan_s.values); + mdev->rs_plan_s.values = rs_plan_s; + mdev->rs_plan_s.size = fifo_size; + mdev->rs_planed = 0; + } + spin_unlock(&mdev->peer_seq_lock); + + mutex_unlock(&mdev->tconn->conf_update); + synchronize_rcu(); + if (new_net_conf) + kfree(old_net_conf); + kfree(old_disk_conf); + return 0; disconnect: @@ -3358,37 +3377,56 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) mdev->p_size = p_size; if (get_ldev(mdev)) { + rcu_read_lock(); + my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + rcu_read_unlock(); + warn_if_differ_considerably(mdev, "lower level device sizes", p_size, drbd_get_max_capacity(mdev->ldev)); warn_if_differ_considerably(mdev, "user requested size", - p_usize, mdev->ldev->dc.disk_size); + p_usize, my_usize); /* if this is the first connect, or an otherwise expected * param exchange, choose the minimum */ if (mdev->state.conn == C_WF_REPORT_PARAMS) - p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size, - p_usize); - - my_usize = mdev->ldev->dc.disk_size; - - if (mdev->ldev->dc.disk_size != p_usize) { - mdev->ldev->dc.disk_size = p_usize; - dev_info(DEV, "Peer sets u_size to %lu sectors\n", - (unsigned long)mdev->ldev->dc.disk_size); - } + p_usize = min_not_zero(my_usize, p_usize); /* Never shrink a device with usable data during connect. But allow online shrinking if we are connected. */ if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) < - drbd_get_capacity(mdev->this_bdev) && - mdev->state.disk >= D_OUTDATED && - mdev->state.conn < C_CONNECTED) { + drbd_get_capacity(mdev->this_bdev) && + mdev->state.disk >= D_OUTDATED && + mdev->state.conn < C_CONNECTED) { dev_err(DEV, "The peer's disk size is too small!\n"); conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); - mdev->ldev->dc.disk_size = my_usize; put_ldev(mdev); return -EIO; } + + if (my_usize != p_usize) { + struct disk_conf *old_disk_conf, *new_disk_conf = NULL; + + new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); + if (!new_disk_conf) { + dev_err(DEV, "Allocation of new disk_conf failed\n"); + put_ldev(mdev); + return -ENOMEM; + } + + mutex_lock(&mdev->tconn->conf_update); + old_disk_conf = mdev->ldev->disk_conf; + *new_disk_conf = *old_disk_conf; + new_disk_conf->disk_size = p_usize; + + rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); + mutex_unlock(&mdev->tconn->conf_update); + synchronize_rcu(); + kfree(old_disk_conf); + + dev_info(DEV, "Peer sets u_size to %lu sectors\n", + (unsigned long)my_usize); + } + put_ldev(mdev); } @@ -4268,7 +4306,9 @@ static int drbd_disconnected(int vnr, void *p, void *data) fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; + rcu_read_lock(); + fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; + rcu_read_unlock(); put_ldev(mdev); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 332781cfb556..cd55f46d5c55 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -483,13 +483,13 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) enum drbd_state_rv rv = SS_SUCCESS; struct net_conf *nc; + rcu_read_lock(); fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; + fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; put_ldev(mdev); } - rcu_read_lock(); nc = rcu_dereference(mdev->tconn->net_conf); if (nc) { if (!nc->two_primaries && ns.role == R_PRIMARY) { @@ -674,7 +674,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; + rcu_read_lock(); + fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; + rcu_read_unlock(); put_ldev(mdev); } @@ -1132,7 +1134,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; + rcu_read_lock(); + fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; + rcu_read_unlock(); put_ldev(mdev); } @@ -1287,7 +1291,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* corresponding get_ldev was in __drbd_set_state, to serialize * our cleanup here with the transition to D_DISKLESS, * so it is safe to dreference ldev here. */ - eh = mdev->ldev->dc.on_io_error; + rcu_read_lock(); + eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error; + rcu_read_unlock(); was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); /* current state still has to be D_FAILED, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 5b645e107080..4f45f75173ac 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -436,6 +436,7 @@ static void fifo_add_val(struct fifo_buffer *fb, int value) static int drbd_rs_controller(struct drbd_conf *mdev) { + struct disk_conf *dc; unsigned int sect_in; /* Number of sectors that came in since the last turn */ unsigned int want; /* The number of sectors we want in the proxy */ int req_sect; /* Number of sectors to request in this turn */ @@ -449,14 +450,16 @@ static int drbd_rs_controller(struct drbd_conf *mdev) mdev->rs_in_flight -= sect_in; spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */ + rcu_read_lock(); + dc = rcu_dereference(mdev->ldev->disk_conf); - steps = mdev->rs_plan_s.size; /* (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ + steps = mdev->rs_plan_s.size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ - want = ((mdev->ldev->dc.resync_rate * 2 * SLEEP_TIME) / HZ) * steps; + want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; } else { /* normal path */ - want = mdev->ldev->dc.c_fill_target ? mdev->ldev->dc.c_fill_target : - sect_in * mdev->ldev->dc.c_delay_target * HZ / (SLEEP_TIME * 10); + want = dc->c_fill_target ? dc->c_fill_target : + sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); } correction = want - mdev->rs_in_flight - mdev->rs_planed; @@ -468,14 +471,13 @@ static int drbd_rs_controller(struct drbd_conf *mdev) /* What we do in this step */ curr_corr = fifo_push(&mdev->rs_plan_s, 0); - spin_unlock(&mdev->peer_seq_lock); mdev->rs_planed -= curr_corr; req_sect = sect_in + curr_corr; if (req_sect < 0) req_sect = 0; - max_sect = (mdev->ldev->dc.c_max_rate * 2 * SLEEP_TIME) / HZ; + max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ; if (req_sect > max_sect) req_sect = max_sect; @@ -484,6 +486,8 @@ static int drbd_rs_controller(struct drbd_conf *mdev) sect_in, mdev->rs_in_flight, want, correction, steps, cps, mdev->rs_planed, curr_corr, req_sect); */ + rcu_read_unlock(); + spin_unlock(&mdev->peer_seq_lock); return req_sect; } @@ -491,11 +495,13 @@ static int drbd_rs_controller(struct drbd_conf *mdev) static int drbd_rs_number_requests(struct drbd_conf *mdev) { int number; - if (mdev->rs_plan_s.size) { /* mdev->ldev->dc.c_plan_ahead */ + if (mdev->rs_plan_s.size) { /* rcu_dereference(mdev->ldev->disk_conf)->c_plan_ahead */ number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { - mdev->c_sync_rate = mdev->ldev->dc.resync_rate; + rcu_read_lock(); + mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate; + rcu_read_unlock(); number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); } @@ -1320,13 +1326,17 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) static int _drbd_may_sync_now(struct drbd_conf *mdev) { struct drbd_conf *odev = mdev; + int ra; while (1) { if (!odev->ldev) return 1; - if (odev->ldev->dc.resync_after == -1) + rcu_read_lock(); + ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + rcu_read_unlock(); + if (ra == -1) return 1; - odev = minor_to_mdev(odev->ldev->dc.resync_after); + odev = minor_to_mdev(ra); if (!expect(odev)) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && @@ -1405,6 +1415,7 @@ void suspend_other_sg(struct drbd_conf *mdev) enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor) { struct drbd_conf *odev; + int ra; if (o_minor == -1) return NO_ERROR; @@ -1417,12 +1428,15 @@ enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor) if (odev == mdev) return ERR_SYNC_AFTER_CYCLE; + rcu_read_lock(); + ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + rcu_read_unlock(); /* dependency chain ends here, no cycles. */ - if (odev->ldev->dc.resync_after == -1) + if (ra == -1) return NO_ERROR; /* follow the dependency chain */ - odev = minor_to_mdev(odev->ldev->dc.resync_after); + odev = minor_to_mdev(ra); } }