2019-05-22 15:51:37 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2009-09-26 07:07:19 +08:00
|
|
|
/*
|
|
|
|
drbd_req.h
|
|
|
|
|
|
|
|
This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
|
|
|
|
|
|
|
|
Copyright (C) 2006-2008, LINBIT Information Technologies GmbH.
|
|
|
|
Copyright (C) 2006-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
|
|
|
|
Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _DRBD_REQ_H
|
|
|
|
#define _DRBD_REQ_H
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/drbd.h>
|
|
|
|
#include "drbd_int.h"
|
|
|
|
|
|
|
|
/* The request callbacks will be called in irq context by the IDE drivers,
|
|
|
|
and in Softirqs/Tasklets/BH context by the SCSI drivers,
|
|
|
|
and by the receiver and worker in kernel-thread context.
|
|
|
|
Try to get the locking right :) */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are
|
|
|
|
* associated with IO requests originating from the block layer above us.
|
|
|
|
*
|
|
|
|
* There are quite a few things that may happen to a drbd request
|
|
|
|
* during its lifetime.
|
|
|
|
*
|
|
|
|
* It will be created.
|
|
|
|
* It will be marked with the intention to be
|
|
|
|
* submitted to local disk and/or
|
|
|
|
* send via the network.
|
|
|
|
*
|
|
|
|
* It has to be placed on the transfer log and other housekeeping lists,
|
|
|
|
* In case we have a network connection.
|
|
|
|
*
|
|
|
|
* It may be identified as a concurrent (write) request
|
|
|
|
* and be handled accordingly.
|
|
|
|
*
|
|
|
|
* It may me handed over to the local disk subsystem.
|
|
|
|
* It may be completed by the local disk subsystem,
|
tree-wide: Assorted spelling fixes
In particular, several occurances of funny versions of 'success',
'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address',
'beginning', 'desirable', 'separate' and 'necessary' are fixed.
Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Joe Perches <joe@perches.com>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-02-03 08:01:28 +08:00
|
|
|
* either successfully or with io-error.
|
2009-09-26 07:07:19 +08:00
|
|
|
* In case it is a READ request, and it failed locally,
|
|
|
|
* it may be retried remotely.
|
|
|
|
*
|
|
|
|
* It may be queued for sending.
|
|
|
|
* It may be handed over to the network stack,
|
|
|
|
* which may fail.
|
|
|
|
* It may be acknowledged by the "peer" according to the wire_protocol in use.
|
|
|
|
* this may be a negative ack.
|
|
|
|
* It may receive a faked ack when the network connection is lost and the
|
|
|
|
* transfer log is cleaned up.
|
|
|
|
* Sending may be canceled due to network connection loss.
|
|
|
|
* When it finally has outlived its time,
|
|
|
|
* corresponding dirty bits in the resync-bitmap may be cleared or set,
|
|
|
|
* it will be destroyed,
|
|
|
|
* and completion will be signalled to the originator,
|
|
|
|
* with or without "success".
|
|
|
|
*/
|
|
|
|
|
|
|
|
enum drbd_req_event {
|
2011-01-25 22:37:43 +08:00
|
|
|
CREATED,
|
|
|
|
TO_BE_SENT,
|
|
|
|
TO_BE_SUBMITTED,
|
2009-09-26 07:07:19 +08:00
|
|
|
|
|
|
|
/* XXX yes, now I am inconsistent...
|
2010-10-27 20:33:00 +08:00
|
|
|
* these are not "events" but "actions"
|
2009-09-26 07:07:19 +08:00
|
|
|
* oh, well... */
|
2011-01-25 22:37:43 +08:00
|
|
|
QUEUE_FOR_NET_WRITE,
|
|
|
|
QUEUE_FOR_NET_READ,
|
|
|
|
QUEUE_FOR_SEND_OOS,
|
|
|
|
|
2013-03-27 21:08:41 +08:00
|
|
|
/* An empty flush is queued as P_BARRIER,
|
|
|
|
* which will cause it to complete "successfully",
|
|
|
|
* even if the local disk flush failed.
|
|
|
|
*
|
|
|
|
* Just like "real" requests, empty flushes (blkdev_issue_flush()) will
|
|
|
|
* only see an error if neither local nor remote data is reachable. */
|
|
|
|
QUEUE_AS_DRBD_BARRIER,
|
|
|
|
|
2011-01-25 22:37:43 +08:00
|
|
|
SEND_CANCELED,
|
|
|
|
SEND_FAILED,
|
|
|
|
HANDED_OVER_TO_NETWORK,
|
|
|
|
OOS_HANDED_TO_NETWORK,
|
|
|
|
CONNECTION_LOST_WHILE_PENDING,
|
|
|
|
READ_RETRY_REMOTE_CANCELED,
|
|
|
|
RECV_ACKED_BY_PEER,
|
|
|
|
WRITE_ACKED_BY_PEER,
|
|
|
|
WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */
|
2012-08-01 18:33:51 +08:00
|
|
|
CONFLICT_RESOLVED,
|
2011-02-22 09:15:32 +08:00
|
|
|
POSTPONE_WRITE,
|
2011-01-25 22:37:43 +08:00
|
|
|
NEG_ACKED,
|
|
|
|
BARRIER_ACKED, /* in protocol A and B */
|
|
|
|
DATA_RECEIVED, /* (remote read) */
|
|
|
|
|
2014-04-29 00:43:24 +08:00
|
|
|
COMPLETED_OK,
|
2011-01-25 22:37:43 +08:00
|
|
|
READ_COMPLETED_WITH_ERROR,
|
|
|
|
READ_AHEAD_COMPLETED_WITH_ERROR,
|
|
|
|
WRITE_COMPLETED_WITH_ERROR,
|
2014-04-29 00:43:24 +08:00
|
|
|
DISCARD_COMPLETED_NOTSUPP,
|
|
|
|
DISCARD_COMPLETED_WITH_ERROR,
|
|
|
|
|
2011-07-05 21:38:59 +08:00
|
|
|
ABORT_DISK_IO,
|
2011-01-25 22:37:43 +08:00
|
|
|
RESEND,
|
|
|
|
FAIL_FROZEN_DISK_IO,
|
|
|
|
RESTART_FROZEN_DISK_IO,
|
|
|
|
NOTHING,
|
2009-09-26 07:07:19 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* encoding of request states for now. we don't actually need that many bits.
|
|
|
|
* we don't need to do atomic bit operations either, since most of the time we
|
|
|
|
* need to look at the connection state and/or manipulate some lists at the
|
|
|
|
* same time, so we should hold the request lock anyways.
|
|
|
|
*/
|
|
|
|
enum drbd_req_state_bits {
|
2011-07-05 21:38:59 +08:00
|
|
|
/* 3210
|
|
|
|
* 0000: no local possible
|
|
|
|
* 0001: to be submitted
|
2009-09-26 07:07:19 +08:00
|
|
|
* UNUSED, we could map: 011: submitted, completion still pending
|
2011-07-05 21:38:59 +08:00
|
|
|
* 0110: completed ok
|
|
|
|
* 0010: completed with error
|
|
|
|
* 1001: Aborted (before completion)
|
|
|
|
* 1x10: Aborted and completed -> free
|
2009-09-26 07:07:19 +08:00
|
|
|
*/
|
|
|
|
__RQ_LOCAL_PENDING,
|
|
|
|
__RQ_LOCAL_COMPLETED,
|
|
|
|
__RQ_LOCAL_OK,
|
2011-07-05 21:38:59 +08:00
|
|
|
__RQ_LOCAL_ABORTED,
|
2009-09-26 07:07:19 +08:00
|
|
|
|
2011-07-05 21:38:59 +08:00
|
|
|
/* 87654
|
2009-09-26 07:07:19 +08:00
|
|
|
* 00000: no network possible
|
|
|
|
* 00001: to be send
|
|
|
|
* 00011: to be send, on worker queue
|
|
|
|
* 00101: sent, expecting recv_ack (B) or write_ack (C)
|
|
|
|
* 11101: sent,
|
|
|
|
* recv_ack (B) or implicit "ack" (A),
|
|
|
|
* still waiting for the barrier ack.
|
|
|
|
* master_bio may already be completed and invalidated.
|
2011-01-25 22:37:43 +08:00
|
|
|
* 11100: write acked (C),
|
|
|
|
* data received (for remote read, any protocol)
|
2009-09-26 07:07:19 +08:00
|
|
|
* or finally the barrier ack has arrived (B,A)...
|
|
|
|
* request can be freed
|
|
|
|
* 01100: neg-acked (write, protocol C)
|
|
|
|
* or neg-d-acked (read, any protocol)
|
|
|
|
* or killed from the transfer log
|
|
|
|
* during cleanup after connection loss
|
|
|
|
* request can be freed
|
|
|
|
* 01000: canceled or send failed...
|
|
|
|
* request can be freed
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* if "SENT" is not set, yet, this can still fail or be canceled.
|
|
|
|
* if "SENT" is set already, we still wait for an Ack packet.
|
|
|
|
* when cleared, the master_bio may be completed.
|
|
|
|
* in (B,A) the request object may still linger on the transaction log
|
|
|
|
* until the corresponding barrier ack comes in */
|
|
|
|
__RQ_NET_PENDING,
|
|
|
|
|
|
|
|
/* If it is QUEUED, and it is a WRITE, it is also registered in the
|
|
|
|
* transfer log. Currently we need this flag to avoid conflicts between
|
|
|
|
* worker canceling the request and tl_clear_barrier killing it from
|
|
|
|
* transfer log. We should restructure the code so this conflict does
|
|
|
|
* no longer occur. */
|
|
|
|
__RQ_NET_QUEUED,
|
|
|
|
|
|
|
|
/* well, actually only "handed over to the network stack".
|
|
|
|
*
|
|
|
|
* TODO can potentially be dropped because of the similar meaning
|
|
|
|
* of RQ_NET_SENT and ~RQ_NET_QUEUED.
|
|
|
|
* however it is not exactly the same. before we drop it
|
|
|
|
* we must ensure that we can tell a request with network part
|
|
|
|
* from a request without, regardless of what happens to it. */
|
|
|
|
__RQ_NET_SENT,
|
|
|
|
|
|
|
|
/* when set, the request may be freed (if RQ_NET_QUEUED is clear).
|
|
|
|
* basically this means the corresponding P_BARRIER_ACK was received */
|
|
|
|
__RQ_NET_DONE,
|
|
|
|
|
|
|
|
/* whether or not we know (C) or pretend (B,A) that the write
|
|
|
|
* was successfully written on the peer.
|
|
|
|
*/
|
|
|
|
__RQ_NET_OK,
|
|
|
|
|
|
|
|
/* peer called drbd_set_in_sync() for this write */
|
|
|
|
__RQ_NET_SIS,
|
|
|
|
|
|
|
|
/* keep this last, its for the RQ_NET_MASK */
|
|
|
|
__RQ_NET_MAX,
|
2010-05-27 21:07:43 +08:00
|
|
|
|
|
|
|
/* Set when this is a write, clear for a read */
|
|
|
|
__RQ_WRITE,
|
2016-06-14 06:26:31 +08:00
|
|
|
__RQ_WSAME,
|
|
|
|
__RQ_UNMAP,
|
drbd: introduce P_ZEROES (REQ_OP_WRITE_ZEROES on the "wire")
And also re-enable partial-zero-out + discard aligned.
With the introduction of REQ_OP_WRITE_ZEROES,
we started to use that for both WRITE_ZEROES and DISCARDS,
hoping that WRITE_ZEROES would "do what we want",
UNMAP if possible, zero-out the rest.
The example scenario is some LVM "thin" backend.
While an un-allocated block on dm-thin reads as zeroes, on a dm-thin
with "skip_block_zeroing=true", after a partial block write allocated
that block, that same block may well map "undefined old garbage" from
the backends on LBAs that have not yet been written to.
If we cannot distinguish between zero-out and discard on the receiving
side, to avoid "undefined old garbage" to pop up randomly at later times
on supposedly zero-initialized blocks, we'd need to map all discards to
zero-out on the receiving side. But that would potentially do a full
alloc on thinly provisioned backends, even when the expectation was to
unmap/trim/discard/de-allocate.
We need to distinguish on the protocol level, whether we need to guarantee
zeroes (and thus use zero-out, potentially doing the mentioned full-alloc),
or if we want to put the emphasis on discard, and only do a "best effort
zeroing" (by "discarding" blocks aligned to discard-granularity, and zeroing
only potential unaligned head and tail clippings to at least *try* to
avoid "false positives" in an online-verify later), hoping that someone
set skip_block_zeroing=false.
For some discussion regarding this on dm-devel, see also
https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
For backward compatibility, P_TRIM means zero-out, unless the
DRBD_FF_WZEROES feature flag is agreed upon during handshake.
To have upper layers even try to submit WRITE ZEROES requests,
we need to announce "efficient zeroout" independently.
We need to fixup max_write_zeroes_sectors after blk_queue_stack_limits():
if we can handle "zeroes" efficiently on the protocol,
we want to do that, even if our backend does not announce
max_write_zeroes_sectors itself.
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2018-12-21 00:23:42 +08:00
|
|
|
__RQ_ZEROES,
|
2010-08-31 18:00:50 +08:00
|
|
|
|
|
|
|
/* Should call drbd_al_complete_io() for this request... */
|
|
|
|
__RQ_IN_ACT_LOG,
|
2011-02-22 09:15:32 +08:00
|
|
|
|
drbd: introduce drbd_recv_header_maybe_unplug
Recently, drbd_recv_header() was changed to potentially
implicitly "unplug" the backend device(s), in case there
is currently nothing to receive.
Be more explicit about it: re-introduce the original drbd_recv_header(),
and introduce a new drbd_recv_header_maybe_unplug() for use by the
receiver "main loop".
Using explicit plugging via blk_start_plug(); blk_finish_plug();
really helps the io-scheduler of the backend with merging requests.
Wrap the receiver "main loop" with such a plug.
Also catch unplug events on the Primary,
and try to propagate.
This is performance relevant. Without this, if the receiving side does
not merge requests, number of IOPS on the peer can me significantly
higher than IOPS on the Primary, and can easily become the bottleneck.
Together, both changes should help to reduce the number of IOPS
as seen on the backend of the receiving side, by increasing
the chance of merging mergable requests, without trading latency
for more throughput.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-08-29 16:20:32 +08:00
|
|
|
/* This was the most recent request during some blk_finish_plug()
|
|
|
|
* or its implicit from-schedule equivalent.
|
|
|
|
* We may use it as hint to send a P_UNPLUG_REMOTE */
|
|
|
|
__RQ_UNPLUG,
|
|
|
|
|
2011-02-22 09:15:32 +08:00
|
|
|
/* The peer has sent a retry ACK */
|
|
|
|
__RQ_POSTPONED,
|
2011-04-14 07:24:47 +08:00
|
|
|
|
2012-01-25 00:19:42 +08:00
|
|
|
/* would have been completed,
|
|
|
|
* but was not, because of drbd_suspended() */
|
|
|
|
__RQ_COMPLETION_SUSP,
|
|
|
|
|
2011-04-14 07:24:47 +08:00
|
|
|
/* We expect a receive ACK (wire proto B) */
|
|
|
|
__RQ_EXP_RECEIVE_ACK,
|
|
|
|
|
|
|
|
/* We expect a write ACK (wite proto C) */
|
|
|
|
__RQ_EXP_WRITE_ACK,
|
2012-01-25 00:19:42 +08:00
|
|
|
|
|
|
|
/* waiting for a barrier ack, did an extra kref_get */
|
|
|
|
__RQ_EXP_BARR_ACK,
|
2009-09-26 07:07:19 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
|
|
|
|
#define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
|
|
|
|
#define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK)
|
2011-07-05 21:38:59 +08:00
|
|
|
#define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED)
|
2009-09-26 07:07:19 +08:00
|
|
|
|
2011-07-05 21:38:59 +08:00
|
|
|
#define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1)
|
2009-09-26 07:07:19 +08:00
|
|
|
|
|
|
|
#define RQ_NET_PENDING (1UL << __RQ_NET_PENDING)
|
|
|
|
#define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED)
|
|
|
|
#define RQ_NET_SENT (1UL << __RQ_NET_SENT)
|
|
|
|
#define RQ_NET_DONE (1UL << __RQ_NET_DONE)
|
|
|
|
#define RQ_NET_OK (1UL << __RQ_NET_OK)
|
|
|
|
#define RQ_NET_SIS (1UL << __RQ_NET_SIS)
|
|
|
|
|
|
|
|
#define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
|
|
|
|
|
2010-05-27 21:07:43 +08:00
|
|
|
#define RQ_WRITE (1UL << __RQ_WRITE)
|
2016-06-14 06:26:31 +08:00
|
|
|
#define RQ_WSAME (1UL << __RQ_WSAME)
|
|
|
|
#define RQ_UNMAP (1UL << __RQ_UNMAP)
|
drbd: introduce P_ZEROES (REQ_OP_WRITE_ZEROES on the "wire")
And also re-enable partial-zero-out + discard aligned.
With the introduction of REQ_OP_WRITE_ZEROES,
we started to use that for both WRITE_ZEROES and DISCARDS,
hoping that WRITE_ZEROES would "do what we want",
UNMAP if possible, zero-out the rest.
The example scenario is some LVM "thin" backend.
While an un-allocated block on dm-thin reads as zeroes, on a dm-thin
with "skip_block_zeroing=true", after a partial block write allocated
that block, that same block may well map "undefined old garbage" from
the backends on LBAs that have not yet been written to.
If we cannot distinguish between zero-out and discard on the receiving
side, to avoid "undefined old garbage" to pop up randomly at later times
on supposedly zero-initialized blocks, we'd need to map all discards to
zero-out on the receiving side. But that would potentially do a full
alloc on thinly provisioned backends, even when the expectation was to
unmap/trim/discard/de-allocate.
We need to distinguish on the protocol level, whether we need to guarantee
zeroes (and thus use zero-out, potentially doing the mentioned full-alloc),
or if we want to put the emphasis on discard, and only do a "best effort
zeroing" (by "discarding" blocks aligned to discard-granularity, and zeroing
only potential unaligned head and tail clippings to at least *try* to
avoid "false positives" in an online-verify later), hoping that someone
set skip_block_zeroing=false.
For some discussion regarding this on dm-devel, see also
https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
For backward compatibility, P_TRIM means zero-out, unless the
DRBD_FF_WZEROES feature flag is agreed upon during handshake.
To have upper layers even try to submit WRITE ZEROES requests,
we need to announce "efficient zeroout" independently.
We need to fixup max_write_zeroes_sectors after blk_queue_stack_limits():
if we can handle "zeroes" efficiently on the protocol,
we want to do that, even if our backend does not announce
max_write_zeroes_sectors itself.
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2018-12-21 00:23:42 +08:00
|
|
|
#define RQ_ZEROES (1UL << __RQ_ZEROES)
|
2010-08-31 18:00:50 +08:00
|
|
|
#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
|
drbd: introduce drbd_recv_header_maybe_unplug
Recently, drbd_recv_header() was changed to potentially
implicitly "unplug" the backend device(s), in case there
is currently nothing to receive.
Be more explicit about it: re-introduce the original drbd_recv_header(),
and introduce a new drbd_recv_header_maybe_unplug() for use by the
receiver "main loop".
Using explicit plugging via blk_start_plug(); blk_finish_plug();
really helps the io-scheduler of the backend with merging requests.
Wrap the receiver "main loop" with such a plug.
Also catch unplug events on the Primary,
and try to propagate.
This is performance relevant. Without this, if the receiving side does
not merge requests, number of IOPS on the peer can me significantly
higher than IOPS on the Primary, and can easily become the bottleneck.
Together, both changes should help to reduce the number of IOPS
as seen on the backend of the receiving side, by increasing
the chance of merging mergable requests, without trading latency
for more throughput.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2017-08-29 16:20:32 +08:00
|
|
|
#define RQ_UNPLUG (1UL << __RQ_UNPLUG)
|
2011-02-22 09:15:32 +08:00
|
|
|
#define RQ_POSTPONED (1UL << __RQ_POSTPONED)
|
2012-01-25 00:19:42 +08:00
|
|
|
#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
|
2011-04-14 07:24:47 +08:00
|
|
|
#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
|
|
|
|
#define RQ_EXP_WRITE_ACK (1UL << __RQ_EXP_WRITE_ACK)
|
2012-01-25 00:19:42 +08:00
|
|
|
#define RQ_EXP_BARR_ACK (1UL << __RQ_EXP_BARR_ACK)
|
2010-05-27 21:07:43 +08:00
|
|
|
|
2010-05-12 23:08:26 +08:00
|
|
|
/* For waking up the frozen transfer log mod_req() has to return if the request
|
|
|
|
should be counted in the epoch object*/
|
2011-07-18 05:06:12 +08:00
|
|
|
#define MR_WRITE 1
|
|
|
|
#define MR_READ 2
|
2010-05-12 23:08:26 +08:00
|
|
|
|
2009-09-26 07:07:19 +08:00
|
|
|
/* Short lived temporary struct on the stack.
|
|
|
|
* We could squirrel the error to be returned into
|
2013-10-12 06:44:27 +08:00
|
|
|
* bio->bi_iter.bi_size, or similar. But that would be too ugly. */
|
2009-09-26 07:07:19 +08:00
|
|
|
struct bio_and_error {
|
|
|
|
struct bio *bio;
|
|
|
|
int error;
|
|
|
|
};
|
|
|
|
|
2011-05-30 22:32:41 +08:00
|
|
|
extern void start_new_tl_epoch(struct drbd_connection *connection);
|
2012-07-24 16:12:36 +08:00
|
|
|
extern void drbd_req_destroy(struct kref *kref);
|
2009-09-26 07:07:19 +08:00
|
|
|
extern void _req_may_be_done(struct drbd_request *req,
|
|
|
|
struct bio_and_error *m);
|
2010-06-09 20:07:43 +08:00
|
|
|
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
2009-09-26 07:07:19 +08:00
|
|
|
struct bio_and_error *m);
|
2011-07-03 19:26:43 +08:00
|
|
|
extern void complete_master_bio(struct drbd_device *device,
|
2009-09-26 07:07:19 +08:00
|
|
|
struct bio_and_error *m);
|
2017-10-18 11:33:01 +08:00
|
|
|
extern void request_timer_fn(struct timer_list *t);
|
2011-05-30 22:32:41 +08:00
|
|
|
extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
|
|
|
|
extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
|
2014-02-27 16:46:18 +08:00
|
|
|
extern void tl_abort_disk_io(struct drbd_device *device);
|
2009-09-26 07:07:19 +08:00
|
|
|
|
drbd: fix potential deadlock during "restart" of conflicting writes
w_restart_write(), run from worker context, calls __drbd_make_request()
and further drbd_al_begin_io(, delegate=true), which then
potentially deadlocks. The previous patch moved a BUG_ON to expose
such call paths, which would now be triggered.
Also, if we call __drbd_make_request() from resource worker context,
like w_restart_write() did, and that should block for whatever reason
(!drbd_state_is_stable(), resource suspended, ...),
we potentially deadlock the whole resource, as the worker
is needed for state changes and other things.
Create a dedicated retry workqueue for this instead.
Also make sure that inc_ap_bio()/dec_ap_bio() are properly paired,
even if do_retry() needs to retry itself,
in case __drbd_make_request() returns != 0.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
2011-11-24 17:36:25 +08:00
|
|
|
/* this is in drbd_main.c */
|
2012-07-17 16:05:04 +08:00
|
|
|
extern void drbd_restart_request(struct drbd_request *req);
|
drbd: fix potential deadlock during "restart" of conflicting writes
w_restart_write(), run from worker context, calls __drbd_make_request()
and further drbd_al_begin_io(, delegate=true), which then
potentially deadlocks. The previous patch moved a BUG_ON to expose
such call paths, which would now be triggered.
Also, if we call __drbd_make_request() from resource worker context,
like w_restart_write() did, and that should block for whatever reason
(!drbd_state_is_stable(), resource suspended, ...),
we potentially deadlock the whole resource, as the worker
is needed for state changes and other things.
Create a dedicated retry workqueue for this instead.
Also make sure that inc_ap_bio()/dec_ap_bio() are properly paired,
even if do_retry() needs to retry itself,
in case __drbd_make_request() returns != 0.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
2011-11-24 17:36:25 +08:00
|
|
|
|
2009-09-26 07:07:19 +08:00
|
|
|
/* use this if you don't want to deal with calling complete_master_bio()
|
|
|
|
* outside the spinlock, e.g. when walking some list on cleanup. */
|
2010-06-09 20:07:43 +08:00
|
|
|
static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
|
2009-09-26 07:07:19 +08:00
|
|
|
{
|
2011-07-28 21:27:51 +08:00
|
|
|
struct drbd_device *device = req->device;
|
2009-09-26 07:07:19 +08:00
|
|
|
struct bio_and_error m;
|
2010-06-09 20:07:43 +08:00
|
|
|
int rv;
|
2009-09-26 07:07:19 +08:00
|
|
|
|
|
|
|
/* __req_mod possibly frees req, do not touch req after that! */
|
2010-06-09 20:07:43 +08:00
|
|
|
rv = __req_mod(req, what, &m);
|
2009-09-26 07:07:19 +08:00
|
|
|
if (m.bio)
|
2011-07-03 19:26:43 +08:00
|
|
|
complete_master_bio(device, &m);
|
2010-06-09 20:07:43 +08:00
|
|
|
|
|
|
|
return rv;
|
2009-09-26 07:07:19 +08:00
|
|
|
}
|
|
|
|
|
2010-10-26 22:02:27 +08:00
|
|
|
/* completion of master bio is outside of our spinlock.
|
|
|
|
* We still may or may not be inside some irqs disabled section
|
|
|
|
* of the lower level driver completion callback, so we need to
|
|
|
|
* spin_lock_irqsave here. */
|
2010-06-09 20:07:43 +08:00
|
|
|
static inline int req_mod(struct drbd_request *req,
|
2009-09-26 07:07:19 +08:00
|
|
|
enum drbd_req_event what)
|
|
|
|
{
|
2010-10-26 22:02:27 +08:00
|
|
|
unsigned long flags;
|
2011-07-28 21:27:51 +08:00
|
|
|
struct drbd_device *device = req->device;
|
2009-09-26 07:07:19 +08:00
|
|
|
struct bio_and_error m;
|
2010-06-09 20:07:43 +08:00
|
|
|
int rv;
|
|
|
|
|
2011-07-07 20:19:42 +08:00
|
|
|
spin_lock_irqsave(&device->resource->req_lock, flags);
|
2010-06-09 20:07:43 +08:00
|
|
|
rv = __req_mod(req, what, &m);
|
2011-07-07 20:19:42 +08:00
|
|
|
spin_unlock_irqrestore(&device->resource->req_lock, flags);
|
2009-09-26 07:07:19 +08:00
|
|
|
|
|
|
|
if (m.bio)
|
2011-07-03 19:26:43 +08:00
|
|
|
complete_master_bio(device, &m);
|
2010-06-09 20:07:43 +08:00
|
|
|
|
|
|
|
return rv;
|
2009-09-26 07:07:19 +08:00
|
|
|
}
|
2011-01-18 03:27:30 +08:00
|
|
|
|
2014-08-08 23:48:00 +08:00
|
|
|
extern bool drbd_should_do_remote(union drbd_dev_state);
|
2011-01-18 03:27:30 +08:00
|
|
|
|
2009-09-26 07:07:19 +08:00
|
|
|
#endif
|