summaryrefslogtreecommitdiff
path: root/drivers/block/drbd/drbd_req.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-23 04:03:12 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-23 04:03:12 +0400
commit8abfc6e7a45eb74e51904bbae676fae008b11366 (patch)
tree57d0a24558c0693e3a52e8e756616f6c72def1e9 /drivers/block/drbd/drbd_req.c
parente9dd2b6837e26fe202708cce5ea4bb4ee3e3482e (diff)
parent6362beea8914cbd4630ccde3617d944aeca2d48f (diff)
downloadlinux-8abfc6e7a45eb74e51904bbae676fae008b11366.tar.xz
Merge branch 'for-2.6.37/drivers' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.37/drivers' of git://git.kernel.dk/linux-2.6-block: (95 commits) cciss: fix PCI IDs for new Smart Array controllers drbd: add race-breaker to drbd_go_diskless drbd: use dynamic_dev_dbg to optionally log uuid changes dynamic_debug.h: Fix dynamic_dev_dbg() macro if CONFIG_DYNAMIC_DEBUG not set drbd: cleanup: change "<= 0" to "== 0" drbd: relax the grace period of the md_sync timer again drbd: add some more explicit drbd_md_sync drbd: drop wrong debug asserts, fix recently introduced race drbd: cleanup useless leftover warn/error printk's drbd: add explicit drbd_md_sync to drbd_resync_finished drbd: Do not log an ASSERT for P_OV_REQUEST packets while C_CONNECTED drbd: fix for possible deadlock on IO error during resync drbd: fix unlikely access after free and list corruption drbd: fix for spurious fullsync (uuids rotated too fast) drbd: allow for explicit resync-finished notifications drbd: preparation commit, using full state in receive_state() drbd: drbd_send_ack_dp must not rely on header information drbd: Fix regression in recv_bm_rle_bits (compressed bitmap) drbd: Fixed a stupid copy and paste error drbd: Allow larger values for c-fill-target. ... Fix up trivial conflict in drivers/block/ataflop.c due to BKL removal
Diffstat (limited to 'drivers/block/drbd/drbd_req.c')
-rw-r--r--drivers/block/drbd/drbd_req.c165
1 files changed, 99 insertions, 66 deletions
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index f761d98a4e90..9e91a2545fc8 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -59,17 +59,19 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
{
const unsigned long s = req->rq_state;
+
+ /* remove it from the transfer log.
+ * well, only if it had been there in the first
+ * place... if it had not (local only or conflicting
+ * and never sent), it should still be "empty" as
+ * initialized in drbd_req_new(), so we can list_del() it
+ * here unconditionally */
+ list_del(&req->tl_requests);
+
/* if it was a write, we may have to set the corresponding
* bit(s) out-of-sync first. If it had a local part, we need to
* release the reference to the activity log. */
if (rw == WRITE) {
- /* remove it from the transfer log.
- * well, only if it had been there in the first
- * place... if it had not (local only or conflicting
- * and never sent), it should still be "empty" as
- * initialized in drbd_req_new(), so we can list_del() it
- * here unconditionally */
- list_del(&req->tl_requests);
/* Set out-of-sync unless both OK flags are set
* (local only or remote failed).
* Other places where we set out-of-sync:
@@ -92,7 +94,8 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
*/
if (s & RQ_LOCAL_MASK) {
if (get_ldev_if_state(mdev, D_FAILED)) {
- drbd_al_complete_io(mdev, req->sector);
+ if (s & RQ_IN_ACT_LOG)
+ drbd_al_complete_io(mdev, req->sector);
put_ldev(mdev);
} else if (__ratelimit(&drbd_ratelimit_state)) {
dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
@@ -280,6 +283,14 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
* protocol A or B, barrier ack still pending... */
}
+static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m)
+{
+ struct drbd_conf *mdev = req->mdev;
+
+ if (!is_susp(mdev->state))
+ _req_may_be_done(req, m);
+}
+
/*
* checks whether there was an overlapping request
* or ee already registered.
@@ -380,10 +391,11 @@ out_conflict:
* and it enforces that we have to think in a very structured manner
* about the "events" that may happen to a request during its life time ...
*/
-void __req_mod(struct drbd_request *req, enum drbd_req_event what,
+int __req_mod(struct drbd_request *req, enum drbd_req_event what,
struct bio_and_error *m)
{
struct drbd_conf *mdev = req->mdev;
+ int rv = 0;
m->bio = NULL;
switch (what) {
@@ -420,7 +432,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
req->rq_state &= ~RQ_LOCAL_PENDING;
- _req_may_be_done(req, m);
+ _req_may_be_done_not_susp(req, m);
put_ldev(mdev);
break;
@@ -429,7 +441,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state &= ~RQ_LOCAL_PENDING;
__drbd_chk_io_error(mdev, FALSE);
- _req_may_be_done(req, m);
+ _req_may_be_done_not_susp(req, m);
put_ldev(mdev);
break;
@@ -437,7 +449,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
/* it is legal to fail READA */
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
- _req_may_be_done(req, m);
+ _req_may_be_done_not_susp(req, m);
put_ldev(mdev);
break;
@@ -455,7 +467,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
/* no point in retrying if there is no good remote data,
* or we have no connection. */
if (mdev->state.pdsk != D_UP_TO_DATE) {
- _req_may_be_done(req, m);
+ _req_may_be_done_not_susp(req, m);
break;
}
@@ -517,11 +529,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
req->epoch = mdev->newest_tle->br_number;
- list_add_tail(&req->tl_requests,
- &mdev->newest_tle->requests);
/* increment size of current epoch */
- mdev->newest_tle->n_req++;
+ mdev->newest_tle->n_writes++;
/* queue work item to send data */
D_ASSERT(req->rq_state & RQ_NET_PENDING);
@@ -530,7 +540,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
drbd_queue_work(&mdev->data.work, &req->w);
/* close the epoch, in case it outgrew the limit */
- if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size)
+ if (mdev->newest_tle->n_writes >= mdev->net_conf->max_epoch_size)
queue_barrier(mdev);
break;
@@ -543,7 +553,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state &= ~RQ_NET_QUEUED;
/* if we did it right, tl_clear should be scheduled only after
* this, so this should not be necessary! */
- _req_may_be_done(req, m);
+ _req_may_be_done_not_susp(req, m);
break;
case handed_over_to_network:
@@ -568,7 +578,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
* "completed_ok" events came in, once we return from
* _drbd_send_zc_bio (drbd_send_dblock), we have to check
* whether it is done already, and end it. */
- _req_may_be_done(req, m);
+ _req_may_be_done_not_susp(req, m);
break;
case read_retry_remote_canceled:
@@ -584,7 +594,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
/* if it is still queued, we may not complete it here.
* it will be canceled soon. */
if (!(req->rq_state & RQ_NET_QUEUED))
- _req_may_be_done(req, m);
+ _req_may_be_done(req, m); /* Allowed while state.susp */
break;
case write_acked_by_peer_and_sis:
@@ -619,7 +629,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
D_ASSERT(req->rq_state & RQ_NET_PENDING);
dec_ap_pending(mdev);
req->rq_state &= ~RQ_NET_PENDING;
- _req_may_be_done(req, m);
+ _req_may_be_done_not_susp(req, m);
break;
case neg_acked:
@@ -629,11 +639,50 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
req->rq_state |= RQ_NET_DONE;
- _req_may_be_done(req, m);
+ _req_may_be_done_not_susp(req, m);
/* else: done by handed_over_to_network */
break;
+ case fail_frozen_disk_io:
+ if (!(req->rq_state & RQ_LOCAL_COMPLETED))
+ break;
+
+ _req_may_be_done(req, m); /* Allowed while state.susp */
+ break;
+
+ case restart_frozen_disk_io:
+ if (!(req->rq_state & RQ_LOCAL_COMPLETED))
+ break;
+
+ req->rq_state &= ~RQ_LOCAL_COMPLETED;
+
+ rv = MR_READ;
+ if (bio_data_dir(req->master_bio) == WRITE)
+ rv = MR_WRITE;
+
+ get_ldev(mdev);
+ req->w.cb = w_restart_disk_io;
+ drbd_queue_work(&mdev->data.work, &req->w);
+ break;
+
+ case resend:
+ /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
+ before the connection loss (B&C only); only P_BARRIER_ACK was missing.
+ Trowing them out of the TL here by pretending we got a BARRIER_ACK
+ We ensure that the peer was not rebooted */
+ if (!(req->rq_state & RQ_NET_OK)) {
+ if (req->w.cb) {
+ drbd_queue_work(&mdev->data.work, &req->w);
+ rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
+ }
+ break;
+ }
+ /* else, fall through to barrier_acked */
+
case barrier_acked:
+ if (!(req->rq_state & RQ_WRITE))
+ break;
+
if (req->rq_state & RQ_NET_PENDING) {
/* barrier came in before all requests have been acked.
* this is bad, because if the connection is lost now,
@@ -643,7 +692,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
}
D_ASSERT(req->rq_state & RQ_NET_SENT);
req->rq_state |= RQ_NET_DONE;
- _req_may_be_done(req, m);
+ _req_may_be_done(req, m); /* Allowed while state.susp */
break;
case data_received:
@@ -651,9 +700,11 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
dec_ap_pending(mdev);
req->rq_state &= ~RQ_NET_PENDING;
req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
- _req_may_be_done(req, m);
+ _req_may_be_done_not_susp(req, m);
break;
};
+
+ return rv;
}
/* we may do a local read if:
@@ -752,14 +803,16 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
* resync extent to finish, and, if necessary, pulls in the target
* extent into the activity log, which involves further disk io because
* of transactional on-disk meta data updates. */
- if (rw == WRITE && local)
+ if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
+ req->rq_state |= RQ_IN_ACT_LOG;
drbd_al_begin_io(mdev, sector);
+ }
remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
(mdev->state.pdsk == D_INCONSISTENT &&
mdev->state.conn >= C_CONNECTED));
- if (!(local || remote) && !mdev->state.susp) {
+ if (!(local || remote) && !is_susp(mdev->state)) {
dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
goto fail_free_complete;
}
@@ -785,7 +838,7 @@ allocate_barrier:
/* GOOD, everything prepared, grab the spin_lock */
spin_lock_irq(&mdev->req_lock);
- if (mdev->state.susp) {
+ if (is_susp(mdev->state)) {
/* If we got suspended, use the retry mechanism of
generic_make_request() to restart processing of this
bio. In the next call to drbd_make_request_26
@@ -867,30 +920,10 @@ allocate_barrier:
/* check this request on the collision detection hash tables.
* if we have a conflict, just complete it here.
* THINK do we want to check reads, too? (I don't think so...) */
- if (rw == WRITE && _req_conflicts(req)) {
- /* this is a conflicting request.
- * even though it may have been only _partially_
- * overlapping with one of the currently pending requests,
- * without even submitting or sending it, we will
- * pretend that it was successfully served right now.
- */
- if (local) {
- bio_put(req->private_bio);
- req->private_bio = NULL;
- drbd_al_complete_io(mdev, req->sector);
- put_ldev(mdev);
- local = 0;
- }
- if (remote)
- dec_ap_pending(mdev);
- _drbd_end_io_acct(mdev, req);
- /* THINK: do we want to fail it (-EIO), or pretend success? */
- bio_endio(req->master_bio, 0);
- req->master_bio = NULL;
- dec_ap_bio(mdev);
- drbd_req_free(req);
- remote = 0;
- }
+ if (rw == WRITE && _req_conflicts(req))
+ goto fail_conflicting;
+
+ list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
/* NOTE remote first: to get the concurrent write detection right,
* we must register the request before start of local IO. */
@@ -923,6 +956,21 @@ allocate_barrier:
return 0;
+fail_conflicting:
+ /* this is a conflicting request.
+ * even though it may have been only _partially_
+ * overlapping with one of the currently pending requests,
+ * without even submitting or sending it, we will
+ * pretend that it was successfully served right now.
+ */
+ _drbd_end_io_acct(mdev, req);
+ spin_unlock_irq(&mdev->req_lock);
+ if (remote)
+ dec_ap_pending(mdev);
+ /* THINK: do we want to fail it (-EIO), or pretend success?
+ * this pretends success. */
+ err = 0;
+
fail_free_complete:
if (rw == WRITE && local)
drbd_al_complete_io(mdev, sector);
@@ -961,21 +1009,6 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
return 1;
}
- /*
- * Paranoia: we might have been primary, but sync target, or
- * even diskless, then lost the connection.
- * This should have been handled (panic? suspend?) somewhere
- * else. But maybe it was not, so check again here.
- * Caution: as long as we do not have a read/write lock on mdev,
- * to serialize state changes, this is racy, since we may lose
- * the connection *after* we test for the cstate.
- */
- if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) {
- if (__ratelimit(&drbd_ratelimit_state))
- dev_err(DEV, "Sorry, I have no access to good data anymore.\n");
- return 1;
- }
-
return 0;
}