diff options
Diffstat (limited to 'drivers/block/drbd/drbd_main.c')
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 316 |
1 files changed, 66 insertions, 250 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7e37149684e4..8c6c48e363cd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -188,147 +188,75 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) #endif /** - * DOC: The transfer log - * - * The transfer log is a single linked list of &struct drbd_tl_epoch objects. - * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail - * of the list. There is always at least one &struct drbd_tl_epoch object. - * - * Each &struct drbd_tl_epoch has a circular double linked list of requests - * attached. - */ -static int tl_init(struct drbd_tconn *tconn) -{ - struct drbd_tl_epoch *b; - - /* during device minor initialization, we may well use GFP_KERNEL */ - b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL); - if (!b) - return 0; - INIT_LIST_HEAD(&b->requests); - INIT_LIST_HEAD(&b->w.list); - b->next = NULL; - b->br_number = atomic_inc_return(&tconn->current_tle_nr); - b->n_writes = 0; - b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ - - tconn->oldest_tle = b; - tconn->newest_tle = b; - INIT_LIST_HEAD(&tconn->out_of_sequence_requests); - INIT_LIST_HEAD(&tconn->barrier_acked_requests); - - return 1; -} - -static void tl_cleanup(struct drbd_tconn *tconn) -{ - if (tconn->oldest_tle != tconn->newest_tle) - conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n"); - if (!list_empty(&tconn->out_of_sequence_requests)) - conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n"); - kfree(tconn->oldest_tle); - tconn->oldest_tle = NULL; - kfree(tconn->unused_spare_tle); - tconn->unused_spare_tle = NULL; -} - -/** - * _tl_add_barrier() - Adds a barrier to the transfer log - * @mdev: DRBD device. - * @new: Barrier to be added before the current head of the TL. - * - * The caller must hold the req_lock. - */ -void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new) -{ - INIT_LIST_HEAD(&new->requests); - INIT_LIST_HEAD(&new->w.list); - new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ - new->next = NULL; - new->n_writes = 0; - - new->br_number = atomic_inc_return(&tconn->current_tle_nr); - if (tconn->newest_tle != new) { - tconn->newest_tle->next = new; - tconn->newest_tle = new; - } -} - -/** - * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL - * @mdev: DRBD device. + * tl_release() - mark as BARRIER_ACKED all requests in the corresponding transfer log epoch + * @tconn: DRBD connection. * @barrier_nr: Expected identifier of the DRBD write barrier packet. * @set_size: Expected number of requests before that barrier. * * In case the passed barrier_nr or set_size does not match the oldest - * &struct drbd_tl_epoch objects this function will cause a termination - * of the connection. + * epoch of not yet barrier-acked requests, this function will cause a + * termination of the connection. */ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, unsigned int set_size) { - struct drbd_conf *mdev; - struct drbd_tl_epoch *b, *nob; /* next old barrier */ - struct list_head *le, *tle; struct drbd_request *r; + struct drbd_request *req = NULL; + int expect_epoch = 0; + int expect_size = 0; spin_lock_irq(&tconn->req_lock); - b = tconn->oldest_tle; + /* find latest not yet barrier-acked write request, + * count writes in its epoch. */ + list_for_each_entry(r, &tconn->transfer_log, tl_requests) { + const unsigned long s = r->rq_state; + if (!req) { + if (!(s & RQ_WRITE)) + continue; + if (!(s & RQ_NET_MASK)) + continue; + if (s & RQ_NET_DONE) + continue; + req = r; + expect_epoch = req->epoch; + expect_size ++; + } else { + if (r->epoch != expect_epoch) + break; + if (!(s & RQ_WRITE)) + continue; + /* if (s & RQ_DONE): not expected */ + /* if (!(s & RQ_NET_MASK)): not expected */ + expect_size++; + } + } /* first some paranoia code */ - if (b == NULL) { + if (req == NULL) { conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", barrier_nr); goto bail; } - if (b->br_number != barrier_nr) { + if (expect_epoch != barrier_nr) { conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n", - barrier_nr, b->br_number); + barrier_nr, expect_epoch); goto bail; } - if (b->n_writes != set_size) { + + if (expect_size != set_size) { conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", - barrier_nr, set_size, b->n_writes); + barrier_nr, set_size, expect_size); goto bail; } /* Clean up list of requests processed during current epoch */ - list_for_each_safe(le, tle, &b->requests) { - r = list_entry(le, struct drbd_request, tl_requests); - _req_mod(r, BARRIER_ACKED); - } - /* There could be requests on the list waiting for completion - of the write to the local disk. To avoid corruptions of - slab's data structures we have to remove the lists head. - - Also there could have been a barrier ack out of sequence, overtaking - the write acks - which would be a bug and violating write ordering. - To not deadlock in case we lose connection while such requests are - still pending, we need some way to find them for the - _req_mode(CONNECTION_LOST_WHILE_PENDING). - - These have been list_move'd to the out_of_sequence_requests list in - _req_mod(, BARRIER_ACKED) above. - */ - list_splice_init(&b->requests, &tconn->barrier_acked_requests); - mdev = b->w.mdev; - - nob = b->next; - if (test_and_clear_bit(CREATE_BARRIER, &tconn->flags)) { - _tl_add_barrier(tconn, b); - if (nob) - tconn->oldest_tle = nob; - /* if nob == NULL b was the only barrier, and becomes the new - barrier. Therefore tconn->oldest_tle points already to b */ - } else { - D_ASSERT(nob != NULL); - tconn->oldest_tle = nob; - kfree(b); + list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) { + if (req->epoch != expect_epoch) + break; + _req_mod(req, BARRIER_ACKED); } - spin_unlock_irq(&tconn->req_lock); - dec_ap_pending(mdev); return; @@ -346,91 +274,20 @@ bail: * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, * RESTART_FROZEN_DISK_IO. */ +/* must hold resource->req_lock */ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) { - struct drbd_tl_epoch *b, *tmp, **pn; - struct list_head *le, *tle, carry_reads; - struct drbd_request *req; - int rv, n_writes, n_reads; - - b = tconn->oldest_tle; - pn = &tconn->oldest_tle; - while (b) { - n_writes = 0; - n_reads = 0; - INIT_LIST_HEAD(&carry_reads); - list_for_each_safe(le, tle, &b->requests) { - req = list_entry(le, struct drbd_request, tl_requests); - rv = _req_mod(req, what); - - if (rv & MR_WRITE) - n_writes++; - if (rv & MR_READ) - n_reads++; - } - tmp = b->next; - - if (n_writes) { - if (what == RESEND) { - b->n_writes = n_writes; - if (b->w.cb == NULL) { - b->w.cb = w_send_barrier; - inc_ap_pending(b->w.mdev); - set_bit(CREATE_BARRIER, &tconn->flags); - } - - drbd_queue_work(&tconn->sender_work, &b->w); - } - pn = &b->next; - } else { - if (n_reads) - list_add(&carry_reads, &b->requests); - /* there could still be requests on that ring list, - * in case local io is still pending */ - list_del(&b->requests); - - /* dec_ap_pending corresponding to queue_barrier. - * the newest barrier may not have been queued yet, - * in which case w.cb is still NULL. */ - if (b->w.cb != NULL) - dec_ap_pending(b->w.mdev); - - if (b == tconn->newest_tle) { - /* recycle, but reinit! */ - if (tmp != NULL) - conn_err(tconn, "ASSERT FAILED tmp == NULL"); - INIT_LIST_HEAD(&b->requests); - list_splice(&carry_reads, &b->requests); - INIT_LIST_HEAD(&b->w.list); - b->w.cb = NULL; - b->br_number = atomic_inc_return(&tconn->current_tle_nr); - b->n_writes = 0; - - *pn = b; - break; - } - *pn = tmp; - kfree(b); - } - b = tmp; - list_splice(&carry_reads, &b->requests); - } - - /* Actions operating on the disk state, also want to work on - requests that got barrier acked. */ - switch (what) { - case FAIL_FROZEN_DISK_IO: - case RESTART_FROZEN_DISK_IO: - list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { - req = list_entry(le, struct drbd_request, tl_requests); - _req_mod(req, what); - } - case CONNECTION_LOST_WHILE_PENDING: - case RESEND: - break; - default: - conn_err(tconn, "what = %d in _tl_restart()\n", what); - } + struct drbd_request *req, *r; + + list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) + _req_mod(req, what); +} + +void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) +{ + spin_lock_irq(&tconn->req_lock); + _tl_restart(tconn, what); + spin_unlock_irq(&tconn->req_lock); } /** @@ -443,36 +300,7 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) */ void tl_clear(struct drbd_tconn *tconn) { - struct list_head *le, *tle; - struct drbd_request *r; - - spin_lock_irq(&tconn->req_lock); - - _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); - - /* we expect this list to be empty. */ - if (!list_empty(&tconn->out_of_sequence_requests)) - conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n"); - - /* but just in case, clean it up anyways! */ - list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) { - r = list_entry(le, struct drbd_request, tl_requests); - /* It would be nice to complete outside of spinlock. - * But this is easier for now. */ - _req_mod(r, CONNECTION_LOST_WHILE_PENDING); - } - - /* ensure bit indicating barrier is required is clear */ - clear_bit(CREATE_BARRIER, &tconn->flags); - - spin_unlock_irq(&tconn->req_lock); -} - -void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) -{ - spin_lock_irq(&tconn->req_lock); - _tl_restart(tconn, what); - spin_unlock_irq(&tconn->req_lock); + tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); } /** @@ -482,31 +310,16 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) void tl_abort_disk_io(struct drbd_conf *mdev) { struct drbd_tconn *tconn = mdev->tconn; - struct drbd_tl_epoch *b; - struct list_head *le, *tle; - struct drbd_request *req; + struct drbd_request *req, *r; spin_lock_irq(&tconn->req_lock); - b = tconn->oldest_tle; - while (b) { - list_for_each_safe(le, tle, &b->requests) { - req = list_entry(le, struct drbd_request, tl_requests); - if (!(req->rq_state & RQ_LOCAL_PENDING)) - continue; - if (req->w.mdev == mdev) - _req_mod(req, ABORT_DISK_IO); - } - b = b->next; - } - - list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { - req = list_entry(le, struct drbd_request, tl_requests); + list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) { if (!(req->rq_state & RQ_LOCAL_PENDING)) continue; - if (req->w.mdev == mdev) - _req_mod(req, ABORT_DISK_IO); + if (req->w.mdev != mdev) + continue; + _req_mod(req, ABORT_DISK_IO); } - spin_unlock_irq(&tconn->req_lock); } @@ -2680,17 +2493,21 @@ struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts) if (set_resource_options(tconn, res_opts)) goto fail; - if (!tl_init(tconn)) - goto fail; - tconn->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); if (!tconn->current_epoch) goto fail; + + INIT_LIST_HEAD(&tconn->transfer_log); + INIT_LIST_HEAD(&tconn->current_epoch->list); tconn->epochs = 1; spin_lock_init(&tconn->epoch_lock); tconn->write_ordering = WO_bdev_flush; + tconn->send.seen_any_write_yet = false; + tconn->send.current_epoch_nr = 0; + tconn->send.current_epoch_writes = 0; + tconn->cstate = C_STANDALONE; mutex_init(&tconn->cstate_mutex); spin_lock_init(&tconn->req_lock); @@ -2713,7 +2530,6 @@ struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts) fail: kfree(tconn->current_epoch); - tl_cleanup(tconn); free_cpumask_var(tconn->cpu_mask); drbd_free_socket(&tconn->meta); drbd_free_socket(&tconn->data); |