diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2014-06-08 10:24:07 +0400 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2014-06-08 10:24:07 +0400 |
commit | a292241cccb7e20e8b997a9a44177e7c98141859 (patch) | |
tree | a0b0bb95e7dce3233a2d8b203f9e326cdec7a00e /drivers/block | |
parent | d49cb7aeebb974713f9f7ab2991352d3050b095b (diff) | |
parent | 68807a0c2015cb40df4869e16651f0ce5cc14d52 (diff) | |
download | linux-a292241cccb7e20e8b997a9a44177e7c98141859.tar.xz |
Merge branch 'next' into for-linus
Prepare input updates for 3.16.
Diffstat (limited to 'drivers/block')
41 files changed, 7328 insertions, 5997 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index eb3950113e42..125d84505738 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -6411,12 +6411,12 @@ static bool DAC960_V2_ExecuteUserCommand(DAC960_Controller_T *Controller, .ScatterGatherSegments[0] .SegmentByteCount = CommandMailbox->ControllerInfo.DataTransferSize; - DAC960_ExecuteCommand(Command); - while (Controller->V2.NewControllerInformation->PhysicalScanActive) - { - DAC960_ExecuteCommand(Command); - sleep_on_timeout(&Controller->CommandWaitQueue, HZ); - } + while (1) { + DAC960_ExecuteCommand(Command); + if (!Controller->V2.NewControllerInformation->PhysicalScanActive) + break; + msleep(1000); + } DAC960_UserCritical("Discovery Completed\n", Controller); } } @@ -7035,18 +7035,16 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, ErrorCode = -EFAULT; break; } - while (Controller->V2.HealthStatusBuffer->StatusChangeCounter - == HealthStatusBuffer.StatusChangeCounter && - Controller->V2.HealthStatusBuffer->NextEventSequenceNumber - == HealthStatusBuffer.NextEventSequenceNumber) - { - interruptible_sleep_on_timeout(&Controller->HealthStatusWaitQueue, - DAC960_MonitoringTimerInterval); - if (signal_pending(current)) { - ErrorCode = -EINTR; - break; - } - } + ErrorCode = wait_event_interruptible_timeout(Controller->HealthStatusWaitQueue, + !(Controller->V2.HealthStatusBuffer->StatusChangeCounter + == HealthStatusBuffer.StatusChangeCounter && + Controller->V2.HealthStatusBuffer->NextEventSequenceNumber + == HealthStatusBuffer.NextEventSequenceNumber), + DAC960_MonitoringTimerInterval); + if (ErrorCode == -ERESTARTSYS) { + ErrorCode = -EINTR; + break; + } if (copy_to_user(GetHealthStatus.HealthStatusBuffer, Controller->V2.HealthStatusBuffer, sizeof(DAC960_V2_HealthStatusBuffer_T))) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 8184451b57c0..422b7d84f686 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -874,7 +874,7 @@ bio_pageinc(struct bio *bio) /* Non-zero page count for non-head members of * compound pages is no longer allowed by the kernel. */ - page = compound_trans_head(bv.bv_page); + page = compound_head(bv.bv_page); atomic_inc(&page->_count); } } @@ -887,7 +887,7 @@ bio_pagedec(struct bio *bio) struct bvec_iter iter; bio_for_each_segment(bv, bio, iter) { - page = compound_trans_head(bv.bv_page); + page = compound_head(bv.bv_page); atomic_dec(&page->_count); } } diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 0e30c6e5492a..96b629e1f0c9 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -68,6 +68,8 @@ #include <linux/init.h> #include <linux/blkdev.h> #include <linux/mutex.h> +#include <linux/completion.h> +#include <linux/wait.h> #include <asm/atafd.h> #include <asm/atafdreg.h> @@ -301,7 +303,7 @@ module_param_array(UserSteprate, int, NULL, 0); /* Synchronization of FDC access. */ static volatile int fdc_busy = 0; static DECLARE_WAIT_QUEUE_HEAD(fdc_wait); -static DECLARE_WAIT_QUEUE_HEAD(format_wait); +static DECLARE_COMPLETION(format_wait); static unsigned long changed_floppies = 0xff, fake_change = 0; #define CHECK_CHANGE_DELAY HZ/2 @@ -608,7 +610,7 @@ static void fd_error( void ) if (IsFormatting) { IsFormatting = 0; FormatError = 1; - wake_up( &format_wait ); + complete(&format_wait); return; } @@ -650,9 +652,8 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) DPRINT(("do_format( dr=%d tr=%d he=%d offs=%d )\n", drive, desc->track, desc->head, desc->sect_offset )); + wait_event(fdc_wait, cmpxchg(&fdc_busy, 0, 1) == 0); local_irq_save(flags); - while( fdc_busy ) sleep_on( &fdc_wait ); - fdc_busy = 1; stdma_lock(floppy_irq, NULL); atari_turnon_irq( IRQ_MFP_FDC ); /* should be already, just to be sure */ local_irq_restore(flags); @@ -706,7 +707,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) ReqSide = desc->head; do_fd_action( drive ); - sleep_on( &format_wait ); + wait_for_completion(&format_wait); redo_fd_request(); return( FormatError ? -EIO : 0 ); @@ -1229,7 +1230,7 @@ static void fd_writetrack_done( int status ) goto err_end; } - wake_up( &format_wait ); + complete(&format_wait); return; err_end: @@ -1497,8 +1498,7 @@ repeat: void do_fd_request(struct request_queue * q) { DPRINT(("do_fd_request for pid %d\n",current->pid)); - while( fdc_busy ) sleep_on( &fdc_wait ); - fdc_busy = 1; + wait_event(fdc_wait, cmpxchg(&fdc_busy, 0, 1) == 0); stdma_lock(floppy_irq, NULL); atari_disable_irq( IRQ_MFP_FDC ); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 036e8ab86c71..73894ca33956 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4092,11 +4092,9 @@ static void cciss_interrupt_mode(ctlr_info_t *h) if (err > 0) { dev_warn(&h->pdev->dev, "only %d MSI-X vectors available\n", err); - goto default_int_mode; } else { dev_warn(&h->pdev->dev, "MSI-X init failed %d\n", err); - goto default_int_mode; } } if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) { diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index a9b13f2cc420..90ae4ba8f9ee 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -95,34 +95,36 @@ struct __packed al_transaction_on_disk { struct update_odbm_work { struct drbd_work w; + struct drbd_device *device; unsigned int enr; }; struct update_al_work { struct drbd_work w; + struct drbd_device *device; struct completion event; int err; }; -void *drbd_md_get_buffer(struct drbd_conf *mdev) +void *drbd_md_get_buffer(struct drbd_device *device) { int r; - wait_event(mdev->misc_wait, - (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 || - mdev->state.disk <= D_FAILED); + wait_event(device->misc_wait, + (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 || + device->state.disk <= D_FAILED); - return r ? NULL : page_address(mdev->md_io_page); + return r ? NULL : page_address(device->md_io_page); } -void drbd_md_put_buffer(struct drbd_conf *mdev) +void drbd_md_put_buffer(struct drbd_device *device) { - if (atomic_dec_and_test(&mdev->md_io_in_use)) - wake_up(&mdev->misc_wait); + if (atomic_dec_and_test(&device->md_io_in_use)) + wake_up(&device->misc_wait); } -void wait_until_done_or_force_detached(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, +void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev, unsigned int *done) { long dt; @@ -134,15 +136,15 @@ void wait_until_done_or_force_detached(struct drbd_conf *mdev, struct drbd_backi if (dt == 0) dt = MAX_SCHEDULE_TIMEOUT; - dt = wait_event_timeout(mdev->misc_wait, - *done || test_bit(FORCE_DETACH, &mdev->flags), dt); + dt = wait_event_timeout(device->misc_wait, + *done || test_bit(FORCE_DETACH, &device->flags), dt); if (dt == 0) { - dev_err(DEV, "meta-data IO operation timed out\n"); - drbd_chk_io_error(mdev, 1, DRBD_FORCE_DETACH); + drbd_err(device, "meta-data IO operation timed out\n"); + drbd_chk_io_error(device, 1, DRBD_FORCE_DETACH); } } -static int _drbd_md_sync_page_io(struct drbd_conf *mdev, +static int _drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, struct page *page, sector_t sector, int rw, int size) @@ -150,10 +152,10 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct bio *bio; int err; - mdev->md_io.done = 0; - mdev->md_io.error = -ENODEV; + device->md_io.done = 0; + device->md_io.error = -ENODEV; - if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) + if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags)) rw |= REQ_FUA | REQ_FLUSH; rw |= REQ_SYNC; @@ -163,69 +165,69 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, err = -EIO; if (bio_add_page(bio, page, size, 0) != size) goto out; - bio->bi_private = &mdev->md_io; + bio->bi_private = &device->md_io; bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; - if (!(rw & WRITE) && mdev->state.disk == D_DISKLESS && mdev->ldev == NULL) + if (!(rw & WRITE) && device->state.disk == D_DISKLESS && device->ldev == NULL) /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */ ; - else if (!get_ldev_if_state(mdev, D_ATTACHING)) { + else if (!get_ldev_if_state(device, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ - dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); + drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); err = -ENODEV; goto out; } bio_get(bio); /* one bio_put() is in the completion handler */ - atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ - if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) + atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ + if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_until_done_or_force_detached(mdev, bdev, &mdev->md_io.done); + wait_until_done_or_force_detached(device, bdev, &device->md_io.done); if (bio_flagged(bio, BIO_UPTODATE)) - err = mdev->md_io.error; + err = device->md_io.error; out: bio_put(bio); return err; } -int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, +int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, sector_t sector, int rw) { int err; - struct page *iop = mdev->md_io_page; + struct page *iop = device->md_io_page; - D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1); + D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1); BUG_ON(!bdev->md_bdev); - dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", + drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", (void*)_RET_IP_ ); if (sector < drbd_md_first_sector(bdev) || sector + 7 > drbd_md_last_sector(bdev)) - dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n", + drbd_alert(device, "%s [%d]:%s(,%llus,%s) out of range md access!\n", current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); /* we do all our meta data IO in aligned 4k blocks. */ - err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, 4096); + err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096); if (err) { - dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", + drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); } return err; } -static struct bm_extent *find_active_resync_extent(struct drbd_conf *mdev, unsigned int enr) +static struct bm_extent *find_active_resync_extent(struct drbd_device *device, unsigned int enr) { struct lc_element *tmp; - tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); + tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); if (unlikely(tmp != NULL)) { struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); if (test_bit(BME_NO_WRITES, &bm_ext->flags)) @@ -234,47 +236,48 @@ static struct bm_extent *find_active_resync_extent(struct drbd_conf *mdev, unsig return NULL; } -static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr, bool nonblock) +static struct lc_element *_al_get(struct drbd_device *device, unsigned int enr, bool nonblock) { struct lc_element *al_ext; struct bm_extent *bm_ext; int wake; - spin_lock_irq(&mdev->al_lock); - bm_ext = find_active_resync_extent(mdev, enr); + spin_lock_irq(&device->al_lock); + bm_ext = find_active_resync_extent(device, enr); if (bm_ext) { wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); - spin_unlock_irq(&mdev->al_lock); + spin_unlock_irq(&device->al_lock); if (wake) - wake_up(&mdev->al_wait); + wake_up(&device->al_wait); return NULL; } if (nonblock) - al_ext = lc_try_get(mdev->act_log, enr); + al_ext = lc_try_get(device->act_log, enr); else - al_ext = lc_get(mdev->act_log, enr); - spin_unlock_irq(&mdev->al_lock); + al_ext = lc_get(device->act_log, enr); + spin_unlock_irq(&device->al_lock); return al_ext; } -bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, struct drbd_interval *i) +bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i) { /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); - D_ASSERT((unsigned)(last - first) <= 1); - D_ASSERT(atomic_read(&mdev->local_cnt) > 0); + D_ASSERT(device, (unsigned)(last - first) <= 1); + D_ASSERT(device, atomic_read(&device->local_cnt) > 0); /* FIXME figure out a fast path for bios crossing AL extent boundaries */ if (first != last) return false; - return _al_get(mdev, first, true); + return _al_get(device, first, true); } -bool drbd_al_begin_io_prepare(struct drbd_conf *mdev, struct drbd_interval *i) +static +bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i) { /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ @@ -283,20 +286,20 @@ bool drbd_al_begin_io_prepare(struct drbd_conf *mdev, struct drbd_interval *i) unsigned enr; bool need_transaction = false; - D_ASSERT(first <= last); - D_ASSERT(atomic_read(&mdev->local_cnt) > 0); + D_ASSERT(device, first <= last); + D_ASSERT(device, atomic_read(&device->local_cnt) > 0); for (enr = first; enr <= last; enr++) { struct lc_element *al_ext; - wait_event(mdev->al_wait, - (al_ext = _al_get(mdev, enr, false)) != NULL); + wait_event(device->al_wait, + (al_ext = _al_get(device, enr, false)) != NULL); if (al_ext->lc_number != enr) need_transaction = true; } return need_transaction; } -static int al_write_transaction(struct drbd_conf *mdev, bool delegate); +static int al_write_transaction(struct drbd_device *device, bool delegate); /* When called through generic_make_request(), we must delegate * activity log I/O to the worker thread: a further request @@ -310,58 +313,58 @@ static int al_write_transaction(struct drbd_conf *mdev, bool delegate); /* * @delegate: delegate activity log I/O to the worker thread */ -void drbd_al_begin_io_commit(struct drbd_conf *mdev, bool delegate) +void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate) { bool locked = false; - BUG_ON(delegate && current == mdev->tconn->worker.task); + BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); /* Serialize multiple transactions. * This uses test_and_set_bit, memory barrier is implicit. */ - wait_event(mdev->al_wait, - mdev->act_log->pending_changes == 0 || - (locked = lc_try_lock_for_transaction(mdev->act_log))); + wait_event(device->al_wait, + device->act_log->pending_changes == 0 || + (locked = lc_try_lock_for_transaction(device->act_log))); if (locked) { /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ - if (mdev->act_log->pending_changes) { + if (device->act_log->pending_changes) { bool write_al_updates; rcu_read_lock(); - write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates; + write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; rcu_read_unlock(); if (write_al_updates) - al_write_transaction(mdev, delegate); - spin_lock_irq(&mdev->al_lock); + al_write_transaction(device, delegate); + spin_lock_irq(&device->al_lock); /* FIXME if (err) we need an "lc_cancel" here; */ - lc_committed(mdev->act_log); - spin_unlock_irq(&mdev->al_lock); + lc_committed(device->act_log); + spin_unlock_irq(&device->al_lock); } - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); + lc_unlock(device->act_log); + wake_up(&device->al_wait); } } /* * @delegate: delegate activity log I/O to the worker thread */ -void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool delegate) +void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate) { - BUG_ON(delegate && current == mdev->tconn->worker.task); + BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); - if (drbd_al_begin_io_prepare(mdev, i)) - drbd_al_begin_io_commit(mdev, delegate); + if (drbd_al_begin_io_prepare(device, i)) + drbd_al_begin_io_commit(device, delegate); } -int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, struct drbd_interval *i) +int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) { - struct lru_cache *al = mdev->act_log; + struct lru_cache *al = device->act_log; /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); @@ -370,7 +373,7 @@ int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, struct drbd_interval *i) unsigned available_update_slots; unsigned enr; - D_ASSERT(first <= last); + D_ASSERT(device, first <= last); nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */ available_update_slots = min(al->nr_elements - al->used, @@ -385,7 +388,7 @@ int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, struct drbd_interval *i) /* Is resync active in this area? */ for (enr = first; enr <= last; enr++) { struct lc_element *tmp; - tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); + tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); if (unlikely(tmp != NULL)) { struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { @@ -401,14 +404,14 @@ int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, struct drbd_interval *i) * this has to be successful. */ for (enr = first; enr <= last; enr++) { struct lc_element *al_ext; - al_ext = lc_get_cumulative(mdev->act_log, enr); + al_ext = lc_get_cumulative(device->act_log, enr); if (!al_ext) - dev_info(DEV, "LOGIC BUG for enr=%u\n", enr); + drbd_info(device, "LOGIC BUG for enr=%u\n", enr); } return 0; } -void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i) +void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i) { /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ @@ -418,19 +421,19 @@ void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i) struct lc_element *extent; unsigned long flags; - D_ASSERT(first <= last); - spin_lock_irqsave(&mdev->al_lock, flags); + D_ASSERT(device, first <= last); + spin_lock_irqsave(&device->al_lock, flags); for (enr = first; enr <= last; enr++) { - extent = lc_find(mdev->act_log, enr); + extent = lc_find(device->act_log, enr); if (!extent) { - dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr); + drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr); continue; } - lc_put(mdev->act_log, extent); + lc_put(device->act_log, extent); } - spin_unlock_irqrestore(&mdev->al_lock, flags); - wake_up(&mdev->al_wait); + spin_unlock_irqrestore(&device->al_lock, flags); + wake_up(&device->al_wait); } #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) @@ -460,13 +463,13 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); } -static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev) +static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) { - const unsigned int stripes = mdev->ldev->md.al_stripes; - const unsigned int stripe_size_4kB = mdev->ldev->md.al_stripe_size_4k; + const unsigned int stripes = device->ldev->md.al_stripes; + const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k; /* transaction number, modulo on-disk ring buffer wrap around */ - unsigned int t = mdev->al_tr_number % (mdev->ldev->md.al_size_4k); + unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k); /* ... to aligned 4k on disk block */ t = ((t % stripes) * stripe_size_4kB) + t/stripes; @@ -475,11 +478,11 @@ static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev) t *= 8; /* ... plus offset to the on disk position */ - return mdev->ldev->md.md_offset + mdev->ldev->md.al_offset + t; + return device->ldev->md.md_offset + device->ldev->md.al_offset + t; } static int -_al_write_transaction(struct drbd_conf *mdev) +_al_write_transaction(struct drbd_device *device) { struct al_transaction_on_disk *buffer; struct lc_element *e; @@ -489,31 +492,31 @@ _al_write_transaction(struct drbd_conf *mdev) unsigned crc = 0; int err = 0; - if (!get_ldev(mdev)) { - dev_err(DEV, "disk is %s, cannot start al transaction\n", - drbd_disk_str(mdev->state.disk)); + if (!get_ldev(device)) { + drbd_err(device, "disk is %s, cannot start al transaction\n", + drbd_disk_str(device->state.disk)); return -EIO; } /* The bitmap write may have failed, causing a state change. */ - if (mdev->state.disk < D_INCONSISTENT) { - dev_err(DEV, + if (device->state.disk < D_INCONSISTENT) { + drbd_err(device, "disk is %s, cannot write al transaction\n", - drbd_disk_str(mdev->state.disk)); - put_ldev(mdev); + drbd_disk_str(device->state.disk)); + put_ldev(device); return -EIO; } - buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ + buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */ if (!buffer) { - dev_err(DEV, "disk failed while waiting for md_io buffer\n"); - put_ldev(mdev); + drbd_err(device, "disk failed while waiting for md_io buffer\n"); + put_ldev(device); return -ENODEV; } memset(buffer, 0, sizeof(*buffer)); buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); - buffer->tr_number = cpu_to_be32(mdev->al_tr_number); + buffer->tr_number = cpu_to_be32(device->al_tr_number); i = 0; @@ -521,8 +524,8 @@ _al_write_transaction(struct drbd_conf *mdev) * once we set the LC_LOCKED -- from drbd_al_begin_io(), * lc_try_lock_for_transaction() --, someone may still * be in the process of changing it. */ - spin_lock_irq(&mdev->al_lock); - list_for_each_entry(e, &mdev->act_log->to_be_changed, list) { + spin_lock_irq(&device->al_lock); + list_for_each_entry(e, &device->act_log->to_be_changed, list) { if (i == AL_UPDATES_PER_TRANSACTION) { i++; break; @@ -530,11 +533,11 @@ _al_write_transaction(struct drbd_conf *mdev) buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); if (e->lc_number != LC_FREE) - drbd_bm_mark_for_writeout(mdev, + drbd_bm_mark_for_writeout(device, al_extent_to_bm_page(e->lc_number)); i++; } - spin_unlock_irq(&mdev->al_lock); + spin_unlock_irq(&device->al_lock); BUG_ON(i > AL_UPDATES_PER_TRANSACTION); buffer->n_updates = cpu_to_be16(i); @@ -543,48 +546,48 @@ _al_write_transaction(struct drbd_conf *mdev) buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); } - buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements); - buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle); + buffer->context_size = cpu_to_be16(device->act_log->nr_elements); + buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle); mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, - mdev->act_log->nr_elements - mdev->al_tr_cycle); + device->act_log->nr_elements - device->al_tr_cycle); for (i = 0; i < mx; i++) { - unsigned idx = mdev->al_tr_cycle + i; - extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; + unsigned idx = device->al_tr_cycle + i; + extent_nr = lc_element_by_index(device->act_log, idx)->lc_number; buffer->context[i] = cpu_to_be32(extent_nr); } for (; i < AL_CONTEXT_PER_TRANSACTION; i++) buffer->context[i] = cpu_to_be32(LC_FREE); - mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; - if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) - mdev->al_tr_cycle = 0; + device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; + if (device->al_tr_cycle >= device->act_log->nr_elements) + device->al_tr_cycle = 0; - sector = al_tr_number_to_on_disk_sector(mdev); + sector = al_tr_number_to_on_disk_sector(device); crc = crc32c(0, buffer, 4096); buffer->crc32c = cpu_to_be32(crc); - if (drbd_bm_write_hinted(mdev)) + if (drbd_bm_write_hinted(device)) err = -EIO; else { bool write_al_updates; rcu_read_lock(); - write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates; + write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; rcu_read_unlock(); if (write_al_updates) { - if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) { err = -EIO; - drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); + drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); } else { - mdev->al_tr_number++; - mdev->al_writ_cnt++; + device->al_tr_number++; + device->al_writ_cnt++; } } } - drbd_md_put_buffer(mdev); - put_ldev(mdev); + drbd_md_put_buffer(device); + put_ldev(device); return err; } @@ -593,10 +596,10 @@ _al_write_transaction(struct drbd_conf *mdev) static int w_al_write_transaction(struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_device *device = aw->device; int err; - err = _al_write_transaction(mdev); + err = _al_write_transaction(device); aw->err = err; complete(&aw->event); @@ -606,63 +609,64 @@ static int w_al_write_transaction(struct drbd_work *w, int unused) /* Calls from worker context (see w_restart_disk_io()) need to write the transaction directly. Others came through generic_make_request(), those need to delegate it to the worker. */ -static int al_write_transaction(struct drbd_conf *mdev, bool delegate) +static int al_write_transaction(struct drbd_device *device, bool delegate) { if (delegate) { struct update_al_work al_work; init_completion(&al_work.event); al_work.w.cb = w_al_write_transaction; - al_work.w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w); + al_work.device = device; + drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, + &al_work.w); wait_for_completion(&al_work.event); return al_work.err; } else - return _al_write_transaction(mdev); + return _al_write_transaction(device); } -static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) +static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) { int rv; - spin_lock_irq(&mdev->al_lock); + spin_lock_irq(&device->al_lock); rv = (al_ext->refcnt == 0); if (likely(rv)) - lc_del(mdev->act_log, al_ext); - spin_unlock_irq(&mdev->al_lock); + lc_del(device->act_log, al_ext); + spin_unlock_irq(&device->al_lock); return rv; } /** * drbd_al_shrink() - Removes all active extents form the activity log - * @mdev: DRBD device. + * @device: DRBD device. * * Removes all active extents form the activity log, waiting until * the reference count of each entry dropped to 0 first, of course. * - * You need to lock mdev->act_log with lc_try_lock() / lc_unlock() + * You need to lock device->act_log with lc_try_lock() / lc_unlock() */ -void drbd_al_shrink(struct drbd_conf *mdev) +void drbd_al_shrink(struct drbd_device *device) { struct lc_element *al_ext; int i; - D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags)); + D_ASSERT(device, test_bit(__LC_LOCKED, &device->act_log->flags)); - for (i = 0; i < mdev->act_log->nr_elements; i++) { - al_ext = lc_element_by_index(mdev->act_log, i); + for (i = 0; i < device->act_log->nr_elements; i++) { + al_ext = lc_element_by_index(device->act_log, i); if (al_ext->lc_number == LC_FREE) continue; - wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext)); + wait_event(device->al_wait, _try_lc_del(device, al_ext)); } - wake_up(&mdev->al_wait); + wake_up(&device->al_wait); } -int drbd_initialize_al(struct drbd_conf *mdev, void *buffer) +int drbd_initialize_al(struct drbd_device *device, void *buffer) { struct al_transaction_on_disk *al = buffer; - struct drbd_md *md = &mdev->ldev->md; + struct drbd_md *md = &device->ldev->md; sector_t al_base = md->md_offset + md->al_offset; int al_size_4k = md->al_stripes * md->al_stripe_size_4k; int i; @@ -673,7 +677,7 @@ int drbd_initialize_al(struct drbd_conf *mdev, void *buffer) al->crc32c = cpu_to_be32(crc32c(0, al, 4096)); for (i = 0; i < al_size_4k; i++) { - int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 8, WRITE); + int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE); if (err) return err; } @@ -683,32 +687,32 @@ int drbd_initialize_al(struct drbd_conf *mdev, void *buffer) static int w_update_odbm(struct drbd_work *w, int unused) { struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_device *device = udw->device; struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; - if (!get_ldev(mdev)) { + if (!get_ldev(device)) { if (__ratelimit(&drbd_ratelimit_state)) - dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n"); + drbd_warn(device, "Can not update on disk bitmap, local IO disabled.\n"); kfree(udw); return 0; } - drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr)); - put_ldev(mdev); + drbd_bm_write_page(device, rs_extent_to_bm_page(udw->enr)); + put_ldev(device); kfree(udw); - if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) { - switch (mdev->state.conn) { + if (drbd_bm_total_weight(device) <= device->rs_failed) { + switch (device->state.conn) { case C_SYNC_SOURCE: case C_SYNC_TARGET: case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T: - drbd_resync_finished(mdev); + drbd_resync_finished(device); default: /* nothing to do */ break; } } - drbd_bcast_event(mdev, &sib); + drbd_bcast_event(device, &sib); return 0; } @@ -720,7 +724,7 @@ static int w_update_odbm(struct drbd_work *w, int unused) * * TODO will be obsoleted once we have a caching lru of the on disk bitmap */ -static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, +static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector, int count, int success) { struct lc_element *e; @@ -728,13 +732,13 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, unsigned int enr; - D_ASSERT(atomic_read(&mdev->local_cnt)); + D_ASSERT(device, atomic_read(&device->local_cnt)); /* I simply assume that a sector/size pair never crosses * a 16 MB extent border. (Currently this is true...) */ enr = BM_SECT_TO_EXT(sector); - e = lc_get(mdev->resync, enr); + e = lc_get(device->resync, enr); if (e) { struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); if (ext->lce.lc_number == enr) { @@ -743,12 +747,12 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, else ext->rs_failed += count; if (ext->rs_left < ext->rs_failed) { - dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d " + drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d " "rs_failed=%d count=%d cstate=%s\n", (unsigned long long)sector, ext->lce.lc_number, ext->rs_left, ext->rs_failed, count, - drbd_conn_str(mdev->state.conn)); + drbd_conn_str(device->state.conn)); /* We don't expect to be able to clear more bits * than have been set when we originally counted @@ -756,7 +760,7 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, * Whatever the reason (disconnect during resync, * delayed local completion of an application write), * try to fix it up by recounting here. */ - ext->rs_left = drbd_bm_e_weight(mdev, enr); + ext->rs_left = drbd_bm_e_weight(device, enr); } } else { /* Normally this element should be in the cache, @@ -765,16 +769,16 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, * But maybe an application write finished, and we set * something outside the resync lru_cache in sync. */ - int rs_left = drbd_bm_e_weight(mdev, enr); + int rs_left = drbd_bm_e_weight(device, enr); if (ext->flags != 0) { - dev_warn(DEV, "changing resync lce: %d[%u;%02lx]" + drbd_warn(device, "changing resync lce: %d[%u;%02lx]" " -> %d[%u;00]\n", ext->lce.lc_number, ext->rs_left, ext->flags, enr, rs_left); ext->flags = 0; } if (ext->rs_failed) { - dev_warn(DEV, "Kicking resync_lru element enr=%u " + drbd_warn(device, "Kicking resync_lru element enr=%u " "out with rs_failed=%d\n", ext->lce.lc_number, ext->rs_failed); } @@ -782,9 +786,9 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, ext->rs_failed = success ? 0 : count; /* we don't keep a persistent log of the resync lru, * we can commit any change right away. */ - lc_committed(mdev->resync); + lc_committed(device->resync); } - lc_put(mdev->resync, &ext->lce); + lc_put(device->resync, &ext->lce); /* no race, we are within the al_lock! */ if (ext->rs_left == ext->rs_failed) { @@ -794,32 +798,33 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, if (udw) { udw->enr = ext->lce.lc_number; udw->w.cb = w_update_odbm; - udw->w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->sender_work, &udw->w); + udw->device = device; + drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, + &udw->w); } else { - dev_warn(DEV, "Could not kmalloc an udw\n"); + drbd_warn(device, "Could not kmalloc an udw\n"); } } } else { - dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n", - mdev->resync_locked, - mdev->resync->nr_elements, - mdev->resync->flags); + drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", + device->resync_locked, + device->resync->nr_elements, + device->resync->flags); } } -void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go) +void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) { unsigned long now = jiffies; - unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark]; - int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS; + unsigned long last = device->rs_mark_time[device->rs_last_mark]; + int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS; if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { - if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go && - mdev->state.conn != C_PAUSED_SYNC_T && - mdev->state.conn != C_PAUSED_SYNC_S) { - mdev->rs_mark_time[next] = now; - mdev->rs_mark_left[next] = still_to_go; - mdev->rs_last_mark = next; + if (device->rs_mark_left[device->rs_last_mark] != still_to_go && + device->state.conn != C_PAUSED_SYNC_T && + device->state.conn != C_PAUSED_SYNC_S) { + device->rs_mark_time[next] = now; + device->rs_mark_left[next] = still_to_go; + device->rs_last_mark = next; } } } @@ -831,7 +836,7 @@ void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go) * called by worker on C_SYNC_TARGET and receiver on SyncSource. * */ -void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, +void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size, const char *file, const unsigned int line) { /* Is called from worker and receiver context _only_ */ @@ -842,15 +847,15 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, unsigned long flags; if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { - dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", + drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; } - if (!get_ldev(mdev)) + if (!get_ldev(device)) return; /* no disk, no metadata, no bitmap to clear bits in */ - nr_sectors = drbd_get_capacity(mdev->this_bdev); + nr_sectors = drbd_get_capacity(device->this_bdev); esector = sector + (size >> 9) - 1; if (!expect(sector < nr_sectors)) @@ -878,21 +883,21 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, * ok, (capacity & 7) != 0 sometimes, but who cares... * we count rs_{total,left} in bits, not sectors. */ - count = drbd_bm_clear_bits(mdev, sbnr, ebnr); + count = drbd_bm_clear_bits(device, sbnr, ebnr); if (count) { - drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev)); - spin_lock_irqsave(&mdev->al_lock, flags); - drbd_try_clear_on_disk_bm(mdev, sector, count, true); - spin_unlock_irqrestore(&mdev->al_lock, flags); + drbd_advance_rs_marks(device, drbd_bm_total_weight(device)); + spin_lock_irqsave(&device->al_lock, flags); + drbd_try_clear_on_disk_bm(device, sector, count, true); + spin_unlock_irqrestore(&device->al_lock, flags); /* just wake_up unconditional now, various lc_chaged(), * lc_put() in drbd_try_clear_on_disk_bm(). */ wake_up = 1; } out: - put_ldev(mdev); + put_ldev(device); if (wake_up) - wake_up(&mdev->al_wait); + wake_up(&device->al_wait); } /* @@ -903,7 +908,7 @@ out: * called by tl_clear and drbd_send_dblock (==drbd_make_request). * so this can be _any_ process. */ -int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, +int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size, const char *file, const unsigned int line) { unsigned long sbnr, ebnr, flags; @@ -916,15 +921,15 @@ int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, return 0; if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { - dev_err(DEV, "sector: %llus, size: %d\n", + drbd_err(device, "sector: %llus, size: %d\n", (unsigned long long)sector, size); return 0; } - if (!get_ldev(mdev)) + if (!get_ldev(device)) return 0; /* no disk, no metadata, no bitmap to set bits in */ - nr_sectors = drbd_get_capacity(mdev->this_bdev); + nr_sectors = drbd_get_capacity(device->this_bdev); esector = sector + (size >> 9) - 1; if (!expect(sector < nr_sectors)) @@ -939,55 +944,55 @@ int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, /* ok, (capacity & 7) != 0 sometimes, but who cares... * we count rs_{total,left} in bits, not sectors. */ - spin_lock_irqsave(&mdev->al_lock, flags); - count = drbd_bm_set_bits(mdev, sbnr, ebnr); + spin_lock_irqsave(&device->al_lock, flags); + count = drbd_bm_set_bits(device, sbnr, ebnr); enr = BM_SECT_TO_EXT(sector); - e = lc_find(mdev->resync, enr); + e = lc_find(device->resync, enr); if (e) lc_entry(e, struct bm_extent, lce)->rs_left += count; - spin_unlock_irqrestore(&mdev->al_lock, flags); + spin_unlock_irqrestore(&device->al_lock, flags); out: - put_ldev(mdev); + put_ldev(device); return count; } static -struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) +struct bm_extent *_bme_get(struct drbd_device *device, unsigned int enr) { struct lc_element *e; struct bm_extent *bm_ext; int wakeup = 0; unsigned long rs_flags; - spin_lock_irq(&mdev->al_lock); - if (mdev->resync_locked > mdev->resync->nr_elements/2) { - spin_unlock_irq(&mdev->al_lock); + spin_lock_irq(&device->al_lock); + if (device->resync_locked > device->resync->nr_elements/2) { + spin_unlock_irq(&device->al_lock); return NULL; } - e = lc_get(mdev->resync, enr); + e = lc_get(device->resync, enr); bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (bm_ext) { if (bm_ext->lce.lc_number != enr) { - bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); + bm_ext->rs_left = drbd_bm_e_weight(device, enr); bm_ext->rs_failed = 0; - lc_committed(mdev->resync); + lc_committed(device->resync); wakeup = 1; } if (bm_ext->lce.refcnt == 1) - mdev->resync_locked++; + device->resync_locked++; set_bit(BME_NO_WRITES, &bm_ext->flags); } - rs_flags = mdev->resync->flags; - spin_unlock_irq(&mdev->al_lock); + rs_flags = device->resync->flags; + spin_unlock_irq(&device->al_lock); if (wakeup) - wake_up(&mdev->al_wait); + wake_up(&device->al_wait); if (!bm_ext) { if (rs_flags & LC_STARVING) - dev_warn(DEV, "Have to wait for element" + drbd_warn(device, "Have to wait for element" " (resync LRU too small?)\n"); BUG_ON(rs_flags & LC_LOCKED); } @@ -995,25 +1000,25 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) return bm_ext; } -static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) +static int _is_in_al(struct drbd_device *device, unsigned int enr) { int rv; - spin_lock_irq(&mdev->al_lock); - rv = lc_is_used(mdev->act_log, enr); - spin_unlock_irq(&mdev->al_lock); + spin_lock_irq(&device->al_lock); + rv = lc_is_used(device->act_log, enr); + spin_unlock_irq(&device->al_lock); return rv; } /** * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED - * @mdev: DRBD device. + * @device: DRBD device. * @sector: The sector number. * * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted. */ -int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) +int drbd_rs_begin_io(struct drbd_device *device, sector_t sector) { unsigned int enr = BM_SECT_TO_EXT(sector); struct bm_extent *bm_ext; @@ -1022,8 +1027,8 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) 200 times -> 20 seconds. */ retry: - sig = wait_event_interruptible(mdev->al_wait, - (bm_ext = _bme_get(mdev, enr))); + sig = wait_event_interruptible(device->al_wait, + (bm_ext = _bme_get(device, enr))); if (sig) return -EINTR; @@ -1031,24 +1036,24 @@ retry: return 0; for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { - sig = wait_event_interruptible(mdev->al_wait, - !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) || + sig = wait_event_interruptible(device->al_wait, + !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || test_bit(BME_PRIORITY, &bm_ext->flags)); if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) { - spin_lock_irq(&mdev->al_lock); - if (lc_put(mdev->resync, &bm_ext->lce) == 0) { + spin_lock_irq(&device->al_lock); + if (lc_put(device->resync, &bm_ext->lce) == 0) { bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ - mdev->resync_locked--; - wake_up(&mdev->al_wait); + device->resync_locked--; + wake_up(&device->al_wait); } - spin_unlock_irq(&mdev->al_lock); + spin_unlock_irq(&device->al_lock); if (sig) return -EINTR; if (schedule_timeout_interruptible(HZ/10)) return -EINTR; if (sa && --sa == 0) - dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec." + drbd_warn(device, "drbd_rs_begin_io() stepped aside for 20sec." "Resync stalled?\n"); goto retry; } @@ -1059,14 +1064,14 @@ retry: /** * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep - * @mdev: DRBD device. + * @device: DRBD device. * @sector: The sector number. * * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN * if there is still application IO going on in this area. */ -int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) +int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) { unsigned int enr = BM_SECT_TO_EXT(sector); const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; @@ -1074,8 +1079,8 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; int i; - spin_lock_irq(&mdev->al_lock); - if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) { + spin_lock_irq(&device->al_lock); + if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { /* in case you have very heavy scattered io, it may * stall the syncer undefined if we give up the ref count * when we try again and requeue. @@ -1089,193 +1094,193 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) * the lc_put here... * we also have to wake_up */ - e = lc_find(mdev->resync, mdev->resync_wenr); + e = lc_find(device->resync, device->resync_wenr); bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (bm_ext) { - D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); - D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags)); + D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); + D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); clear_bit(BME_NO_WRITES, &bm_ext->flags); - mdev->resync_wenr = LC_FREE; - if (lc_put(mdev->resync, &bm_ext->lce) == 0) - mdev->resync_locked--; - wake_up(&mdev->al_wait); + device->resync_wenr = LC_FREE; + if (lc_put(device->resync, &bm_ext->lce) == 0) + device->resync_locked--; + wake_up(&device->al_wait); } else { - dev_alert(DEV, "LOGIC BUG\n"); + drbd_alert(device, "LOGIC BUG\n"); } } /* TRY. */ - e = lc_try_get(mdev->resync, enr); + e = lc_try_get(device->resync, enr); bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (bm_ext) { if (test_bit(BME_LOCKED, &bm_ext->flags)) goto proceed; if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) { - mdev->resync_locked++; + device->resync_locked++; } else { /* we did set the BME_NO_WRITES, * but then could not set BME_LOCKED, * so we tried again. * drop the extra reference. */ bm_ext->lce.refcnt--; - D_ASSERT(bm_ext->lce.refcnt > 0); + D_ASSERT(device, bm_ext->lce.refcnt > 0); } goto check_al; } else { /* do we rather want to try later? */ - if (mdev->resync_locked > mdev->resync->nr_elements-3) + if (device->resync_locked > device->resync->nr_elements-3) goto try_again; /* Do or do not. There is no try. -- Yoda */ - e = lc_get(mdev->resync, enr); + e = lc_get(device->resync, enr); bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (!bm_ext) { - const unsigned long rs_flags = mdev->resync->flags; + const unsigned long rs_flags = device->resync->flags; if (rs_flags & LC_STARVING) - dev_warn(DEV, "Have to wait for element" + drbd_warn(device, "Have to wait for element" " (resync LRU too small?)\n"); BUG_ON(rs_flags & LC_LOCKED); goto try_again; } if (bm_ext->lce.lc_number != enr) { - bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); + bm_ext->rs_left = drbd_bm_e_weight(device, enr); bm_ext->rs_failed = 0; - lc_committed(mdev->resync); - wake_up(&mdev->al_wait); - D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); + lc_committed(device->resync); + wake_up(&device->al_wait); + D_ASSERT(device, test_bit(BME_LOCKED, &bm_ext->flags) == 0); } set_bit(BME_NO_WRITES, &bm_ext->flags); - D_ASSERT(bm_ext->lce.refcnt == 1); - mdev->resync_locked++; + D_ASSERT(device, bm_ext->lce.refcnt == 1); + device->resync_locked++; goto check_al; } check_al: for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { - if (lc_is_used(mdev->act_log, al_enr+i)) + if (lc_is_used(device->act_log, al_enr+i)) goto try_again; } set_bit(BME_LOCKED, &bm_ext->flags); proceed: - mdev->resync_wenr = LC_FREE; - spin_unlock_irq(&mdev->al_lock); + device->resync_wenr = LC_FREE; + spin_unlock_irq(&device->al_lock); return 0; try_again: if (bm_ext) - mdev->resync_wenr = enr; - spin_unlock_irq(&mdev->al_lock); + device->resync_wenr = enr; + spin_unlock_irq(&device->al_lock); return -EAGAIN; } -void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) +void drbd_rs_complete_io(struct drbd_device *device, sector_t sector) { unsigned int enr = BM_SECT_TO_EXT(sector); struct lc_element *e; struct bm_extent *bm_ext; unsigned long flags; - spin_lock_irqsave(&mdev->al_lock, flags); - e = lc_find(mdev->resync, enr); + spin_lock_irqsave(&device->al_lock, flags); + e = lc_find(device->resync, enr); bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (!bm_ext) { - spin_unlock_irqrestore(&mdev->al_lock, flags); + spin_unlock_irqrestore(&device->al_lock, flags); if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n"); + drbd_err(device, "drbd_rs_complete_io() called, but extent not found\n"); return; } if (bm_ext->lce.refcnt == 0) { - spin_unlock_irqrestore(&mdev->al_lock, flags); - dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, " + spin_unlock_irqrestore(&device->al_lock, flags); + drbd_err(device, "drbd_rs_complete_io(,%llu [=%u]) called, " "but refcnt is 0!?\n", (unsigned long long)sector, enr); return; } - if (lc_put(mdev->resync, &bm_ext->lce) == 0) { + if (lc_put(device->resync, &bm_ext->lce) == 0) { bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */ - mdev->resync_locked--; - wake_up(&mdev->al_wait); + device->resync_locked--; + wake_up(&device->al_wait); } - spin_unlock_irqrestore(&mdev->al_lock, flags); + spin_unlock_irqrestore(&device->al_lock, flags); } /** * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED) - * @mdev: DRBD device. + * @device: DRBD device. */ -void drbd_rs_cancel_all(struct drbd_conf *mdev) +void drbd_rs_cancel_all(struct drbd_device *device) { - spin_lock_irq(&mdev->al_lock); + spin_lock_irq(&device->al_lock); - if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */ - lc_reset(mdev->resync); - put_ldev(mdev); + if (get_ldev_if_state(device, D_FAILED)) { /* Makes sure ->resync is there. */ + lc_reset(device->resync); + put_ldev(device); } - mdev->resync_locked = 0; - mdev->resync_wenr = LC_FREE; - spin_unlock_irq(&mdev->al_lock); - wake_up(&mdev->al_wait); + device->resync_locked = 0; + device->resync_wenr = LC_FREE; + spin_unlock_irq(&device->al_lock); + wake_up(&device->al_wait); } /** * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU - * @mdev: DRBD device. + * @device: DRBD device. * * Returns 0 upon success, -EAGAIN if at least one reference count was * not zero. */ -int drbd_rs_del_all(struct drbd_conf *mdev) +int drbd_rs_del_all(struct drbd_device *device) { struct lc_element *e; struct bm_extent *bm_ext; int i; - spin_lock_irq(&mdev->al_lock); + spin_lock_irq(&device->al_lock); - if (get_ldev_if_state(mdev, D_FAILED)) { + if (get_ldev_if_state(device, D_FAILED)) { /* ok, ->resync is there. */ - for (i = 0; i < mdev->resync->nr_elements; i++) { - e = lc_element_by_index(mdev->resync, i); + for (i = 0; i < device->resync->nr_elements; i++) { + e = lc_element_by_index(device->resync, i); bm_ext = lc_entry(e, struct bm_extent, lce); if (bm_ext->lce.lc_number == LC_FREE) continue; - if (bm_ext->lce.lc_number == mdev->resync_wenr) { - dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently" + if (bm_ext->lce.lc_number == device->resync_wenr) { + drbd_info(device, "dropping %u in drbd_rs_del_all, apparently" " got 'synced' by application io\n", - mdev->resync_wenr); - D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); - D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags)); + device->resync_wenr); + D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); + D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); clear_bit(BME_NO_WRITES, &bm_ext->flags); - mdev->resync_wenr = LC_FREE; - lc_put(mdev->resync, &bm_ext->lce); + device->resync_wenr = LC_FREE; + lc_put(device->resync, &bm_ext->lce); } if (bm_ext->lce.refcnt != 0) { - dev_info(DEV, "Retrying drbd_rs_del_all() later. " + drbd_info(device, "Retrying drbd_rs_del_all() later. " "refcnt=%d\n", bm_ext->lce.refcnt); - put_ldev(mdev); - spin_unlock_irq(&mdev->al_lock); + put_ldev(device); + spin_unlock_irq(&device->al_lock); return -EAGAIN; } - D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); - D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags)); - lc_del(mdev->resync, &bm_ext->lce); + D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); + D_ASSERT(device, !test_bit(BME_NO_WRITES, &bm_ext->flags)); + lc_del(device->resync, &bm_ext->lce); } - D_ASSERT(mdev->resync->used == 0); - put_ldev(mdev); + D_ASSERT(device, device->resync->used == 0); + put_ldev(device); } - spin_unlock_irq(&mdev->al_lock); - wake_up(&mdev->al_wait); + spin_unlock_irq(&device->al_lock); + wake_up(&device->al_wait); return 0; } /** * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks - * @mdev: DRBD device. + * @device: DRBD device. * @sector: The sector number. * @size: Size of failed IO operation, in byte. */ -void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) +void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size) { /* Is called from worker and receiver context _only_ */ unsigned long sbnr, ebnr, lbnr; @@ -1284,11 +1289,11 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) int wake_up = 0; if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { - dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", + drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; } - nr_sectors = drbd_get_capacity(mdev->this_bdev); + nr_sectors = drbd_get_capacity(device->this_bdev); esector = sector + (size >> 9) - 1; if (!expect(sector < nr_sectors)) @@ -1316,21 +1321,21 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) * ok, (capacity & 7) != 0 sometimes, but who cares... * we count rs_{total,left} in bits, not sectors. */ - spin_lock_irq(&mdev->al_lock); - count = drbd_bm_count_bits(mdev, sbnr, ebnr); + spin_lock_irq(&device->al_lock); + count = drbd_bm_count_bits(device, sbnr, ebnr); if (count) { - mdev->rs_failed += count; + device->rs_failed += count; - if (get_ldev(mdev)) { - drbd_try_clear_on_disk_bm(mdev, sector, count, false); - put_ldev(mdev); + if (get_ldev(device)) { + drbd_try_clear_on_disk_bm(device, sector, count, false); + put_ldev(device); } /* just wake_up unconditional now, various lc_chaged(), * lc_put() in drbd_try_clear_on_disk_bm(). */ wake_up = 1; } - spin_unlock_irq(&mdev->al_lock); + spin_unlock_irq(&device->al_lock); if (wake_up) - wake_up(&mdev->al_wait); + wake_up(&device->al_wait); } diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 597f111df67b..1aa29f8fdfe1 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -113,54 +113,54 @@ struct drbd_bitmap { }; #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) -static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) +static void __bm_print_lock_info(struct drbd_device *device, const char *func) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; if (!__ratelimit(&drbd_ratelimit_state)) return; - dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", - drbd_task_to_thread_name(mdev->tconn, current), - func, b->bm_why ?: "?", - drbd_task_to_thread_name(mdev->tconn, b->bm_task)); + drbd_err(device, "FIXME %s[%d] in %s, bitmap locked for '%s' by %s[%d]\n", + current->comm, task_pid_nr(current), + func, b->bm_why ?: "?", + b->bm_task->comm, task_pid_nr(b->bm_task)); } -void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) +void drbd_bm_lock(struct drbd_device *device, char *why, enum bm_flag flags) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; int trylock_failed; if (!b) { - dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n"); + drbd_err(device, "FIXME no bitmap in drbd_bm_lock!?\n"); return; } trylock_failed = !mutex_trylock(&b->bm_change); if (trylock_failed) { - dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", - drbd_task_to_thread_name(mdev->tconn, current), - why, b->bm_why ?: "?", - drbd_task_to_thread_name(mdev->tconn, b->bm_task)); + drbd_warn(device, "%s[%d] going to '%s' but bitmap already locked for '%s' by %s[%d]\n", + current->comm, task_pid_nr(current), + why, b->bm_why ?: "?", + b->bm_task->comm, task_pid_nr(b->bm_task)); mutex_lock(&b->bm_change); } if (BM_LOCKED_MASK & b->bm_flags) - dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); + drbd_err(device, "FIXME bitmap already locked in bm_lock\n"); b->bm_flags |= flags & BM_LOCKED_MASK; b->bm_why = why; b->bm_task = current; } -void drbd_bm_unlock(struct drbd_conf *mdev) +void drbd_bm_unlock(struct drbd_device *device) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; if (!b) { - dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n"); + drbd_err(device, "FIXME no bitmap in drbd_bm_unlock!?\n"); return; } - if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags)) - dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); + if (!(BM_LOCKED_MASK & device->bitmap->bm_flags)) + drbd_err(device, "FIXME bitmap not locked in bm_unlock\n"); b->bm_flags &= ~BM_LOCKED_MASK; b->bm_why = NULL; @@ -211,19 +211,19 @@ static unsigned long bm_page_to_idx(struct page *page) /* As is very unlikely that the same page is under IO from more than one * context, we can get away with a bit per page and one wait queue per bitmap. */ -static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr) +static void bm_page_lock_io(struct drbd_device *device, int page_nr) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; void *addr = &page_private(b->bm_pages[page_nr]); wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr)); } -static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) +static void bm_page_unlock_io(struct drbd_device *device, int page_nr) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; void *addr = &page_private(b->bm_pages[page_nr]); clear_bit_unlock(BM_PAGE_IO_LOCK, addr); - wake_up(&mdev->bitmap->bm_io_wait); + wake_up(&device->bitmap->bm_io_wait); } /* set _before_ submit_io, so it may be reset due to being changed @@ -242,22 +242,22 @@ static void bm_set_page_need_writeout(struct page *page) /** * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout - * @mdev: DRBD device. + * @device: DRBD device. * @page_nr: the bitmap page to mark with the "hint" flag * * From within an activity log transaction, we mark a few pages with these * hints, then call drbd_bm_write_hinted(), which will only write out changed * pages which are flagged with this mark. */ -void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr) +void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr) { struct page *page; - if (page_nr >= mdev->bitmap->bm_number_of_pages) { - dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n", - page_nr, (int)mdev->bitmap->bm_number_of_pages); + if (page_nr >= device->bitmap->bm_number_of_pages) { + drbd_warn(device, "BAD: page_nr: %u, number_of_pages: %u\n", + page_nr, (int)device->bitmap->bm_number_of_pages); return; } - page = mdev->bitmap->bm_pages[page_nr]; + page = device->bitmap->bm_pages[page_nr]; set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page)); } @@ -340,7 +340,7 @@ static void bm_unmap(unsigned long *p_addr) /* * actually most functions herein should take a struct drbd_bitmap*, not a - * struct drbd_conf*, but for the debug macros I like to have the mdev around + * struct drbd_device*, but for the debug macros I like to have the device around * to be able to report device specific. */ @@ -436,11 +436,11 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) /* * called on driver init only. TODO call when a device is created. - * allocates the drbd_bitmap, and stores it in mdev->bitmap. + * allocates the drbd_bitmap, and stores it in device->bitmap. */ -int drbd_bm_init(struct drbd_conf *mdev) +int drbd_bm_init(struct drbd_device *device) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; WARN_ON(b != NULL); b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL); if (!b) @@ -449,28 +449,28 @@ int drbd_bm_init(struct drbd_conf *mdev) mutex_init(&b->bm_change); init_waitqueue_head(&b->bm_io_wait); - mdev->bitmap = b; + device->bitmap = b; return 0; } -sector_t drbd_bm_capacity(struct drbd_conf *mdev) +sector_t drbd_bm_capacity(struct drbd_device *device) { - if (!expect(mdev->bitmap)) + if (!expect(device->bitmap)) return 0; - return mdev->bitmap->bm_dev_capacity; + return device->bitmap->bm_dev_capacity; } /* called on driver unload. TODO: call when a device is destroyed. */ -void drbd_bm_cleanup(struct drbd_conf *mdev) +void drbd_bm_cleanup(struct drbd_device *device) { - if (!expect(mdev->bitmap)) + if (!expect(device->bitmap)) return; - bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); - bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags)); - kfree(mdev->bitmap); - mdev->bitmap = NULL; + bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages); + bm_vk_free(device->bitmap->bm_pages, (BM_P_VMALLOCED & device->bitmap->bm_flags)); + kfree(device->bitmap); + device->bitmap = NULL; } /* @@ -631,9 +631,9 @@ static u64 drbd_md_on_disk_bits(struct drbd_backing_dev *ldev) * In case this is actually a resize, we copy the old bitmap into the new one. * Otherwise, the bitmap is initialized to all bits set. */ -int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) +int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bits) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; unsigned long bits, words, owords, obits; unsigned long want, have, onpages; /* number of pages */ struct page **npages, **opages = NULL; @@ -643,9 +643,9 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) if (!expect(b)) return -ENOMEM; - drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK); + drbd_bm_lock(device, "resize", BM_LOCKED_MASK); - dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", + drbd_info(device, "drbd_bm_resize called with capacity == %llu\n", (unsigned long long)capacity); if (capacity == b->bm_dev_capacity) @@ -678,12 +678,12 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) */ words = ALIGN(bits, 64) >> LN2_BPL; - if (get_ldev(mdev)) { - u64 bits_on_disk = drbd_md_on_disk_bits(mdev->ldev); - put_ldev(mdev); + if (get_ldev(device)) { + u64 bits_on_disk = drbd_md_on_disk_bits(device->ldev); + put_ldev(device); if (bits > bits_on_disk) { - dev_info(DEV, "bits = %lu\n", bits); - dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk); + drbd_info(device, "bits = %lu\n", bits); + drbd_info(device, "bits_on_disk = %llu\n", bits_on_disk); err = -ENOSPC; goto out; } @@ -692,10 +692,10 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT; have = b->bm_number_of_pages; if (want == have) { - D_ASSERT(b->bm_pages != NULL); + D_ASSERT(device, b->bm_pages != NULL); npages = b->bm_pages; } else { - if (drbd_insert_fault(mdev, DRBD_FAULT_BM_ALLOC)) + if (drbd_insert_fault(device, DRBD_FAULT_BM_ALLOC)) npages = NULL; else npages = bm_realloc_pages(b, want); @@ -742,10 +742,10 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) bm_vk_free(opages, opages_vmalloced); if (!growing) b->bm_set = bm_count_bits(b); - dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want); + drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want); out: - drbd_bm_unlock(mdev); + drbd_bm_unlock(device); return err; } @@ -757,9 +757,9 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) * * maybe bm_set should be atomic_t ? */ -unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev) +unsigned long _drbd_bm_total_weight(struct drbd_device *device) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; unsigned long s; unsigned long flags; @@ -775,20 +775,20 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev) return s; } -unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) +unsigned long drbd_bm_total_weight(struct drbd_device *device) { unsigned long s; /* if I don't have a disk, I don't know about out-of-sync status */ - if (!get_ldev_if_state(mdev, D_NEGOTIATING)) + if (!get_ldev_if_state(device, D_NEGOTIATING)) return 0; - s = _drbd_bm_total_weight(mdev); - put_ldev(mdev); + s = _drbd_bm_total_weight(device); + put_ldev(device); return s; } -size_t drbd_bm_words(struct drbd_conf *mdev) +size_t drbd_bm_words(struct drbd_device *device) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; if (!expect(b)) return 0; if (!expect(b->bm_pages)) @@ -797,9 +797,9 @@ size_t drbd_bm_words(struct drbd_conf *mdev) return b->bm_words; } -unsigned long drbd_bm_bits(struct drbd_conf *mdev) +unsigned long drbd_bm_bits(struct drbd_device *device) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; if (!expect(b)) return 0; @@ -811,10 +811,10 @@ unsigned long drbd_bm_bits(struct drbd_conf *mdev) * bitmap must be locked by drbd_bm_lock. * currently only used from receive_bitmap. */ -void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, +void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number, unsigned long *buffer) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; unsigned long *p_addr, *bm; unsigned long word, bits; unsigned int idx; @@ -860,10 +860,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, /* copy number words from the bitmap starting at offset into the buffer. * buffer[i] will be little endian unsigned long. */ -void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, +void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number, unsigned long *buffer) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; unsigned long *p_addr, *bm; size_t end, do_now; @@ -878,7 +878,7 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, if ((offset >= b->bm_words) || (end > b->bm_words) || (number <= 0)) - dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n", + drbd_err(device, "offset=%lu number=%lu bm_words=%lu\n", (unsigned long) offset, (unsigned long) number, (unsigned long) b->bm_words); @@ -897,9 +897,9 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, } /* set all bits in the bitmap */ -void drbd_bm_set_all(struct drbd_conf *mdev) +void drbd_bm_set_all(struct drbd_device *device) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; if (!expect(b)) return; if (!expect(b->bm_pages)) @@ -913,9 +913,9 @@ void drbd_bm_set_all(struct drbd_conf *mdev) } /* clear all bits in the bitmap */ -void drbd_bm_clear_all(struct drbd_conf *mdev) +void drbd_bm_clear_all(struct drbd_device *device) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; if (!expect(b)) return; if (!expect(b->bm_pages)) @@ -928,7 +928,7 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) } struct bm_aio_ctx { - struct drbd_conf *mdev; + struct drbd_device *device; atomic_t in_flight; unsigned int done; unsigned flags; @@ -943,7 +943,7 @@ static void bm_aio_ctx_destroy(struct kref *kref) { struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); - put_ldev(ctx->mdev); + put_ldev(ctx->device); kfree(ctx); } @@ -951,8 +951,8 @@ static void bm_aio_ctx_destroy(struct kref *kref) static void bm_async_io_complete(struct bio *bio, int error) { struct bm_aio_ctx *ctx = bio->bi_private; - struct drbd_conf *mdev = ctx->mdev; - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_device *device = ctx->device; + struct drbd_bitmap *b = device->bitmap; unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); int uptodate = bio_flagged(bio, BIO_UPTODATE); @@ -966,7 +966,7 @@ static void bm_async_io_complete(struct bio *bio, int error) if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 && !bm_test_page_unchanged(b->bm_pages[idx])) - dev_warn(DEV, "bitmap page idx %u changed during IO!\n", idx); + drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx); if (error) { /* ctx error will hold the completed-last non-zero error code, @@ -976,14 +976,14 @@ static void bm_async_io_complete(struct bio *bio, int error) /* Not identical to on disk version of it. * Is BM_PAGE_IO_ERROR enough? */ if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n", + drbd_err(device, "IO ERROR %d on bitmap page idx %u\n", error, idx); } else { bm_clear_page_io_err(b->bm_pages[idx]); - dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx); + dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx); } - bm_page_unlock_io(mdev, idx); + bm_page_unlock_io(device, idx); if (ctx->flags & BM_AIO_COPY_PAGES) mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool); @@ -992,7 +992,7 @@ static void bm_async_io_complete(struct bio *bio, int error) if (atomic_dec_and_test(&ctx->in_flight)) { ctx->done = 1; - wake_up(&mdev->misc_wait); + wake_up(&device->misc_wait); kref_put(&ctx->kref, &bm_aio_ctx_destroy); } } @@ -1000,23 +1000,23 @@ static void bm_async_io_complete(struct bio *bio, int error) static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) { struct bio *bio = bio_alloc_drbd(GFP_NOIO); - struct drbd_conf *mdev = ctx->mdev; - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_device *device = ctx->device; + struct drbd_bitmap *b = device->bitmap; struct page *page; unsigned int len; sector_t on_disk_sector = - mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset; + device->ldev->md.md_offset + device->ldev->md.bm_offset; on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); /* this might happen with very small * flexible external meta data device, * or with PAGE_SIZE > 4k */ len = min_t(unsigned int, PAGE_SIZE, - (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9); + (drbd_md_last_sector(device->ldev) - on_disk_sector + 1)<<9); /* serialize IO on this page */ - bm_page_lock_io(mdev, page_nr); + bm_page_lock_io(device, page_nr); /* before memcpy and submit, * so it can be redirtied any time */ bm_set_page_unchanged(b->bm_pages[page_nr]); @@ -1027,7 +1027,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bm_store_page_idx(page, page_nr); } else page = b->bm_pages[page_nr]; - bio->bi_bdev = mdev->ldev->md_bdev; + bio->bi_bdev = device->ldev->md_bdev; bio->bi_iter.bi_sector = on_disk_sector; /* bio_add_page of a single page to an empty bio will always succeed, * according to api. Do we want to assert that? */ @@ -1035,24 +1035,24 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bio->bi_private = ctx; bio->bi_end_io = bm_async_io_complete; - if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { + if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { bio->bi_rw |= rw; bio_endio(bio, -EIO); } else { submit_bio(rw, bio); /* this should not count as user activity and cause the * resync to throttle -- see drbd_rs_should_slow_down(). */ - atomic_add(len >> 9, &mdev->rs_sect_ev); + atomic_add(len >> 9, &device->rs_sect_ev); } } /* * bm_rw: read/write the whole bitmap from/to its on disk location. */ -static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) +static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) { struct bm_aio_ctx *ctx; - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; int num_pages, i, count = 0; unsigned long now; char ppb[10]; @@ -1072,7 +1072,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w return -ENOMEM; *ctx = (struct bm_aio_ctx) { - .mdev = mdev, + .device = device, .in_flight = ATOMIC_INIT(1), .done = 0, .flags = flags, @@ -1080,8 +1080,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w .kref = { ATOMIC_INIT(2) }, }; - if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ - dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); + if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); kfree(ctx); return -ENODEV; } @@ -1106,14 +1106,14 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w if (!(flags & BM_WRITE_ALL_PAGES) && bm_test_page_unchanged(b->bm_pages[i])) { - dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); + dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i); continue; } /* during lazy writeout, * ignore those pages not marked for lazy writeout. */ if (lazy_writeout_upper_idx && !bm_test_page_lazy_writeout(b->bm_pages[i])) { - dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i); + dynamic_drbd_dbg(device, "skipped bm lazy write for idx %u\n", i); continue; } } @@ -1132,19 +1132,19 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * "in_flight reached zero, all done" event. */ if (!atomic_dec_and_test(&ctx->in_flight)) - wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done); + wait_until_done_or_force_detached(device, device->ldev, &ctx->done); else kref_put(&ctx->kref, &bm_aio_ctx_destroy); /* summary for global bitmap IO */ if (flags == 0) - dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", + drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n", rw == WRITE ? "WRITE" : "READ", count, jiffies - now); if (ctx->error) { - dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); - drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); + drbd_alert(device, "we had at least one MD IO ERROR during bitmap IO\n"); + drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); err = -EIO; /* ctx->error ? */ } @@ -1153,16 +1153,16 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w now = jiffies; if (rw == WRITE) { - drbd_md_flush(mdev); + drbd_md_flush(device); } else /* rw == READ */ { b->bm_set = bm_count_bits(b); - dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", + drbd_info(device, "recounting of set bits took additional %lu jiffies\n", jiffies - now); } now = b->bm_set; if (flags == 0) - dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", + drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); kref_put(&ctx->kref, &bm_aio_ctx_destroy); @@ -1171,48 +1171,38 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w /** * drbd_bm_read() - Read the whole bitmap from its on disk location. - * @mdev: DRBD device. + * @device: DRBD device. */ -int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) +int drbd_bm_read(struct drbd_device *device) __must_hold(local) { - return bm_rw(mdev, READ, 0, 0); + return bm_rw(device, READ, 0, 0); } /** * drbd_bm_write() - Write the whole bitmap to its on disk location. - * @mdev: DRBD device. + * @device: DRBD device. * * Will only write pages that have changed since last IO. */ -int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) +int drbd_bm_write(struct drbd_device *device) __must_hold(local) { - return bm_rw(mdev, WRITE, 0, 0); + return bm_rw(device, WRITE, 0, 0); } /** * drbd_bm_write_all() - Write the whole bitmap to its on disk location. - * @mdev: DRBD device. + * @device: DRBD device. * * Will write all pages. */ -int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local) +int drbd_bm_write_all(struct drbd_device *device) __must_hold(local) { - return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0); -} - -/** - * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed. - * @mdev: DRBD device. - * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages - */ -int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) -{ - return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx); + return bm_rw(device, WRITE, BM_WRITE_ALL_PAGES, 0); } /** * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location. - * @mdev: DRBD device. + * @device: DRBD device. * * Will only write pages that have changed since last IO. * In contrast to drbd_bm_write(), this will copy the bitmap pages @@ -1221,23 +1211,23 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l * verify is aborted due to a failed peer disk, while local IO continues, or * pending resync acks are still being processed. */ -int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local) +int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local) { - return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0); + return bm_rw(device, WRITE, BM_AIO_COPY_PAGES, 0); } /** * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed. - * @mdev: DRBD device. + * @device: DRBD device. */ -int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) +int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local) { - return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); + return bm_rw(device, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); } /** * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap - * @mdev: DRBD device. + * @device: DRBD device. * @idx: bitmap page index * * We don't want to special case on logical_block_size of the backend device, @@ -1247,13 +1237,13 @@ int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) * In case this becomes an issue on systems with larger PAGE_SIZE, * we may want to change this again to write 4k aligned 4k pieces. */ -int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) +int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local) { struct bm_aio_ctx *ctx; int err; - if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { - dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); + if (bm_test_page_unchanged(device->bitmap->bm_pages[idx])) { + dynamic_drbd_dbg(device, "skipped bm page write for idx %u\n", idx); return 0; } @@ -1262,7 +1252,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc return -ENOMEM; *ctx = (struct bm_aio_ctx) { - .mdev = mdev, + .device = device, .in_flight = ATOMIC_INIT(1), .done = 0, .flags = BM_AIO_COPY_PAGES, @@ -1270,21 +1260,21 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc .kref = { ATOMIC_INIT(2) }, }; - if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ - dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); + if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); kfree(ctx); return -ENODEV; } bm_page_io_async(ctx, idx, WRITE_SYNC); - wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done); + wait_until_done_or_force_detached(device, device->ldev, &ctx->done); if (ctx->error) - drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); + drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); /* that causes us to detach, so the in memory bitmap will be * gone in a moment as well. */ - mdev->bm_writ_cnt++; + device->bm_writ_cnt++; err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; @@ -1298,17 +1288,17 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc * * this returns a bit number, NOT a sector! */ -static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, +static unsigned long __bm_find_next(struct drbd_device *device, unsigned long bm_fo, const int find_zero_bit) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; unsigned long *p_addr; unsigned long bit_offset; unsigned i; if (bm_fo > b->bm_bits) { - dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); + drbd_err(device, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); bm_fo = DRBD_END_OF_BITMAP; } else { while (bm_fo < b->bm_bits) { @@ -1338,10 +1328,10 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, return bm_fo; } -static unsigned long bm_find_next(struct drbd_conf *mdev, +static unsigned long bm_find_next(struct drbd_device *device, unsigned long bm_fo, const int find_zero_bit) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; unsigned long i = DRBD_END_OF_BITMAP; if (!expect(b)) @@ -1351,39 +1341,39 @@ static unsigned long bm_find_next(struct drbd_conf *mdev, spin_lock_irq(&b->bm_lock); if (BM_DONT_TEST & b->bm_flags) - bm_print_lock_info(mdev); + bm_print_lock_info(device); - i = __bm_find_next(mdev, bm_fo, find_zero_bit); + i = __bm_find_next(device, bm_fo, find_zero_bit); spin_unlock_irq(&b->bm_lock); return i; } -unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) +unsigned long drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo) { - return bm_find_next(mdev, bm_fo, 0); + return bm_find_next(device, bm_fo, 0); } #if 0 /* not yet needed for anything. */ -unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) +unsigned long drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo) { - return bm_find_next(mdev, bm_fo, 1); + return bm_find_next(device, bm_fo, 1); } #endif /* does not spin_lock_irqsave. * you must take drbd_bm_lock() first */ -unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) +unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo) { - /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ - return __bm_find_next(mdev, bm_fo, 0); + /* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */ + return __bm_find_next(device, bm_fo, 0); } -unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) +unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo) { - /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ - return __bm_find_next(mdev, bm_fo, 1); + /* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */ + return __bm_find_next(device, bm_fo, 1); } /* returns number of bits actually changed. @@ -1392,10 +1382,10 @@ unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_f * wants bitnr, not sector. * expected to be called for only a few bits (e - s about BITS_PER_LONG). * Must hold bitmap lock already. */ -static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, +static int __bm_change_bits_to(struct drbd_device *device, const unsigned long s, unsigned long e, int val) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; unsigned long *p_addr = NULL; unsigned long bitnr; unsigned int last_page_nr = -1U; @@ -1403,7 +1393,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, int changed_total = 0; if (e >= b->bm_bits) { - dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", + drbd_err(device, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", s, e, b->bm_bits); e = b->bm_bits ? b->bm_bits -1 : 0; } @@ -1441,11 +1431,11 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, * for val != 0, we change 0 -> 1, return code positive * for val == 0, we change 1 -> 0, return code negative * wants bitnr, not sector */ -static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, +static int bm_change_bits_to(struct drbd_device *device, const unsigned long s, const unsigned long e, int val) { unsigned long flags; - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; int c = 0; if (!expect(b)) @@ -1455,24 +1445,24 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, spin_lock_irqsave(&b->bm_lock, flags); if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) - bm_print_lock_info(mdev); + bm_print_lock_info(device); - c = __bm_change_bits_to(mdev, s, e, val); + c = __bm_change_bits_to(device, s, e, val); spin_unlock_irqrestore(&b->bm_lock, flags); return c; } /* returns number of bits changed 0 -> 1 */ -int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +int drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) { - return bm_change_bits_to(mdev, s, e, 1); + return bm_change_bits_to(device, s, e, 1); } /* returns number of bits changed 1 -> 0 */ -int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +int drbd_bm_clear_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) { - return -bm_change_bits_to(mdev, s, e, 0); + return -bm_change_bits_to(device, s, e, 0); } /* sets all bits in full words, @@ -1504,7 +1494,7 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, * You must first drbd_bm_lock(). * Can be called to set the whole bitmap in one go. * Sets bits from s to e _inclusive_. */ -void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) { /* First set_bit from the first bit (s) * up to the next long boundary (sl), @@ -1514,7 +1504,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * Do not use memset, because we must account for changes, * so we need to loop over the words with hweight() anyways. */ - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; unsigned long sl = ALIGN(s,BITS_PER_LONG); unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1); int first_page; @@ -1526,7 +1516,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi if (e - s <= 3*BITS_PER_LONG) { /* don't bother; el and sl may even be wrong. */ spin_lock_irq(&b->bm_lock); - __bm_change_bits_to(mdev, s, e, 1); + __bm_change_bits_to(device, s, e, 1); spin_unlock_irq(&b->bm_lock); return; } @@ -1537,7 +1527,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi /* bits filling the current long */ if (sl) - __bm_change_bits_to(mdev, s, sl-1, 1); + __bm_change_bits_to(device, s, sl-1, 1); first_page = sl >> (3 + PAGE_SHIFT); last_page = el >> (3 + PAGE_SHIFT); @@ -1549,7 +1539,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi /* first and full pages, unless first page == last page */ for (page_nr = first_page; page_nr < last_page; page_nr++) { - bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word); + bm_set_full_words_within_one_page(device->bitmap, page_nr, first_word, last_word); spin_unlock_irq(&b->bm_lock); cond_resched(); first_word = 0; @@ -1565,7 +1555,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * as we did not allocate it, it is not present in bitmap->bm_pages. */ if (last_word) - bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); + bm_set_full_words_within_one_page(device->bitmap, last_page, first_word, last_word); /* possibly trailing bits. * example: (e & 63) == 63, el will be e+1. @@ -1573,7 +1563,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * it would trigger an assert in __bm_change_bits_to() */ if (el <= e) - __bm_change_bits_to(mdev, el, e, 1); + __bm_change_bits_to(device, el, e, 1); spin_unlock_irq(&b->bm_lock); } @@ -1584,10 +1574,10 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * 0 ... bit not set * -1 ... first out of bounds access, stop testing for bits! */ -int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) +int drbd_bm_test_bit(struct drbd_device *device, const unsigned long bitnr) { unsigned long flags; - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; unsigned long *p_addr; int i; @@ -1598,7 +1588,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) - bm_print_lock_info(mdev); + bm_print_lock_info(device); if (bitnr < b->bm_bits) { p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); i = test_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0; @@ -1606,7 +1596,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) } else if (bitnr == b->bm_bits) { i = -1; } else { /* (bitnr > b->bm_bits) */ - dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits); + drbd_err(device, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits); i = 0; } @@ -1615,10 +1605,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) } /* returns number of bits set in the range [s, e] */ -int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) { unsigned long flags; - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; unsigned long *p_addr = NULL; unsigned long bitnr; unsigned int page_nr = -1U; @@ -1635,7 +1625,7 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) - bm_print_lock_info(mdev); + bm_print_lock_info(device); for (bitnr = s; bitnr <= e; bitnr++) { unsigned int idx = bm_bit_to_page_idx(b, bitnr); if (page_nr != idx) { @@ -1647,7 +1637,7 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi if (expect(bitnr < b->bm_bits)) c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); else - dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); + drbd_err(device, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); } if (p_addr) bm_unmap(p_addr); @@ -1670,9 +1660,9 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * reference count of some bitmap extent element from some lru instead... * */ -int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) +int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr) { - struct drbd_bitmap *b = mdev->bitmap; + struct drbd_bitmap *b = device->bitmap; int count, s, e; unsigned long flags; unsigned long *p_addr, *bm; @@ -1684,7 +1674,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) - bm_print_lock_info(mdev); + bm_print_lock_info(device); s = S2W(enr); e = min((size_t)S2W(enr+1), b->bm_words); @@ -1697,7 +1687,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) count += hweight_long(*bm++); bm_unmap(p_addr); } else { - dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s); + drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s); } spin_unlock_irqrestore(&b->bm_lock, flags); return count; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0e06f0c5dd1e..e7093d4291f1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -45,7 +45,9 @@ #include <linux/prefetch.h> #include <linux/drbd_genl_api.h> #include <linux/drbd.h> +#include "drbd_strings.h" #include "drbd_state.h" +#include "drbd_protocol.h" #ifdef __CHECKER__ # define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) @@ -65,6 +67,7 @@ extern unsigned int minor_count; extern bool disable_sendpage; extern bool allow_oos; +void tl_abort_disk_io(struct drbd_device *device); #ifdef CONFIG_DRBD_FAULT_INJECTION extern int enable_faults; @@ -95,25 +98,60 @@ extern char usermode_helper[]; #define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL) -struct drbd_conf; -struct drbd_tconn; - - -/* to shorten dev_warn(DEV, "msg"); and relatives statements */ -#define DEV (disk_to_dev(mdev->vdisk)) - -#define conn_printk(LEVEL, TCONN, FMT, ARGS...) \ - printk(LEVEL "d-con %s: " FMT, TCONN->name , ## ARGS) -#define conn_alert(TCONN, FMT, ARGS...) conn_printk(KERN_ALERT, TCONN, FMT, ## ARGS) -#define conn_crit(TCONN, FMT, ARGS...) conn_printk(KERN_CRIT, TCONN, FMT, ## ARGS) -#define conn_err(TCONN, FMT, ARGS...) conn_printk(KERN_ERR, TCONN, FMT, ## ARGS) -#define conn_warn(TCONN, FMT, ARGS...) conn_printk(KERN_WARNING, TCONN, FMT, ## ARGS) -#define conn_notice(TCONN, FMT, ARGS...) conn_printk(KERN_NOTICE, TCONN, FMT, ## ARGS) -#define conn_info(TCONN, FMT, ARGS...) conn_printk(KERN_INFO, TCONN, FMT, ## ARGS) -#define conn_dbg(TCONN, FMT, ARGS...) conn_printk(KERN_DEBUG, TCONN, FMT, ## ARGS) - -#define D_ASSERT(exp) if (!(exp)) \ - dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) +struct drbd_device; +struct drbd_connection; + +#define __drbd_printk_device(level, device, fmt, args...) \ + dev_printk(level, disk_to_dev((device)->vdisk), fmt, ## args) +#define __drbd_printk_peer_device(level, peer_device, fmt, args...) \ + dev_printk(level, disk_to_dev((peer_device)->device->vdisk), fmt, ## args) +#define __drbd_printk_resource(level, resource, fmt, args...) \ + printk(level "drbd %s: " fmt, (resource)->name, ## args) +#define __drbd_printk_connection(level, connection, fmt, args...) \ + printk(level "drbd %s: " fmt, (connection)->resource->name, ## args) + +void drbd_printk_with_wrong_object_type(void); + +#define __drbd_printk_if_same_type(obj, type, func, level, fmt, args...) \ + (__builtin_types_compatible_p(typeof(obj), type) || \ + __builtin_types_compatible_p(typeof(obj), const type)), \ + func(level, (const type)(obj), fmt, ## args) + +#define drbd_printk(level, obj, fmt, args...) \ + __builtin_choose_expr( \ + __drbd_printk_if_same_type(obj, struct drbd_device *, \ + __drbd_printk_device, level, fmt, ## args), \ + __builtin_choose_expr( \ + __drbd_printk_if_same_type(obj, struct drbd_resource *, \ + __drbd_printk_resource, level, fmt, ## args), \ + __builtin_choose_expr( \ + __drbd_printk_if_same_type(obj, struct drbd_connection *, \ + __drbd_printk_connection, level, fmt, ## args), \ + __builtin_choose_expr( \ + __drbd_printk_if_same_type(obj, struct drbd_peer_device *, \ + __drbd_printk_peer_device, level, fmt, ## args), \ + drbd_printk_with_wrong_object_type())))) + +#define drbd_dbg(obj, fmt, args...) \ + drbd_printk(KERN_DEBUG, obj, fmt, ## args) +#define drbd_alert(obj, fmt, args...) \ + drbd_printk(KERN_ALERT, obj, fmt, ## args) +#define drbd_err(obj, fmt, args...) \ + drbd_printk(KERN_ERR, obj, fmt, ## args) +#define drbd_warn(obj, fmt, args...) \ + drbd_printk(KERN_WARNING, obj, fmt, ## args) +#define drbd_info(obj, fmt, args...) \ + drbd_printk(KERN_INFO, obj, fmt, ## args) +#define drbd_emerg(obj, fmt, args...) \ + drbd_printk(KERN_EMERG, obj, fmt, ## args) + +#define dynamic_drbd_dbg(device, fmt, args...) \ + dynamic_dev_dbg(disk_to_dev(device->vdisk), fmt, ## args) + +#define D_ASSERT(device, exp) do { \ + if (!(exp)) \ + drbd_err(device, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__); \ + } while (0) /** * expect - Make an assertion @@ -123,7 +161,7 @@ struct drbd_tconn; #define expect(exp) ({ \ bool _bool = (exp); \ if (!_bool) \ - dev_err(DEV, "ASSERTION %s FAILED in %s\n", \ + drbd_err(device, "ASSERTION %s FAILED in %s\n", \ #exp, __func__); \ _bool; \ }) @@ -145,14 +183,14 @@ enum { }; extern unsigned int -_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); +_drbd_insert_fault(struct drbd_device *device, unsigned int type); static inline int -drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { +drbd_insert_fault(struct drbd_device *device, unsigned int type) { #ifdef CONFIG_DRBD_FAULT_INJECTION return fault_rate && (enable_faults & (1<<type)) && - _drbd_insert_fault(mdev, type); + _drbd_insert_fault(device, type); #else return 0; #endif @@ -164,74 +202,8 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { #define div_floor(A, B) ((A)/(B)) extern struct ratelimit_state drbd_ratelimit_state; -extern struct idr minors; /* RCU, updates: genl_lock() */ -extern struct list_head drbd_tconns; /* RCU, updates: genl_lock() */ - -/* on the wire */ -enum drbd_packet { - /* receiver (data socket) */ - P_DATA = 0x00, - P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */ - P_RS_DATA_REPLY = 0x02, /* Response to P_RS_DATA_REQUEST */ - P_BARRIER = 0x03, - P_BITMAP = 0x04, - P_BECOME_SYNC_TARGET = 0x05, - P_BECOME_SYNC_SOURCE = 0x06, - P_UNPLUG_REMOTE = 0x07, /* Used at various times to hint the peer */ - P_DATA_REQUEST = 0x08, /* Used to ask for a data block */ - P_RS_DATA_REQUEST = 0x09, /* Used to ask for a data block for resync */ - P_SYNC_PARAM = 0x0a, - P_PROTOCOL = 0x0b, - P_UUIDS = 0x0c, - P_SIZES = 0x0d, - P_STATE = 0x0e, - P_SYNC_UUID = 0x0f, - P_AUTH_CHALLENGE = 0x10, - P_AUTH_RESPONSE = 0x11, - P_STATE_CHG_REQ = 0x12, - - /* asender (meta socket */ - P_PING = 0x13, - P_PING_ACK = 0x14, - P_RECV_ACK = 0x15, /* Used in protocol B */ - P_WRITE_ACK = 0x16, /* Used in protocol C */ - P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */ - P_SUPERSEDED = 0x18, /* Used in proto C, two-primaries conflict detection */ - P_NEG_ACK = 0x19, /* Sent if local disk is unusable */ - P_NEG_DREPLY = 0x1a, /* Local disk is broken... */ - P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */ - P_BARRIER_ACK = 0x1c, - P_STATE_CHG_REPLY = 0x1d, - - /* "new" commands, no longer fitting into the ordering scheme above */ - - P_OV_REQUEST = 0x1e, /* data socket */ - P_OV_REPLY = 0x1f, - P_OV_RESULT = 0x20, /* meta socket */ - P_CSUM_RS_REQUEST = 0x21, /* data socket */ - P_RS_IS_IN_SYNC = 0x22, /* meta socket */ - P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */ - P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */ - /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */ - /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */ - P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ - P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */ - P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */ - P_CONN_ST_CHG_REQ = 0x2a, /* data sock: Connection wide state request */ - P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ - P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */ - P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */ - - P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ - P_MAX_OPT_CMD = 0x101, - - /* special command ids for handshake */ - - P_INITIAL_META = 0xfff1, /* First Packet on the MetaSock */ - P_INITIAL_DATA = 0xfff2, /* First Packet on the Socket */ - - P_CONNECTION_FEATURES = 0xfffe /* FIXED for the next century! */ -}; +extern struct idr drbd_devices; /* RCU, updates: genl_lock() */ +extern struct list_head drbd_resources; /* RCU, updates: genl_lock() */ extern const char *cmdname(enum drbd_packet cmd); @@ -253,7 +225,7 @@ struct bm_xfer_ctx { unsigned bytes[2]; }; -extern void INFO_bm_xfer_stats(struct drbd_conf *mdev, +extern void INFO_bm_xfer_stats(struct drbd_device *device, const char *direction, struct bm_xfer_ctx *c); static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) @@ -275,233 +247,7 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) #endif } -#ifndef __packed -#define __packed __attribute__((packed)) -#endif - -/* This is the layout for a packet on the wire. - * The byteorder is the network byte order. - * (except block_id and barrier fields. - * these are pointers to local structs - * and have no relevance for the partner, - * which just echoes them as received.) - * - * NOTE that the payload starts at a long aligned offset, - * regardless of 32 or 64 bit arch! - */ -struct p_header80 { - u32 magic; - u16 command; - u16 length; /* bytes of data after this header */ -} __packed; - -/* Header for big packets, Used for data packets exceeding 64kB */ -struct p_header95 { - u16 magic; /* use DRBD_MAGIC_BIG here */ - u16 command; - u32 length; -} __packed; - -struct p_header100 { - u32 magic; - u16 volume; - u16 command; - u32 length; - u32 pad; -} __packed; - -extern unsigned int drbd_header_size(struct drbd_tconn *tconn); - -/* these defines must not be changed without changing the protocol version */ -#define DP_HARDBARRIER 1 /* depricated */ -#define DP_RW_SYNC 2 /* equals REQ_SYNC */ -#define DP_MAY_SET_IN_SYNC 4 -#define DP_UNPLUG 8 /* not used anymore */ -#define DP_FUA 16 /* equals REQ_FUA */ -#define DP_FLUSH 32 /* equals REQ_FLUSH */ -#define DP_DISCARD 64 /* equals REQ_DISCARD */ -#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */ -#define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */ - -struct p_data { - u64 sector; /* 64 bits sector number */ - u64 block_id; /* to identify the request in protocol B&C */ - u32 seq_num; - u32 dp_flags; -} __packed; - -/* - * commands which share a struct: - * p_block_ack: - * P_RECV_ACK (proto B), P_WRITE_ACK (proto C), - * P_SUPERSEDED (proto C, two-primaries conflict detection) - * p_block_req: - * P_DATA_REQUEST, P_RS_DATA_REQUEST - */ -struct p_block_ack { - u64 sector; - u64 block_id; - u32 blksize; - u32 seq_num; -} __packed; - -struct p_block_req { - u64 sector; - u64 block_id; - u32 blksize; - u32 pad; /* to multiple of 8 Byte */ -} __packed; - -/* - * commands with their own struct for additional fields: - * P_CONNECTION_FEATURES - * P_BARRIER - * P_BARRIER_ACK - * P_SYNC_PARAM - * ReportParams - */ - -struct p_connection_features { - u32 protocol_min; - u32 feature_flags; - u32 protocol_max; - - /* should be more than enough for future enhancements - * for now, feature_flags and the reserved array shall be zero. - */ - - u32 _pad; - u64 reserved[7]; -} __packed; - -struct p_barrier { - u32 barrier; /* barrier number _handle_ only */ - u32 pad; /* to multiple of 8 Byte */ -} __packed; - -struct p_barrier_ack { - u32 barrier; - u32 set_size; -} __packed; - -struct p_rs_param { - u32 resync_rate; - - /* Since protocol version 88 and higher. */ - char verify_alg[0]; -} __packed; - -struct p_rs_param_89 { - u32 resync_rate; - /* protocol version 89: */ - char verify_alg[SHARED_SECRET_MAX]; - char csums_alg[SHARED_SECRET_MAX]; -} __packed; - -struct p_rs_param_95 { - u32 resync_rate; - char verify_alg[SHARED_SECRET_MAX]; - char csums_alg[SHARED_SECRET_MAX]; - u32 c_plan_ahead; - u32 c_delay_target; - u32 c_fill_target; - u32 c_max_rate; -} __packed; - -enum drbd_conn_flags { - CF_DISCARD_MY_DATA = 1, - CF_DRY_RUN = 2, -}; - -struct p_protocol { - u32 protocol; - u32 after_sb_0p; - u32 after_sb_1p; - u32 after_sb_2p; - u32 conn_flags; - u32 two_primaries; - - /* Since protocol version 87 and higher. */ - char integrity_alg[0]; - -} __packed; - -struct p_uuids { - u64 uuid[UI_EXTENDED_SIZE]; -} __packed; - -struct p_rs_uuid { - u64 uuid; -} __packed; - -struct p_sizes { - u64 d_size; /* size of disk */ - u64 u_size; /* user requested size */ - u64 c_size; /* current exported size */ - u32 max_bio_size; /* Maximal size of a BIO */ - u16 queue_order_type; /* not yet implemented in DRBD*/ - u16 dds_flags; /* use enum dds_flags here. */ -} __packed; - -struct p_state { - u32 state; -} __packed; - -struct p_req_state { - u32 mask; - u32 val; -} __packed; - -struct p_req_state_reply { - u32 retcode; -} __packed; - -struct p_drbd06_param { - u64 size; - u32 state; - u32 blksize; - u32 protocol; - u32 version; - u32 gen_cnt[5]; - u32 bit_map_gen[5]; -} __packed; - -struct p_block_desc { - u64 sector; - u32 blksize; - u32 pad; /* to multiple of 8 Byte */ -} __packed; - -/* Valid values for the encoding field. - * Bump proto version when changing this. */ -enum drbd_bitmap_code { - /* RLE_VLI_Bytes = 0, - * and other bit variants had been defined during - * algorithm evaluation. */ - RLE_VLI_Bits = 2, -}; - -struct p_compressed_bm { - /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code - * (encoding & 0x80): polarity (set/unset) of first runlength - * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits - * used to pad up to head.length bytes - */ - u8 encoding; - - u8 code[0]; -} __packed; - -struct p_delay_probe93 { - u32 seq_num; /* sequence number to match the two probe packets */ - u32 offset; /* usecs the probe got sent after the reference time point */ -} __packed; - -/* - * Bitmap packets need to fit within a single page on the sender and receiver, - * so we are limited to 4 KiB (and not to PAGE_SIZE, which can be bigger). - */ -#define DRBD_SOCKET_BUFFER_SIZE 4096 +extern unsigned int drbd_header_size(struct drbd_connection *connection); /**********************************************************************/ enum drbd_thread_state { @@ -517,9 +263,10 @@ struct drbd_thread { struct completion stop; enum drbd_thread_state t_state; int (*function) (struct drbd_thread *); - struct drbd_tconn *tconn; + struct drbd_resource *resource; + struct drbd_connection *connection; int reset_cpu_mask; - char name[9]; + const char *name; }; static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) @@ -535,18 +282,20 @@ static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) struct drbd_work { struct list_head list; int (*cb)(struct drbd_work *, int cancel); - union { - struct drbd_conf *mdev; - struct drbd_tconn *tconn; - }; +}; + +struct drbd_device_work { + struct drbd_work w; + struct drbd_device *device; }; #include "drbd_interval.h" -extern int drbd_wait_misc(struct drbd_conf *, struct drbd_interval *); +extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *); struct drbd_request { struct drbd_work w; + struct drbd_device *device; /* if local IO is not allowed, will be NULL. * if local IO _is_ allowed, holds the locally submitted bio clone, @@ -579,7 +328,7 @@ struct drbd_request { }; struct drbd_epoch { - struct drbd_tconn *tconn; + struct drbd_connection *connection; struct list_head list; unsigned int barrier_nr; atomic_t epoch_size; /* increased on every request added. */ @@ -587,6 +336,10 @@ struct drbd_epoch { unsigned long flags; }; +/* Prototype declaration of function defined in drbd_receiver.c */ +int drbdd_init(struct drbd_thread *); +int drbd_asender(struct drbd_thread *); + /* drbd_epoch flag bits */ enum { DE_HAVE_BARRIER_NUMBER, @@ -599,11 +352,6 @@ enum epoch_event { EV_CLEANUP = 32, /* used as flag */ }; -struct drbd_wq_barrier { - struct drbd_work w; - struct completion done; -}; - struct digest_info { int digest_size; void *digest; @@ -611,6 +359,7 @@ struct digest_info { struct drbd_peer_request { struct drbd_work w; + struct drbd_peer_device *peer_device; struct drbd_epoch *epoch; /* for writes */ struct page *pages; atomic_t pending_bios; @@ -663,7 +412,7 @@ enum { #define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK) #define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) -/* flag bits per mdev */ +/* flag bits per device */ enum { UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ MD_DIRTY, /* current uuids and flags not yet on disk */ @@ -695,7 +444,7 @@ enum { READ_BALANCE_RR, }; -struct drbd_bitmap; /* opaque for drbd_conf */ +struct drbd_bitmap; /* opaque for drbd_device */ /* definition of bits in bm_flags to be used in drbd_bm_lock * and drbd_bitmap_io and friends. */ @@ -769,7 +518,7 @@ struct drbd_backing_dev { struct block_device *backing_bdev; struct block_device *md_bdev; struct drbd_md md; - struct disk_conf *disk_conf; /* RCU, for updates: mdev->tconn->conf_update */ + struct disk_conf *disk_conf; /* RCU, for updates: resource->conf_update */ sector_t known_size; /* last known size of that backing device */ }; @@ -782,8 +531,8 @@ struct bm_io_work { struct drbd_work w; char *why; enum bm_flag flags; - int (*io_fn)(struct drbd_conf *mdev); - void (*done)(struct drbd_conf *mdev, int rv); + int (*io_fn)(struct drbd_device *device); + void (*done)(struct drbd_device *device, int rv); }; enum write_ordering_e { @@ -800,7 +549,7 @@ struct fifo_buffer { }; extern struct fifo_buffer *fifo_alloc(int fifo_size); -/* flag bits per tconn */ +/* flag bits per connection */ enum { NET_CONGESTED, /* The data socket is congested */ RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */ @@ -822,23 +571,35 @@ enum { DISCONNECT_SENT, }; -struct drbd_tconn { /* is a resource from the config file */ - char *name; /* Resource name */ - struct list_head all_tconn; /* linked on global drbd_tconns */ +struct drbd_resource { + char *name; struct kref kref; - struct idr volumes; /* <tconn, vnr> to mdev mapping */ - enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ + struct idr devices; /* volume number to device mapping */ + struct list_head connections; + struct list_head resources; + struct res_opts res_opts; + struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */ + spinlock_t req_lock; + unsigned susp:1; /* IO suspended by user */ unsigned susp_nod:1; /* IO suspended because no data */ unsigned susp_fen:1; /* IO suspended because fence peer handler runs */ + + cpumask_var_t cpu_mask; +}; + +struct drbd_connection { + struct list_head connections; + struct drbd_resource *resource; + struct kref kref; + struct idr peer_devices; /* volume number to peer device mapping */ + enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ struct mutex cstate_mutex; /* Protects graceful disconnects */ unsigned int connect_cnt; /* Inc each time a connection is established */ unsigned long flags; struct net_conf *net_conf; /* content protected by rcu */ - struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */ wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ - struct res_opts res_opts; struct sockaddr_storage my_addr; int my_addr_len; @@ -851,12 +612,10 @@ struct drbd_tconn { /* is a resource from the config file */ unsigned long last_received; /* in jiffies, either socket */ unsigned int ko_count; - spinlock_t req_lock; - struct list_head transfer_log; /* all requests not yet fully processed */ struct crypto_hash *cram_hmac_tfm; - struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */ + struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by connection->data->mutex */ struct crypto_hash *peer_integrity_tfm; /* checksums we verify, only accessed from receiver thread */ struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; @@ -875,7 +634,6 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; - cpumask_var_t cpu_mask; /* sender side */ struct drbd_work_queue sender_work; @@ -903,8 +661,15 @@ struct submit_worker { struct list_head writes; }; -struct drbd_conf { - struct drbd_tconn *tconn; +struct drbd_peer_device { + struct list_head peer_devices; + struct drbd_device *device; + struct drbd_connection *connection; +}; + +struct drbd_device { + struct drbd_resource *resource; + struct list_head peer_devices; int vnr; /* volume number within the connection */ struct kref kref; @@ -920,11 +685,11 @@ struct drbd_conf { struct gendisk *vdisk; unsigned long last_reattach_jif; - struct drbd_work resync_work, - unplug_work, - go_diskless, - md_sync_work, - start_resync_work; + struct drbd_work resync_work; + struct drbd_work unplug_work; + struct drbd_work go_diskless; + struct drbd_work md_sync_work; + struct drbd_work start_resync_work; struct timer_list resync_timer; struct timer_list md_sync_timer; struct timer_list start_resync_timer; @@ -1030,7 +795,7 @@ struct drbd_conf { struct bm_io_work bm_io_work; u64 ed_uuid; /* UUID of the exposed data */ struct mutex own_state_mutex; - struct mutex *state_mutex; /* either own_state_mutex or mdev->tconn->cstate_mutex */ + struct mutex *state_mutex; /* either own_state_mutex or first_peer_device(device)->connection->cstate_mutex */ char congestion_reason; /* Why we where congested... */ atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */ atomic_t rs_sect_ev; /* for submitted resync data rate, both */ @@ -1038,7 +803,7 @@ struct drbd_conf { int rs_last_events; /* counter of read or write "events" (unit sectors) * on the lower level device when we last looked. */ int c_sync_rate; /* current resync rate after syncer throttle magic */ - struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, tconn->conn_update) */ + struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, connection->conn_update) */ int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ unsigned int peer_max_bio_size; @@ -1049,19 +814,46 @@ struct drbd_conf { struct submit_worker submit; }; -static inline struct drbd_conf *minor_to_mdev(unsigned int minor) +static inline struct drbd_device *minor_to_device(unsigned int minor) { - return (struct drbd_conf *)idr_find(&minors, minor); + return (struct drbd_device *)idr_find(&drbd_devices, minor); } -static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) +static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device) { - return mdev->minor; + return list_first_entry(&device->peer_devices, struct drbd_peer_device, peer_devices); } -static inline struct drbd_conf *vnr_to_mdev(struct drbd_tconn *tconn, int vnr) +#define for_each_resource(resource, _resources) \ + list_for_each_entry(resource, _resources, resources) + +#define for_each_resource_rcu(resource, _resources) \ + list_for_each_entry_rcu(resource, _resources, resources) + +#define for_each_resource_safe(resource, tmp, _resources) \ + list_for_each_entry_safe(resource, tmp, _resources, resources) + +#define for_each_connection(connection, resource) \ + list_for_each_entry(connection, &resource->connections, connections) + +#define for_each_connection_rcu(connection, resource) \ + list_for_each_entry_rcu(connection, &resource->connections, connections) + +#define for_each_connection_safe(connection, tmp, resource) \ + list_for_each_entry_safe(connection, tmp, &resource->connections, connections) + +#define for_each_peer_device(peer_device, device) \ + list_for_each_entry(peer_device, &device->peer_devices, peer_devices) + +#define for_each_peer_device_rcu(peer_device, device) \ + list_for_each_entry_rcu(peer_device, &device->peer_devices, peer_devices) + +#define for_each_peer_device_safe(peer_device, tmp, device) \ + list_for_each_entry_safe(peer_device, tmp, &device->peer_devices, peer_devices) + +static inline unsigned int device_to_minor(struct drbd_device *device) { - return (struct drbd_conf *)idr_find(&tconn->volumes, vnr); + return device->minor; } /* @@ -1075,96 +867,93 @@ enum dds_flags { DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */ }; -extern void drbd_init_set_defaults(struct drbd_conf *mdev); +extern void drbd_init_set_defaults(struct drbd_device *device); extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); -extern char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task); #ifdef CONFIG_SMP extern void drbd_thread_current_set_cpu(struct drbd_thread *thi); -extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn); #else #define drbd_thread_current_set_cpu(A) ({}) -#define drbd_calc_cpu_mask(A) ({}) #endif -extern void tl_release(struct drbd_tconn *, unsigned int barrier_nr, +extern void tl_release(struct drbd_connection *, unsigned int barrier_nr, unsigned int set_size); -extern void tl_clear(struct drbd_tconn *); -extern void drbd_free_sock(struct drbd_tconn *tconn); -extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, +extern void tl_clear(struct drbd_connection *); +extern void drbd_free_sock(struct drbd_connection *connection); +extern int drbd_send(struct drbd_connection *connection, struct socket *sock, void *buf, size_t size, unsigned msg_flags); -extern int drbd_send_all(struct drbd_tconn *, struct socket *, void *, size_t, +extern int drbd_send_all(struct drbd_connection *, struct socket *, void *, size_t, unsigned); -extern int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd); -extern int drbd_send_protocol(struct drbd_tconn *tconn); -extern int drbd_send_uuids(struct drbd_conf *mdev); -extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); -extern void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); -extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); -extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s); -extern int drbd_send_current_state(struct drbd_conf *mdev); -extern int drbd_send_sync_param(struct drbd_conf *mdev); -extern void drbd_send_b_ack(struct drbd_tconn *tconn, u32 barrier_nr, +extern int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cmd); +extern int drbd_send_protocol(struct drbd_connection *connection); +extern int drbd_send_uuids(struct drbd_peer_device *); +extern int drbd_send_uuids_skip_initial_sync(struct drbd_peer_device *); +extern void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *); +extern int drbd_send_sizes(struct drbd_peer_device *, int trigger_reply, enum dds_flags flags); +extern int drbd_send_state(struct drbd_peer_device *, union drbd_state s); +extern int drbd_send_current_state(struct drbd_peer_device *); +extern int drbd_send_sync_param(struct drbd_peer_device *); +extern void drbd_send_b_ack(struct drbd_connection *connection, u32 barrier_nr, u32 set_size); -extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet, +extern int drbd_send_ack(struct drbd_peer_device *, enum drbd_packet, struct drbd_peer_request *); -extern void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, +extern void drbd_send_ack_rp(struct drbd_peer_device *, enum drbd_packet, struct p_block_req *rp); -extern void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, +extern void drbd_send_ack_dp(struct drbd_peer_device *, enum drbd_packet, struct p_data *dp, int data_size); -extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, +extern int drbd_send_ack_ex(struct drbd_peer_device *, enum drbd_packet, sector_t sector, int blksize, u64 block_id); -extern int drbd_send_out_of_sync(struct drbd_conf *, struct drbd_request *); -extern int drbd_send_block(struct drbd_conf *, enum drbd_packet, +extern int drbd_send_out_of_sync(struct drbd_peer_device *, struct drbd_request *); +extern int drbd_send_block(struct drbd_peer_device *, enum drbd_packet, struct drbd_peer_request *); -extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); -extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, +extern int drbd_send_dblock(struct drbd_peer_device *, struct drbd_request *req); +extern int drbd_send_drequest(struct drbd_peer_device *, int cmd, sector_t sector, int size, u64 block_id); -extern int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, +extern int drbd_send_drequest_csum(struct drbd_peer_device *, sector_t sector, int size, void *digest, int digest_size, enum drbd_packet cmd); -extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size); +extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int size); -extern int drbd_send_bitmap(struct drbd_conf *mdev); -extern void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); -extern void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode); +extern int drbd_send_bitmap(struct drbd_device *device); +extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); +extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); extern void drbd_free_bc(struct drbd_backing_dev *ldev); -extern void drbd_mdev_cleanup(struct drbd_conf *mdev); -void drbd_print_uuids(struct drbd_conf *mdev, const char *text); - -extern void conn_md_sync(struct drbd_tconn *tconn); -extern void drbd_md_write(struct drbd_conf *mdev, void *buffer); -extern void drbd_md_sync(struct drbd_conf *mdev); -extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); -extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); -extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); -extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); -extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local); -extern void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local); -extern void __drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); -extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local); -extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local); +extern void drbd_device_cleanup(struct drbd_device *device); +void drbd_print_uuids(struct drbd_device *device, const char *text); + +extern void conn_md_sync(struct drbd_connection *connection); +extern void drbd_md_write(struct drbd_device *device, void *buffer); +extern void drbd_md_sync(struct drbd_device *device); +extern int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev); +extern void drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local); +extern void _drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local); +extern void drbd_uuid_new_current(struct drbd_device *device) __must_hold(local); +extern void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local); +extern void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local); +extern void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local); +extern void drbd_md_set_flag(struct drbd_device *device, int flags) __must_hold(local); +extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold(local); extern int drbd_md_test_flag(struct drbd_backing_dev *, int); #ifndef DRBD_DEBUG_MD_SYNC -extern void drbd_md_mark_dirty(struct drbd_conf *mdev); +extern void drbd_md_mark_dirty(struct drbd_device *device); #else #define drbd_md_mark_dirty(m) drbd_md_mark_dirty_(m, __LINE__ , __func__ ) -extern void drbd_md_mark_dirty_(struct drbd_conf *mdev, +extern void drbd_md_mark_dirty_(struct drbd_device *device, unsigned int line, const char *func); #endif -extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, - int (*io_fn)(struct drbd_conf *), - void (*done)(struct drbd_conf *, int), +extern void drbd_queue_bitmap_io(struct drbd_device *device, + int (*io_fn)(struct drbd_device *), + void (*done)(struct drbd_device *, int), char *why, enum bm_flag flags); -extern int drbd_bitmap_io(struct drbd_conf *mdev, - int (*io_fn)(struct drbd_conf *), +extern int drbd_bitmap_io(struct drbd_device *device, + int (*io_fn)(struct drbd_device *), char *why, enum bm_flag flags); -extern int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, - int (*io_fn)(struct drbd_conf *), +extern int drbd_bitmap_io_from_worker(struct drbd_device *device, + int (*io_fn)(struct drbd_device *), char *why, enum bm_flag flags); -extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); -extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); -extern void drbd_ldev_destroy(struct drbd_conf *mdev); +extern int drbd_bmio_set_n_write(struct drbd_device *device); +extern int drbd_bmio_clear_n_write(struct drbd_device *device); +extern void drbd_ldev_destroy(struct drbd_device *device); /* Meta data layout * @@ -1350,52 +1139,52 @@ struct bm_extent { #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */ #define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */ -extern int drbd_bm_init(struct drbd_conf *mdev); -extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits); -extern void drbd_bm_cleanup(struct drbd_conf *mdev); -extern void drbd_bm_set_all(struct drbd_conf *mdev); -extern void drbd_bm_clear_all(struct drbd_conf *mdev); +extern int drbd_bm_init(struct drbd_device *device); +extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits); +extern void drbd_bm_cleanup(struct drbd_device *device); +extern void drbd_bm_set_all(struct drbd_device *device); +extern void drbd_bm_clear_all(struct drbd_device *device); /* set/clear/test only a few bits at a time */ extern int drbd_bm_set_bits( - struct drbd_conf *mdev, unsigned long s, unsigned long e); + struct drbd_device *device, unsigned long s, unsigned long e); extern int drbd_bm_clear_bits( - struct drbd_conf *mdev, unsigned long s, unsigned long e); + struct drbd_device *device, unsigned long s, unsigned long e); extern int drbd_bm_count_bits( - struct drbd_conf *mdev, const unsigned long s, const unsigned long e); + struct drbd_device *device, const unsigned long s, const unsigned long e); /* bm_set_bits variant for use while holding drbd_bm_lock, * may process the whole bitmap in one go */ -extern void _drbd_bm_set_bits(struct drbd_conf *mdev, +extern void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e); -extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); -extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); -extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); -extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); -extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr); -extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); -extern int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local); -extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local); -extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); -extern size_t drbd_bm_words(struct drbd_conf *mdev); -extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); -extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); +extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr); +extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr); +extern int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local); +extern int drbd_bm_read(struct drbd_device *device) __must_hold(local); +extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr); +extern int drbd_bm_write(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local); +extern size_t drbd_bm_words(struct drbd_device *device); +extern unsigned long drbd_bm_bits(struct drbd_device *device); +extern sector_t drbd_bm_capacity(struct drbd_device *device); #define DRBD_END_OF_BITMAP (~(unsigned long)0) -extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); +extern unsigned long drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo); /* bm_find_next variants for use while you hold drbd_bm_lock() */ -extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); -extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo); -extern unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev); -extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev); -extern int drbd_bm_rs_done(struct drbd_conf *mdev); +extern unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo); +extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo); +extern unsigned long _drbd_bm_total_weight(struct drbd_device *device); +extern unsigned long drbd_bm_total_weight(struct drbd_device *device); +extern int drbd_bm_rs_done(struct drbd_device *device); /* for receive_bitmap */ -extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, +extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number, unsigned long *buffer); /* for _drbd_send_bitmap */ -extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, +extern void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number, unsigned long *buffer); -extern void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags); -extern void drbd_bm_unlock(struct drbd_conf *mdev); +extern void drbd_bm_lock(struct drbd_device *device, char *why, enum bm_flag flags); +extern void drbd_bm_unlock(struct drbd_device *device); /* drbd_main.c */ extern struct kmem_cache *drbd_request_cache; @@ -1439,35 +1228,40 @@ extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); extern rwlock_t global_state_lock; -extern int conn_lowest_minor(struct drbd_tconn *tconn); -enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr); -extern void drbd_minor_destroy(struct kref *kref); +extern int conn_lowest_minor(struct drbd_connection *connection); +enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr); +extern void drbd_destroy_device(struct kref *kref); +extern void drbd_delete_device(struct drbd_device *mdev); + +extern struct drbd_resource *drbd_create_resource(const char *name); +extern void drbd_free_resource(struct drbd_resource *resource); -extern int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts); -extern struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts); -extern void conn_destroy(struct kref *kref); -struct drbd_tconn *conn_get_by_name(const char *name); -extern struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len, +extern int set_resource_options(struct drbd_resource *resource, struct res_opts *res_opts); +extern struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts); +extern void drbd_destroy_connection(struct kref *kref); +extern struct drbd_connection *conn_get_by_addrs(void *my_addr, int my_addr_len, void *peer_addr, int peer_addr_len); -extern void conn_free_crypto(struct drbd_tconn *tconn); +extern struct drbd_resource *drbd_find_resource(const char *name); +extern void drbd_destroy_resource(struct kref *kref); +extern void conn_free_crypto(struct drbd_connection *connection); extern int proc_details; /* drbd_req */ extern void do_submit(struct work_struct *ws); -extern void __drbd_make_request(struct drbd_conf *, struct bio *, unsigned long); +extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long); extern void drbd_make_request(struct request_queue *q, struct bio *bio); -extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); +extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req); extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); extern int is_valid_ar_handle(struct drbd_request *, sector_t); /* drbd_nl.c */ extern int drbd_msg_put_info(const char *info); -extern void drbd_suspend_io(struct drbd_conf *mdev); -extern void drbd_resume_io(struct drbd_conf *mdev); +extern void drbd_suspend_io(struct drbd_device *device); +extern void drbd_resume_io(struct drbd_device *device); extern char *ppsize(char *buf, unsigned long long size); -extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int); +extern sector_t drbd_new_dev_size(struct drbd_device *, struct drbd_backing_dev *, sector_t, int); enum determine_dev_size { DS_ERROR_SHRINK = -3, DS_ERROR_SPACE_MD = -2, @@ -1478,48 +1272,47 @@ enum determine_dev_size { DS_GREW_FROM_ZERO = 3, }; extern enum determine_dev_size -drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local); -extern void resync_after_online_grow(struct drbd_conf *); -extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); -extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, +drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local); +extern void resync_after_online_grow(struct drbd_device *); +extern void drbd_reconsider_max_bio_size(struct drbd_device *device); +extern enum drbd_state_rv drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force); -extern bool conn_try_outdate_peer(struct drbd_tconn *tconn); -extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn); -extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); +extern bool conn_try_outdate_peer(struct drbd_connection *connection); +extern void conn_try_outdate_peer_async(struct drbd_connection *connection); +extern int drbd_khelper(struct drbd_device *device, char *cmd); /* drbd_worker.c */ extern int drbd_worker(struct drbd_thread *thi); -enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor); -void drbd_resync_after_changed(struct drbd_conf *mdev); -extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); -extern void resume_next_sg(struct drbd_conf *mdev); -extern void suspend_other_sg(struct drbd_conf *mdev); -extern int drbd_resync_finished(struct drbd_conf *mdev); +enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor); +void drbd_resync_after_changed(struct drbd_device *device); +extern void drbd_start_resync(struct drbd_device *device, enum drbd_conns side); +extern void resume_next_sg(struct drbd_device *device); +extern void suspend_other_sg(struct drbd_device *device); +extern int drbd_resync_finished(struct drbd_device *device); /* maybe rather drbd_main.c ? */ -extern void *drbd_md_get_buffer(struct drbd_conf *mdev); -extern void drbd_md_put_buffer(struct drbd_conf *mdev); -extern int drbd_md_sync_page_io(struct drbd_conf *mdev, +extern void *drbd_md_get_buffer(struct drbd_device *device); +extern void drbd_md_put_buffer(struct drbd_device *device); +extern int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, sector_t sector, int rw); -extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); -extern void wait_until_done_or_force_detached(struct drbd_conf *mdev, +extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int); +extern void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev, unsigned int *done); -extern void drbd_rs_controller_reset(struct drbd_conf *mdev); +extern void drbd_rs_controller_reset(struct drbd_device *device); -static inline void ov_out_of_sync_print(struct drbd_conf *mdev) +static inline void ov_out_of_sync_print(struct drbd_device *device) { - if (mdev->ov_last_oos_size) { - dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n", - (unsigned long long)mdev->ov_last_oos_start, - (unsigned long)mdev->ov_last_oos_size); + if (device->ov_last_oos_size) { + drbd_err(device, "Out of sync: start=%llu, size=%lu (sectors)\n", + (unsigned long long)device->ov_last_oos_start, + (unsigned long)device->ov_last_oos_size); } - mdev->ov_last_oos_size=0; + device->ov_last_oos_size = 0; } -extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); -extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, - struct drbd_peer_request *, void *); +extern void drbd_csum_bio(struct crypto_hash *, struct bio *, void *); +extern void drbd_csum_ee(struct crypto_hash *, struct drbd_peer_request *, void *); /* worker callbacks */ extern int w_e_end_data_req(struct drbd_work *, int); extern int w_e_end_rsdata_req(struct drbd_work *, int); @@ -1529,10 +1322,8 @@ extern int w_e_end_ov_req(struct drbd_work *, int); extern int w_ov_finished(struct drbd_work *, int); extern int w_resync_timer(struct drbd_work *, int); extern int w_send_write_hint(struct drbd_work *, int); -extern int w_make_resync_request(struct drbd_work *, int); extern int w_send_dblock(struct drbd_work *, int); extern int w_send_read_req(struct drbd_work *, int); -extern int w_prev_work_done(struct drbd_work *, int); extern int w_e_reissue(struct drbd_work *, int); extern int w_restart_disk_io(struct drbd_work *, int); extern int w_send_out_of_sync(struct drbd_work *, int); @@ -1542,27 +1333,24 @@ extern void resync_timer_fn(unsigned long data); extern void start_resync_timer_fn(unsigned long data); /* drbd_receiver.c */ -extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector); -extern int drbd_submit_peer_request(struct drbd_conf *, +extern int drbd_receiver(struct drbd_thread *thi); +extern int drbd_asender(struct drbd_thread *thi); +extern int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); +extern int drbd_submit_peer_request(struct drbd_device *, struct drbd_peer_request *, const unsigned, const int); -extern int drbd_free_peer_reqs(struct drbd_conf *, struct list_head *); -extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_conf *, u64, +extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); +extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64, sector_t, unsigned int, gfp_t) __must_hold(local); -extern void __drbd_free_peer_req(struct drbd_conf *, struct drbd_peer_request *, +extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *, int); #define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0) #define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1) -extern struct page *drbd_alloc_pages(struct drbd_conf *, unsigned int, bool); -extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); -extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); -extern void conn_flush_workqueue(struct drbd_tconn *tconn); -extern int drbd_connected(struct drbd_conf *mdev); -static inline void drbd_flush_workqueue(struct drbd_conf *mdev) -{ - conn_flush_workqueue(mdev->tconn); -} +extern struct page *drbd_alloc_pages(struct drbd_peer_device *, unsigned int, bool); +extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled); +extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); +extern int drbd_connected(struct drbd_peer_device *); /* Yes, there is kernel_setsockopt, but only since 2.6.18. * So we have our own copy of it here. */ @@ -1613,7 +1401,7 @@ static inline void drbd_tcp_quickack(struct socket *sock) (char*)&val, sizeof(val)); } -void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo); +void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo); /* drbd_proc.c */ extern struct proc_dir_entry *drbd_proc; @@ -1622,29 +1410,29 @@ extern const char *drbd_conn_str(enum drbd_conns s); extern const char *drbd_role_str(enum drbd_role s); /* drbd_actlog.c */ -extern int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, struct drbd_interval *i); -extern void drbd_al_begin_io_commit(struct drbd_conf *mdev, bool delegate); -extern bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, struct drbd_interval *i); -extern void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool delegate); -extern void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i); -extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector); -extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector); -extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector); -extern void drbd_rs_cancel_all(struct drbd_conf *mdev); -extern int drbd_rs_del_all(struct drbd_conf *mdev); -extern void drbd_rs_failed_io(struct drbd_conf *mdev, +extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i); +extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate); +extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i); +extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate); +extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i); +extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector); +extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector); +extern int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector); +extern void drbd_rs_cancel_all(struct drbd_device *device); +extern int drbd_rs_del_all(struct drbd_device *device); +extern void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size); -extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go); -extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, +extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go); +extern void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size, const char *file, const unsigned int line); -#define drbd_set_in_sync(mdev, sector, size) \ - __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) -extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, +#define drbd_set_in_sync(device, sector, size) \ + __drbd_set_in_sync(device, sector, size, __FILE__, __LINE__) +extern int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size, const char *file, const unsigned int line); -#define drbd_set_out_of_sync(mdev, sector, size) \ - __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) -extern void drbd_al_shrink(struct drbd_conf *mdev); -extern int drbd_initialize_al(struct drbd_conf *, void *); +#define drbd_set_out_of_sync(device, sector, size) \ + __drbd_set_out_of_sync(device, sector, size, __FILE__, __LINE__) +extern void drbd_al_shrink(struct drbd_device *device); +extern int drbd_initialize_al(struct drbd_device *, void *); /* drbd_nl.c */ /* state info broadcast */ @@ -1661,7 +1449,7 @@ struct sib_info { }; }; }; -void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib); +void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib); /* * inline helper functions @@ -1690,26 +1478,27 @@ static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_r } static inline enum drbd_state_rv -_drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, +_drbd_set_state(struct drbd_device *device, union drbd_state ns, enum chg_state_flags flags, struct completion *done) { enum drbd_state_rv rv; read_lock(&global_state_lock); - rv = __drbd_set_state(mdev, ns, flags, done); + rv = __drbd_set_state(device, ns, flags, done); read_unlock(&global_state_lock); return rv; } -static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) +static inline union drbd_state drbd_read_state(struct drbd_device *device) { + struct drbd_resource *resource = device->resource; union drbd_state rv; - rv.i = mdev->state.i; - rv.susp = mdev->tconn->susp; - rv.susp_nod = mdev->tconn->susp_nod; - rv.susp_fen = mdev->tconn->susp_fen; + rv.i = device->state.i; + rv.susp = resource->susp; + rv.susp_nod = resource->susp_nod; + rv.susp_fen = resource->susp_fen; return rv; } @@ -1722,22 +1511,22 @@ enum drbd_force_detach_flags { }; #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) -static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, +static inline void __drbd_chk_io_error_(struct drbd_device *device, enum drbd_force_detach_flags df, const char *where) { enum drbd_io_error_p ep; rcu_read_lock(); - ep = rcu_dereference(mdev->ldev->disk_conf)->on_io_error; + ep = rcu_dereference(device->ldev->disk_conf)->on_io_error; rcu_read_unlock(); switch (ep) { case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) { if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "Local IO failed in %s.\n", where); - if (mdev->state.disk > D_INCONSISTENT) - _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); + drbd_err(device, "Local IO failed in %s.\n", where); + if (device->state.disk > D_INCONSISTENT) + _drbd_set_state(_NS(device, disk, D_INCONSISTENT), CS_HARD, NULL); break; } /* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */ @@ -1763,14 +1552,14 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, * we read meta data only once during attach, * which will fail in case of errors. */ - set_bit(WAS_IO_ERROR, &mdev->flags); + set_bit(WAS_IO_ERROR, &device->flags); if (df == DRBD_READ_ERROR) - set_bit(WAS_READ_ERROR, &mdev->flags); + set_bit(WAS_READ_ERROR, &device->flags); if (df == DRBD_FORCE_DETACH) - set_bit(FORCE_DETACH, &mdev->flags); - if (mdev->state.disk > D_FAILED) { - _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); - dev_err(DEV, + set_bit(FORCE_DETACH, &device->flags); + if (device->state.disk > D_FAILED) { + _drbd_set_state(_NS(device, disk, D_FAILED), CS_HARD, NULL); + drbd_err(device, "Local IO failed in %s. Detaching...\n", where); } break; @@ -1779,21 +1568,21 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, /** * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers - * @mdev: DRBD device. + * @device: DRBD device. * @error: Error code passed to the IO completion callback * @forcedetach: Force detach. I.e. the error happened while accessing the meta data * * See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED) */ #define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__) -static inline void drbd_chk_io_error_(struct drbd_conf *mdev, +static inline void drbd_chk_io_error_(struct drbd_device *device, int error, enum drbd_force_detach_flags forcedetach, const char *where) { if (error) { unsigned long flags; - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - __drbd_chk_io_error_(mdev, forcedetach, where); - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + spin_lock_irqsave(&device->resource->req_lock, flags); + __drbd_chk_io_error_(device, forcedetach, where); + spin_unlock_irqrestore(&device->resource->req_lock, flags); } } @@ -1916,31 +1705,33 @@ drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) wake_up(&q->q_wait); } -static inline void wake_asender(struct drbd_tconn *tconn) +extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue); + +static inline void wake_asender(struct drbd_connection *connection) { - if (test_bit(SIGNAL_ASENDER, &tconn->flags)) - force_sig(DRBD_SIG, tconn->asender.task); + if (test_bit(SIGNAL_ASENDER, &connection->flags)) + force_sig(DRBD_SIG, connection->asender.task); } -static inline void request_ping(struct drbd_tconn *tconn) +static inline void request_ping(struct drbd_connection *connection) { - set_bit(SEND_PING, &tconn->flags); - wake_asender(tconn); + set_bit(SEND_PING, &connection->flags); + wake_asender(connection); } -extern void *conn_prepare_command(struct drbd_tconn *, struct drbd_socket *); -extern void *drbd_prepare_command(struct drbd_conf *, struct drbd_socket *); -extern int conn_send_command(struct drbd_tconn *, struct drbd_socket *, +extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *); +extern void *drbd_prepare_command(struct drbd_peer_device *, struct drbd_socket *); +extern int conn_send_command(struct drbd_connection *, struct drbd_socket *, enum drbd_packet, unsigned int, void *, unsigned int); -extern int drbd_send_command(struct drbd_conf *, struct drbd_socket *, +extern int drbd_send_command(struct drbd_peer_device *, struct drbd_socket *, enum drbd_packet, unsigned int, void *, unsigned int); -extern int drbd_send_ping(struct drbd_tconn *tconn); -extern int drbd_send_ping_ack(struct drbd_tconn *tconn); -extern int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); -extern int conn_send_state_req(struct drbd_tconn *, union drbd_state, union drbd_state); +extern int drbd_send_ping(struct drbd_connection *connection); +extern int drbd_send_ping_ack(struct drbd_connection *connection); +extern int drbd_send_state_req(struct drbd_peer_device *, union drbd_state, union drbd_state); +extern int conn_send_state_req(struct drbd_connection *, union drbd_state, union drbd_state); static inline void drbd_thread_stop(struct drbd_thread *thi) { @@ -1979,22 +1770,22 @@ static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) * _req_mod(req, CONNECTION_LOST_WHILE_PENDING) * [from tl_clear_barrier] */ -static inline void inc_ap_pending(struct drbd_conf *mdev) +static inline void inc_ap_pending(struct drbd_device *device) { - atomic_inc(&mdev->ap_pending_cnt); + atomic_inc(&device->ap_pending_cnt); } #define ERR_IF_CNT_IS_NEGATIVE(which, func, line) \ - if (atomic_read(&mdev->which) < 0) \ - dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n", \ + if (atomic_read(&device->which) < 0) \ + drbd_err(device, "in %s:%d: " #which " = %d < 0 !\n", \ func, line, \ - atomic_read(&mdev->which)) + atomic_read(&device->which)) -#define dec_ap_pending(mdev) _dec_ap_pending(mdev, __FUNCTION__, __LINE__) -static inline void _dec_ap_pending(struct drbd_conf *mdev, const char *func, int line) +#define dec_ap_pending(device) _dec_ap_pending(device, __FUNCTION__, __LINE__) +static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line) { - if (atomic_dec_and_test(&mdev->ap_pending_cnt)) - wake_up(&mdev->misc_wait); + if (atomic_dec_and_test(&device->ap_pending_cnt)) + wake_up(&device->misc_wait); ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line); } @@ -2004,15 +1795,15 @@ static inline void _dec_ap_pending(struct drbd_conf *mdev, const char *func, int * C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK with ID_SYNCER) * (or P_NEG_ACK with ID_SYNCER) */ -static inline void inc_rs_pending(struct drbd_conf *mdev) +static inline void inc_rs_pending(struct drbd_device *device) { - atomic_inc(&mdev->rs_pending_cnt); + atomic_inc(&device->rs_pending_cnt); } -#define dec_rs_pending(mdev) _dec_rs_pending(mdev, __FUNCTION__, __LINE__) -static inline void _dec_rs_pending(struct drbd_conf *mdev, const char *func, int line) +#define dec_rs_pending(device) _dec_rs_pending(device, __FUNCTION__, __LINE__) +static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line) { - atomic_dec(&mdev->rs_pending_cnt); + atomic_dec(&device->rs_pending_cnt); ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line); } @@ -2025,103 +1816,104 @@ static inline void _dec_rs_pending(struct drbd_conf *mdev, const char *func, int * receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA * receive_Barrier_* we need to send a P_BARRIER_ACK */ -static inline void inc_unacked(struct drbd_conf *mdev) +static inline void inc_unacked(struct drbd_device *device) { - atomic_inc(&mdev->unacked_cnt); + atomic_inc(&device->unacked_cnt); } -#define dec_unacked(mdev) _dec_unacked(mdev, __FUNCTION__, __LINE__) -static inline void _dec_unacked(struct drbd_conf *mdev, const char *func, int line) +#define dec_unacked(device) _dec_unacked(device, __FUNCTION__, __LINE__) +static inline void _dec_unacked(struct drbd_device *device, const char *func, int line) { - atomic_dec(&mdev->unacked_cnt); + atomic_dec(&device->unacked_cnt); ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); } -#define sub_unacked(mdev, n) _sub_unacked(mdev, n, __FUNCTION__, __LINE__) -static inline void _sub_unacked(struct drbd_conf *mdev, int n, const char *func, int line) +#define sub_unacked(device, n) _sub_unacked(device, n, __FUNCTION__, __LINE__) +static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line) { - atomic_sub(n, &mdev->unacked_cnt); + atomic_sub(n, &device->unacked_cnt); ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); } /** - * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev + * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev * @M: DRBD device. * - * You have to call put_ldev() when finished working with mdev->ldev. + * You have to call put_ldev() when finished working with device->ldev. */ #define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT)) #define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS)) -static inline void put_ldev(struct drbd_conf *mdev) +static inline void put_ldev(struct drbd_device *device) { - int i = atomic_dec_return(&mdev->local_cnt); + int i = atomic_dec_return(&device->local_cnt); /* This may be called from some endio handler, * so we must not sleep here. */ __release(local); - D_ASSERT(i >= 0); + D_ASSERT(device, i >= 0); if (i == 0) { - if (mdev->state.disk == D_DISKLESS) + if (device->state.disk == D_DISKLESS) /* even internal references gone, safe to destroy */ - drbd_ldev_destroy(mdev); - if (mdev->state.disk == D_FAILED) { + drbd_ldev_destroy(device); + if (device->state.disk == D_FAILED) { /* all application IO references gone. */ - if (!test_and_set_bit(GO_DISKLESS, &mdev->flags)) - drbd_queue_work(&mdev->tconn->sender_work, &mdev->go_diskless); + if (!test_and_set_bit(GO_DISKLESS, &device->flags)) + drbd_queue_work(&first_peer_device(device)->connection->sender_work, + &device->go_diskless); } - wake_up(&mdev->misc_wait); + wake_up(&device->misc_wait); } } #ifndef __CHECKER__ -static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) +static inline int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins) { int io_allowed; /* never get a reference while D_DISKLESS */ - if (mdev->state.disk == D_DISKLESS) + if (device->state.disk == D_DISKLESS) return 0; - atomic_inc(&mdev->local_cnt); - io_allowed = (mdev->state.disk >= mins); + atomic_inc(&device->local_cnt); + io_allowed = (device->state.disk >= mins); if (!io_allowed) - put_ldev(mdev); + put_ldev(device); return io_allowed; } #else -extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins); +extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins); #endif /* you must have an "get_ldev" reference */ -static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, +static inline void drbd_get_syncer_progress(struct drbd_device *device, unsigned long *bits_left, unsigned int *per_mil_done) { /* this is to break it at compile time when we change that, in case we * want to support more than (1<<32) bits on a 32bit arch. */ - typecheck(unsigned long, mdev->rs_total); + typecheck(unsigned long, device->rs_total); /* note: both rs_total and rs_left are in bits, i.e. in * units of BM_BLOCK_SIZE. * for the percentage, we don't care. */ - if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) - *bits_left = mdev->ov_left; + if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) + *bits_left = device->ov_left; else - *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; + *bits_left = drbd_bm_total_weight(device) - device->rs_failed; /* >> 10 to prevent overflow, * +1 to prevent division by zero */ - if (*bits_left > mdev->rs_total) { + if (*bits_left > device->rs_total) { /* doh. maybe a logic bug somewhere. * may also be just a race condition * between this and a disconnect during sync. * for now, just prevent in-kernel buffer overflow. */ smp_rmb(); - dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", - drbd_conn_str(mdev->state.conn), - *bits_left, mdev->rs_total, mdev->rs_failed); + drbd_warn(device, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", + drbd_conn_str(device->state.conn), + *bits_left, device->rs_total, device->rs_failed); *per_mil_done = 0; } else { /* Make sure the division happens in long context. @@ -2133,9 +1925,9 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, * Note: currently we don't support such large bitmaps on 32bit * arch anyways, but no harm done to be prepared for it here. */ - unsigned int shift = mdev->rs_total > UINT_MAX ? 16 : 10; + unsigned int shift = device->rs_total > UINT_MAX ? 16 : 10; unsigned long left = *bits_left >> shift; - unsigned long total = 1UL + (mdev->rs_total >> shift); + unsigned long total = 1UL + (device->rs_total >> shift); unsigned long tmp = 1000UL - left * 1000UL/total; *per_mil_done = tmp; } @@ -2145,22 +1937,22 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, /* this throttles on-the-fly application requests * according to max_buffers settings; * maybe re-implement using semaphores? */ -static inline int drbd_get_max_buffers(struct drbd_conf *mdev) +static inline int drbd_get_max_buffers(struct drbd_device *device) { struct net_conf *nc; int mxb; rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(first_peer_device(device)->connection->net_conf); mxb = nc ? nc->max_buffers : 1000000; /* arbitrary limit on open requests */ rcu_read_unlock(); return mxb; } -static inline int drbd_state_is_stable(struct drbd_conf *mdev) +static inline int drbd_state_is_stable(struct drbd_device *device) { - union drbd_dev_state s = mdev->state; + union drbd_dev_state s = device->state; /* DO NOT add a default clause, we want the compiler to warn us * for any newly introduced state we may have forgotten to add here */ @@ -2194,7 +1986,7 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) /* Allow IO in BM exchange states with new protocols */ case C_WF_BITMAP_S: - if (mdev->tconn->agreed_pro_version < 96) + if (first_peer_device(device)->connection->agreed_pro_version < 96) return 0; break; @@ -2228,20 +2020,20 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) return 1; } -static inline int drbd_suspended(struct drbd_conf *mdev) +static inline int drbd_suspended(struct drbd_device *device) { - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_resource *resource = device->resource; - return tconn->susp || tconn->susp_fen || tconn->susp_nod; + return resource->susp || resource->susp_fen || resource->susp_nod; } -static inline bool may_inc_ap_bio(struct drbd_conf *mdev) +static inline bool may_inc_ap_bio(struct drbd_device *device) { - int mxb = drbd_get_max_buffers(mdev); + int mxb = drbd_get_max_buffers(device); - if (drbd_suspended(mdev)) + if (drbd_suspended(device)) return false; - if (test_bit(SUSPEND_IO, &mdev->flags)) + if (test_bit(SUSPEND_IO, &device->flags)) return false; /* to avoid potential deadlock or bitmap corruption, @@ -2249,32 +2041,32 @@ static inline bool may_inc_ap_bio(struct drbd_conf *mdev) * to start during "stable" states. */ /* no new io accepted when attaching or detaching the disk */ - if (!drbd_state_is_stable(mdev)) + if (!drbd_state_is_stable(device)) return false; /* since some older kernels don't have atomic_add_unless, * and we are within the spinlock anyways, we have this workaround. */ - if (atomic_read(&mdev->ap_bio_cnt) > mxb) + if (atomic_read(&device->ap_bio_cnt) > mxb) return false; - if (test_bit(BITMAP_IO, &mdev->flags)) + if (test_bit(BITMAP_IO, &device->flags)) return false; return true; } -static inline bool inc_ap_bio_cond(struct drbd_conf *mdev) +static inline bool inc_ap_bio_cond(struct drbd_device *device) { bool rv = false; - spin_lock_irq(&mdev->tconn->req_lock); - rv = may_inc_ap_bio(mdev); + spin_lock_irq(&device->resource->req_lock); + rv = may_inc_ap_bio(device); if (rv) - atomic_inc(&mdev->ap_bio_cnt); - spin_unlock_irq(&mdev->tconn->req_lock); + atomic_inc(&device->ap_bio_cnt); + spin_unlock_irq(&device->resource->req_lock); return rv; } -static inline void inc_ap_bio(struct drbd_conf *mdev) +static inline void inc_ap_bio(struct drbd_device *device) { /* we wait here * as long as the device is suspended @@ -2284,42 +2076,44 @@ static inline void inc_ap_bio(struct drbd_conf *mdev) * to avoid races with the reconnect code, * we need to atomic_inc within the spinlock. */ - wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev)); + wait_event(device->misc_wait, inc_ap_bio_cond(device)); } -static inline void dec_ap_bio(struct drbd_conf *mdev) +static inline void dec_ap_bio(struct drbd_device *device) { - int mxb = drbd_get_max_buffers(mdev); - int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt); + int mxb = drbd_get_max_buffers(device); + int ap_bio = atomic_dec_return(&device->ap_bio_cnt); - D_ASSERT(ap_bio >= 0); + D_ASSERT(device, ap_bio >= 0); - if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { - if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) - drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w); + if (ap_bio == 0 && test_bit(BITMAP_IO, &device->flags)) { + if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags)) + drbd_queue_work(&first_peer_device(device)-> + connection->sender_work, + &device->bm_io_work.w); } /* this currently does wake_up for every dec_ap_bio! * maybe rather introduce some type of hysteresis? * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */ if (ap_bio < mxb) - wake_up(&mdev->misc_wait); + wake_up(&device->misc_wait); } -static inline bool verify_can_do_stop_sector(struct drbd_conf *mdev) +static inline bool verify_can_do_stop_sector(struct drbd_device *device) { - return mdev->tconn->agreed_pro_version >= 97 && - mdev->tconn->agreed_pro_version != 100; + return first_peer_device(device)->connection->agreed_pro_version >= 97 && + first_peer_device(device)->connection->agreed_pro_version != 100; } -static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) +static inline int drbd_set_ed_uuid(struct drbd_device *device, u64 val) { - int changed = mdev->ed_uuid != val; - mdev->ed_uuid = val; + int changed = device->ed_uuid != val; + device->ed_uuid = val; return changed; } -static inline int drbd_queue_order_type(struct drbd_conf *mdev) +static inline int drbd_queue_order_type(struct drbd_device *device) { /* sorry, we currently have no working implementation * of distributed TCQ stuff */ @@ -2329,23 +2123,29 @@ static inline int drbd_queue_order_type(struct drbd_conf *mdev) return QUEUE_ORDERED_NONE; } -static inline void drbd_md_flush(struct drbd_conf *mdev) +static inline void drbd_md_flush(struct drbd_device *device) { int r; - if (mdev->ldev == NULL) { - dev_warn(DEV, "mdev->ldev == NULL in drbd_md_flush\n"); + if (device->ldev == NULL) { + drbd_warn(device, "device->ldev == NULL in drbd_md_flush\n"); return; } - if (test_bit(MD_NO_FUA, &mdev->flags)) + if (test_bit(MD_NO_FUA, &device->flags)) return; - r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_NOIO, NULL); + r = blkdev_issue_flush(device->ldev->md_bdev, GFP_NOIO, NULL); if (r) { - set_bit(MD_NO_FUA, &mdev->flags); - dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); + set_bit(MD_NO_FUA, &device->flags); + drbd_err(device, "meta data flush failed with status %d, disabling md-flushes\n", r); } } +static inline struct drbd_connection *first_connection(struct drbd_resource *resource) +{ + return list_first_entry(&resource->connections, + struct drbd_connection, connections); +} + #endif diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 929468e1512a..331e5cc1227d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -52,16 +52,12 @@ #include <linux/drbd_limits.h> #include "drbd_int.h" +#include "drbd_protocol.h" #include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ #include "drbd_vli.h" static DEFINE_MUTEX(drbd_main_mutex); -int drbdd_init(struct drbd_thread *); -int drbd_worker(struct drbd_thread *); -int drbd_asender(struct drbd_thread *); - -int drbd_init(void); static int drbd_open(struct block_device *bdev, fmode_t mode); static void drbd_release(struct gendisk *gd, fmode_t mode); static int w_md_sync(struct drbd_work *w, int unused); @@ -118,8 +114,8 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 /* in 2.6.x, our device mapping and config info contains our virtual gendisks * as member "struct gendisk *vdisk;" */ -struct idr minors; -struct list_head drbd_tconns; /* list of struct drbd_tconn */ +struct idr drbd_devices; +struct list_head drbd_resources; struct kmem_cache *drbd_request_cache; struct kmem_cache *drbd_ee_cache; /* peer requests */ @@ -166,15 +162,15 @@ struct bio *bio_alloc_drbd(gfp_t gfp_mask) /* When checking with sparse, and this is an inline function, sparse will give tons of false positives. When this is a real functions sparse works. */ -int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) +int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins) { int io_allowed; - atomic_inc(&mdev->local_cnt); - io_allowed = (mdev->state.disk >= mins); + atomic_inc(&device->local_cnt); + io_allowed = (device->state.disk >= mins); if (!io_allowed) { - if (atomic_dec_and_test(&mdev->local_cnt)) - wake_up(&mdev->misc_wait); + if (atomic_dec_and_test(&device->local_cnt)) + wake_up(&device->misc_wait); } return io_allowed; } @@ -183,7 +179,7 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) /** * tl_release() - mark as BARRIER_ACKED all requests in the corresponding transfer log epoch - * @tconn: DRBD connection. + * @connection: DRBD connection. * @barrier_nr: Expected identifier of the DRBD write barrier packet. * @set_size: Expected number of requests before that barrier. * @@ -191,7 +187,7 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) * epoch of not yet barrier-acked requests, this function will cause a * termination of the connection. */ -void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, +void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, unsigned int set_size) { struct drbd_request *r; @@ -199,11 +195,11 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, int expect_epoch = 0; int expect_size = 0; - spin_lock_irq(&tconn->req_lock); + spin_lock_irq(&connection->resource->req_lock); /* find oldest not yet barrier-acked write request, * count writes in its epoch. */ - list_for_each_entry(r, &tconn->transfer_log, tl_requests) { + list_for_each_entry(r, &connection->transfer_log, tl_requests) { const unsigned s = r->rq_state; if (!req) { if (!(s & RQ_WRITE)) @@ -228,18 +224,18 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, /* first some paranoia code */ if (req == NULL) { - conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", + drbd_err(connection, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", barrier_nr); goto bail; } if (expect_epoch != barrier_nr) { - conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n", + drbd_err(connection, "BAD! BarrierAck #%u received, expected #%u!\n", barrier_nr, expect_epoch); goto bail; } if (expect_size != set_size) { - conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", + drbd_err(connection, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", barrier_nr, set_size, expect_size); goto bail; } @@ -248,90 +244,91 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, /* this extra list walk restart is paranoia, * to catch requests being barrier-acked "unexpectedly". * It usually should find the same req again, or some READ preceding it. */ - list_for_each_entry(req, &tconn->transfer_log, tl_requests) + list_for_each_entry(req, &connection->transfer_log, tl_requests) if (req->epoch == expect_epoch) break; - list_for_each_entry_safe_from(req, r, &tconn->transfer_log, tl_requests) { + list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { if (req->epoch != expect_epoch) break; _req_mod(req, BARRIER_ACKED); } - spin_unlock_irq(&tconn->req_lock); + spin_unlock_irq(&connection->resource->req_lock); return; bail: - spin_unlock_irq(&tconn->req_lock); - conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); + spin_unlock_irq(&connection->resource->req_lock); + conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); } /** * _tl_restart() - Walks the transfer log, and applies an action to all requests - * @mdev: DRBD device. + * @device: DRBD device. * @what: The action/event to perform with all request objects * * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, * RESTART_FROZEN_DISK_IO. */ /* must hold resource->req_lock */ -void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) +void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what) { struct drbd_request *req, *r; - list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) + list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) _req_mod(req, what); } -void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) +void tl_restart(struct drbd_connection *connection, enum drbd_req_event what) { - spin_lock_irq(&tconn->req_lock); - _tl_restart(tconn, what); - spin_unlock_irq(&tconn->req_lock); + spin_lock_irq(&connection->resource->req_lock); + _tl_restart(connection, what); + spin_unlock_irq(&connection->resource->req_lock); } /** * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL - * @mdev: DRBD device. + * @device: DRBD device. * * This is called after the connection to the peer was lost. The storage covered * by the requests on the transfer gets marked as our of sync. Called from the * receiver thread and the worker thread. */ -void tl_clear(struct drbd_tconn *tconn) +void tl_clear(struct drbd_connection *connection) { - tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); + tl_restart(connection, CONNECTION_LOST_WHILE_PENDING); } /** - * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL - * @mdev: DRBD device. + * tl_abort_disk_io() - Abort disk I/O for all requests for a certain device in the TL + * @device: DRBD device. */ -void tl_abort_disk_io(struct drbd_conf *mdev) +void tl_abort_disk_io(struct drbd_device *device) { - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_connection *connection = first_peer_device(device)->connection; struct drbd_request *req, *r; - spin_lock_irq(&tconn->req_lock); - list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) { + spin_lock_irq(&connection->resource->req_lock); + list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) { if (!(req->rq_state & RQ_LOCAL_PENDING)) continue; - if (req->w.mdev != mdev) + if (req->device != device) continue; _req_mod(req, ABORT_DISK_IO); } - spin_unlock_irq(&tconn->req_lock); + spin_unlock_irq(&connection->resource->req_lock); } static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; - struct drbd_tconn *tconn = thi->tconn; + struct drbd_resource *resource = thi->resource; unsigned long flags; int retval; snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s", - thi->name[0], thi->tconn->name); + thi->name[0], + resource->name); restart: retval = thi->function(thi); @@ -349,7 +346,7 @@ restart: */ if (thi->t_state == RESTARTING) { - conn_info(tconn, "Restarting %s thread\n", thi->name); + drbd_info(resource, "Restarting %s thread\n", thi->name); thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); goto restart; @@ -361,29 +358,32 @@ restart: complete_all(&thi->stop); spin_unlock_irqrestore(&thi->t_lock, flags); - conn_info(tconn, "Terminating %s\n", current->comm); + drbd_info(resource, "Terminating %s\n", current->comm); /* Release mod reference taken when thread was started */ - kref_put(&tconn->kref, &conn_destroy); + if (thi->connection) + kref_put(&thi->connection->kref, drbd_destroy_connection); + kref_put(&resource->kref, drbd_destroy_resource); module_put(THIS_MODULE); return retval; } -static void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi, - int (*func) (struct drbd_thread *), char *name) +static void drbd_thread_init(struct drbd_resource *resource, struct drbd_thread *thi, + int (*func) (struct drbd_thread *), const char *name) { spin_lock_init(&thi->t_lock); thi->task = NULL; thi->t_state = NONE; thi->function = func; - thi->tconn = tconn; - strncpy(thi->name, name, ARRAY_SIZE(thi->name)); + thi->resource = resource; + thi->connection = NULL; + thi->name = name; } int drbd_thread_start(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->tconn; + struct drbd_resource *resource = thi->resource; struct task_struct *nt; unsigned long flags; @@ -393,17 +393,19 @@ int drbd_thread_start(struct drbd_thread *thi) switch (thi->t_state) { case NONE: - conn_info(tconn, "Starting %s thread (from %s [%d])\n", + drbd_info(resource, "Starting %s thread (from %s [%d])\n", thi->name, current->comm, current->pid); /* Get ref on module for thread - this is released when thread exits */ if (!try_module_get(THIS_MODULE)) { - conn_err(tconn, "Failed to get module reference in drbd_thread_start\n"); + drbd_err(resource, "Failed to get module reference in drbd_thread_start\n"); spin_unlock_irqrestore(&thi->t_lock, flags); return false; } - kref_get(&thi->tconn->kref); + kref_get(&resource->kref); + if (thi->connection) + kref_get(&thi->connection->kref); init_completion(&thi->stop); thi->reset_cpu_mask = 1; @@ -412,12 +414,14 @@ int drbd_thread_start(struct drbd_thread *thi) flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ nt = kthread_create(drbd_thread_setup, (void *) thi, - "drbd_%c_%s", thi->name[0], thi->tconn->name); + "drbd_%c_%s", thi->name[0], thi->resource->name); if (IS_ERR(nt)) { - conn_err(tconn, "Couldn't start thread\n"); + drbd_err(resource, "Couldn't start thread\n"); - kref_put(&tconn->kref, &conn_destroy); + if (thi->connection) + kref_put(&thi->connection->kref, drbd_destroy_connection); + kref_put(&resource->kref, drbd_destroy_resource); module_put(THIS_MODULE); return false; } @@ -429,7 +433,7 @@ int drbd_thread_start(struct drbd_thread *thi) break; case EXITING: thi->t_state = RESTARTING; - conn_info(tconn, "Restarting %s thread (from %s [%d])\n", + drbd_info(resource, "Restarting %s thread (from %s [%d])\n", thi->name, current->comm, current->pid); /* fall through */ case RUNNING: @@ -478,65 +482,60 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) wait_for_completion(&thi->stop); } -static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task) -{ - struct drbd_thread *thi = - task == tconn->receiver.task ? &tconn->receiver : - task == tconn->asender.task ? &tconn->asender : - task == tconn->worker.task ? &tconn->worker : NULL; - - return thi; -} - -char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task) +int conn_lowest_minor(struct drbd_connection *connection) { - struct drbd_thread *thi = drbd_task_to_thread(tconn, task); - return thi ? thi->name : task->comm; -} - -int conn_lowest_minor(struct drbd_tconn *tconn) -{ - struct drbd_conf *mdev; - int vnr = 0, m; + struct drbd_peer_device *peer_device; + int vnr = 0, minor = -1; rcu_read_lock(); - mdev = idr_get_next(&tconn->volumes, &vnr); - m = mdev ? mdev_to_minor(mdev) : -1; + peer_device = idr_get_next(&connection->peer_devices, &vnr); + if (peer_device) + minor = device_to_minor(peer_device->device); rcu_read_unlock(); - return m; + return minor; } #ifdef CONFIG_SMP /** * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs - * @mdev: DRBD device. * - * Forces all threads of a device onto the same CPU. This is beneficial for + * Forces all threads of a resource onto the same CPU. This is beneficial for * DRBD's performance. May be overwritten by user's configuration. */ -void drbd_calc_cpu_mask(struct drbd_tconn *tconn) +static void drbd_calc_cpu_mask(cpumask_var_t *cpu_mask) { - int ord, cpu; + unsigned int *resources_per_cpu, min_index = ~0; - /* user override. */ - if (cpumask_weight(tconn->cpu_mask)) - return; + resources_per_cpu = kzalloc(nr_cpu_ids * sizeof(*resources_per_cpu), GFP_KERNEL); + if (resources_per_cpu) { + struct drbd_resource *resource; + unsigned int cpu, min = ~0; - ord = conn_lowest_minor(tconn) % cpumask_weight(cpu_online_mask); - for_each_online_cpu(cpu) { - if (ord-- == 0) { - cpumask_set_cpu(cpu, tconn->cpu_mask); - return; + rcu_read_lock(); + for_each_resource_rcu(resource, &drbd_resources) { + for_each_cpu(cpu, resource->cpu_mask) + resources_per_cpu[cpu]++; } + rcu_read_unlock(); + for_each_online_cpu(cpu) { + if (resources_per_cpu[cpu] < min) { + min = resources_per_cpu[cpu]; + min_index = cpu; + } + } + kfree(resources_per_cpu); } - /* should not be reached */ - cpumask_setall(tconn->cpu_mask); + if (min_index == ~0) { + cpumask_setall(*cpu_mask); + return; + } + cpumask_set_cpu(min_index, *cpu_mask); } /** * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread - * @mdev: DRBD device. + * @device: DRBD device. * @thi: drbd_thread object * * call in the "main loop" of _all_ threads, no need for any mutex, current won't die @@ -544,13 +543,16 @@ void drbd_calc_cpu_mask(struct drbd_tconn *tconn) */ void drbd_thread_current_set_cpu(struct drbd_thread *thi) { + struct drbd_resource *resource = thi->resource; struct task_struct *p = current; if (!thi->reset_cpu_mask) return; thi->reset_cpu_mask = 0; - set_cpus_allowed_ptr(p, thi->tconn->cpu_mask); + set_cpus_allowed_ptr(p, resource->cpu_mask); } +#else +#define drbd_calc_cpu_mask(A) ({}) #endif /** @@ -560,9 +562,9 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi) * word aligned on 64-bit architectures. (The bitmap send and receive code * relies on this.) */ -unsigned int drbd_header_size(struct drbd_tconn *tconn) +unsigned int drbd_header_size(struct drbd_connection *connection) { - if (tconn->agreed_pro_version >= 100) { + if (connection->agreed_pro_version >= 100) { BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8)); return sizeof(struct p_header100); } else { @@ -600,44 +602,44 @@ static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cm return sizeof(struct p_header100); } -static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, +static unsigned int prepare_header(struct drbd_connection *connection, int vnr, void *buffer, enum drbd_packet cmd, int size) { - if (tconn->agreed_pro_version >= 100) + if (connection->agreed_pro_version >= 100) return prepare_header100(buffer, cmd, size, vnr); - else if (tconn->agreed_pro_version >= 95 && + else if (connection->agreed_pro_version >= 95 && size > DRBD_MAX_SIZE_H80_PACKET) return prepare_header95(buffer, cmd, size); else return prepare_header80(buffer, cmd, size); } -static void *__conn_prepare_command(struct drbd_tconn *tconn, +static void *__conn_prepare_command(struct drbd_connection *connection, struct drbd_socket *sock) { if (!sock->socket) return NULL; - return sock->sbuf + drbd_header_size(tconn); + return sock->sbuf + drbd_header_size(connection); } -void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock) +void *conn_prepare_command(struct drbd_connection *connection, struct drbd_socket *sock) { void *p; mutex_lock(&sock->mutex); - p = __conn_prepare_command(tconn, sock); + p = __conn_prepare_command(connection, sock); if (!p) mutex_unlock(&sock->mutex); return p; } -void *drbd_prepare_command(struct drbd_conf *mdev, struct drbd_socket *sock) +void *drbd_prepare_command(struct drbd_peer_device *peer_device, struct drbd_socket *sock) { - return conn_prepare_command(mdev->tconn, sock); + return conn_prepare_command(peer_device->connection, sock); } -static int __send_command(struct drbd_tconn *tconn, int vnr, +static int __send_command(struct drbd_connection *connection, int vnr, struct drbd_socket *sock, enum drbd_packet cmd, unsigned int header_size, void *data, unsigned int size) @@ -654,82 +656,82 @@ static int __send_command(struct drbd_tconn *tconn, int vnr, */ msg_flags = data ? MSG_MORE : 0; - header_size += prepare_header(tconn, vnr, sock->sbuf, cmd, + header_size += prepare_header(connection, vnr, sock->sbuf, cmd, header_size + size); - err = drbd_send_all(tconn, sock->socket, sock->sbuf, header_size, + err = drbd_send_all(connection, sock->socket, sock->sbuf, header_size, msg_flags); if (data && !err) - err = drbd_send_all(tconn, sock->socket, data, size, 0); + err = drbd_send_all(connection, sock->socket, data, size, 0); return err; } -static int __conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock, +static int __conn_send_command(struct drbd_connection *connection, struct drbd_socket *sock, enum drbd_packet cmd, unsigned int header_size, void *data, unsigned int size) { - return __send_command(tconn, 0, sock, cmd, header_size, data, size); + return __send_command(connection, 0, sock, cmd, header_size, data, size); } -int conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock, +int conn_send_command(struct drbd_connection *connection, struct drbd_socket *sock, enum drbd_packet cmd, unsigned int header_size, void *data, unsigned int size) { int err; - err = __conn_send_command(tconn, sock, cmd, header_size, data, size); + err = __conn_send_command(connection, sock, cmd, header_size, data, size); mutex_unlock(&sock->mutex); return err; } -int drbd_send_command(struct drbd_conf *mdev, struct drbd_socket *sock, +int drbd_send_command(struct drbd_peer_device *peer_device, struct drbd_socket *sock, enum drbd_packet cmd, unsigned int header_size, void *data, unsigned int size) { int err; - err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, header_size, - data, size); + err = __send_command(peer_device->connection, peer_device->device->vnr, + sock, cmd, header_size, data, size); mutex_unlock(&sock->mutex); return err; } -int drbd_send_ping(struct drbd_tconn *tconn) +int drbd_send_ping(struct drbd_connection *connection) { struct drbd_socket *sock; - sock = &tconn->meta; - if (!conn_prepare_command(tconn, sock)) + sock = &connection->meta; + if (!conn_prepare_command(connection, sock)) return -EIO; - return conn_send_command(tconn, sock, P_PING, 0, NULL, 0); + return conn_send_command(connection, sock, P_PING, 0, NULL, 0); } -int drbd_send_ping_ack(struct drbd_tconn *tconn) +int drbd_send_ping_ack(struct drbd_connection *connection) { struct drbd_socket *sock; - sock = &tconn->meta; - if (!conn_prepare_command(tconn, sock)) + sock = &connection->meta; + if (!conn_prepare_command(connection, sock)) return -EIO; - return conn_send_command(tconn, sock, P_PING_ACK, 0, NULL, 0); + return conn_send_command(connection, sock, P_PING_ACK, 0, NULL, 0); } -int drbd_send_sync_param(struct drbd_conf *mdev) +int drbd_send_sync_param(struct drbd_peer_device *peer_device) { struct drbd_socket *sock; struct p_rs_param_95 *p; int size; - const int apv = mdev->tconn->agreed_pro_version; + const int apv = peer_device->connection->agreed_pro_version; enum drbd_packet cmd; struct net_conf *nc; struct disk_conf *dc; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); if (!p) return -EIO; rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(peer_device->connection->net_conf); size = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) @@ -742,14 +744,14 @@ int drbd_send_sync_param(struct drbd_conf *mdev) /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - if (get_ldev(mdev)) { - dc = rcu_dereference(mdev->ldev->disk_conf); + if (get_ldev(peer_device->device)) { + dc = rcu_dereference(peer_device->device->ldev->disk_conf); p->resync_rate = cpu_to_be32(dc->resync_rate); p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead); p->c_delay_target = cpu_to_be32(dc->c_delay_target); p->c_fill_target = cpu_to_be32(dc->c_fill_target); p->c_max_rate = cpu_to_be32(dc->c_max_rate); - put_ldev(mdev); + put_ldev(peer_device->device); } else { p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF); p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); @@ -764,33 +766,33 @@ int drbd_send_sync_param(struct drbd_conf *mdev) strcpy(p->csums_alg, nc->csums_alg); rcu_read_unlock(); - return drbd_send_command(mdev, sock, cmd, size, NULL, 0); + return drbd_send_command(peer_device, sock, cmd, size, NULL, 0); } -int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd) +int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cmd) { struct drbd_socket *sock; struct p_protocol *p; struct net_conf *nc; int size, cf; - sock = &tconn->data; - p = __conn_prepare_command(tconn, sock); + sock = &connection->data; + p = __conn_prepare_command(connection, sock); if (!p) return -EIO; rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); + nc = rcu_dereference(connection->net_conf); - if (nc->tentative && tconn->agreed_pro_version < 92) { + if (nc->tentative && connection->agreed_pro_version < 92) { rcu_read_unlock(); mutex_unlock(&sock->mutex); - conn_err(tconn, "--dry-run is not supported by peer"); + drbd_err(connection, "--dry-run is not supported by peer"); return -EOPNOTSUPP; } size = sizeof(*p); - if (tconn->agreed_pro_version >= 87) + if (connection->agreed_pro_version >= 87) size += strlen(nc->integrity_alg) + 1; p->protocol = cpu_to_be32(nc->wire_protocol); @@ -805,128 +807,131 @@ int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd) cf |= CF_DRY_RUN; p->conn_flags = cpu_to_be32(cf); - if (tconn->agreed_pro_version >= 87) + if (connection->agreed_pro_version >= 87) strcpy(p->integrity_alg, nc->integrity_alg); rcu_read_unlock(); - return __conn_send_command(tconn, sock, cmd, size, NULL, 0); + return __conn_send_command(connection, sock, cmd, size, NULL, 0); } -int drbd_send_protocol(struct drbd_tconn *tconn) +int drbd_send_protocol(struct drbd_connection *connection) { int err; - mutex_lock(&tconn->data.mutex); - err = __drbd_send_protocol(tconn, P_PROTOCOL); - mutex_unlock(&tconn->data.mutex); + mutex_lock(&connection->data.mutex); + err = __drbd_send_protocol(connection, P_PROTOCOL); + mutex_unlock(&connection->data.mutex); return err; } -int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) +static int _drbd_send_uuids(struct drbd_peer_device *peer_device, u64 uuid_flags) { + struct drbd_device *device = peer_device->device; struct drbd_socket *sock; struct p_uuids *p; int i; - if (!get_ldev_if_state(mdev, D_NEGOTIATING)) + if (!get_ldev_if_state(device, D_NEGOTIATING)) return 0; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); if (!p) { - put_ldev(mdev); + put_ldev(device); return -EIO; } - spin_lock_irq(&mdev->ldev->md.uuid_lock); + spin_lock_irq(&device->ldev->md.uuid_lock); for (i = UI_CURRENT; i < UI_SIZE; i++) - p->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]); - spin_unlock_irq(&mdev->ldev->md.uuid_lock); + p->uuid[i] = cpu_to_be64(device->ldev->md.uuid[i]); + spin_unlock_irq(&device->ldev->md.uuid_lock); - mdev->comm_bm_set = drbd_bm_total_weight(mdev); - p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); + device->comm_bm_set = drbd_bm_total_weight(device); + p->uuid[UI_SIZE] = cpu_to_be64(device->comm_bm_set); rcu_read_lock(); - uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0; + uuid_flags |= rcu_dereference(peer_device->connection->net_conf)->discard_my_data ? 1 : 0; rcu_read_unlock(); - uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; - uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; + uuid_flags |= test_bit(CRASHED_PRIMARY, &device->flags) ? 2 : 0; + uuid_flags |= device->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); - put_ldev(mdev); - return drbd_send_command(mdev, sock, P_UUIDS, sizeof(*p), NULL, 0); + put_ldev(device); + return drbd_send_command(peer_device, sock, P_UUIDS, sizeof(*p), NULL, 0); } -int drbd_send_uuids(struct drbd_conf *mdev) +int drbd_send_uuids(struct drbd_peer_device *peer_device) { - return _drbd_send_uuids(mdev, 0); + return _drbd_send_uuids(peer_device, 0); } -int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev) +int drbd_send_uuids_skip_initial_sync(struct drbd_peer_device *peer_device) { - return _drbd_send_uuids(mdev, 8); + return _drbd_send_uuids(peer_device, 8); } -void drbd_print_uuids(struct drbd_conf *mdev, const char *text) +void drbd_print_uuids(struct drbd_device *device, const char *text) { - if (get_ldev_if_state(mdev, D_NEGOTIATING)) { - u64 *uuid = mdev->ldev->md.uuid; - dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n", + if (get_ldev_if_state(device, D_NEGOTIATING)) { + u64 *uuid = device->ldev->md.uuid; + drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX\n", text, (unsigned long long)uuid[UI_CURRENT], (unsigned long long)uuid[UI_BITMAP], (unsigned long long)uuid[UI_HISTORY_START], (unsigned long long)uuid[UI_HISTORY_END]); - put_ldev(mdev); + put_ldev(device); } else { - dev_info(DEV, "%s effective data uuid: %016llX\n", + drbd_info(device, "%s effective data uuid: %016llX\n", text, - (unsigned long long)mdev->ed_uuid); + (unsigned long long)device->ed_uuid); } } -void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) +void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; struct drbd_socket *sock; struct p_rs_uuid *p; u64 uuid; - D_ASSERT(mdev->state.disk == D_UP_TO_DATE); + D_ASSERT(device, device->state.disk == D_UP_TO_DATE); - uuid = mdev->ldev->md.uuid[UI_BITMAP]; + uuid = device->ldev->md.uuid[UI_BITMAP]; if (uuid && uuid != UUID_JUST_CREATED) uuid = uuid + UUID_NEW_BM_OFFSET; else get_random_bytes(&uuid, sizeof(u64)); - drbd_uuid_set(mdev, UI_BITMAP, uuid); - drbd_print_uuids(mdev, "updated sync UUID"); - drbd_md_sync(mdev); + drbd_uuid_set(device, UI_BITMAP, uuid); + drbd_print_uuids(device, "updated sync UUID"); + drbd_md_sync(device); - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); if (p) { p->uuid = cpu_to_be64(uuid); - drbd_send_command(mdev, sock, P_SYNC_UUID, sizeof(*p), NULL, 0); + drbd_send_command(peer_device, sock, P_SYNC_UUID, sizeof(*p), NULL, 0); } } -int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) +int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags) { + struct drbd_device *device = peer_device->device; struct drbd_socket *sock; struct p_sizes *p; sector_t d_size, u_size; int q_order_type; unsigned int max_bio_size; - if (get_ldev_if_state(mdev, D_NEGOTIATING)) { - D_ASSERT(mdev->ldev->backing_bdev); - d_size = drbd_get_max_capacity(mdev->ldev); + if (get_ldev_if_state(device, D_NEGOTIATING)) { + D_ASSERT(device, device->ldev->backing_bdev); + d_size = drbd_get_max_capacity(device->ldev); rcu_read_lock(); - u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; rcu_read_unlock(); - q_order_type = drbd_queue_order_type(mdev); - max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; + q_order_type = drbd_queue_order_type(device); + max_bio_size = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9; max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE); - put_ldev(mdev); + put_ldev(device); } else { d_size = 0; u_size = 0; @@ -934,45 +939,45 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ } - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); if (!p) return -EIO; - if (mdev->tconn->agreed_pro_version <= 94) + if (peer_device->connection->agreed_pro_version <= 94) max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET); - else if (mdev->tconn->agreed_pro_version < 100) + else if (peer_device->connection->agreed_pro_version < 100) max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE_P95); p->d_size = cpu_to_be64(d_size); p->u_size = cpu_to_be64(u_size); - p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); + p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(device->this_bdev)); p->max_bio_size = cpu_to_be32(max_bio_size); p->queue_order_type = cpu_to_be16(q_order_type); p->dds_flags = cpu_to_be16(flags); - return drbd_send_command(mdev, sock, P_SIZES, sizeof(*p), NULL, 0); + return drbd_send_command(peer_device, sock, P_SIZES, sizeof(*p), NULL, 0); } /** * drbd_send_current_state() - Sends the drbd state to the peer - * @mdev: DRBD device. + * @peer_device: DRBD peer device. */ -int drbd_send_current_state(struct drbd_conf *mdev) +int drbd_send_current_state(struct drbd_peer_device *peer_device) { struct drbd_socket *sock; struct p_state *p; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); if (!p) return -EIO; - p->state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ - return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0); + p->state = cpu_to_be32(peer_device->device->state.i); /* Within the send mutex */ + return drbd_send_command(peer_device, sock, P_STATE, sizeof(*p), NULL, 0); } /** * drbd_send_state() - After a state change, sends the new state to the peer - * @mdev: DRBD device. + * @peer_device: DRBD peer device. * @state: the state to send, not necessarily the current state. * * Each state change queues an "after_state_ch" work, which will eventually @@ -980,73 +985,73 @@ int drbd_send_current_state(struct drbd_conf *mdev) * between queuing and processing of the after_state_ch work, we still * want to send each intermediary state in the order it occurred. */ -int drbd_send_state(struct drbd_conf *mdev, union drbd_state state) +int drbd_send_state(struct drbd_peer_device *peer_device, union drbd_state state) { struct drbd_socket *sock; struct p_state *p; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); if (!p) return -EIO; p->state = cpu_to_be32(state.i); /* Within the send mutex */ - return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0); + return drbd_send_command(peer_device, sock, P_STATE, sizeof(*p), NULL, 0); } -int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) +int drbd_send_state_req(struct drbd_peer_device *peer_device, union drbd_state mask, union drbd_state val) { struct drbd_socket *sock; struct p_req_state *p; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); if (!p) return -EIO; p->mask = cpu_to_be32(mask.i); p->val = cpu_to_be32(val.i); - return drbd_send_command(mdev, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0); + return drbd_send_command(peer_device, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0); } -int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val) +int conn_send_state_req(struct drbd_connection *connection, union drbd_state mask, union drbd_state val) { enum drbd_packet cmd; struct drbd_socket *sock; struct p_req_state *p; - cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ; - sock = &tconn->data; - p = conn_prepare_command(tconn, sock); + cmd = connection->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ; + sock = &connection->data; + p = conn_prepare_command(connection, sock); if (!p) return -EIO; p->mask = cpu_to_be32(mask.i); p->val = cpu_to_be32(val.i); - return conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0); + return conn_send_command(connection, sock, cmd, sizeof(*p), NULL, 0); } -void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) +void drbd_send_sr_reply(struct drbd_peer_device *peer_device, enum drbd_state_rv retcode) { struct drbd_socket *sock; struct p_req_state_reply *p; - sock = &mdev->tconn->meta; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->meta; + p = drbd_prepare_command(peer_device, sock); if (p) { p->retcode = cpu_to_be32(retcode); - drbd_send_command(mdev, sock, P_STATE_CHG_REPLY, sizeof(*p), NULL, 0); + drbd_send_command(peer_device, sock, P_STATE_CHG_REPLY, sizeof(*p), NULL, 0); } } -void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) +void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode) { struct drbd_socket *sock; struct p_req_state_reply *p; - enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY; + enum drbd_packet cmd = connection->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY; - sock = &tconn->meta; - p = conn_prepare_command(tconn, sock); + sock = &connection->meta; + p = conn_prepare_command(connection, sock); if (p) { p->retcode = cpu_to_be32(retcode); - conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0); + conn_send_command(connection, sock, cmd, sizeof(*p), NULL, 0); } } @@ -1067,7 +1072,7 @@ static void dcbp_set_pad_bits(struct p_compressed_bm *p, int n) p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4); } -int fill_bitmap_rle_bits(struct drbd_conf *mdev, +static int fill_bitmap_rle_bits(struct drbd_device *device, struct p_compressed_bm *p, unsigned int size, struct bm_xfer_ctx *c) @@ -1082,9 +1087,9 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, /* may we use this feature? */ rcu_read_lock(); - use_rle = rcu_dereference(mdev->tconn->net_conf)->use_rle; + use_rle = rcu_dereference(first_peer_device(device)->connection->net_conf)->use_rle; rcu_read_unlock(); - if (!use_rle || mdev->tconn->agreed_pro_version < 90) + if (!use_rle || first_peer_device(device)->connection->agreed_pro_version < 90) return 0; if (c->bit_offset >= c->bm_bits) @@ -1104,8 +1109,8 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, /* see how much plain bits we can stuff into one packet * using RLE and VLI. */ do { - tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset) - : _drbd_bm_find_next(mdev, c->bit_offset); + tmp = (toggle == 0) ? _drbd_bm_find_next_zero(device, c->bit_offset) + : _drbd_bm_find_next(device, c->bit_offset); if (tmp == -1UL) tmp = c->bm_bits; rl = tmp - c->bit_offset; @@ -1125,7 +1130,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, /* paranoia: catch zero runlength. * can only happen if bitmap is modified while we scan it. */ if (rl == 0) { - dev_err(DEV, "unexpected zero runlength while encoding bitmap " + drbd_err(device, "unexpected zero runlength while encoding bitmap " "t:%u bo:%lu\n", toggle, c->bit_offset); return -1; } @@ -1134,7 +1139,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, if (bits == -ENOBUFS) /* buffer full */ break; if (bits <= 0) { - dev_err(DEV, "error while encoding bitmap: %d\n", bits); + drbd_err(device, "error while encoding bitmap: %d\n", bits); return 0; } @@ -1171,21 +1176,21 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, * code upon failure. */ static int -send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) +send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) { - struct drbd_socket *sock = &mdev->tconn->data; - unsigned int header_size = drbd_header_size(mdev->tconn); + struct drbd_socket *sock = &first_peer_device(device)->connection->data; + unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); struct p_compressed_bm *p = sock->sbuf + header_size; int len, err; - len = fill_bitmap_rle_bits(mdev, p, + len = fill_bitmap_rle_bits(device, p, DRBD_SOCKET_BUFFER_SIZE - header_size - sizeof(*p), c); if (len < 0) return -EIO; if (len) { dcbp_set_code(p, RLE_VLI_Bits); - err = __send_command(mdev->tconn, mdev->vnr, sock, + err = __send_command(first_peer_device(device)->connection, device->vnr, sock, P_COMPRESSED_BITMAP, sizeof(*p) + len, NULL, 0); c->packets[0]++; @@ -1205,8 +1210,8 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) c->bm_words - c->word_offset); len = num_words * sizeof(*p); if (len) - drbd_bm_get_lel(mdev, c->word_offset, num_words, p); - err = __send_command(mdev->tconn, mdev->vnr, sock, P_BITMAP, len, NULL, 0); + drbd_bm_get_lel(device, c->word_offset, num_words, p); + err = __send_command(first_peer_device(device)->connection, device->vnr, sock, P_BITMAP, len, NULL, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; @@ -1218,7 +1223,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) } if (!err) { if (len == 0) { - INFO_bm_xfer_stats(mdev, "send", c); + INFO_bm_xfer_stats(device, "send", c); return 0; } else return 1; @@ -1227,128 +1232,128 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) } /* See the comment at receive_bitmap() */ -static int _drbd_send_bitmap(struct drbd_conf *mdev) +static int _drbd_send_bitmap(struct drbd_device *device) { struct bm_xfer_ctx c; int err; - if (!expect(mdev->bitmap)) + if (!expect(device->bitmap)) return false; - if (get_ldev(mdev)) { - if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { - dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n"); - drbd_bm_set_all(mdev); - if (drbd_bm_write(mdev)) { + if (get_ldev(device)) { + if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC)) { + drbd_info(device, "Writing the whole bitmap, MDF_FullSync was set.\n"); + drbd_bm_set_all(device); + if (drbd_bm_write(device)) { /* write_bm did fail! Leave full sync flag set in Meta P_DATA * but otherwise process as per normal - need to tell other * side that a full resync is required! */ - dev_err(DEV, "Failed to write bitmap to disk!\n"); + drbd_err(device, "Failed to write bitmap to disk!\n"); } else { - drbd_md_clear_flag(mdev, MDF_FULL_SYNC); - drbd_md_sync(mdev); + drbd_md_clear_flag(device, MDF_FULL_SYNC); + drbd_md_sync(device); } } - put_ldev(mdev); + put_ldev(device); } c = (struct bm_xfer_ctx) { - .bm_bits = drbd_bm_bits(mdev), - .bm_words = drbd_bm_words(mdev), + .bm_bits = drbd_bm_bits(device), + .bm_words = drbd_bm_words(device), }; do { - err = send_bitmap_rle_or_plain(mdev, &c); + err = send_bitmap_rle_or_plain(device, &c); } while (err > 0); return err == 0; } -int drbd_send_bitmap(struct drbd_conf *mdev) +int drbd_send_bitmap(struct drbd_device *device) { - struct drbd_socket *sock = &mdev->tconn->data; + struct drbd_socket *sock = &first_peer_device(device)->connection->data; int err = -1; mutex_lock(&sock->mutex); if (sock->socket) - err = !_drbd_send_bitmap(mdev); + err = !_drbd_send_bitmap(device); mutex_unlock(&sock->mutex); return err; } -void drbd_send_b_ack(struct drbd_tconn *tconn, u32 barrier_nr, u32 set_size) +void drbd_send_b_ack(struct drbd_connection *connection, u32 barrier_nr, u32 set_size) { struct drbd_socket *sock; struct p_barrier_ack *p; - if (tconn->cstate < C_WF_REPORT_PARAMS) + if (connection->cstate < C_WF_REPORT_PARAMS) return; - sock = &tconn->meta; - p = conn_prepare_command(tconn, sock); + sock = &connection->meta; + p = conn_prepare_command(connection, sock); if (!p) return; p->barrier = barrier_nr; p->set_size = cpu_to_be32(set_size); - conn_send_command(tconn, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0); + conn_send_command(connection, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0); } /** * _drbd_send_ack() - Sends an ack packet - * @mdev: DRBD device. + * @device: DRBD device. * @cmd: Packet command code. * @sector: sector, needs to be in big endian byte order * @blksize: size in byte, needs to be in big endian byte order * @block_id: Id, big endian byte order */ -static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, +static int _drbd_send_ack(struct drbd_peer_device *peer_device, enum drbd_packet cmd, u64 sector, u32 blksize, u64 block_id) { struct drbd_socket *sock; struct p_block_ack *p; - if (mdev->state.conn < C_CONNECTED) + if (peer_device->device->state.conn < C_CONNECTED) return -EIO; - sock = &mdev->tconn->meta; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->meta; + p = drbd_prepare_command(peer_device, sock); if (!p) return -EIO; p->sector = sector; p->block_id = block_id; p->blksize = blksize; - p->seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq)); - return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0); + p->seq_num = cpu_to_be32(atomic_inc_return(&peer_device->device->packet_seq)); + return drbd_send_command(peer_device, sock, cmd, sizeof(*p), NULL, 0); } /* dp->sector and dp->block_id already/still in network byte order, * data_size is payload size according to dp->head, * and may need to be corrected for digest size. */ -void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, +void drbd_send_ack_dp(struct drbd_peer_device *peer_device, enum drbd_packet cmd, struct p_data *dp, int data_size) { - if (mdev->tconn->peer_integrity_tfm) - data_size -= crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm); - _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), + if (peer_device->connection->peer_integrity_tfm) + data_size -= crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); + _drbd_send_ack(peer_device, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); } -void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, +void drbd_send_ack_rp(struct drbd_peer_device *peer_device, enum drbd_packet cmd, struct p_block_req *rp) { - _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); + _drbd_send_ack(peer_device, cmd, rp->sector, rp->blksize, rp->block_id); } /** * drbd_send_ack() - Sends an ack packet - * @mdev: DRBD device + * @device: DRBD device * @cmd: packet command code * @peer_req: peer request */ -int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, +int drbd_send_ack(struct drbd_peer_device *peer_device, enum drbd_packet cmd, struct drbd_peer_request *peer_req) { - return _drbd_send_ack(mdev, cmd, + return _drbd_send_ack(peer_device, cmd, cpu_to_be64(peer_req->i.sector), cpu_to_be32(peer_req->i.size), peer_req->block_id); @@ -1356,32 +1361,32 @@ int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, /* This function misuses the block_id field to signal if the blocks * are is sync or not. */ -int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, +int drbd_send_ack_ex(struct drbd_peer_device *peer_device, enum drbd_packet cmd, sector_t sector, int blksize, u64 block_id) { - return _drbd_send_ack(mdev, cmd, + return _drbd_send_ack(peer_device, cmd, cpu_to_be64(sector), cpu_to_be32(blksize), cpu_to_be64(block_id)); } -int drbd_send_drequest(struct drbd_conf *mdev, int cmd, +int drbd_send_drequest(struct drbd_peer_device *peer_device, int cmd, sector_t sector, int size, u64 block_id) { struct drbd_socket *sock; struct p_block_req *p; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); if (!p) return -EIO; p->sector = cpu_to_be64(sector); p->block_id = block_id; p->blksize = cpu_to_be32(size); - return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0); + return drbd_send_command(peer_device, sock, cmd, sizeof(*p), NULL, 0); } -int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, +int drbd_send_drequest_csum(struct drbd_peer_device *peer_device, sector_t sector, int size, void *digest, int digest_size, enum drbd_packet cmd) { struct drbd_socket *sock; @@ -1389,64 +1394,63 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, /* FIXME: Put the digest into the preallocated socket buffer. */ - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); if (!p) return -EIO; p->sector = cpu_to_be64(sector); p->block_id = ID_SYNCER /* unused */; p->blksize = cpu_to_be32(size); - return drbd_send_command(mdev, sock, cmd, sizeof(*p), - digest, digest_size); + return drbd_send_command(peer_device, sock, cmd, sizeof(*p), digest, digest_size); } -int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) +int drbd_send_ov_request(struct drbd_peer_device *peer_device, sector_t sector, int size) { struct drbd_socket *sock; struct p_block_req *p; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); if (!p) return -EIO; p->sector = cpu_to_be64(sector); p->block_id = ID_SYNCER /* unused */; p->blksize = cpu_to_be32(size); - return drbd_send_command(mdev, sock, P_OV_REQUEST, sizeof(*p), NULL, 0); + return drbd_send_command(peer_device, sock, P_OV_REQUEST, sizeof(*p), NULL, 0); } /* called on sndtimeo * returns false if we should retry, * true if we think connection is dead */ -static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket *sock) +static int we_should_drop_the_connection(struct drbd_connection *connection, struct socket *sock) { int drop_it; - /* long elapsed = (long)(jiffies - mdev->last_received); */ + /* long elapsed = (long)(jiffies - device->last_received); */ - drop_it = tconn->meta.socket == sock - || !tconn->asender.task - || get_t_state(&tconn->asender) != RUNNING - || tconn->cstate < C_WF_REPORT_PARAMS; + drop_it = connection->meta.socket == sock + || !connection->asender.task + || get_t_state(&connection->asender) != RUNNING + || connection->cstate < C_WF_REPORT_PARAMS; if (drop_it) return true; - drop_it = !--tconn->ko_count; + drop_it = !--connection->ko_count; if (!drop_it) { - conn_err(tconn, "[%s/%d] sock_sendmsg time expired, ko = %u\n", - current->comm, current->pid, tconn->ko_count); - request_ping(tconn); + drbd_err(connection, "[%s/%d] sock_sendmsg time expired, ko = %u\n", + current->comm, current->pid, connection->ko_count); + request_ping(connection); } - return drop_it; /* && (mdev->state == R_PRIMARY) */; + return drop_it; /* && (device->state == R_PRIMARY) */; } -static void drbd_update_congested(struct drbd_tconn *tconn) +static void drbd_update_congested(struct drbd_connection *connection) { - struct sock *sk = tconn->data.socket->sk; + struct sock *sk = connection->data.socket->sk; if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) - set_bit(NET_CONGESTED, &tconn->flags); + set_bit(NET_CONGESTED, &connection->flags); } /* The idea of sendpage seems to be to put some kind of reference @@ -1470,26 +1474,26 @@ static void drbd_update_congested(struct drbd_tconn *tconn) * As a workaround, we disable sendpage on pages * with page_count == 0 or PageSlab. */ -static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, +static int _drbd_no_send_page(struct drbd_peer_device *peer_device, struct page *page, int offset, size_t size, unsigned msg_flags) { struct socket *socket; void *addr; int err; - socket = mdev->tconn->data.socket; + socket = peer_device->connection->data.socket; addr = kmap(page) + offset; - err = drbd_send_all(mdev->tconn, socket, addr, size, msg_flags); + err = drbd_send_all(peer_device->connection, socket, addr, size, msg_flags); kunmap(page); if (!err) - mdev->send_cnt += size >> 9; + peer_device->device->send_cnt += size >> 9; return err; } -static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, +static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *page, int offset, size_t size, unsigned msg_flags) { - struct socket *socket = mdev->tconn->data.socket; + struct socket *socket = peer_device->connection->data.socket; mm_segment_t oldfs = get_fs(); int len = size; int err = -EIO; @@ -1501,10 +1505,10 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, * __page_cache_release a page that would actually still be referenced * by someone, leading to some obscure delayed Oops somewhere else. */ if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) - return _drbd_no_send_page(mdev, page, offset, size, msg_flags); + return _drbd_no_send_page(peer_device, page, offset, size, msg_flags); msg_flags |= MSG_NOSIGNAL; - drbd_update_congested(mdev->tconn); + drbd_update_congested(peer_device->connection); set_fs(KERNEL_DS); do { int sent; @@ -1512,11 +1516,11 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, sent = socket->ops->sendpage(socket, page, offset, len, msg_flags); if (sent <= 0) { if (sent == -EAGAIN) { - if (we_should_drop_the_connection(mdev->tconn, socket)) + if (we_should_drop_the_connection(peer_device->connection, socket)) break; continue; } - dev_warn(DEV, "%s: size=%d len=%d sent=%d\n", + drbd_warn(peer_device->device, "%s: size=%d len=%d sent=%d\n", __func__, (int)size, len, sent); if (sent < 0) err = sent; @@ -1524,18 +1528,18 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, } len -= sent; offset += sent; - } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/); + } while (len > 0 /* THINK && device->cstate >= C_CONNECTED*/); set_fs(oldfs); - clear_bit(NET_CONGESTED, &mdev->tconn->flags); + clear_bit(NET_CONGESTED, &peer_device->connection->flags); if (len == 0) { err = 0; - mdev->send_cnt += size >> 9; + peer_device->device->send_cnt += size >> 9; } return err; } -static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) +static int _drbd_send_bio(struct drbd_peer_device *peer_device, struct bio *bio) { struct bio_vec bvec; struct bvec_iter iter; @@ -1544,7 +1548,7 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) bio_for_each_segment(bvec, bio, iter) { int err; - err = _drbd_no_send_page(mdev, bvec.bv_page, + err = _drbd_no_send_page(peer_device, bvec.bv_page, bvec.bv_offset, bvec.bv_len, bio_iter_last(bvec, iter) ? 0 : MSG_MORE); @@ -1554,7 +1558,7 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) return 0; } -static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) +static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *bio) { struct bio_vec bvec; struct bvec_iter iter; @@ -1563,7 +1567,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) bio_for_each_segment(bvec, bio, iter) { int err; - err = _drbd_send_page(mdev, bvec.bv_page, + err = _drbd_send_page(peer_device, bvec.bv_page, bvec.bv_offset, bvec.bv_len, bio_iter_last(bvec, iter) ? 0 : MSG_MORE); if (err) @@ -1572,7 +1576,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) return 0; } -static int _drbd_send_zc_ee(struct drbd_conf *mdev, +static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device, struct drbd_peer_request *peer_req) { struct page *page = peer_req->pages; @@ -1583,7 +1587,7 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); - err = _drbd_send_page(mdev, page, 0, l, + err = _drbd_send_page(peer_device, page, 0, l, page_chain_next(page) ? MSG_MORE : 0); if (err) return err; @@ -1592,9 +1596,9 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, return 0; } -static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw) +static u32 bio_flags_to_wire(struct drbd_connection *connection, unsigned long bi_rw) { - if (mdev->tconn->agreed_pro_version >= 95) + if (connection->agreed_pro_version >= 95) return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) | (bi_rw & REQ_FUA ? DP_FUA : 0) | (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) | @@ -1606,28 +1610,30 @@ static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw) /* Used to send write requests * R_PRIMARY -> Peer (P_DATA) */ -int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) +int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req) { + struct drbd_device *device = peer_device->device; struct drbd_socket *sock; struct p_data *p; unsigned int dp_flags = 0; int dgs; int err; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); - dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); + dgs = peer_device->connection->integrity_tfm ? + crypto_hash_digestsize(peer_device->connection->integrity_tfm) : 0; if (!p) return -EIO; p->sector = cpu_to_be64(req->i.sector); p->block_id = (unsigned long)req; - p->seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq)); - dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); - if (mdev->state.conn >= C_SYNC_SOURCE && - mdev->state.conn <= C_PAUSED_SYNC_T) + p->seq_num = cpu_to_be32(atomic_inc_return(&device->packet_seq)); + dp_flags = bio_flags_to_wire(peer_device->connection, req->master_bio->bi_rw); + if (device->state.conn >= C_SYNC_SOURCE && + device->state.conn <= C_PAUSED_SYNC_T) dp_flags |= DP_MAY_SET_IN_SYNC; - if (mdev->tconn->agreed_pro_version >= 100) { + if (peer_device->connection->agreed_pro_version >= 100) { if (req->rq_state & RQ_EXP_RECEIVE_ACK) dp_flags |= DP_SEND_RECEIVE_ACK; if (req->rq_state & RQ_EXP_WRITE_ACK) @@ -1635,8 +1641,8 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) } p->dp_flags = cpu_to_be32(dp_flags); if (dgs) - drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, p + 1); - err = __send_command(mdev->tconn, mdev->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size); + drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1); + err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size); if (!err) { /* For protocol A, we have to memcpy the payload into * socket buffers, as we may complete right away @@ -1650,18 +1656,18 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * receiving side, we sure have detected corruption elsewhere. */ if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)) || dgs) - err = _drbd_send_bio(mdev, req->master_bio); + err = _drbd_send_bio(peer_device, req->master_bio); else - err = _drbd_send_zc_bio(mdev, req->master_bio); + err = _drbd_send_zc_bio(peer_device, req->master_bio); /* double check digest, sometimes buffers have been modified in flight. */ if (dgs > 0 && dgs <= 64) { /* 64 byte, 512 bit, is the largest digest size * currently supported in kernel crypto. */ unsigned char digest[64]; - drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, digest); + drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest); if (memcmp(p + 1, digest, dgs)) { - dev_warn(DEV, + drbd_warn(device, "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", (unsigned long long)req->i.sector, req->i.size); } @@ -1678,18 +1684,20 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY) * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY) */ -int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, +int drbd_send_block(struct drbd_peer_device *peer_device, enum drbd_packet cmd, struct drbd_peer_request *peer_req) { + struct drbd_device *device = peer_device->device; struct drbd_socket *sock; struct p_data *p; int err; int dgs; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); - dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; + dgs = peer_device->connection->integrity_tfm ? + crypto_hash_digestsize(peer_device->connection->integrity_tfm) : 0; if (!p) return -EIO; @@ -1698,27 +1706,27 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, p->seq_num = 0; /* unused */ p->dp_flags = 0; if (dgs) - drbd_csum_ee(mdev, mdev->tconn->integrity_tfm, peer_req, p + 1); - err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size); + drbd_csum_ee(peer_device->connection->integrity_tfm, peer_req, p + 1); + err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size); if (!err) - err = _drbd_send_zc_ee(mdev, peer_req); + err = _drbd_send_zc_ee(peer_device, peer_req); mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ return err; } -int drbd_send_out_of_sync(struct drbd_conf *mdev, struct drbd_request *req) +int drbd_send_out_of_sync(struct drbd_peer_device *peer_device, struct drbd_request *req) { struct drbd_socket *sock; struct p_block_desc *p; - sock = &mdev->tconn->data; - p = drbd_prepare_command(mdev, sock); + sock = &peer_device->connection->data; + p = drbd_prepare_command(peer_device, sock); if (!p) return -EIO; p->sector = cpu_to_be64(req->i.sector); p->blksize = cpu_to_be32(req->i.size); - return drbd_send_command(mdev, sock, P_OUT_OF_SYNC, sizeof(*p), NULL, 0); + return drbd_send_command(peer_device, sock, P_OUT_OF_SYNC, sizeof(*p), NULL, 0); } /* @@ -1737,7 +1745,7 @@ int drbd_send_out_of_sync(struct drbd_conf *mdev, struct drbd_request *req) /* * you must have down()ed the appropriate [m]sock_mutex elsewhere! */ -int drbd_send(struct drbd_tconn *tconn, struct socket *sock, +int drbd_send(struct drbd_connection *connection, struct socket *sock, void *buf, size_t size, unsigned msg_flags) { struct kvec iov; @@ -1758,11 +1766,11 @@ int drbd_send(struct drbd_tconn *tconn, struct socket *sock, msg.msg_controllen = 0; msg.msg_flags = msg_flags | MSG_NOSIGNAL; - if (sock == tconn->data.socket) { + if (sock == connection->data.socket) { rcu_read_lock(); - tconn->ko_count = rcu_dereference(tconn->net_conf)->ko_count; + connection->ko_count = rcu_dereference(connection->net_conf)->ko_count; rcu_read_unlock(); - drbd_update_congested(tconn); + drbd_update_congested(connection); } do { /* STRANGE @@ -1776,7 +1784,7 @@ int drbd_send(struct drbd_tconn *tconn, struct socket *sock, */ rv = kernel_sendmsg(sock, &msg, &iov, 1, size); if (rv == -EAGAIN) { - if (we_should_drop_the_connection(tconn, sock)) + if (we_should_drop_the_connection(connection, sock)) break; else continue; @@ -1792,17 +1800,17 @@ int drbd_send(struct drbd_tconn *tconn, struct socket *sock, iov.iov_len -= rv; } while (sent < size); - if (sock == tconn->data.socket) - clear_bit(NET_CONGESTED, &tconn->flags); + if (sock == connection->data.socket) + clear_bit(NET_CONGESTED, &connection->flags); if (rv <= 0) { if (rv != -EAGAIN) { - conn_err(tconn, "%s_sendmsg returned %d\n", - sock == tconn->meta.socket ? "msock" : "sock", + drbd_err(connection, "%s_sendmsg returned %d\n", + sock == connection->meta.socket ? "msock" : "sock", rv); - conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD); + conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD); } else - conn_request_state(tconn, NS(conn, C_TIMEOUT), CS_HARD); + conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD); } return sent; @@ -1813,12 +1821,12 @@ int drbd_send(struct drbd_tconn *tconn, struct socket *sock, * * Returns 0 upon success and a negative error value otherwise. */ -int drbd_send_all(struct drbd_tconn *tconn, struct socket *sock, void *buffer, +int drbd_send_all(struct drbd_connection *connection, struct socket *sock, void *buffer, size_t size, unsigned msg_flags) { int err; - err = drbd_send(tconn, sock, buffer, size, msg_flags); + err = drbd_send(connection, sock, buffer, size, msg_flags); if (err < 0) return err; if (err != size) @@ -1828,16 +1836,16 @@ int drbd_send_all(struct drbd_tconn *tconn, struct socket *sock, void *buffer, static int drbd_open(struct block_device *bdev, fmode_t mode) { - struct drbd_conf *mdev = bdev->bd_disk->private_data; + struct drbd_device *device = bdev->bd_disk->private_data; unsigned long flags; int rv = 0; mutex_lock(&drbd_main_mutex); - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - /* to have a stable mdev->state.role + spin_lock_irqsave(&device->resource->req_lock, flags); + /* to have a stable device->state.role * and no race with updating open_cnt */ - if (mdev->state.role != R_PRIMARY) { + if (device->state.role != R_PRIMARY) { if (mode & FMODE_WRITE) rv = -EROFS; else if (!allow_oos) @@ -1845,8 +1853,8 @@ static int drbd_open(struct block_device *bdev, fmode_t mode) } if (!rv) - mdev->open_cnt++; - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + device->open_cnt++; + spin_unlock_irqrestore(&device->resource->req_lock, flags); mutex_unlock(&drbd_main_mutex); return rv; @@ -1854,17 +1862,17 @@ static int drbd_open(struct block_device *bdev, fmode_t mode) static void drbd_release(struct gendisk *gd, fmode_t mode) { - struct drbd_conf *mdev = gd->private_data; + struct drbd_device *device = gd->private_data; mutex_lock(&drbd_main_mutex); - mdev->open_cnt--; + device->open_cnt--; mutex_unlock(&drbd_main_mutex); } -static void drbd_set_defaults(struct drbd_conf *mdev) +static void drbd_set_defaults(struct drbd_device *device) { /* Beware! The actual layout differs * between big endian and little endian */ - mdev->state = (union drbd_dev_state) { + device->state = (union drbd_dev_state) { { .role = R_SECONDARY, .peer = R_UNKNOWN, .conn = C_STANDALONE, @@ -1873,130 +1881,123 @@ static void drbd_set_defaults(struct drbd_conf *mdev) } }; } -void drbd_init_set_defaults(struct drbd_conf *mdev) +void drbd_init_set_defaults(struct drbd_device *device) { /* the memset(,0,) did most of this. * note: only assignments, no allocation in here */ - drbd_set_defaults(mdev); - - atomic_set(&mdev->ap_bio_cnt, 0); - atomic_set(&mdev->ap_pending_cnt, 0); - atomic_set(&mdev->rs_pending_cnt, 0); - atomic_set(&mdev->unacked_cnt, 0); - atomic_set(&mdev->local_cnt, 0); - atomic_set(&mdev->pp_in_use_by_net, 0); - atomic_set(&mdev->rs_sect_in, 0); - atomic_set(&mdev->rs_sect_ev, 0); - atomic_set(&mdev->ap_in_flight, 0); - atomic_set(&mdev->md_io_in_use, 0); - - mutex_init(&mdev->own_state_mutex); - mdev->state_mutex = &mdev->own_state_mutex; - - spin_lock_init(&mdev->al_lock); - spin_lock_init(&mdev->peer_seq_lock); - - INIT_LIST_HEAD(&mdev->active_ee); - INIT_LIST_HEAD(&mdev->sync_ee); - INIT_LIST_HEAD(&mdev->done_ee); - INIT_LIST_HEAD(&mdev->read_ee); - INIT_LIST_HEAD(&mdev->net_ee); - INIT_LIST_HEAD(&mdev->resync_reads); - INIT_LIST_HEAD(&mdev->resync_work.list); - INIT_LIST_HEAD(&mdev->unplug_work.list); - INIT_LIST_HEAD(&mdev->go_diskless.list); - INIT_LIST_HEAD(&mdev->md_sync_work.list); - INIT_LIST_HEAD(&mdev->start_resync_work.list); - INIT_LIST_HEAD(&mdev->bm_io_work.w.list); - - mdev->resync_work.cb = w_resync_timer; - mdev->unplug_work.cb = w_send_write_hint; - mdev->go_diskless.cb = w_go_diskless; - mdev->md_sync_work.cb = w_md_sync; - mdev->bm_io_work.w.cb = w_bitmap_io; - mdev->start_resync_work.cb = w_start_resync; - - mdev->resync_work.mdev = mdev; - mdev->unplug_work.mdev = mdev; - mdev->go_diskless.mdev = mdev; - mdev->md_sync_work.mdev = mdev; - mdev->bm_io_work.w.mdev = mdev; - mdev->start_resync_work.mdev = mdev; - - init_timer(&mdev->resync_timer); - init_timer(&mdev->md_sync_timer); - init_timer(&mdev->start_resync_timer); - init_timer(&mdev->request_timer); - mdev->resync_timer.function = resync_timer_fn; - mdev->resync_timer.data = (unsigned long) mdev; - mdev->md_sync_timer.function = md_sync_timer_fn; - mdev->md_sync_timer.data = (unsigned long) mdev; - mdev->start_resync_timer.function = start_resync_timer_fn; - mdev->start_resync_timer.data = (unsigned long) mdev; - mdev->request_timer.function = request_timer_fn; - mdev->request_timer.data = (unsigned long) mdev; - - init_waitqueue_head(&mdev->misc_wait); - init_waitqueue_head(&mdev->state_wait); - init_waitqueue_head(&mdev->ee_wait); - init_waitqueue_head(&mdev->al_wait); - init_waitqueue_head(&mdev->seq_wait); - - mdev->resync_wenr = LC_FREE; - mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; - mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; -} - -void drbd_mdev_cleanup(struct drbd_conf *mdev) + drbd_set_defaults(device); + + atomic_set(&device->ap_bio_cnt, 0); + atomic_set(&device->ap_pending_cnt, 0); + atomic_set(&device->rs_pending_cnt, 0); + atomic_set(&device->unacked_cnt, 0); + atomic_set(&device->local_cnt, 0); + atomic_set(&device->pp_in_use_by_net, 0); + atomic_set(&device->rs_sect_in, 0); + atomic_set(&device->rs_sect_ev, 0); + atomic_set(&device->ap_in_flight, 0); + atomic_set(&device->md_io_in_use, 0); + + mutex_init(&device->own_state_mutex); + device->state_mutex = &device->own_state_mutex; + + spin_lock_init(&device->al_lock); + spin_lock_init(&device->peer_seq_lock); + + INIT_LIST_HEAD(&device->active_ee); + INIT_LIST_HEAD(&device->sync_ee); + INIT_LIST_HEAD(&device->done_ee); + INIT_LIST_HEAD(&device->read_ee); + INIT_LIST_HEAD(&device->net_ee); + INIT_LIST_HEAD(&device->resync_reads); + INIT_LIST_HEAD(&device->resync_work.list); + INIT_LIST_HEAD(&device->unplug_work.list); + INIT_LIST_HEAD(&device->go_diskless.list); + INIT_LIST_HEAD(&device->md_sync_work.list); + INIT_LIST_HEAD(&device->start_resync_work.list); + INIT_LIST_HEAD(&device->bm_io_work.w.list); + + device->resync_work.cb = w_resync_timer; + device->unplug_work.cb = w_send_write_hint; + device->go_diskless.cb = w_go_diskless; + device->md_sync_work.cb = w_md_sync; + device->bm_io_work.w.cb = w_bitmap_io; + device->start_resync_work.cb = w_start_resync; + + init_timer(&device->resync_timer); + init_timer(&device->md_sync_timer); + init_timer(&device->start_resync_timer); + init_timer(&device->request_timer); + device->resync_timer.function = resync_timer_fn; + device->resync_timer.data = (unsigned long) device; + device->md_sync_timer.function = md_sync_timer_fn; + device->md_sync_timer.data = (unsigned long) device; + device->start_resync_timer.function = start_resync_timer_fn; + device->start_resync_timer.data = (unsigned long) device; + device->request_timer.function = request_timer_fn; + device->request_timer.data = (unsigned long) device; + + init_waitqueue_head(&device->misc_wait); + init_waitqueue_head(&device->state_wait); + init_waitqueue_head(&device->ee_wait); + init_waitqueue_head(&device->al_wait); + init_waitqueue_head(&device->seq_wait); + + device->resync_wenr = LC_FREE; + device->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; + device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; +} + +void drbd_device_cleanup(struct drbd_device *device) { int i; - if (mdev->tconn->receiver.t_state != NONE) - dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", - mdev->tconn->receiver.t_state); - - mdev->al_writ_cnt = - mdev->bm_writ_cnt = - mdev->read_cnt = - mdev->recv_cnt = - mdev->send_cnt = - mdev->writ_cnt = - mdev->p_size = - mdev->rs_start = - mdev->rs_total = - mdev->rs_failed = 0; - mdev->rs_last_events = 0; - mdev->rs_last_sect_ev = 0; + if (first_peer_device(device)->connection->receiver.t_state != NONE) + drbd_err(device, "ASSERT FAILED: receiver t_state == %d expected 0.\n", + first_peer_device(device)->connection->receiver.t_state); + + device->al_writ_cnt = + device->bm_writ_cnt = + device->read_cnt = + device->recv_cnt = + device->send_cnt = + device->writ_cnt = + device->p_size = + device->rs_start = + device->rs_total = + device->rs_failed = 0; + device->rs_last_events = 0; + device->rs_last_sect_ev = 0; for (i = 0; i < DRBD_SYNC_MARKS; i++) { - mdev->rs_mark_left[i] = 0; - mdev->rs_mark_time[i] = 0; + device->rs_mark_left[i] = 0; + device->rs_mark_time[i] = 0; } - D_ASSERT(mdev->tconn->net_conf == NULL); + D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL); - drbd_set_my_capacity(mdev, 0); - if (mdev->bitmap) { + drbd_set_my_capacity(device, 0); + if (device->bitmap) { /* maybe never allocated. */ - drbd_bm_resize(mdev, 0, 1); - drbd_bm_cleanup(mdev); + drbd_bm_resize(device, 0, 1); + drbd_bm_cleanup(device); } - drbd_free_bc(mdev->ldev); - mdev->ldev = NULL; + drbd_free_bc(device->ldev); + device->ldev = NULL; - clear_bit(AL_SUSPENDED, &mdev->flags); + clear_bit(AL_SUSPENDED, &device->flags); - D_ASSERT(list_empty(&mdev->active_ee)); - D_ASSERT(list_empty(&mdev->sync_ee)); - D_ASSERT(list_empty(&mdev->done_ee)); - D_ASSERT(list_empty(&mdev->read_ee)); - D_ASSERT(list_empty(&mdev->net_ee)); - D_ASSERT(list_empty(&mdev->resync_reads)); - D_ASSERT(list_empty(&mdev->tconn->sender_work.q)); - D_ASSERT(list_empty(&mdev->resync_work.list)); - D_ASSERT(list_empty(&mdev->unplug_work.list)); - D_ASSERT(list_empty(&mdev->go_diskless.list)); + D_ASSERT(device, list_empty(&device->active_ee)); + D_ASSERT(device, list_empty(&device->sync_ee)); + D_ASSERT(device, list_empty(&device->done_ee)); + D_ASSERT(device, list_empty(&device->read_ee)); + D_ASSERT(device, list_empty(&device->net_ee)); + D_ASSERT(device, list_empty(&device->resync_reads)); + D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q)); + D_ASSERT(device, list_empty(&device->resync_work.list)); + D_ASSERT(device, list_empty(&device->unplug_work.list)); + D_ASSERT(device, list_empty(&device->go_diskless.list)); - drbd_set_defaults(mdev); + drbd_set_defaults(device); } @@ -2011,7 +2012,7 @@ static void drbd_destroy_mempools(void) drbd_pp_vacant--; } - /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ + /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */ if (drbd_md_io_bio_set) bioset_free(drbd_md_io_bio_set); @@ -2131,69 +2132,73 @@ static struct notifier_block drbd_notifier = { .notifier_call = drbd_notify_sys, }; -static void drbd_release_all_peer_reqs(struct drbd_conf *mdev) +static void drbd_release_all_peer_reqs(struct drbd_device *device) { int rr; - rr = drbd_free_peer_reqs(mdev, &mdev->active_ee); + rr = drbd_free_peer_reqs(device, &device->active_ee); if (rr) - dev_err(DEV, "%d EEs in active list found!\n", rr); + drbd_err(device, "%d EEs in active list found!\n", rr); - rr = drbd_free_peer_reqs(mdev, &mdev->sync_ee); + rr = drbd_free_peer_reqs(device, &device->sync_ee); if (rr) - dev_err(DEV, "%d EEs in sync list found!\n", rr); + drbd_err(device, "%d EEs in sync list found!\n", rr); - rr = drbd_free_peer_reqs(mdev, &mdev->read_ee); + rr = drbd_free_peer_reqs(device, &device->read_ee); if (rr) - dev_err(DEV, "%d EEs in read list found!\n", rr); + drbd_err(device, "%d EEs in read list found!\n", rr); - rr = drbd_free_peer_reqs(mdev, &mdev->done_ee); + rr = drbd_free_peer_reqs(device, &device->done_ee); if (rr) - dev_err(DEV, "%d EEs in done list found!\n", rr); + drbd_err(device, "%d EEs in done list found!\n", rr); - rr = drbd_free_peer_reqs(mdev, &mdev->net_ee); + rr = drbd_free_peer_reqs(device, &device->net_ee); if (rr) - dev_err(DEV, "%d EEs in net list found!\n", rr); + drbd_err(device, "%d EEs in net list found!\n", rr); } /* caution. no locking. */ -void drbd_minor_destroy(struct kref *kref) +void drbd_destroy_device(struct kref *kref) { - struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref); - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_device *device = container_of(kref, struct drbd_device, kref); + struct drbd_resource *resource = device->resource; + struct drbd_connection *connection; - del_timer_sync(&mdev->request_timer); + del_timer_sync(&device->request_timer); /* paranoia asserts */ - D_ASSERT(mdev->open_cnt == 0); + D_ASSERT(device, device->open_cnt == 0); /* end paranoia asserts */ /* cleanup stuff that may have been allocated during * device (re-)configuration or state changes */ - if (mdev->this_bdev) - bdput(mdev->this_bdev); + if (device->this_bdev) + bdput(device->this_bdev); - drbd_free_bc(mdev->ldev); - mdev->ldev = NULL; + drbd_free_bc(device->ldev); + device->ldev = NULL; - drbd_release_all_peer_reqs(mdev); + drbd_release_all_peer_reqs(device); - lc_destroy(mdev->act_log); - lc_destroy(mdev->resync); + lc_destroy(device->act_log); + lc_destroy(device->resync); - kfree(mdev->p_uuid); - /* mdev->p_uuid = NULL; */ + kfree(device->p_uuid); + /* device->p_uuid = NULL; */ - if (mdev->bitmap) /* should no longer be there. */ - drbd_bm_cleanup(mdev); - __free_page(mdev->md_io_page); - put_disk(mdev->vdisk); - blk_cleanup_queue(mdev->rq_queue); - kfree(mdev->rs_plan_s); - kfree(mdev); + if (device->bitmap) /* should no longer be there. */ + drbd_bm_cleanup(device); + __free_page(device->md_io_page); + put_disk(device->vdisk); + blk_cleanup_queue(device->rq_queue); + kfree(device->rs_plan_s); + kfree(first_peer_device(device)); + kfree(device); - kref_put(&tconn->kref, &conn_destroy); + for_each_connection(connection, resource) + kref_put(&connection->kref, drbd_destroy_connection); + kref_put(&resource->kref, drbd_destroy_resource); } /* One global retry thread, if we need to push back some bio and have it @@ -2218,19 +2223,19 @@ static void do_retry(struct work_struct *ws) spin_unlock_irq(&retry->lock); list_for_each_entry_safe(req, tmp, &writes, tl_requests) { - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; struct bio *bio = req->master_bio; unsigned long start_time = req->start_time; bool expected; - expected = + expected = expect(atomic_read(&req->completion_ref) == 0) && expect(req->rq_state & RQ_POSTPONED) && expect((req->rq_state & RQ_LOCAL_PENDING) == 0 || (req->rq_state & RQ_LOCAL_ABORTED) != 0); if (!expected) - dev_err(DEV, "req=%p completion_ref=%d rq_state=%x\n", + drbd_err(device, "req=%p completion_ref=%d rq_state=%x\n", req, atomic_read(&req->completion_ref), req->rq_state); @@ -2254,8 +2259,8 @@ static void do_retry(struct work_struct *ws) /* We are not just doing generic_make_request(), * as we want to keep the start_time information. */ - inc_ap_bio(mdev); - __drbd_make_request(mdev, bio, start_time); + inc_ap_bio(device); + __drbd_make_request(device, bio, start_time); } } @@ -2269,17 +2274,38 @@ void drbd_restart_request(struct drbd_request *req) /* Drop the extra reference that would otherwise * have been dropped by complete_master_bio. * do_retry() needs to grab a new one. */ - dec_ap_bio(req->w.mdev); + dec_ap_bio(req->device); queue_work(retry.wq, &retry.worker); } +void drbd_destroy_resource(struct kref *kref) +{ + struct drbd_resource *resource = + container_of(kref, struct drbd_resource, kref); + + idr_destroy(&resource->devices); + free_cpumask_var(resource->cpu_mask); + kfree(resource->name); + kfree(resource); +} + +void drbd_free_resource(struct drbd_resource *resource) +{ + struct drbd_connection *connection, *tmp; + + for_each_connection_safe(connection, tmp, resource) { + list_del(&connection->connections); + kref_put(&connection->kref, drbd_destroy_connection); + } + kref_put(&resource->kref, drbd_destroy_resource); +} static void drbd_cleanup(void) { unsigned int i; - struct drbd_conf *mdev; - struct drbd_tconn *tconn, *tmp; + struct drbd_device *device; + struct drbd_resource *resource, *tmp; unregister_reboot_notifier(&drbd_notifier); @@ -2299,26 +2325,19 @@ static void drbd_cleanup(void) drbd_genl_unregister(); - idr_for_each_entry(&minors, mdev, i) { - idr_remove(&minors, mdev_to_minor(mdev)); - idr_remove(&mdev->tconn->volumes, mdev->vnr); - destroy_workqueue(mdev->submit.wq); - del_gendisk(mdev->vdisk); - /* synchronize_rcu(); No other threads running at this point */ - kref_put(&mdev->kref, &drbd_minor_destroy); - } + idr_for_each_entry(&drbd_devices, device, i) + drbd_delete_device(device); /* not _rcu since, no other updater anymore. Genl already unregistered */ - list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) { - list_del(&tconn->all_tconn); /* not _rcu no proc, not other threads */ - /* synchronize_rcu(); */ - kref_put(&tconn->kref, &conn_destroy); + for_each_resource_safe(resource, tmp, &drbd_resources) { + list_del(&resource->resources); + drbd_free_resource(resource); } drbd_destroy_mempools(); unregister_blkdev(DRBD_MAJOR, "drbd"); - idr_destroy(&minors); + idr_destroy(&drbd_devices); printk(KERN_INFO "drbd: module cleanup done.\n"); } @@ -2332,49 +2351,50 @@ static void drbd_cleanup(void) */ static int drbd_congested(void *congested_data, int bdi_bits) { - struct drbd_conf *mdev = congested_data; + struct drbd_device *device = congested_data; struct request_queue *q; char reason = '-'; int r = 0; - if (!may_inc_ap_bio(mdev)) { + if (!may_inc_ap_bio(device)) { /* DRBD has frozen IO */ r = bdi_bits; reason = 'd'; goto out; } - if (test_bit(CALLBACK_PENDING, &mdev->tconn->flags)) { + if (test_bit(CALLBACK_PENDING, &first_peer_device(device)->connection->flags)) { r |= (1 << BDI_async_congested); /* Without good local data, we would need to read from remote, * and that would need the worker thread as well, which is * currently blocked waiting for that usermode helper to * finish. */ - if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) + if (!get_ldev_if_state(device, D_UP_TO_DATE)) r |= (1 << BDI_sync_congested); else - put_ldev(mdev); + put_ldev(device); r &= bdi_bits; reason = 'c'; goto out; } - if (get_ldev(mdev)) { - q = bdev_get_queue(mdev->ldev->backing_bdev); + if (get_ldev(device)) { + q = bdev_get_queue(device->ldev->backing_bdev); r = bdi_congested(&q->backing_dev_info, bdi_bits); - put_ldev(mdev); + put_ldev(device); if (r) reason = 'b'; } - if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->tconn->flags)) { + if (bdi_bits & (1 << BDI_async_congested) && + test_bit(NET_CONGESTED, &first_peer_device(device)->connection->flags)) { r |= (1 << BDI_async_congested); reason = reason == 'b' ? 'a' : 'n'; } out: - mdev->congestion_reason = reason; + device->congestion_reason = reason; return r; } @@ -2385,45 +2405,72 @@ static void drbd_init_workqueue(struct drbd_work_queue* wq) init_waitqueue_head(&wq->q_wait); } -struct drbd_tconn *conn_get_by_name(const char *name) +struct completion_work { + struct drbd_work w; + struct completion done; +}; + +static int w_complete(struct drbd_work *w, int cancel) +{ + struct completion_work *completion_work = + container_of(w, struct completion_work, w); + + complete(&completion_work->done); + return 0; +} + +void drbd_flush_workqueue(struct drbd_work_queue *work_queue) +{ + struct completion_work completion_work; + + completion_work.w.cb = w_complete; + init_completion(&completion_work.done); + drbd_queue_work(work_queue, &completion_work.w); + wait_for_completion(&completion_work.done); +} + +struct drbd_resource *drbd_find_resource(const char *name) { - struct drbd_tconn *tconn; + struct drbd_resource *resource; if (!name || !name[0]) return NULL; rcu_read_lock(); - list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) { - if (!strcmp(tconn->name, name)) { - kref_get(&tconn->kref); + for_each_resource_rcu(resource, &drbd_resources) { + if (!strcmp(resource->name, name)) { + kref_get(&resource->kref); goto found; } } - tconn = NULL; + resource = NULL; found: rcu_read_unlock(); - return tconn; + return resource; } -struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len, +struct drbd_connection *conn_get_by_addrs(void *my_addr, int my_addr_len, void *peer_addr, int peer_addr_len) { - struct drbd_tconn *tconn; + struct drbd_resource *resource; + struct drbd_connection *connection; rcu_read_lock(); - list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) { - if (tconn->my_addr_len == my_addr_len && - tconn->peer_addr_len == peer_addr_len && - !memcmp(&tconn->my_addr, my_addr, my_addr_len) && - !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) { - kref_get(&tconn->kref); - goto found; + for_each_resource_rcu(resource, &drbd_resources) { + for_each_connection_rcu(connection, resource) { + if (connection->my_addr_len == my_addr_len && + connection->peer_addr_len == peer_addr_len && + !memcmp(&connection->my_addr, my_addr, my_addr_len) && + !memcmp(&connection->peer_addr, peer_addr, peer_addr_len)) { + kref_get(&connection->kref); + goto found; + } } } - tconn = NULL; + connection = NULL; found: rcu_read_unlock(); - return tconn; + return connection; } static int drbd_alloc_socket(struct drbd_socket *socket) @@ -2443,29 +2490,30 @@ static void drbd_free_socket(struct drbd_socket *socket) free_page((unsigned long) socket->rbuf); } -void conn_free_crypto(struct drbd_tconn *tconn) +void conn_free_crypto(struct drbd_connection *connection) { - drbd_free_sock(tconn); + drbd_free_sock(connection); - crypto_free_hash(tconn->csums_tfm); - crypto_free_hash(tconn->verify_tfm); - crypto_free_hash(tconn->cram_hmac_tfm); - crypto_free_hash(tconn->integrity_tfm); - crypto_free_hash(tconn->peer_integrity_tfm); - kfree(tconn->int_dig_in); - kfree(tconn->int_dig_vv); + crypto_free_hash(connection->csums_tfm); + crypto_free_hash(connection->verify_tfm); + crypto_free_hash(connection->cram_hmac_tfm); + crypto_free_hash(connection->integrity_tfm); + crypto_free_hash(connection->peer_integrity_tfm); + kfree(connection->int_dig_in); + kfree(connection->int_dig_vv); - tconn->csums_tfm = NULL; - tconn->verify_tfm = NULL; - tconn->cram_hmac_tfm = NULL; - tconn->integrity_tfm = NULL; - tconn->peer_integrity_tfm = NULL; - tconn->int_dig_in = NULL; - tconn->int_dig_vv = NULL; + connection->csums_tfm = NULL; + connection->verify_tfm = NULL; + connection->cram_hmac_tfm = NULL; + connection->integrity_tfm = NULL; + connection->peer_integrity_tfm = NULL; + connection->int_dig_in = NULL; + connection->int_dig_vv = NULL; } -int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts) +int set_resource_options(struct drbd_resource *resource, struct res_opts *res_opts) { + struct drbd_connection *connection; cpumask_var_t new_cpu_mask; int err; @@ -2478,22 +2526,24 @@ int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts) /* silently ignore cpu mask on UP kernel */ if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { - /* FIXME: Get rid of constant 32 here */ - err = bitmap_parse(res_opts->cpu_mask, 32, + err = bitmap_parse(res_opts->cpu_mask, DRBD_CPU_MASK_SIZE, cpumask_bits(new_cpu_mask), nr_cpu_ids); if (err) { - conn_warn(tconn, "bitmap_parse() failed with %d\n", err); + drbd_warn(resource, "bitmap_parse() failed with %d\n", err); /* retcode = ERR_CPU_MASK_PARSE; */ goto fail; } } - tconn->res_opts = *res_opts; - if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) { - cpumask_copy(tconn->cpu_mask, new_cpu_mask); - drbd_calc_cpu_mask(tconn); - tconn->receiver.reset_cpu_mask = 1; - tconn->asender.reset_cpu_mask = 1; - tconn->worker.reset_cpu_mask = 1; + resource->res_opts = *res_opts; + if (cpumask_empty(new_cpu_mask)) + drbd_calc_cpu_mask(&new_cpu_mask); + if (!cpumask_equal(resource->cpu_mask, new_cpu_mask)) { + cpumask_copy(resource->cpu_mask, new_cpu_mask); + for_each_connection_rcu(connection, resource) { + connection->receiver.reset_cpu_mask = 1; + connection->asender.reset_cpu_mask = 1; + connection->worker.reset_cpu_mask = 1; + } } err = 0; @@ -2503,146 +2553,177 @@ fail: } +struct drbd_resource *drbd_create_resource(const char *name) +{ + struct drbd_resource *resource; + + resource = kzalloc(sizeof(struct drbd_resource), GFP_KERNEL); + if (!resource) + goto fail; + resource->name = kstrdup(name, GFP_KERNEL); + if (!resource->name) + goto fail_free_resource; + if (!zalloc_cpumask_var(&resource->cpu_mask, GFP_KERNEL)) + goto fail_free_name; + kref_init(&resource->kref); + idr_init(&resource->devices); + INIT_LIST_HEAD(&resource->connections); + list_add_tail_rcu(&resource->resources, &drbd_resources); + mutex_init(&resource->conf_update); + spin_lock_init(&resource->req_lock); + return resource; + +fail_free_name: + kfree(resource->name); +fail_free_resource: + kfree(resource); +fail: + return NULL; +} + /* caller must be under genl_lock() */ -struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts) +struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) { - struct drbd_tconn *tconn; + struct drbd_resource *resource; + struct drbd_connection *connection; - tconn = kzalloc(sizeof(struct drbd_tconn), GFP_KERNEL); - if (!tconn) + connection = kzalloc(sizeof(struct drbd_connection), GFP_KERNEL); + if (!connection) return NULL; - tconn->name = kstrdup(name, GFP_KERNEL); - if (!tconn->name) + if (drbd_alloc_socket(&connection->data)) goto fail; - - if (drbd_alloc_socket(&tconn->data)) - goto fail; - if (drbd_alloc_socket(&tconn->meta)) + if (drbd_alloc_socket(&connection->meta)) goto fail; - if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL)) + connection->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); + if (!connection->current_epoch) goto fail; - if (set_resource_options(tconn, res_opts)) - goto fail; + INIT_LIST_HEAD(&connection->transfer_log); - tconn->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); - if (!tconn->current_epoch) - goto fail; + INIT_LIST_HEAD(&connection->current_epoch->list); + connection->epochs = 1; + spin_lock_init(&connection->epoch_lock); + connection->write_ordering = WO_bdev_flush; - INIT_LIST_HEAD(&tconn->transfer_log); + connection->send.seen_any_write_yet = false; + connection->send.current_epoch_nr = 0; + connection->send.current_epoch_writes = 0; - INIT_LIST_HEAD(&tconn->current_epoch->list); - tconn->epochs = 1; - spin_lock_init(&tconn->epoch_lock); - tconn->write_ordering = WO_bdev_flush; + resource = drbd_create_resource(name); + if (!resource) + goto fail; - tconn->send.seen_any_write_yet = false; - tconn->send.current_epoch_nr = 0; - tconn->send.current_epoch_writes = 0; + connection->cstate = C_STANDALONE; + mutex_init(&connection->cstate_mutex); + init_waitqueue_head(&connection->ping_wait); + idr_init(&connection->peer_devices); - tconn->cstate = C_STANDALONE; - mutex_init(&tconn->cstate_mutex); - spin_lock_init(&tconn->req_lock); - mutex_init(&tconn->conf_update); - init_waitqueue_head(&tconn->ping_wait); - idr_init(&tconn->volumes); + drbd_init_workqueue(&connection->sender_work); + mutex_init(&connection->data.mutex); + mutex_init(&connection->meta.mutex); - drbd_init_workqueue(&tconn->sender_work); - mutex_init(&tconn->data.mutex); - mutex_init(&tconn->meta.mutex); + drbd_thread_init(resource, &connection->receiver, drbd_receiver, "receiver"); + connection->receiver.connection = connection; + drbd_thread_init(resource, &connection->worker, drbd_worker, "worker"); + connection->worker.connection = connection; + drbd_thread_init(resource, &connection->asender, drbd_asender, "asender"); + connection->asender.connection = connection; - drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver"); - drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); - drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); + kref_init(&connection->kref); - kref_init(&tconn->kref); - list_add_tail_rcu(&tconn->all_tconn, &drbd_tconns); + connection->resource = resource; - return tconn; + if (set_resource_options(resource, res_opts)) + goto fail_resource; -fail: - kfree(tconn->current_epoch); - free_cpumask_var(tconn->cpu_mask); - drbd_free_socket(&tconn->meta); - drbd_free_socket(&tconn->data); - kfree(tconn->name); - kfree(tconn); + kref_get(&resource->kref); + list_add_tail_rcu(&connection->connections, &resource->connections); + return connection; +fail_resource: + list_del(&resource->resources); + drbd_free_resource(resource); +fail: + kfree(connection->current_epoch); + drbd_free_socket(&connection->meta); + drbd_free_socket(&connection->data); + kfree(connection); return NULL; } -void conn_destroy(struct kref *kref) +void drbd_destroy_connection(struct kref *kref) { - struct drbd_tconn *tconn = container_of(kref, struct drbd_tconn, kref); + struct drbd_connection *connection = container_of(kref, struct drbd_connection, kref); + struct drbd_resource *resource = connection->resource; - if (atomic_read(&tconn->current_epoch->epoch_size) != 0) - conn_err(tconn, "epoch_size:%d\n", atomic_read(&tconn->current_epoch->epoch_size)); - kfree(tconn->current_epoch); + if (atomic_read(&connection->current_epoch->epoch_size) != 0) + drbd_err(connection, "epoch_size:%d\n", atomic_read(&connection->current_epoch->epoch_size)); + kfree(connection->current_epoch); - idr_destroy(&tconn->volumes); + idr_destroy(&connection->peer_devices); - free_cpumask_var(tconn->cpu_mask); - drbd_free_socket(&tconn->meta); - drbd_free_socket(&tconn->data); - kfree(tconn->name); - kfree(tconn->int_dig_in); - kfree(tconn->int_dig_vv); - kfree(tconn); + drbd_free_socket(&connection->meta); + drbd_free_socket(&connection->data); + kfree(connection->int_dig_in); + kfree(connection->int_dig_vv); + kfree(connection); + kref_put(&resource->kref, drbd_destroy_resource); } -int init_submitter(struct drbd_conf *mdev) +static int init_submitter(struct drbd_device *device) { /* opencoded create_singlethread_workqueue(), * to be able to say "drbd%d", ..., minor */ - mdev->submit.wq = alloc_workqueue("drbd%u_submit", - WQ_UNBOUND | WQ_MEM_RECLAIM, 1, mdev->minor); - if (!mdev->submit.wq) + device->submit.wq = alloc_workqueue("drbd%u_submit", + WQ_UNBOUND | WQ_MEM_RECLAIM, 1, device->minor); + if (!device->submit.wq) return -ENOMEM; - INIT_WORK(&mdev->submit.worker, do_submit); - spin_lock_init(&mdev->submit.lock); - INIT_LIST_HEAD(&mdev->submit.writes); + INIT_WORK(&device->submit.worker, do_submit); + spin_lock_init(&device->submit.lock); + INIT_LIST_HEAD(&device->submit.writes); return 0; } -enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr) +enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr) { - struct drbd_conf *mdev; + struct drbd_connection *connection; + struct drbd_device *device; + struct drbd_peer_device *peer_device, *tmp_peer_device; struct gendisk *disk; struct request_queue *q; - int vnr_got = vnr; - int minor_got = minor; + int id; enum drbd_ret_code err = ERR_NOMEM; - mdev = minor_to_mdev(minor); - if (mdev) + device = minor_to_device(minor); + if (device) return ERR_MINOR_EXISTS; /* GFP_KERNEL, we are outside of all write-out paths */ - mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); - if (!mdev) + device = kzalloc(sizeof(struct drbd_device), GFP_KERNEL); + if (!device) return ERR_NOMEM; + kref_init(&device->kref); - kref_get(&tconn->kref); - mdev->tconn = tconn; + kref_get(&resource->kref); + device->resource = resource; + device->minor = minor; + device->vnr = vnr; - mdev->minor = minor; - mdev->vnr = vnr; - - drbd_init_set_defaults(mdev); + drbd_init_set_defaults(device); q = blk_alloc_queue(GFP_KERNEL); if (!q) goto out_no_q; - mdev->rq_queue = q; - q->queuedata = mdev; + device->rq_queue = q; + q->queuedata = device; disk = alloc_disk(1); if (!disk) goto out_no_disk; - mdev->vdisk = disk; + device->vdisk = disk; set_disk_ro(disk, true); @@ -2651,14 +2732,14 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, disk->first_minor = minor; disk->fops = &drbd_ops; sprintf(disk->disk_name, "drbd%d", minor); - disk->private_data = mdev; + disk->private_data = device; - mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor)); + device->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor)); /* we have no partitions. we contain only ourselves. */ - mdev->this_bdev->bd_contains = mdev->this_bdev; + device->this_bdev->bd_contains = device->this_bdev; q->backing_dev_info.congested_fn = drbd_congested; - q->backing_dev_info.congested_data = mdev; + q->backing_dev_info.congested_data = device; blk_queue_make_request(q, drbd_make_request); blk_queue_flush(q, REQ_FLUSH | REQ_FUA); @@ -2667,70 +2748,125 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); blk_queue_merge_bvec(q, drbd_merge_bvec); - q->queue_lock = &mdev->tconn->req_lock; /* needed since we use */ + q->queue_lock = &resource->req_lock; - mdev->md_io_page = alloc_page(GFP_KERNEL); - if (!mdev->md_io_page) + device->md_io_page = alloc_page(GFP_KERNEL); + if (!device->md_io_page) goto out_no_io_page; - if (drbd_bm_init(mdev)) + if (drbd_bm_init(device)) goto out_no_bitmap; - mdev->read_requests = RB_ROOT; - mdev->write_requests = RB_ROOT; + device->read_requests = RB_ROOT; + device->write_requests = RB_ROOT; - minor_got = idr_alloc(&minors, mdev, minor, minor + 1, GFP_KERNEL); - if (minor_got < 0) { - if (minor_got == -ENOSPC) { + id = idr_alloc(&drbd_devices, device, minor, minor + 1, GFP_KERNEL); + if (id < 0) { + if (id == -ENOSPC) { err = ERR_MINOR_EXISTS; drbd_msg_put_info("requested minor exists already"); } goto out_no_minor_idr; } + kref_get(&device->kref); - vnr_got = idr_alloc(&tconn->volumes, mdev, vnr, vnr + 1, GFP_KERNEL); - if (vnr_got < 0) { - if (vnr_got == -ENOSPC) { - err = ERR_INVALID_REQUEST; - drbd_msg_put_info("requested volume exists already"); + id = idr_alloc(&resource->devices, device, vnr, vnr + 1, GFP_KERNEL); + if (id < 0) { + if (id == -ENOSPC) { + err = ERR_MINOR_EXISTS; + drbd_msg_put_info("requested minor exists already"); } goto out_idr_remove_minor; } + kref_get(&device->kref); + + INIT_LIST_HEAD(&device->peer_devices); + for_each_connection(connection, resource) { + peer_device = kzalloc(sizeof(struct drbd_peer_device), GFP_KERNEL); + if (!peer_device) + goto out_idr_remove_from_resource; + peer_device->connection = connection; + peer_device->device = device; + + list_add(&peer_device->peer_devices, &device->peer_devices); + kref_get(&device->kref); - if (init_submitter(mdev)) { + id = idr_alloc(&connection->peer_devices, peer_device, vnr, vnr + 1, GFP_KERNEL); + if (id < 0) { + if (id == -ENOSPC) { + err = ERR_INVALID_REQUEST; + drbd_msg_put_info("requested volume exists already"); + } + goto out_idr_remove_from_resource; + } + kref_get(&connection->kref); + } + + if (init_submitter(device)) { err = ERR_NOMEM; drbd_msg_put_info("unable to create submit workqueue"); goto out_idr_remove_vol; } add_disk(disk); - kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */ /* inherit the connection state */ - mdev->state.conn = tconn->cstate; - if (mdev->state.conn == C_WF_REPORT_PARAMS) - drbd_connected(mdev); + device->state.conn = first_connection(resource)->cstate; + if (device->state.conn == C_WF_REPORT_PARAMS) { + for_each_peer_device(peer_device, device) + drbd_connected(peer_device); + } return NO_ERROR; out_idr_remove_vol: - idr_remove(&tconn->volumes, vnr_got); + idr_remove(&connection->peer_devices, vnr); +out_idr_remove_from_resource: + for_each_connection(connection, resource) { + peer_device = idr_find(&connection->peer_devices, vnr); + if (peer_device) { + idr_remove(&connection->peer_devices, vnr); + kref_put(&connection->kref, drbd_destroy_connection); + } + } + for_each_peer_device_safe(peer_device, tmp_peer_device, device) { + list_del(&peer_device->peer_devices); + kfree(peer_device); + } + idr_remove(&resource->devices, vnr); out_idr_remove_minor: - idr_remove(&minors, minor_got); + idr_remove(&drbd_devices, minor); synchronize_rcu(); out_no_minor_idr: - drbd_bm_cleanup(mdev); + drbd_bm_cleanup(device); out_no_bitmap: - __free_page(mdev->md_io_page); + __free_page(device->md_io_page); out_no_io_page: put_disk(disk); out_no_disk: blk_cleanup_queue(q); out_no_q: - kfree(mdev); - kref_put(&tconn->kref, &conn_destroy); + kref_put(&resource->kref, drbd_destroy_resource); + kfree(device); return err; } +void drbd_delete_device(struct drbd_device *device) +{ + struct drbd_resource *resource = device->resource; + struct drbd_connection *connection; + int refs = 3; + + for_each_connection(connection, resource) { + idr_remove(&connection->peer_devices, device->vnr); + refs++; + } + idr_remove(&resource->devices, device->vnr); + idr_remove(&drbd_devices, device_to_minor(device)); + del_gendisk(device->vdisk); + synchronize_rcu(); + kref_sub(&device->kref, refs, drbd_destroy_device); +} + int __init drbd_init(void) { int err; @@ -2761,10 +2897,10 @@ int __init drbd_init(void) init_waitqueue_head(&drbd_pp_wait); drbd_proc = NULL; /* play safe for drbd_cleanup */ - idr_init(&minors); + idr_init(&drbd_devices); rwlock_init(&global_state_lock); - INIT_LIST_HEAD(&drbd_tconns); + INIT_LIST_HEAD(&drbd_resources); err = drbd_genl_register(); if (err) { @@ -2822,37 +2958,39 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) kfree(ldev); } -void drbd_free_sock(struct drbd_tconn *tconn) +void drbd_free_sock(struct drbd_connection *connection) { - if (tconn->data.socket) { - mutex_lock(&tconn->data.mutex); - kernel_sock_shutdown(tconn->data.socket, SHUT_RDWR); - sock_release(tconn->data.socket); - tconn->data.socket = NULL; - mutex_unlock(&tconn->data.mutex); + if (connection->data.socket) { + mutex_lock(&connection->data.mutex); + kernel_sock_shutdown(connection->data.socket, SHUT_RDWR); + sock_release(connection->data.socket); + connection->data.socket = NULL; + mutex_unlock(&connection->data.mutex); } - if (tconn->meta.socket) { - mutex_lock(&tconn->meta.mutex); - kernel_sock_shutdown(tconn->meta.socket, SHUT_RDWR); - sock_release(tconn->meta.socket); - tconn->meta.socket = NULL; - mutex_unlock(&tconn->meta.mutex); + if (connection->meta.socket) { + mutex_lock(&connection->meta.mutex); + kernel_sock_shutdown(connection->meta.socket, SHUT_RDWR); + sock_release(connection->meta.socket); + connection->meta.socket = NULL; + mutex_unlock(&connection->meta.mutex); } } /* meta data management */ -void conn_md_sync(struct drbd_tconn *tconn) +void conn_md_sync(struct drbd_connection *connection) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - kref_get(&mdev->kref); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + + kref_get(&device->kref); rcu_read_unlock(); - drbd_md_sync(mdev); - kref_put(&mdev->kref, &drbd_minor_destroy); + drbd_md_sync(device); + kref_put(&device->kref, drbd_destroy_device); rcu_read_lock(); } rcu_read_unlock(); @@ -2883,7 +3021,7 @@ struct meta_data_on_disk { -void drbd_md_write(struct drbd_conf *mdev, void *b) +void drbd_md_write(struct drbd_device *device, void *b) { struct meta_data_on_disk *buffer = b; sector_t sector; @@ -2891,39 +3029,39 @@ void drbd_md_write(struct drbd_conf *mdev, void *b) memset(buffer, 0, sizeof(*buffer)); - buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); + buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(device->this_bdev)); for (i = UI_CURRENT; i < UI_SIZE; i++) - buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]); - buffer->flags = cpu_to_be32(mdev->ldev->md.flags); + buffer->uuid[i] = cpu_to_be64(device->ldev->md.uuid[i]); + buffer->flags = cpu_to_be32(device->ldev->md.flags); buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN); - buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect); - buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset); - buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements); + buffer->md_size_sect = cpu_to_be32(device->ldev->md.md_size_sect); + buffer->al_offset = cpu_to_be32(device->ldev->md.al_offset); + buffer->al_nr_extents = cpu_to_be32(device->act_log->nr_elements); buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE); - buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid); + buffer->device_uuid = cpu_to_be64(device->ldev->md.device_uuid); - buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); - buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size); + buffer->bm_offset = cpu_to_be32(device->ldev->md.bm_offset); + buffer->la_peer_max_bio_size = cpu_to_be32(device->peer_max_bio_size); - buffer->al_stripes = cpu_to_be32(mdev->ldev->md.al_stripes); - buffer->al_stripe_size_4k = cpu_to_be32(mdev->ldev->md.al_stripe_size_4k); + buffer->al_stripes = cpu_to_be32(device->ldev->md.al_stripes); + buffer->al_stripe_size_4k = cpu_to_be32(device->ldev->md.al_stripe_size_4k); - D_ASSERT(drbd_md_ss(mdev->ldev) == mdev->ldev->md.md_offset); - sector = mdev->ldev->md.md_offset; + D_ASSERT(device, drbd_md_ss(device->ldev) == device->ldev->md.md_offset); + sector = device->ldev->md.md_offset; - if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) { /* this was a try anyways ... */ - dev_err(DEV, "meta data update failed!\n"); - drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); + drbd_err(device, "meta data update failed!\n"); + drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); } } /** * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set - * @mdev: DRBD device. + * @device: DRBD device. */ -void drbd_md_sync(struct drbd_conf *mdev) +void drbd_md_sync(struct drbd_device *device) { struct meta_data_on_disk *buffer; @@ -2931,32 +3069,32 @@ void drbd_md_sync(struct drbd_conf *mdev) BUILD_BUG_ON(UI_SIZE != 4); BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096); - del_timer(&mdev->md_sync_timer); + del_timer(&device->md_sync_timer); /* timer may be rearmed by drbd_md_mark_dirty() now. */ - if (!test_and_clear_bit(MD_DIRTY, &mdev->flags)) + if (!test_and_clear_bit(MD_DIRTY, &device->flags)) return; /* We use here D_FAILED and not D_ATTACHING because we try to write * metadata even if we detach due to a disk failure! */ - if (!get_ldev_if_state(mdev, D_FAILED)) + if (!get_ldev_if_state(device, D_FAILED)) return; - buffer = drbd_md_get_buffer(mdev); + buffer = drbd_md_get_buffer(device); if (!buffer) goto out; - drbd_md_write(mdev, buffer); + drbd_md_write(device, buffer); - /* Update mdev->ldev->md.la_size_sect, + /* Update device->ldev->md.la_size_sect, * since we updated it on metadata. */ - mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); + device->ldev->md.la_size_sect = drbd_get_capacity(device->this_bdev); - drbd_md_put_buffer(mdev); + drbd_md_put_buffer(device); out: - put_ldev(mdev); + put_ldev(device); } -static int check_activity_log_stripe_size(struct drbd_conf *mdev, +static int check_activity_log_stripe_size(struct drbd_device *device, struct meta_data_on_disk *on_disk, struct drbd_md *in_core) { @@ -2996,12 +3134,12 @@ static int check_activity_log_stripe_size(struct drbd_conf *mdev, return 0; err: - dev_err(DEV, "invalid activity log striping: al_stripes=%u, al_stripe_size_4k=%u\n", + drbd_err(device, "invalid activity log striping: al_stripes=%u, al_stripe_size_4k=%u\n", al_stripes, al_stripe_size_4k); return -EINVAL; } -static int check_offsets_and_sizes(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) +static int check_offsets_and_sizes(struct drbd_device *device, struct drbd_backing_dev *bdev) { sector_t capacity = drbd_get_capacity(bdev->md_bdev); struct drbd_md *in_core = &bdev->md; @@ -3068,7 +3206,7 @@ static int check_offsets_and_sizes(struct drbd_conf *mdev, struct drbd_backing_d return 0; err: - dev_err(DEV, "meta data offsets don't make sense: idx=%d " + drbd_err(device, "meta data offsets don't make sense: idx=%d " "al_s=%u, al_sz4k=%u, al_offset=%d, bm_offset=%d, " "md_size_sect=%u, la_size=%llu, md_capacity=%llu\n", in_core->meta_dev_idx, @@ -3083,25 +3221,25 @@ err: /** * drbd_md_read() - Reads in the meta data super block - * @mdev: DRBD device. + * @device: DRBD device. * @bdev: Device from which the meta data should be read in. * * Return NO_ERROR on success, and an enum drbd_ret_code in case * something goes wrong. * * Called exactly once during drbd_adm_attach(), while still being D_DISKLESS, - * even before @bdev is assigned to @mdev->ldev. + * even before @bdev is assigned to @device->ldev. */ -int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) +int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev) { struct meta_data_on_disk *buffer; u32 magic, flags; int i, rv = NO_ERROR; - if (mdev->state.disk != D_DISKLESS) + if (device->state.disk != D_DISKLESS) return ERR_DISK_CONFIGURED; - buffer = drbd_md_get_buffer(mdev); + buffer = drbd_md_get_buffer(device); if (!buffer) return ERR_NOMEM; @@ -3110,10 +3248,10 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx; bdev->md.md_offset = drbd_md_ss(bdev); - if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { + if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) { /* NOTE: can't do normal error processing here as this is called BEFORE disk is attached */ - dev_err(DEV, "Error while reading metadata.\n"); + drbd_err(device, "Error while reading metadata.\n"); rv = ERR_IO_MD_DISK; goto err; } @@ -3123,7 +3261,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (magic == DRBD_MD_MAGIC_84_UNCLEAN || (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) { /* btw: that's Activity Log clean, not "all" clean. */ - dev_err(DEV, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n"); + drbd_err(device, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n"); rv = ERR_MD_UNCLEAN; goto err; } @@ -3131,14 +3269,14 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) rv = ERR_MD_INVALID; if (magic != DRBD_MD_MAGIC_08) { if (magic == DRBD_MD_MAGIC_07) - dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); + drbd_err(device, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); else - dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); + drbd_err(device, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); goto err; } if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { - dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n", + drbd_err(device, "unexpected bm_bytes_per_bit: %u (expected %u)\n", be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); goto err; } @@ -3155,182 +3293,182 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) bdev->md.al_offset = be32_to_cpu(buffer->al_offset); bdev->md.bm_offset = be32_to_cpu(buffer->bm_offset); - if (check_activity_log_stripe_size(mdev, buffer, &bdev->md)) + if (check_activity_log_stripe_size(device, buffer, &bdev->md)) goto err; - if (check_offsets_and_sizes(mdev, bdev)) + if (check_offsets_and_sizes(device, bdev)) goto err; if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { - dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", + drbd_err(device, "unexpected bm_offset: %d (expected %d)\n", be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); goto err; } if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) { - dev_err(DEV, "unexpected md_size: %u (expected %u)\n", + drbd_err(device, "unexpected md_size: %u (expected %u)\n", be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect); goto err; } rv = NO_ERROR; - spin_lock_irq(&mdev->tconn->req_lock); - if (mdev->state.conn < C_CONNECTED) { + spin_lock_irq(&device->resource->req_lock); + if (device->state.conn < C_CONNECTED) { unsigned int peer; peer = be32_to_cpu(buffer->la_peer_max_bio_size); peer = max(peer, DRBD_MAX_BIO_SIZE_SAFE); - mdev->peer_max_bio_size = peer; + device->peer_max_bio_size = peer; } - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); err: - drbd_md_put_buffer(mdev); + drbd_md_put_buffer(device); return rv; } /** * drbd_md_mark_dirty() - Mark meta data super block as dirty - * @mdev: DRBD device. + * @device: DRBD device. * * Call this function if you change anything that should be written to * the meta-data super block. This function sets MD_DIRTY, and starts a * timer that ensures that within five seconds you have to call drbd_md_sync(). */ #ifdef DEBUG -void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func) +void drbd_md_mark_dirty_(struct drbd_device *device, unsigned int line, const char *func) { - if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) { - mod_timer(&mdev->md_sync_timer, jiffies + HZ); - mdev->last_md_mark_dirty.line = line; - mdev->last_md_mark_dirty.func = func; + if (!test_and_set_bit(MD_DIRTY, &device->flags)) { + mod_timer(&device->md_sync_timer, jiffies + HZ); + device->last_md_mark_dirty.line = line; + device->last_md_mark_dirty.func = func; } } #else -void drbd_md_mark_dirty(struct drbd_conf *mdev) +void drbd_md_mark_dirty(struct drbd_device *device) { - if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) - mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ); + if (!test_and_set_bit(MD_DIRTY, &device->flags)) + mod_timer(&device->md_sync_timer, jiffies + 5*HZ); } #endif -void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) +void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local) { int i; for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) - mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; + device->ldev->md.uuid[i+1] = device->ldev->md.uuid[i]; } -void __drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) +void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local) { if (idx == UI_CURRENT) { - if (mdev->state.role == R_PRIMARY) + if (device->state.role == R_PRIMARY) val |= 1; else val &= ~((u64)1); - drbd_set_ed_uuid(mdev, val); + drbd_set_ed_uuid(device, val); } - mdev->ldev->md.uuid[idx] = val; - drbd_md_mark_dirty(mdev); + device->ldev->md.uuid[idx] = val; + drbd_md_mark_dirty(device); } -void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) +void _drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local) { unsigned long flags; - spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags); - __drbd_uuid_set(mdev, idx, val); - spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags); + spin_lock_irqsave(&device->ldev->md.uuid_lock, flags); + __drbd_uuid_set(device, idx, val); + spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags); } -void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) +void drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must_hold(local) { unsigned long flags; - spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags); - if (mdev->ldev->md.uuid[idx]) { - drbd_uuid_move_history(mdev); - mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; + spin_lock_irqsave(&device->ldev->md.uuid_lock, flags); + if (device->ldev->md.uuid[idx]) { + drbd_uuid_move_history(device); + device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[idx]; } - __drbd_uuid_set(mdev, idx, val); - spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags); + __drbd_uuid_set(device, idx, val); + spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags); } /** * drbd_uuid_new_current() - Creates a new current UUID - * @mdev: DRBD device. + * @device: DRBD device. * * Creates a new current UUID, and rotates the old current UUID into * the bitmap slot. Causes an incremental resync upon next connect. */ -void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) +void drbd_uuid_new_current(struct drbd_device *device) __must_hold(local) { u64 val; unsigned long long bm_uuid; get_random_bytes(&val, sizeof(u64)); - spin_lock_irq(&mdev->ldev->md.uuid_lock); - bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; + spin_lock_irq(&device->ldev->md.uuid_lock); + bm_uuid = device->ldev->md.uuid[UI_BITMAP]; if (bm_uuid) - dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); + drbd_warn(device, "bm UUID was already set: %llX\n", bm_uuid); - mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; - __drbd_uuid_set(mdev, UI_CURRENT, val); - spin_unlock_irq(&mdev->ldev->md.uuid_lock); + device->ldev->md.uuid[UI_BITMAP] = device->ldev->md.uuid[UI_CURRENT]; + __drbd_uuid_set(device, UI_CURRENT, val); + spin_unlock_irq(&device->ldev->md.uuid_lock); - drbd_print_uuids(mdev, "new current UUID"); + drbd_print_uuids(device, "new current UUID"); /* get it to stable storage _now_ */ - drbd_md_sync(mdev); + drbd_md_sync(device); } -void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) +void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) { unsigned long flags; - if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) + if (device->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) return; - spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags); + spin_lock_irqsave(&device->ldev->md.uuid_lock, flags); if (val == 0) { - drbd_uuid_move_history(mdev); - mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; - mdev->ldev->md.uuid[UI_BITMAP] = 0; + drbd_uuid_move_history(device); + device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP]; + device->ldev->md.uuid[UI_BITMAP] = 0; } else { - unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; + unsigned long long bm_uuid = device->ldev->md.uuid[UI_BITMAP]; if (bm_uuid) - dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); + drbd_warn(device, "bm UUID was already set: %llX\n", bm_uuid); - mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1); + device->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1); } - spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags); + spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags); - drbd_md_mark_dirty(mdev); + drbd_md_mark_dirty(device); } /** * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() - * @mdev: DRBD device. + * @device: DRBD device. * * Sets all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_set_n_write(struct drbd_conf *mdev) +int drbd_bmio_set_n_write(struct drbd_device *device) { int rv = -EIO; - if (get_ldev_if_state(mdev, D_ATTACHING)) { - drbd_md_set_flag(mdev, MDF_FULL_SYNC); - drbd_md_sync(mdev); - drbd_bm_set_all(mdev); + if (get_ldev_if_state(device, D_ATTACHING)) { + drbd_md_set_flag(device, MDF_FULL_SYNC); + drbd_md_sync(device); + drbd_bm_set_all(device); - rv = drbd_bm_write(mdev); + rv = drbd_bm_write(device); if (!rv) { - drbd_md_clear_flag(mdev, MDF_FULL_SYNC); - drbd_md_sync(mdev); + drbd_md_clear_flag(device, MDF_FULL_SYNC); + drbd_md_sync(device); } - put_ldev(mdev); + put_ldev(device); } return rv; @@ -3338,19 +3476,19 @@ int drbd_bmio_set_n_write(struct drbd_conf *mdev) /** * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() - * @mdev: DRBD device. + * @device: DRBD device. * * Clears all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_clear_n_write(struct drbd_conf *mdev) +int drbd_bmio_clear_n_write(struct drbd_device *device) { int rv = -EIO; - drbd_resume_al(mdev); - if (get_ldev_if_state(mdev, D_ATTACHING)) { - drbd_bm_clear_all(mdev); - rv = drbd_bm_write(mdev); - put_ldev(mdev); + drbd_resume_al(device); + if (get_ldev_if_state(device, D_ATTACHING)) { + drbd_bm_clear_all(device); + rv = drbd_bm_write(device); + put_ldev(device); } return rv; @@ -3358,50 +3496,52 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev) static int w_bitmap_io(struct drbd_work *w, int unused) { - struct bm_io_work *work = container_of(w, struct bm_io_work, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_device *device = + container_of(w, struct drbd_device, bm_io_work.w); + struct bm_io_work *work = &device->bm_io_work; int rv = -EIO; - D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); + D_ASSERT(device, atomic_read(&device->ap_bio_cnt) == 0); - if (get_ldev(mdev)) { - drbd_bm_lock(mdev, work->why, work->flags); - rv = work->io_fn(mdev); - drbd_bm_unlock(mdev); - put_ldev(mdev); + if (get_ldev(device)) { + drbd_bm_lock(device, work->why, work->flags); + rv = work->io_fn(device); + drbd_bm_unlock(device); + put_ldev(device); } - clear_bit_unlock(BITMAP_IO, &mdev->flags); - wake_up(&mdev->misc_wait); + clear_bit_unlock(BITMAP_IO, &device->flags); + wake_up(&device->misc_wait); if (work->done) - work->done(mdev, rv); + work->done(device, rv); - clear_bit(BITMAP_IO_QUEUED, &mdev->flags); + clear_bit(BITMAP_IO_QUEUED, &device->flags); work->why = NULL; work->flags = 0; return 0; } -void drbd_ldev_destroy(struct drbd_conf *mdev) +void drbd_ldev_destroy(struct drbd_device *device) { - lc_destroy(mdev->resync); - mdev->resync = NULL; - lc_destroy(mdev->act_log); - mdev->act_log = NULL; + lc_destroy(device->resync); + device->resync = NULL; + lc_destroy(device->act_log); + device->act_log = NULL; __no_warn(local, - drbd_free_bc(mdev->ldev); - mdev->ldev = NULL;); + drbd_free_bc(device->ldev); + device->ldev = NULL;); - clear_bit(GO_DISKLESS, &mdev->flags); + clear_bit(GO_DISKLESS, &device->flags); } static int w_go_diskless(struct drbd_work *w, int unused) { - struct drbd_conf *mdev = w->mdev; + struct drbd_device *device = + container_of(w, struct drbd_device, go_diskless); - D_ASSERT(mdev->state.disk == D_FAILED); + D_ASSERT(device, device->state.disk == D_FAILED); /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will * inc/dec it frequently. Once we are D_DISKLESS, no one will touch * the protected members anymore, though, so once put_ldev reaches zero @@ -3420,27 +3560,27 @@ static int w_go_diskless(struct drbd_work *w, int unused) * We still need to check if both bitmap and ldev are present, we may * end up here after a failed attach, before ldev was even assigned. */ - if (mdev->bitmap && mdev->ldev) { + if (device->bitmap && device->ldev) { /* An interrupted resync or similar is allowed to recounts bits * while we detach. * Any modifications would not be expected anymore, though. */ - if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write, + if (drbd_bitmap_io_from_worker(device, drbd_bm_write, "detach", BM_LOCKED_TEST_ALLOWED)) { - if (test_bit(WAS_READ_ERROR, &mdev->flags)) { - drbd_md_set_flag(mdev, MDF_FULL_SYNC); - drbd_md_sync(mdev); + if (test_bit(WAS_READ_ERROR, &device->flags)) { + drbd_md_set_flag(device, MDF_FULL_SYNC); + drbd_md_sync(device); } } } - drbd_force_state(mdev, NS(disk, D_DISKLESS)); + drbd_force_state(device, NS(disk, D_DISKLESS)); return 0; } /** * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap - * @mdev: DRBD device. + * @device: DRBD device. * @io_fn: IO callback to be called when bitmap IO is possible * @done: callback to be called after the bitmap IO was performed * @why: Descriptive text of the reason for doing the IO @@ -3450,76 +3590,77 @@ static int w_go_diskless(struct drbd_work *w, int unused) * called from worker context. It MUST NOT be used while a previous such * work is still pending! */ -void drbd_queue_bitmap_io(struct drbd_conf *mdev, - int (*io_fn)(struct drbd_conf *), - void (*done)(struct drbd_conf *, int), +void drbd_queue_bitmap_io(struct drbd_device *device, + int (*io_fn)(struct drbd_device *), + void (*done)(struct drbd_device *, int), char *why, enum bm_flag flags) { - D_ASSERT(current == mdev->tconn->worker.task); + D_ASSERT(device, current == first_peer_device(device)->connection->worker.task); - D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags)); - D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags)); - D_ASSERT(list_empty(&mdev->bm_io_work.w.list)); - if (mdev->bm_io_work.why) - dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n", - why, mdev->bm_io_work.why); + D_ASSERT(device, !test_bit(BITMAP_IO_QUEUED, &device->flags)); + D_ASSERT(device, !test_bit(BITMAP_IO, &device->flags)); + D_ASSERT(device, list_empty(&device->bm_io_work.w.list)); + if (device->bm_io_work.why) + drbd_err(device, "FIXME going to queue '%s' but '%s' still pending?\n", + why, device->bm_io_work.why); - mdev->bm_io_work.io_fn = io_fn; - mdev->bm_io_work.done = done; - mdev->bm_io_work.why = why; - mdev->bm_io_work.flags = flags; + device->bm_io_work.io_fn = io_fn; + device->bm_io_work.done = done; + device->bm_io_work.why = why; + device->bm_io_work.flags = flags; - spin_lock_irq(&mdev->tconn->req_lock); - set_bit(BITMAP_IO, &mdev->flags); - if (atomic_read(&mdev->ap_bio_cnt) == 0) { - if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) - drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w); + spin_lock_irq(&device->resource->req_lock); + set_bit(BITMAP_IO, &device->flags); + if (atomic_read(&device->ap_bio_cnt) == 0) { + if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags)) + drbd_queue_work(&first_peer_device(device)->connection->sender_work, + &device->bm_io_work.w); } - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); } /** * drbd_bitmap_io() - Does an IO operation on the whole bitmap - * @mdev: DRBD device. + * @device: DRBD device. * @io_fn: IO callback to be called when bitmap IO is possible * @why: Descriptive text of the reason for doing the IO * * freezes application IO while that the actual IO operations runs. This * functions MAY NOT be called from worker context. */ -int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), +int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *), char *why, enum bm_flag flags) { int rv; - D_ASSERT(current != mdev->tconn->worker.task); + D_ASSERT(device, current != first_peer_device(device)->connection->worker.task); if ((flags & BM_LOCKED_SET_ALLOWED) == 0) - drbd_suspend_io(mdev); + drbd_suspend_io(device); - drbd_bm_lock(mdev, why, flags); - rv = io_fn(mdev); - drbd_bm_unlock(mdev); + drbd_bm_lock(device, why, flags); + rv = io_fn(device); + drbd_bm_unlock(device); if ((flags & BM_LOCKED_SET_ALLOWED) == 0) - drbd_resume_io(mdev); + drbd_resume_io(device); return rv; } -void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local) +void drbd_md_set_flag(struct drbd_device *device, int flag) __must_hold(local) { - if ((mdev->ldev->md.flags & flag) != flag) { - drbd_md_mark_dirty(mdev); - mdev->ldev->md.flags |= flag; + if ((device->ldev->md.flags & flag) != flag) { + drbd_md_mark_dirty(device); + device->ldev->md.flags |= flag; } } -void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local) +void drbd_md_clear_flag(struct drbd_device *device, int flag) __must_hold(local) { - if ((mdev->ldev->md.flags & flag) != 0) { - drbd_md_mark_dirty(mdev); - mdev->ldev->md.flags &= ~flag; + if ((device->ldev->md.flags & flag) != 0) { + drbd_md_mark_dirty(device); + device->ldev->md.flags &= ~flag; } } int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag) @@ -3529,23 +3670,25 @@ int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag) static void md_sync_timer_fn(unsigned long data) { - struct drbd_conf *mdev = (struct drbd_conf *) data; + struct drbd_device *device = (struct drbd_device *) data; /* must not double-queue! */ - if (list_empty(&mdev->md_sync_work.list)) - drbd_queue_work_front(&mdev->tconn->sender_work, &mdev->md_sync_work); + if (list_empty(&device->md_sync_work.list)) + drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, + &device->md_sync_work); } static int w_md_sync(struct drbd_work *w, int unused) { - struct drbd_conf *mdev = w->mdev; + struct drbd_device *device = + container_of(w, struct drbd_device, md_sync_work); - dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); + drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); #ifdef DEBUG - dev_warn(DEV, "last md_mark_dirty: %s:%u\n", - mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line); + drbd_warn(device, "last md_mark_dirty: %s:%u\n", + device->last_md_mark_dirty.func, device->last_md_mark_dirty.line); #endif - drbd_md_sync(mdev); + drbd_md_sync(device); return 0; } @@ -3621,18 +3764,18 @@ const char *cmdname(enum drbd_packet cmd) /** * drbd_wait_misc - wait for a request to make progress - * @mdev: device associated with the request + * @device: device associated with the request * @i: the struct drbd_interval embedded in struct drbd_request or * struct drbd_peer_request */ -int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i) +int drbd_wait_misc(struct drbd_device *device, struct drbd_interval *i) { struct net_conf *nc; DEFINE_WAIT(wait); long timeout; rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(first_peer_device(device)->connection->net_conf); if (!nc) { rcu_read_unlock(); return -ETIMEDOUT; @@ -3640,14 +3783,14 @@ int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i) timeout = nc->ko_count ? nc->timeout * HZ / 10 * nc->ko_count : MAX_SCHEDULE_TIMEOUT; rcu_read_unlock(); - /* Indicate to wake up mdev->misc_wait on progress. */ + /* Indicate to wake up device->misc_wait on progress. */ i->waiting = true; - prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); - spin_unlock_irq(&mdev->tconn->req_lock); + prepare_to_wait(&device->misc_wait, &wait, TASK_INTERRUPTIBLE); + spin_unlock_irq(&device->resource->req_lock); timeout = schedule_timeout(timeout); - finish_wait(&mdev->misc_wait, &wait); - spin_lock_irq(&mdev->tconn->req_lock); - if (!timeout || mdev->state.conn < C_CONNECTED) + finish_wait(&device->misc_wait, &wait); + spin_lock_irq(&device->resource->req_lock); + if (!timeout || device->state.conn < C_CONNECTED) return -ETIMEDOUT; if (signal_pending(current)) return -ERESTARTSYS; @@ -3703,20 +3846,20 @@ _drbd_fault_str(unsigned int type) { } unsigned int -_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) +_drbd_insert_fault(struct drbd_device *device, unsigned int type) { static struct fault_random_state rrs = {0, 0}; unsigned int ret = ( (fault_devs == 0 || - ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) && + ((1 << device_to_minor(device)) & fault_devs) != 0) && (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate)); if (ret) { fault_count++; if (__ratelimit(&drbd_ratelimit_state)) - dev_warn(DEV, "***Simulating %s failure\n", + drbd_warn(device, "***Simulating %s failure\n", _drbd_fault_str(type)); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c706d50a8b06..526414bc2cab 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -32,6 +32,7 @@ #include <linux/blkpg.h> #include <linux/cpumask.h> #include "drbd_int.h" +#include "drbd_protocol.h" #include "drbd_req.h" #include "drbd_wrappers.h" #include <asm/unaligned.h> @@ -44,8 +45,8 @@ // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info); // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info); int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info); int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info); @@ -102,8 +103,9 @@ static struct drbd_config_context { /* pointer into reply buffer */ struct drbd_genlmsghdr *reply_dh; /* resolved from attributes, if possible */ - struct drbd_conf *mdev; - struct drbd_tconn *tconn; + struct drbd_device *device; + struct drbd_resource *resource; + struct drbd_connection *connection; } adm_ctx; static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) @@ -202,62 +204,67 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)]; adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)]; if ((adm_ctx.my_addr && - nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.tconn->my_addr)) || + nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.connection->my_addr)) || (adm_ctx.peer_addr && - nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.tconn->peer_addr))) { + nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.connection->peer_addr))) { err = -EINVAL; goto fail; } } adm_ctx.minor = d_in->minor; - adm_ctx.mdev = minor_to_mdev(d_in->minor); - adm_ctx.tconn = conn_get_by_name(adm_ctx.resource_name); + adm_ctx.device = minor_to_device(d_in->minor); + if (adm_ctx.resource_name) { + adm_ctx.resource = drbd_find_resource(adm_ctx.resource_name); + } - if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) { + if (!adm_ctx.device && (flags & DRBD_ADM_NEED_MINOR)) { drbd_msg_put_info("unknown minor"); return ERR_MINOR_INVALID; } - if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_RESOURCE)) { + if (!adm_ctx.resource && (flags & DRBD_ADM_NEED_RESOURCE)) { drbd_msg_put_info("unknown resource"); + if (adm_ctx.resource_name) + return ERR_RES_NOT_KNOWN; return ERR_INVALID_REQUEST; } if (flags & DRBD_ADM_NEED_CONNECTION) { - if (adm_ctx.tconn && !(flags & DRBD_ADM_NEED_RESOURCE)) { + if (adm_ctx.resource) { drbd_msg_put_info("no resource name expected"); return ERR_INVALID_REQUEST; } - if (adm_ctx.mdev) { + if (adm_ctx.device) { drbd_msg_put_info("no minor number expected"); return ERR_INVALID_REQUEST; } if (adm_ctx.my_addr && adm_ctx.peer_addr) - adm_ctx.tconn = conn_get_by_addrs(nla_data(adm_ctx.my_addr), + adm_ctx.connection = conn_get_by_addrs(nla_data(adm_ctx.my_addr), nla_len(adm_ctx.my_addr), nla_data(adm_ctx.peer_addr), nla_len(adm_ctx.peer_addr)); - if (!adm_ctx.tconn) { + if (!adm_ctx.connection) { drbd_msg_put_info("unknown connection"); return ERR_INVALID_REQUEST; } } /* some more paranoia, if the request was over-determined */ - if (adm_ctx.mdev && adm_ctx.tconn && - adm_ctx.mdev->tconn != adm_ctx.tconn) { - pr_warning("request: minor=%u, resource=%s; but that minor belongs to connection %s\n", - adm_ctx.minor, adm_ctx.resource_name, - adm_ctx.mdev->tconn->name); + if (adm_ctx.device && adm_ctx.resource && + adm_ctx.device->resource != adm_ctx.resource) { + pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n", + adm_ctx.minor, adm_ctx.resource->name, + adm_ctx.device->resource->name); drbd_msg_put_info("minor exists in different resource"); return ERR_INVALID_REQUEST; } - if (adm_ctx.mdev && + if (adm_ctx.device && adm_ctx.volume != VOLUME_UNSPECIFIED && - adm_ctx.volume != adm_ctx.mdev->vnr) { + adm_ctx.volume != adm_ctx.device->vnr) { pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n", adm_ctx.minor, adm_ctx.volume, - adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name); + adm_ctx.device->vnr, + adm_ctx.device->resource->name); drbd_msg_put_info("minor exists as different volume"); return ERR_INVALID_REQUEST; } @@ -272,9 +279,13 @@ fail: static int drbd_adm_finish(struct genl_info *info, int retcode) { - if (adm_ctx.tconn) { - kref_put(&adm_ctx.tconn->kref, &conn_destroy); - adm_ctx.tconn = NULL; + if (adm_ctx.connection) { + kref_put(&adm_ctx.connection->kref, drbd_destroy_connection); + adm_ctx.connection = NULL; + } + if (adm_ctx.resource) { + kref_put(&adm_ctx.resource->kref, drbd_destroy_resource); + adm_ctx.resource = NULL; } if (!adm_ctx.reply_skb) @@ -285,34 +296,34 @@ static int drbd_adm_finish(struct genl_info *info, int retcode) return 0; } -static void setup_khelper_env(struct drbd_tconn *tconn, char **envp) +static void setup_khelper_env(struct drbd_connection *connection, char **envp) { char *afs; /* FIXME: A future version will not allow this case. */ - if (tconn->my_addr_len == 0 || tconn->peer_addr_len == 0) + if (connection->my_addr_len == 0 || connection->peer_addr_len == 0) return; - switch (((struct sockaddr *)&tconn->peer_addr)->sa_family) { + switch (((struct sockaddr *)&connection->peer_addr)->sa_family) { case AF_INET6: afs = "ipv6"; snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6", - &((struct sockaddr_in6 *)&tconn->peer_addr)->sin6_addr); + &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr); break; case AF_INET: afs = "ipv4"; snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr); + &((struct sockaddr_in *)&connection->peer_addr)->sin_addr); break; default: afs = "ssocks"; snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr); + &((struct sockaddr_in *)&connection->peer_addr)->sin_addr); } snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs); } -int drbd_khelper(struct drbd_conf *mdev, char *cmd) +int drbd_khelper(struct drbd_device *device, char *cmd) { char *envp[] = { "HOME=/", "TERM=linux", @@ -322,39 +333,39 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) NULL }; char mb[12]; char *argv[] = {usermode_helper, cmd, mb, NULL }; - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_connection *connection = first_peer_device(device)->connection; struct sib_info sib; int ret; - if (current == tconn->worker.task) - set_bit(CALLBACK_PENDING, &tconn->flags); + if (current == connection->worker.task) + set_bit(CALLBACK_PENDING, &connection->flags); - snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); - setup_khelper_env(tconn, envp); + snprintf(mb, 12, "minor-%d", device_to_minor(device)); + setup_khelper_env(connection, envp); /* The helper may take some time. * write out any unsynced meta data changes now */ - drbd_md_sync(mdev); + drbd_md_sync(device); - dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); + drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb); sib.sib_reason = SIB_HELPER_PRE; sib.helper_name = cmd; - drbd_bcast_event(mdev, &sib); + drbd_bcast_event(device, &sib); ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) - dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", + drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, mb, (ret >> 8) & 0xff, ret); else - dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", + drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, mb, (ret >> 8) & 0xff, ret); sib.sib_reason = SIB_HELPER_POST; sib.helper_exit_code = ret; - drbd_bcast_event(mdev, &sib); + drbd_bcast_event(device, &sib); - if (current == tconn->worker.task) - clear_bit(CALLBACK_PENDING, &tconn->flags); + if (current == connection->worker.task) + clear_bit(CALLBACK_PENDING, &connection->flags); if (ret < 0) /* Ignore any ERRNOs we got. */ ret = 0; @@ -362,7 +373,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) return ret; } -int conn_khelper(struct drbd_tconn *tconn, char *cmd) +static int conn_khelper(struct drbd_connection *connection, char *cmd) { char *envp[] = { "HOME=/", "TERM=linux", @@ -370,23 +381,24 @@ int conn_khelper(struct drbd_tconn *tconn, char *cmd) (char[20]) { }, /* address family */ (char[60]) { }, /* address */ NULL }; - char *argv[] = {usermode_helper, cmd, tconn->name, NULL }; + char *resource_name = connection->resource->name; + char *argv[] = {usermode_helper, cmd, resource_name, NULL }; int ret; - setup_khelper_env(tconn, envp); - conn_md_sync(tconn); + setup_khelper_env(connection, envp); + conn_md_sync(connection); - conn_info(tconn, "helper command: %s %s %s\n", usermode_helper, cmd, tconn->name); + drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name); /* TODO: conn_bcast_event() ?? */ ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) - conn_warn(tconn, "helper command: %s %s %s exit code %u (0x%x)\n", - usermode_helper, cmd, tconn->name, + drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n", + usermode_helper, cmd, resource_name, (ret >> 8) & 0xff, ret); else - conn_info(tconn, "helper command: %s %s %s exit code %u (0x%x)\n", - usermode_helper, cmd, tconn->name, + drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n", + usermode_helper, cmd, resource_name, (ret >> 8) & 0xff, ret); /* TODO: conn_bcast_event() ?? */ @@ -396,18 +408,20 @@ int conn_khelper(struct drbd_tconn *tconn, char *cmd) return ret; } -static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn) +static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection) { enum drbd_fencing_p fp = FP_NOT_AVAIL; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (get_ldev_if_state(mdev, D_CONSISTENT)) { - fp = max_t(enum drbd_fencing_p, fp, - rcu_dereference(mdev->ldev->disk_conf)->fencing); - put_ldev(mdev); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + if (get_ldev_if_state(device, D_CONSISTENT)) { + struct disk_conf *disk_conf = + rcu_dereference(peer_device->device->ldev->disk_conf); + fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing); + put_ldev(device); } } rcu_read_unlock(); @@ -415,7 +429,7 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn) return fp; } -bool conn_try_outdate_peer(struct drbd_tconn *tconn) +bool conn_try_outdate_peer(struct drbd_connection *connection) { unsigned int connect_cnt; union drbd_state mask = { }; @@ -424,26 +438,26 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn) char *ex_to_string; int r; - if (tconn->cstate >= C_WF_REPORT_PARAMS) { - conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n"); + if (connection->cstate >= C_WF_REPORT_PARAMS) { + drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n"); return false; } - spin_lock_irq(&tconn->req_lock); - connect_cnt = tconn->connect_cnt; - spin_unlock_irq(&tconn->req_lock); + spin_lock_irq(&connection->resource->req_lock); + connect_cnt = connection->connect_cnt; + spin_unlock_irq(&connection->resource->req_lock); - fp = highest_fencing_policy(tconn); + fp = highest_fencing_policy(connection); switch (fp) { case FP_NOT_AVAIL: - conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n"); + drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n"); goto out; case FP_DONT_CARE: return true; default: ; } - r = conn_khelper(tconn, "fence-peer"); + r = conn_khelper(connection, "fence-peer"); switch ((r>>8) & 0xff) { case 3: /* peer is inconsistent */ @@ -457,7 +471,7 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn) val.pdsk = D_OUTDATED; break; case 5: /* peer was down */ - if (conn_highest_disk(tconn) == D_UP_TO_DATE) { + if (conn_highest_disk(connection) == D_UP_TO_DATE) { /* we will(have) create(d) a new UUID anyways... */ ex_to_string = "peer is unreachable, assumed to be dead"; mask.pdsk = D_MASK; @@ -470,70 +484,70 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn) * This is useful when an unconnected R_SECONDARY is asked to * become R_PRIMARY, but finds the other peer being active. */ ex_to_string = "peer is active"; - conn_warn(tconn, "Peer is primary, outdating myself.\n"); + drbd_warn(connection, "Peer is primary, outdating myself.\n"); mask.disk = D_MASK; val.disk = D_OUTDATED; break; case 7: if (fp != FP_STONITH) - conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n"); + drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n"); ex_to_string = "peer was stonithed"; mask.pdsk = D_MASK; val.pdsk = D_OUTDATED; break; default: /* The script is broken ... */ - conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); + drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); return false; /* Eventually leave IO frozen */ } - conn_info(tconn, "fence-peer helper returned %d (%s)\n", + drbd_info(connection, "fence-peer helper returned %d (%s)\n", (r>>8) & 0xff, ex_to_string); out: /* Not using - conn_request_state(tconn, mask, val, CS_VERBOSE); + conn_request_state(connection, mask, val, CS_VERBOSE); here, because we might were able to re-establish the connection in the meantime. */ - spin_lock_irq(&tconn->req_lock); - if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) { - if (tconn->connect_cnt != connect_cnt) + spin_lock_irq(&connection->resource->req_lock); + if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) { + if (connection->connect_cnt != connect_cnt) /* In case the connection was established and droped while the fence-peer handler was running, ignore it */ - conn_info(tconn, "Ignoring fence-peer exit code\n"); + drbd_info(connection, "Ignoring fence-peer exit code\n"); else - _conn_request_state(tconn, mask, val, CS_VERBOSE); + _conn_request_state(connection, mask, val, CS_VERBOSE); } - spin_unlock_irq(&tconn->req_lock); + spin_unlock_irq(&connection->resource->req_lock); - return conn_highest_pdsk(tconn) <= D_OUTDATED; + return conn_highest_pdsk(connection) <= D_OUTDATED; } static int _try_outdate_peer_async(void *data) { - struct drbd_tconn *tconn = (struct drbd_tconn *)data; + struct drbd_connection *connection = (struct drbd_connection *)data; - conn_try_outdate_peer(tconn); + conn_try_outdate_peer(connection); - kref_put(&tconn->kref, &conn_destroy); + kref_put(&connection->kref, drbd_destroy_connection); return 0; } -void conn_try_outdate_peer_async(struct drbd_tconn *tconn) +void conn_try_outdate_peer_async(struct drbd_connection *connection) { struct task_struct *opa; - kref_get(&tconn->kref); - opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h"); + kref_get(&connection->kref); + opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h"); if (IS_ERR(opa)) { - conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n"); - kref_put(&tconn->kref, &conn_destroy); + drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n"); + kref_put(&connection->kref, drbd_destroy_connection); } } enum drbd_state_rv -drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) +drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) { const int max_tries = 4; enum drbd_state_rv rv = SS_UNKNOWN_ERROR; @@ -542,16 +556,24 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) int forced = 0; union drbd_state mask, val; - if (new_role == R_PRIMARY) - request_ping(mdev->tconn); /* Detect a dead peer ASAP */ + if (new_role == R_PRIMARY) { + struct drbd_connection *connection; - mutex_lock(mdev->state_mutex); + /* Detect dead peers as soon as possible. */ + + rcu_read_lock(); + for_each_connection(connection, device->resource) + request_ping(connection); + rcu_read_unlock(); + } + + mutex_lock(device->state_mutex); mask.i = 0; mask.role = R_MASK; val.i = 0; val.role = new_role; while (try++ < max_tries) { - rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE); + rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE); /* in case we first succeeded to outdate, * but now suddenly could establish a connection */ @@ -562,8 +584,8 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } if (rv == SS_NO_UP_TO_DATE_DISK && force && - (mdev->state.disk < D_UP_TO_DATE && - mdev->state.disk >= D_INCONSISTENT)) { + (device->state.disk < D_UP_TO_DATE && + device->state.disk >= D_INCONSISTENT)) { mask.disk = D_MASK; val.disk = D_UP_TO_DATE; forced = 1; @@ -571,10 +593,10 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } if (rv == SS_NO_UP_TO_DATE_DISK && - mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { - D_ASSERT(mdev->state.pdsk == D_UNKNOWN); + device->state.disk == D_CONSISTENT && mask.pdsk == 0) { + D_ASSERT(device, device->state.pdsk == D_UNKNOWN); - if (conn_try_outdate_peer(mdev->tconn)) { + if (conn_try_outdate_peer(first_peer_device(device)->connection)) { val.disk = D_UP_TO_DATE; mask.disk = D_MASK; } @@ -584,8 +606,8 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (rv == SS_NOTHING_TO_DO) goto out; if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { - if (!conn_try_outdate_peer(mdev->tconn) && force) { - dev_warn(DEV, "Forced into split brain situation!\n"); + if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) { + drbd_warn(device, "Forced into split brain situation!\n"); mask.pdsk = D_MASK; val.pdsk = D_OUTDATED; @@ -597,7 +619,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) retry at most once more in this case. */ int timeo; rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(first_peer_device(device)->connection->net_conf); timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; rcu_read_unlock(); schedule_timeout_interruptible(timeo); @@ -606,7 +628,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) continue; } if (rv < SS_SUCCESS) { - rv = _drbd_request_state(mdev, mask, val, + rv = _drbd_request_state(device, mask, val, CS_VERBOSE + CS_WAIT_COMPLETE); if (rv < SS_SUCCESS) goto out; @@ -618,53 +640,53 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) goto out; if (forced) - dev_warn(DEV, "Forced to consider local data as UpToDate!\n"); + drbd_warn(device, "Forced to consider local data as UpToDate!\n"); /* Wait until nothing is on the fly :) */ - wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); + wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0); /* FIXME also wait for all pending P_BARRIER_ACK? */ if (new_role == R_SECONDARY) { - set_disk_ro(mdev->vdisk, true); - if (get_ldev(mdev)) { - mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; - put_ldev(mdev); + set_disk_ro(device->vdisk, true); + if (get_ldev(device)) { + device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; + put_ldev(device); } } else { - mutex_lock(&mdev->tconn->conf_update); - nc = mdev->tconn->net_conf; + mutex_lock(&device->resource->conf_update); + nc = first_peer_device(device)->connection->net_conf; if (nc) nc->discard_my_data = 0; /* without copy; single bit op is atomic */ - mutex_unlock(&mdev->tconn->conf_update); + mutex_unlock(&device->resource->conf_update); - set_disk_ro(mdev->vdisk, false); - if (get_ldev(mdev)) { - if (((mdev->state.conn < C_CONNECTED || - mdev->state.pdsk <= D_FAILED) - && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced) - drbd_uuid_new_current(mdev); + set_disk_ro(device->vdisk, false); + if (get_ldev(device)) { + if (((device->state.conn < C_CONNECTED || + device->state.pdsk <= D_FAILED) + && device->ldev->md.uuid[UI_BITMAP] == 0) || forced) + drbd_uuid_new_current(device); - mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; - put_ldev(mdev); + device->ldev->md.uuid[UI_CURRENT] |= (u64)1; + put_ldev(device); } } /* writeout of activity log covered areas of the bitmap * to stable storage done in after state change already */ - if (mdev->state.conn >= C_WF_REPORT_PARAMS) { + if (device->state.conn >= C_WF_REPORT_PARAMS) { /* if this was forced, we should consider sync */ if (forced) - drbd_send_uuids(mdev); - drbd_send_current_state(mdev); + drbd_send_uuids(first_peer_device(device)); + drbd_send_current_state(first_peer_device(device)); } - drbd_md_sync(mdev); + drbd_md_sync(device); - kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); out: - mutex_unlock(mdev->state_mutex); + mutex_unlock(device->state_mutex); return rv; } @@ -699,9 +721,9 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) } if (info->genlhdr->cmd == DRBD_ADM_PRIMARY) - retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate); + retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate); else - retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0); + retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0); out: drbd_adm_finish(info, retcode); return 0; @@ -728,7 +750,7 @@ out: * Activity log size used to be fixed 32kB, * but is about to become configurable. */ -static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, +static void drbd_md_set_sector_offsets(struct drbd_device *device, struct drbd_backing_dev *bdev) { sector_t md_size_sect = 0; @@ -804,35 +826,35 @@ char *ppsize(char *buf, unsigned long long size) * drbd_adm_suspend_io/drbd_adm_resume_io, * which are (sub) state changes triggered by admin (drbdsetup), * and can be long lived. - * This changes an mdev->flag, is triggered by drbd internals, + * This changes an device->flag, is triggered by drbd internals, * and should be short-lived. */ -void drbd_suspend_io(struct drbd_conf *mdev) +void drbd_suspend_io(struct drbd_device *device) { - set_bit(SUSPEND_IO, &mdev->flags); - if (drbd_suspended(mdev)) + set_bit(SUSPEND_IO, &device->flags); + if (drbd_suspended(device)) return; - wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); + wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt)); } -void drbd_resume_io(struct drbd_conf *mdev) +void drbd_resume_io(struct drbd_device *device) { - clear_bit(SUSPEND_IO, &mdev->flags); - wake_up(&mdev->misc_wait); + clear_bit(SUSPEND_IO, &device->flags); + wake_up(&device->misc_wait); } /** * drbd_determine_dev_size() - Sets the right device size obeying all constraints - * @mdev: DRBD device. + * @device: DRBD device. * * Returns 0 on success, negative return values indicate errors. * You should call drbd_md_sync() after calling this function. */ enum determine_dev_size -drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local) +drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local) { sector_t prev_first_sect, prev_size; /* previous meta location */ sector_t la_size_sect, u_size; - struct drbd_md *md = &mdev->ldev->md; + struct drbd_md *md = &device->ldev->md; u32 prev_al_stripe_size_4k; u32 prev_al_stripes; sector_t size; @@ -851,19 +873,19 @@ drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct res * Suspend IO right here. * still lock the act_log to not trigger ASSERTs there. */ - drbd_suspend_io(mdev); - buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */ + drbd_suspend_io(device); + buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */ if (!buffer) { - drbd_resume_io(mdev); + drbd_resume_io(device); return DS_ERROR; } /* no wait necessary anymore, actually we could assert that */ - wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + wait_event(device->al_wait, lc_try_lock(device->act_log)); - prev_first_sect = drbd_md_first_sector(mdev->ldev); - prev_size = mdev->ldev->md.md_size_sect; - la_size_sect = mdev->ldev->md.la_size_sect; + prev_first_sect = drbd_md_first_sector(device->ldev); + prev_size = device->ldev->md.md_size_sect; + la_size_sect = device->ldev->md.la_size_sect; if (rs) { /* rs is non NULL if we should change the AL layout only */ @@ -876,18 +898,18 @@ drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct res md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4; } - drbd_md_set_sector_offsets(mdev, mdev->ldev); + drbd_md_set_sector_offsets(device, device->ldev); rcu_read_lock(); - u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; rcu_read_unlock(); - size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED); + size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED); if (size < la_size_sect) { if (rs && u_size == 0) { /* Remove "rs &&" later. This check should always be active, but right now the receiver expects the permissive behavior */ - dev_warn(DEV, "Implicit shrink not allowed. " + drbd_warn(device, "Implicit shrink not allowed. " "Use --size=%llus for explicit shrink.\n", (unsigned long long)size); rv = DS_ERROR_SHRINK; @@ -898,60 +920,60 @@ drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct res goto err_out; } - if (drbd_get_capacity(mdev->this_bdev) != size || - drbd_bm_capacity(mdev) != size) { + if (drbd_get_capacity(device->this_bdev) != size || + drbd_bm_capacity(device) != size) { int err; - err = drbd_bm_resize(mdev, size, !(flags & DDSF_NO_RESYNC)); + err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC)); if (unlikely(err)) { /* currently there is only one error: ENOMEM! */ - size = drbd_bm_capacity(mdev)>>1; + size = drbd_bm_capacity(device)>>1; if (size == 0) { - dev_err(DEV, "OUT OF MEMORY! " + drbd_err(device, "OUT OF MEMORY! " "Could not allocate bitmap!\n"); } else { - dev_err(DEV, "BM resizing failed. " + drbd_err(device, "BM resizing failed. " "Leaving size unchanged at size = %lu KB\n", (unsigned long)size); } rv = DS_ERROR; } /* racy, see comments above. */ - drbd_set_my_capacity(mdev, size); - mdev->ldev->md.la_size_sect = size; - dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), + drbd_set_my_capacity(device, size); + device->ldev->md.la_size_sect = size; + drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), (unsigned long long)size>>1); } if (rv <= DS_ERROR) goto err_out; - la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect); + la_size_changed = (la_size_sect != device->ldev->md.la_size_sect); - md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev) - || prev_size != mdev->ldev->md.md_size_sect; + md_moved = prev_first_sect != drbd_md_first_sector(device->ldev) + || prev_size != device->ldev->md.md_size_sect; if (la_size_changed || md_moved || rs) { u32 prev_flags; - drbd_al_shrink(mdev); /* All extents inactive. */ + drbd_al_shrink(device); /* All extents inactive. */ prev_flags = md->flags; md->flags &= ~MDF_PRIMARY_IND; - drbd_md_write(mdev, buffer); + drbd_md_write(device, buffer); - dev_info(DEV, "Writing the whole bitmap, %s\n", + drbd_info(device, "Writing the whole bitmap, %s\n", la_size_changed && md_moved ? "size changed and md moved" : la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ - drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, + drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write, "size changed", BM_LOCKED_MASK); - drbd_initialize_al(mdev, buffer); + drbd_initialize_al(device, buffer); md->flags = prev_flags; - drbd_md_write(mdev, buffer); + drbd_md_write(device, buffer); if (rs) - dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n", - md->al_stripes, md->al_stripe_size_4k * 4); + drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n", + md->al_stripes, md->al_stripe_size_4k * 4); } if (size > la_size_sect) @@ -966,30 +988,30 @@ drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct res md->al_stripe_size_4k = prev_al_stripe_size_4k; md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k; - drbd_md_set_sector_offsets(mdev, mdev->ldev); + drbd_md_set_sector_offsets(device, device->ldev); } } - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); - drbd_md_put_buffer(mdev); - drbd_resume_io(mdev); + lc_unlock(device->act_log); + wake_up(&device->al_wait); + drbd_md_put_buffer(device); + drbd_resume_io(device); return rv; } sector_t -drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, +drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev, sector_t u_size, int assume_peer_has_space) { - sector_t p_size = mdev->p_size; /* partner's disk size. */ + sector_t p_size = device->p_size; /* partner's disk size. */ sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */ sector_t m_size; /* my size */ sector_t size = 0; m_size = drbd_get_max_capacity(bdev); - if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) { - dev_warn(DEV, "Resize while not connected was forced by the user!\n"); + if (device->state.conn < C_CONNECTED && assume_peer_has_space) { + drbd_warn(device, "Resize while not connected was forced by the user!\n"); p_size = m_size; } @@ -1011,11 +1033,11 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, } if (size == 0) - dev_err(DEV, "Both nodes diskless!\n"); + drbd_err(device, "Both nodes diskless!\n"); if (u_size) { if (u_size > size) - dev_err(DEV, "Requested disk size is too big (%lu > %lu)\n", + drbd_err(device, "Requested disk size is too big (%lu > %lu)\n", (unsigned long)u_size>>1, (unsigned long)size>>1); else size = u_size; @@ -1026,71 +1048,71 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, /** * drbd_check_al_size() - Ensures that the AL is of the right size - * @mdev: DRBD device. + * @device: DRBD device. * * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation * failed, and 0 on success. You should call drbd_md_sync() after you called * this function. */ -static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc) +static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) { struct lru_cache *n, *t; struct lc_element *e; unsigned int in_use; int i; - if (mdev->act_log && - mdev->act_log->nr_elements == dc->al_extents) + if (device->act_log && + device->act_log->nr_elements == dc->al_extents) return 0; in_use = 0; - t = mdev->act_log; + t = device->act_log; n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, dc->al_extents, sizeof(struct lc_element), 0); if (n == NULL) { - dev_err(DEV, "Cannot allocate act_log lru!\n"); + drbd_err(device, "Cannot allocate act_log lru!\n"); return -ENOMEM; } - spin_lock_irq(&mdev->al_lock); + spin_lock_irq(&device->al_lock); if (t) { for (i = 0; i < t->nr_elements; i++) { e = lc_element_by_index(t, i); if (e->refcnt) - dev_err(DEV, "refcnt(%d)==%d\n", + drbd_err(device, "refcnt(%d)==%d\n", e->lc_number, e->refcnt); in_use += e->refcnt; } } if (!in_use) - mdev->act_log = n; - spin_unlock_irq(&mdev->al_lock); + device->act_log = n; + spin_unlock_irq(&device->al_lock); if (in_use) { - dev_err(DEV, "Activity log still in use!\n"); + drbd_err(device, "Activity log still in use!\n"); lc_destroy(n); return -EBUSY; } else { if (t) lc_destroy(t); } - drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */ + drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */ return 0; } -static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) +static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size) { - struct request_queue * const q = mdev->rq_queue; + struct request_queue * const q = device->rq_queue; unsigned int max_hw_sectors = max_bio_size >> 9; unsigned int max_segments = 0; - if (get_ldev_if_state(mdev, D_ATTACHING)) { - struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; + if (get_ldev_if_state(device, D_ATTACHING)) { + struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue; max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); rcu_read_lock(); - max_segments = rcu_dereference(mdev->ldev->disk_conf)->max_bio_bvecs; + max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; rcu_read_unlock(); - put_ldev(mdev); + put_ldev(device); } blk_queue_logical_block_size(q, 512); @@ -1099,46 +1121,46 @@ static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_ blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); - if (get_ldev_if_state(mdev, D_ATTACHING)) { - struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; + if (get_ldev_if_state(device, D_ATTACHING)) { + struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue; blk_queue_stack_limits(q, b); if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { - dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", + drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", q->backing_dev_info.ra_pages, b->backing_dev_info.ra_pages); q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; } - put_ldev(mdev); + put_ldev(device); } } -void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) +void drbd_reconsider_max_bio_size(struct drbd_device *device) { unsigned int now, new, local, peer; - now = queue_max_hw_sectors(mdev->rq_queue) << 9; - local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */ - peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */ + now = queue_max_hw_sectors(device->rq_queue) << 9; + local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ + peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ - if (get_ldev_if_state(mdev, D_ATTACHING)) { - local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; - mdev->local_max_bio_size = local; - put_ldev(mdev); + if (get_ldev_if_state(device, D_ATTACHING)) { + local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9; + device->local_max_bio_size = local; + put_ldev(device); } local = min(local, DRBD_MAX_BIO_SIZE); /* We may ignore peer limits if the peer is modern enough. Because new from 8.3.8 onwards the peer can use multiple BIOs for a single peer_request */ - if (mdev->state.conn >= C_WF_REPORT_PARAMS) { - if (mdev->tconn->agreed_pro_version < 94) - peer = min(mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + if (device->state.conn >= C_WF_REPORT_PARAMS) { + if (first_peer_device(device)->connection->agreed_pro_version < 94) + peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ - else if (mdev->tconn->agreed_pro_version == 94) + else if (first_peer_device(device)->connection->agreed_pro_version == 94) peer = DRBD_MAX_SIZE_H80_PACKET; - else if (mdev->tconn->agreed_pro_version < 100) + else if (first_peer_device(device)->connection->agreed_pro_version < 100) peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */ else peer = DRBD_MAX_BIO_SIZE; @@ -1146,57 +1168,57 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) new = min(local, peer); - if (mdev->state.role == R_PRIMARY && new < now) - dev_err(DEV, "ASSERT FAILED new < now; (%u < %u)\n", new, now); + if (device->state.role == R_PRIMARY && new < now) + drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now); if (new != now) - dev_info(DEV, "max BIO size = %u\n", new); + drbd_info(device, "max BIO size = %u\n", new); - drbd_setup_queue_param(mdev, new); + drbd_setup_queue_param(device, new); } /* Starts the worker thread */ -static void conn_reconfig_start(struct drbd_tconn *tconn) +static void conn_reconfig_start(struct drbd_connection *connection) { - drbd_thread_start(&tconn->worker); - conn_flush_workqueue(tconn); + drbd_thread_start(&connection->worker); + drbd_flush_workqueue(&connection->sender_work); } /* if still unconfigured, stops worker again. */ -static void conn_reconfig_done(struct drbd_tconn *tconn) +static void conn_reconfig_done(struct drbd_connection *connection) { bool stop_threads; - spin_lock_irq(&tconn->req_lock); - stop_threads = conn_all_vols_unconf(tconn) && - tconn->cstate == C_STANDALONE; - spin_unlock_irq(&tconn->req_lock); + spin_lock_irq(&connection->resource->req_lock); + stop_threads = conn_all_vols_unconf(connection) && + connection->cstate == C_STANDALONE; + spin_unlock_irq(&connection->resource->req_lock); if (stop_threads) { /* asender is implicitly stopped by receiver * in conn_disconnect() */ - drbd_thread_stop(&tconn->receiver); - drbd_thread_stop(&tconn->worker); + drbd_thread_stop(&connection->receiver); + drbd_thread_stop(&connection->worker); } } /* Make sure IO is suspended before calling this function(). */ -static void drbd_suspend_al(struct drbd_conf *mdev) +static void drbd_suspend_al(struct drbd_device *device) { int s = 0; - if (!lc_try_lock(mdev->act_log)) { - dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n"); + if (!lc_try_lock(device->act_log)) { + drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n"); return; } - drbd_al_shrink(mdev); - spin_lock_irq(&mdev->tconn->req_lock); - if (mdev->state.conn < C_CONNECTED) - s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags); - spin_unlock_irq(&mdev->tconn->req_lock); - lc_unlock(mdev->act_log); + drbd_al_shrink(device); + spin_lock_irq(&device->resource->req_lock); + if (device->state.conn < C_CONNECTED) + s = !test_and_set_bit(AL_SUSPENDED, &device->flags); + spin_unlock_irq(&device->resource->req_lock); + lc_unlock(device->act_log); if (s) - dev_info(DEV, "Suspended AL updates\n"); + drbd_info(device, "Suspended AL updates\n"); } @@ -1237,7 +1259,7 @@ static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev) int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; - struct drbd_conf *mdev; + struct drbd_device *device; struct disk_conf *new_disk_conf, *old_disk_conf; struct fifo_buffer *old_plan = NULL, *new_plan = NULL; int err, fifo_size; @@ -1248,11 +1270,11 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mdev = adm_ctx.mdev; + device = adm_ctx.device; /* we also need a disk * to change the options on */ - if (!get_ldev(mdev)) { + if (!get_ldev(device)) { retcode = ERR_NO_DISK; goto out; } @@ -1263,8 +1285,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) goto fail; } - mutex_lock(&mdev->tconn->conf_update); - old_disk_conf = mdev->ldev->disk_conf; + mutex_lock(&device->resource->conf_update); + old_disk_conf = device->ldev->disk_conf; *new_disk_conf = *old_disk_conf; if (should_set_defaults(info)) set_disk_conf_defaults(new_disk_conf); @@ -1273,6 +1295,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (err && err != -ENOMSG) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto fail_unlock; } if (!expect(new_disk_conf->resync_rate >= 1)) @@ -1280,29 +1303,29 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; - if (new_disk_conf->al_extents > drbd_al_extents_max(mdev->ldev)) - new_disk_conf->al_extents = drbd_al_extents_max(mdev->ldev); + if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev)) + new_disk_conf->al_extents = drbd_al_extents_max(device->ldev); if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; - if (fifo_size != mdev->rs_plan_s->size) { + if (fifo_size != device->rs_plan_s->size) { new_plan = fifo_alloc(fifo_size); if (!new_plan) { - dev_err(DEV, "kmalloc of fifo_buffer failed"); + drbd_err(device, "kmalloc of fifo_buffer failed"); retcode = ERR_NOMEM; goto fail_unlock; } } - drbd_suspend_io(mdev); - wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); - drbd_al_shrink(mdev); - err = drbd_check_al_size(mdev, new_disk_conf); - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); - drbd_resume_io(mdev); + drbd_suspend_io(device); + wait_event(device->al_wait, lc_try_lock(device->act_log)); + drbd_al_shrink(device); + err = drbd_check_al_size(device, new_disk_conf); + lc_unlock(device->act_log); + wake_up(&device->al_wait); + drbd_resume_io(device); if (err) { retcode = ERR_NOMEM; @@ -1310,10 +1333,10 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } write_lock_irq(&global_state_lock); - retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after); + retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after); if (retcode == NO_ERROR) { - rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); - drbd_resync_after_changed(mdev); + rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); + drbd_resync_after_changed(device); } write_unlock_irq(&global_state_lock); @@ -1321,42 +1344,46 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) goto fail_unlock; if (new_plan) { - old_plan = mdev->rs_plan_s; - rcu_assign_pointer(mdev->rs_plan_s, new_plan); + old_plan = device->rs_plan_s; + rcu_assign_pointer(device->rs_plan_s, new_plan); } - mutex_unlock(&mdev->tconn->conf_update); + mutex_unlock(&device->resource->conf_update); if (new_disk_conf->al_updates) - mdev->ldev->md.flags &= ~MDF_AL_DISABLED; + device->ldev->md.flags &= ~MDF_AL_DISABLED; else - mdev->ldev->md.flags |= MDF_AL_DISABLED; + device->ldev->md.flags |= MDF_AL_DISABLED; if (new_disk_conf->md_flushes) - clear_bit(MD_NO_FUA, &mdev->flags); + clear_bit(MD_NO_FUA, &device->flags); else - set_bit(MD_NO_FUA, &mdev->flags); + set_bit(MD_NO_FUA, &device->flags); - drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); + drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush); - drbd_md_sync(mdev); + drbd_md_sync(device); - if (mdev->state.conn >= C_CONNECTED) - drbd_send_sync_param(mdev); + if (device->state.conn >= C_CONNECTED) { + struct drbd_peer_device *peer_device; + + for_each_peer_device(peer_device, device) + drbd_send_sync_param(peer_device); + } synchronize_rcu(); kfree(old_disk_conf); kfree(old_plan); - mod_timer(&mdev->request_timer, jiffies + HZ); + mod_timer(&device->request_timer, jiffies + HZ); goto success; fail_unlock: - mutex_unlock(&mdev->tconn->conf_update); + mutex_unlock(&device->resource->conf_update); fail: kfree(new_disk_conf); kfree(new_plan); success: - put_ldev(mdev); + put_ldev(device); out: drbd_adm_finish(info, retcode); return 0; @@ -1364,7 +1391,7 @@ success: int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; + struct drbd_device *device; int err; enum drbd_ret_code retcode; enum determine_dev_size dd; @@ -1385,11 +1412,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto finish; - mdev = adm_ctx.mdev; - conn_reconfig_start(mdev->tconn); + device = adm_ctx.device; + conn_reconfig_start(first_peer_device(device)->connection); /* if you want to reconfigure, please tear down first */ - if (mdev->state.disk > D_DISKLESS) { + if (device->state.disk > D_DISKLESS) { retcode = ERR_DISK_CONFIGURED; goto fail; } @@ -1397,17 +1424,17 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * drbd_ldev_destroy is done already, we may end up here very fast, * e.g. if someone calls attach from the on-io-error handler, * to realize a "hot spare" feature (not that I'd recommend that) */ - wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + wait_event(device->misc_wait, !atomic_read(&device->local_cnt)); /* make sure there is no leftover from previous force-detach attempts */ - clear_bit(FORCE_DETACH, &mdev->flags); - clear_bit(WAS_IO_ERROR, &mdev->flags); - clear_bit(WAS_READ_ERROR, &mdev->flags); + clear_bit(FORCE_DETACH, &device->flags); + clear_bit(WAS_IO_ERROR, &device->flags); + clear_bit(WAS_READ_ERROR, &device->flags); /* and no leftover from previously aborted resync or verify, either */ - mdev->rs_total = 0; - mdev->rs_failed = 0; - atomic_set(&mdev->rs_pending_cnt, 0); + device->rs_total = 0; + device->rs_failed = 0; + atomic_set(&device->rs_pending_cnt, 0); /* allocation not in the IO path, drbdsetup context */ nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); @@ -1447,13 +1474,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } write_lock_irq(&global_state_lock); - retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after); + retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after); write_unlock_irq(&global_state_lock); if (retcode != NO_ERROR) goto fail; rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(first_peer_device(device)->connection->net_conf); if (nc) { if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { rcu_read_unlock(); @@ -1464,9 +1491,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); bdev = blkdev_get_by_path(new_disk_conf->backing_dev, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev); + FMODE_READ | FMODE_WRITE | FMODE_EXCL, device); if (IS_ERR(bdev)) { - dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev, + drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev, PTR_ERR(bdev)); retcode = ERR_OPEN_DISK; goto fail; @@ -1484,9 +1511,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) bdev = blkdev_get_by_path(new_disk_conf->meta_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, (new_disk_conf->meta_dev_idx < 0) ? - (void *)mdev : (void *)drbd_m_holder); + (void *)device : (void *)drbd_m_holder); if (IS_ERR(bdev)) { - dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev, + drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev, PTR_ERR(bdev)); retcode = ERR_OPEN_MD_DISK; goto fail; @@ -1510,7 +1537,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* Read our meta data super block early. * This also sets other on-disk offsets. */ - retcode = drbd_md_read(mdev, nbc); + retcode = drbd_md_read(device, nbc); if (retcode != NO_ERROR) goto fail; @@ -1520,7 +1547,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) new_disk_conf->al_extents = drbd_al_extents_max(nbc); if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { - dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", + drbd_err(device, "max capacity %llu smaller than disk size %llu\n", (unsigned long long) drbd_get_max_capacity(nbc), (unsigned long long) new_disk_conf->disk_size); retcode = ERR_DISK_TOO_SMALL; @@ -1538,7 +1565,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { retcode = ERR_MD_DISK_TOO_SMALL; - dev_warn(DEV, "refusing attach: md-device too small, " + drbd_warn(device, "refusing attach: md-device too small, " "at least %llu sectors needed for this meta-disk type\n", (unsigned long long) min_md_device_sectors); goto fail; @@ -1547,7 +1574,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* Make sure the new disk is big enough * (we may currently be R_PRIMARY with no local disk...) */ if (drbd_get_max_capacity(nbc) < - drbd_get_capacity(mdev->this_bdev)) { + drbd_get_capacity(device->this_bdev)) { retcode = ERR_DISK_TOO_SMALL; goto fail; } @@ -1555,15 +1582,15 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) nbc->known_size = drbd_get_capacity(nbc->backing_bdev); if (nbc->known_size > max_possible_sectors) { - dev_warn(DEV, "==> truncating very big lower level device " + drbd_warn(device, "==> truncating very big lower level device " "to currently maximum possible %llu sectors <==\n", (unsigned long long) max_possible_sectors); if (new_disk_conf->meta_dev_idx >= 0) - dev_warn(DEV, "==>> using internal or flexible " + drbd_warn(device, "==>> using internal or flexible " "meta data may help <<==\n"); } - drbd_suspend_io(mdev); + drbd_suspend_io(device); /* also wait for the last barrier ack. */ /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171 * We need a way to either ignore barrier acks for barriers sent before a device @@ -1571,45 +1598,45 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * As barriers are counted per resource, * we'd need to suspend io on all devices of a resource. */ - wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || drbd_suspended(mdev)); + wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device)); /* and for any other previously queued work */ - drbd_flush_workqueue(mdev); + drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work); - rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); + rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE); retcode = rv; /* FIXME: Type mismatch. */ - drbd_resume_io(mdev); + drbd_resume_io(device); if (rv < SS_SUCCESS) goto fail; - if (!get_ldev_if_state(mdev, D_ATTACHING)) + if (!get_ldev_if_state(device, D_ATTACHING)) goto force_diskless; - if (!mdev->bitmap) { - if (drbd_bm_init(mdev)) { + if (!device->bitmap) { + if (drbd_bm_init(device)) { retcode = ERR_NOMEM; goto force_diskless_dec; } } - if (mdev->state.conn < C_CONNECTED && - mdev->state.role == R_PRIMARY && - (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { - dev_err(DEV, "Can only attach to data with current UUID=%016llX\n", - (unsigned long long)mdev->ed_uuid); + if (device->state.conn < C_CONNECTED && + device->state.role == R_PRIMARY && + (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { + drbd_err(device, "Can only attach to data with current UUID=%016llX\n", + (unsigned long long)device->ed_uuid); retcode = ERR_DATA_NOT_CURRENT; goto force_diskless_dec; } /* Since we are diskless, fix the activity log first... */ - if (drbd_check_al_size(mdev, new_disk_conf)) { + if (drbd_check_al_size(device, new_disk_conf)) { retcode = ERR_NOMEM; goto force_diskless_dec; } /* Prevent shrinking of consistent devices ! */ if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && - drbd_new_dev_size(mdev, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { - dev_warn(DEV, "refusing to truncate a consistent device\n"); + drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { + drbd_warn(device, "refusing to truncate a consistent device\n"); retcode = ERR_DISK_TOO_SMALL; goto force_diskless_dec; } @@ -1617,40 +1644,40 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ if (new_disk_conf->md_flushes) - clear_bit(MD_NO_FUA, &mdev->flags); + clear_bit(MD_NO_FUA, &device->flags); else - set_bit(MD_NO_FUA, &mdev->flags); + set_bit(MD_NO_FUA, &device->flags); /* Point of no return reached. * Devices and memory are no longer released by error cleanup below. - * now mdev takes over responsibility, and the state engine should + * now device takes over responsibility, and the state engine should * clean it up somewhere. */ - D_ASSERT(mdev->ldev == NULL); - mdev->ldev = nbc; - mdev->resync = resync_lru; - mdev->rs_plan_s = new_plan; + D_ASSERT(device, device->ldev == NULL); + device->ldev = nbc; + device->resync = resync_lru; + device->rs_plan_s = new_plan; nbc = NULL; resync_lru = NULL; new_disk_conf = NULL; new_plan = NULL; - drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); + drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush); - if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY)) - set_bit(CRASHED_PRIMARY, &mdev->flags); + if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY)) + set_bit(CRASHED_PRIMARY, &device->flags); else - clear_bit(CRASHED_PRIMARY, &mdev->flags); + clear_bit(CRASHED_PRIMARY, &device->flags); - if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && - !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) - set_bit(CRASHED_PRIMARY, &mdev->flags); + if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) && + !(device->state.role == R_PRIMARY && device->resource->susp_nod)) + set_bit(CRASHED_PRIMARY, &device->flags); - mdev->send_cnt = 0; - mdev->recv_cnt = 0; - mdev->read_cnt = 0; - mdev->writ_cnt = 0; + device->send_cnt = 0; + device->recv_cnt = 0; + device->read_cnt = 0; + device->writ_cnt = 0; - drbd_reconsider_max_bio_size(mdev); + drbd_reconsider_max_bio_size(device); /* If I am currently not R_PRIMARY, * but meta data primary indicator is set, @@ -1666,50 +1693,50 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * so we can automatically recover from a crash of a * degraded but active "cluster" after a certain timeout. */ - clear_bit(USE_DEGR_WFC_T, &mdev->flags); - if (mdev->state.role != R_PRIMARY && - drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && - !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) - set_bit(USE_DEGR_WFC_T, &mdev->flags); + clear_bit(USE_DEGR_WFC_T, &device->flags); + if (device->state.role != R_PRIMARY && + drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) && + !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND)) + set_bit(USE_DEGR_WFC_T, &device->flags); - dd = drbd_determine_dev_size(mdev, 0, NULL); + dd = drbd_determine_dev_size(device, 0, NULL); if (dd <= DS_ERROR) { retcode = ERR_NOMEM_BITMAP; goto force_diskless_dec; } else if (dd == DS_GREW) - set_bit(RESYNC_AFTER_NEG, &mdev->flags); + set_bit(RESYNC_AFTER_NEG, &device->flags); - if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) || - (test_bit(CRASHED_PRIMARY, &mdev->flags) && - drbd_md_test_flag(mdev->ldev, MDF_AL_DISABLED))) { - dev_info(DEV, "Assuming that all blocks are out of sync " + if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) || + (test_bit(CRASHED_PRIMARY, &device->flags) && + drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) { + drbd_info(device, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); - if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, + if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from attaching", BM_LOCKED_MASK)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } } else { - if (drbd_bitmap_io(mdev, &drbd_bm_read, + if (drbd_bitmap_io(device, &drbd_bm_read, "read from attaching", BM_LOCKED_MASK)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } } - if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) - drbd_suspend_al(mdev); /* IO is still suspended here... */ + if (_drbd_bm_total_weight(device) == drbd_bm_bits(device)) + drbd_suspend_al(device); /* IO is still suspended here... */ - spin_lock_irq(&mdev->tconn->req_lock); - os = drbd_read_state(mdev); + spin_lock_irq(&device->resource->req_lock); + os = drbd_read_state(device); ns = os; /* If MDF_CONSISTENT is not set go into inconsistent state, otherwise investigate MDF_WasUpToDate... If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state, otherwise into D_CONSISTENT state. */ - if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) { - if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE)) + if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) { + if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE)) ns.disk = D_CONSISTENT; else ns.disk = D_OUTDATED; @@ -1717,12 +1744,12 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) ns.disk = D_INCONSISTENT; } - if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED)) + if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED)) ns.pdsk = D_OUTDATED; rcu_read_lock(); if (ns.disk == D_CONSISTENT && - (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE)) + (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE)) ns.disk = D_UP_TO_DATE; /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, @@ -1730,56 +1757,56 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) this point, because drbd_request_state() modifies these flags. */ - if (rcu_dereference(mdev->ldev->disk_conf)->al_updates) - mdev->ldev->md.flags &= ~MDF_AL_DISABLED; + if (rcu_dereference(device->ldev->disk_conf)->al_updates) + device->ldev->md.flags &= ~MDF_AL_DISABLED; else - mdev->ldev->md.flags |= MDF_AL_DISABLED; + device->ldev->md.flags |= MDF_AL_DISABLED; rcu_read_unlock(); /* In case we are C_CONNECTED postpone any decision on the new disk state after the negotiation phase. */ - if (mdev->state.conn == C_CONNECTED) { - mdev->new_state_tmp.i = ns.i; + if (device->state.conn == C_CONNECTED) { + device->new_state_tmp.i = ns.i; ns.i = os.i; ns.disk = D_NEGOTIATING; /* We expect to receive up-to-date UUIDs soon. To avoid a race in receive_state, free p_uuid while holding req_lock. I.e. atomic with the state change */ - kfree(mdev->p_uuid); - mdev->p_uuid = NULL; + kfree(device->p_uuid); + device->p_uuid = NULL; } - rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->tconn->req_lock); + rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL); + spin_unlock_irq(&device->resource->req_lock); if (rv < SS_SUCCESS) goto force_diskless_dec; - mod_timer(&mdev->request_timer, jiffies + HZ); + mod_timer(&device->request_timer, jiffies + HZ); - if (mdev->state.role == R_PRIMARY) - mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; + if (device->state.role == R_PRIMARY) + device->ldev->md.uuid[UI_CURRENT] |= (u64)1; else - mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; + device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; - drbd_md_mark_dirty(mdev); - drbd_md_sync(mdev); + drbd_md_mark_dirty(device); + drbd_md_sync(device); - kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); - put_ldev(mdev); - conn_reconfig_done(mdev->tconn); + kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); + put_ldev(device); + conn_reconfig_done(first_peer_device(device)->connection); drbd_adm_finish(info, retcode); return 0; force_diskless_dec: - put_ldev(mdev); + put_ldev(device); force_diskless: - drbd_force_state(mdev, NS(disk, D_DISKLESS)); - drbd_md_sync(mdev); + drbd_force_state(device, NS(disk, D_DISKLESS)); + drbd_md_sync(device); fail: - conn_reconfig_done(mdev->tconn); + conn_reconfig_done(first_peer_device(device)->connection); if (nbc) { if (nbc->backing_bdev) blkdev_put(nbc->backing_bdev, @@ -1798,26 +1825,26 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) return 0; } -static int adm_detach(struct drbd_conf *mdev, int force) +static int adm_detach(struct drbd_device *device, int force) { enum drbd_state_rv retcode; int ret; if (force) { - set_bit(FORCE_DETACH, &mdev->flags); - drbd_force_state(mdev, NS(disk, D_FAILED)); + set_bit(FORCE_DETACH, &device->flags); + drbd_force_state(device, NS(disk, D_FAILED)); retcode = SS_SUCCESS; goto out; } - drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ - drbd_md_get_buffer(mdev); /* make sure there is no in-flight meta-data IO */ - retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); - drbd_md_put_buffer(mdev); + drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ + drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */ + retcode = drbd_request_state(device, NS(disk, D_FAILED)); + drbd_md_put_buffer(device); /* D_FAILED will transition to DISKLESS. */ - ret = wait_event_interruptible(mdev->misc_wait, - mdev->state.disk != D_FAILED); - drbd_resume_io(mdev); + ret = wait_event_interruptible(device->misc_wait, + device->state.disk != D_FAILED); + drbd_resume_io(device); if ((int)retcode == (int)SS_IS_DISKLESS) retcode = SS_NOTHING_TO_DO; if (ret) @@ -1852,24 +1879,25 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) } } - retcode = adm_detach(adm_ctx.mdev, parms.force_detach); + retcode = adm_detach(adm_ctx.device, parms.force_detach); out: drbd_adm_finish(info, retcode); return 0; } -static bool conn_resync_running(struct drbd_tconn *tconn) +static bool conn_resync_running(struct drbd_connection *connection) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; bool rv = false; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (mdev->state.conn == C_SYNC_SOURCE || - mdev->state.conn == C_SYNC_TARGET || - mdev->state.conn == C_PAUSED_SYNC_S || - mdev->state.conn == C_PAUSED_SYNC_T) { + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + if (device->state.conn == C_SYNC_SOURCE || + device->state.conn == C_SYNC_TARGET || + device->state.conn == C_PAUSED_SYNC_S || + device->state.conn == C_PAUSED_SYNC_T) { rv = true; break; } @@ -1879,16 +1907,17 @@ static bool conn_resync_running(struct drbd_tconn *tconn) return rv; } -static bool conn_ov_running(struct drbd_tconn *tconn) +static bool conn_ov_running(struct drbd_connection *connection) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; bool rv = false; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (mdev->state.conn == C_VERIFY_S || - mdev->state.conn == C_VERIFY_T) { + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + if (device->state.conn == C_VERIFY_S || + device->state.conn == C_VERIFY_T) { rv = true; break; } @@ -1899,63 +1928,65 @@ static bool conn_ov_running(struct drbd_tconn *tconn) } static enum drbd_ret_code -_check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct net_conf *new_conf) +_check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int i; - if (old_conf && tconn->cstate == C_WF_REPORT_PARAMS && tconn->agreed_pro_version < 100) { - if (new_conf->wire_protocol != old_conf->wire_protocol) + if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) { + if (new_net_conf->wire_protocol != old_net_conf->wire_protocol) return ERR_NEED_APV_100; - if (new_conf->two_primaries != old_conf->two_primaries) + if (new_net_conf->two_primaries != old_net_conf->two_primaries) return ERR_NEED_APV_100; - if (strcmp(new_conf->integrity_alg, old_conf->integrity_alg)) + if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg)) return ERR_NEED_APV_100; } - if (!new_conf->two_primaries && - conn_highest_role(tconn) == R_PRIMARY && - conn_highest_peer(tconn) == R_PRIMARY) + if (!new_net_conf->two_primaries && + conn_highest_role(connection) == R_PRIMARY && + conn_highest_peer(connection) == R_PRIMARY) return ERR_NEED_ALLOW_TWO_PRI; - if (new_conf->two_primaries && - (new_conf->wire_protocol != DRBD_PROT_C)) + if (new_net_conf->two_primaries && + (new_net_conf->wire_protocol != DRBD_PROT_C)) return ERR_NOT_PROTO_C; - idr_for_each_entry(&tconn->volumes, mdev, i) { - if (get_ldev(mdev)) { - enum drbd_fencing_p fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; - put_ldev(mdev); - if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) + idr_for_each_entry(&connection->peer_devices, peer_device, i) { + struct drbd_device *device = peer_device->device; + if (get_ldev(device)) { + enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing; + put_ldev(device); + if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) return ERR_STONITH_AND_PROT_A; } - if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data) + if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data) return ERR_DISCARD_IMPOSSIBLE; } - if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) + if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A) return ERR_CONG_NOT_PROTO_A; return NO_ERROR; } static enum drbd_ret_code -check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf) +check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf) { static enum drbd_ret_code rv; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int i; rcu_read_lock(); - rv = _check_net_options(tconn, rcu_dereference(tconn->net_conf), new_conf); + rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf); rcu_read_unlock(); - /* tconn->volumes protected by genl_lock() here */ - idr_for_each_entry(&tconn->volumes, mdev, i) { - if (!mdev->bitmap) { - if(drbd_bm_init(mdev)) + /* connection->volumes protected by genl_lock() here */ + idr_for_each_entry(&connection->peer_devices, peer_device, i) { + struct drbd_device *device = peer_device->device; + if (!device->bitmap) { + if (drbd_bm_init(device)) return ERR_NOMEM; } } @@ -1986,26 +2017,26 @@ alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg) } static enum drbd_ret_code -alloc_crypto(struct crypto *crypto, struct net_conf *new_conf) +alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf) { char hmac_name[CRYPTO_MAX_ALG_NAME]; enum drbd_ret_code rv; - rv = alloc_hash(&crypto->csums_tfm, new_conf->csums_alg, + rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg, ERR_CSUMS_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_hash(&crypto->verify_tfm, new_conf->verify_alg, + rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg, ERR_VERIFY_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_hash(&crypto->integrity_tfm, new_conf->integrity_alg, + rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg, ERR_INTEGRITY_ALG); if (rv != NO_ERROR) return rv; - if (new_conf->cram_hmac_alg[0] != 0) { + if (new_net_conf->cram_hmac_alg[0] != 0) { snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", - new_conf->cram_hmac_alg); + new_net_conf->cram_hmac_alg); rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name, ERR_AUTH_ALG); @@ -2025,8 +2056,8 @@ static void free_crypto(struct crypto *crypto) int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; - struct drbd_tconn *tconn; - struct net_conf *old_conf, *new_conf = NULL; + struct drbd_connection *connection; + struct net_conf *old_net_conf, *new_net_conf = NULL; int err; int ovr; /* online verify running */ int rsr; /* re-sync running */ @@ -2038,98 +2069,103 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - tconn = adm_ctx.tconn; + connection = adm_ctx.connection; - new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); - if (!new_conf) { + new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); + if (!new_net_conf) { retcode = ERR_NOMEM; goto out; } - conn_reconfig_start(tconn); + conn_reconfig_start(connection); - mutex_lock(&tconn->data.mutex); - mutex_lock(&tconn->conf_update); - old_conf = tconn->net_conf; + mutex_lock(&connection->data.mutex); + mutex_lock(&connection->resource->conf_update); + old_net_conf = connection->net_conf; - if (!old_conf) { + if (!old_net_conf) { drbd_msg_put_info("net conf missing, try connect"); retcode = ERR_INVALID_REQUEST; goto fail; } - *new_conf = *old_conf; + *new_net_conf = *old_net_conf; if (should_set_defaults(info)) - set_net_conf_defaults(new_conf); + set_net_conf_defaults(new_net_conf); - err = net_conf_from_attrs_for_change(new_conf, info); + err = net_conf_from_attrs_for_change(new_net_conf, info); if (err && err != -ENOMSG) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } - retcode = check_net_options(tconn, new_conf); + retcode = check_net_options(connection, new_net_conf); if (retcode != NO_ERROR) goto fail; /* re-sync running */ - rsr = conn_resync_running(tconn); - if (rsr && strcmp(new_conf->csums_alg, old_conf->csums_alg)) { + rsr = conn_resync_running(connection); + if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) { retcode = ERR_CSUMS_RESYNC_RUNNING; goto fail; } /* online verify running */ - ovr = conn_ov_running(tconn); - if (ovr && strcmp(new_conf->verify_alg, old_conf->verify_alg)) { + ovr = conn_ov_running(connection); + if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) { retcode = ERR_VERIFY_RUNNING; goto fail; } - retcode = alloc_crypto(&crypto, new_conf); + retcode = alloc_crypto(&crypto, new_net_conf); if (retcode != NO_ERROR) goto fail; - rcu_assign_pointer(tconn->net_conf, new_conf); + rcu_assign_pointer(connection->net_conf, new_net_conf); if (!rsr) { - crypto_free_hash(tconn->csums_tfm); - tconn->csums_tfm = crypto.csums_tfm; + crypto_free_hash(connection->csums_tfm); + connection->csums_tfm = crypto.csums_tfm; crypto.csums_tfm = NULL; } if (!ovr) { - crypto_free_hash(tconn->verify_tfm); - tconn->verify_tfm = crypto.verify_tfm; + crypto_free_hash(connection->verify_tfm); + connection->verify_tfm = crypto.verify_tfm; crypto.verify_tfm = NULL; } - crypto_free_hash(tconn->integrity_tfm); - tconn->integrity_tfm = crypto.integrity_tfm; - if (tconn->cstate >= C_WF_REPORT_PARAMS && tconn->agreed_pro_version >= 100) - /* Do this without trying to take tconn->data.mutex again. */ - __drbd_send_protocol(tconn, P_PROTOCOL_UPDATE); + crypto_free_hash(connection->integrity_tfm); + connection->integrity_tfm = crypto.integrity_tfm; + if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100) + /* Do this without trying to take connection->data.mutex again. */ + __drbd_send_protocol(connection, P_PROTOCOL_UPDATE); - crypto_free_hash(tconn->cram_hmac_tfm); - tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; + crypto_free_hash(connection->cram_hmac_tfm); + connection->cram_hmac_tfm = crypto.cram_hmac_tfm; - mutex_unlock(&tconn->conf_update); - mutex_unlock(&tconn->data.mutex); + mutex_unlock(&connection->resource->conf_update); + mutex_unlock(&connection->data.mutex); synchronize_rcu(); - kfree(old_conf); + kfree(old_net_conf); + + if (connection->cstate >= C_WF_REPORT_PARAMS) { + struct drbd_peer_device *peer_device; + int vnr; - if (tconn->cstate >= C_WF_REPORT_PARAMS) - drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn))); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) + drbd_send_sync_param(peer_device); + } goto done; fail: - mutex_unlock(&tconn->conf_update); - mutex_unlock(&tconn->data.mutex); + mutex_unlock(&connection->resource->conf_update); + mutex_unlock(&connection->data.mutex); free_crypto(&crypto); - kfree(new_conf); + kfree(new_net_conf); done: - conn_reconfig_done(tconn); + conn_reconfig_done(connection); out: drbd_adm_finish(info, retcode); return 0; @@ -2137,10 +2173,11 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; - struct net_conf *old_conf, *new_conf = NULL; + struct drbd_peer_device *peer_device; + struct net_conf *old_net_conf, *new_net_conf = NULL; struct crypto crypto = { }; - struct drbd_tconn *tconn; + struct drbd_resource *resource; + struct drbd_connection *connection; enum drbd_ret_code retcode; int i; int err; @@ -2160,106 +2197,111 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) /* No need for _rcu here. All reconfiguration is * strictly serialized on genl_lock(). We are protected against * concurrent reconfiguration/addition/deletion */ - list_for_each_entry(tconn, &drbd_tconns, all_tconn) { - if (nla_len(adm_ctx.my_addr) == tconn->my_addr_len && - !memcmp(nla_data(adm_ctx.my_addr), &tconn->my_addr, tconn->my_addr_len)) { - retcode = ERR_LOCAL_ADDR; - goto out; - } + for_each_resource(resource, &drbd_resources) { + for_each_connection(connection, resource) { + if (nla_len(adm_ctx.my_addr) == connection->my_addr_len && + !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr, + connection->my_addr_len)) { + retcode = ERR_LOCAL_ADDR; + goto out; + } - if (nla_len(adm_ctx.peer_addr) == tconn->peer_addr_len && - !memcmp(nla_data(adm_ctx.peer_addr), &tconn->peer_addr, tconn->peer_addr_len)) { - retcode = ERR_PEER_ADDR; - goto out; + if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len && + !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr, + connection->peer_addr_len)) { + retcode = ERR_PEER_ADDR; + goto out; + } } } - tconn = adm_ctx.tconn; - conn_reconfig_start(tconn); + connection = first_connection(adm_ctx.resource); + conn_reconfig_start(connection); - if (tconn->cstate > C_STANDALONE) { + if (connection->cstate > C_STANDALONE) { retcode = ERR_NET_CONFIGURED; goto fail; } /* allocation not in the IO path, drbdsetup / netlink process context */ - new_conf = kzalloc(sizeof(*new_conf), GFP_KERNEL); - if (!new_conf) { + new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL); + if (!new_net_conf) { retcode = ERR_NOMEM; goto fail; } - set_net_conf_defaults(new_conf); + set_net_conf_defaults(new_net_conf); - err = net_conf_from_attrs(new_conf, info); + err = net_conf_from_attrs(new_net_conf, info); if (err && err != -ENOMSG) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } - retcode = check_net_options(tconn, new_conf); + retcode = check_net_options(connection, new_net_conf); if (retcode != NO_ERROR) goto fail; - retcode = alloc_crypto(&crypto, new_conf); + retcode = alloc_crypto(&crypto, new_net_conf); if (retcode != NO_ERROR) goto fail; - ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; + ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; - conn_flush_workqueue(tconn); + drbd_flush_workqueue(&connection->sender_work); - mutex_lock(&tconn->conf_update); - old_conf = tconn->net_conf; - if (old_conf) { + mutex_lock(&adm_ctx.resource->conf_update); + old_net_conf = connection->net_conf; + if (old_net_conf) { retcode = ERR_NET_CONFIGURED; - mutex_unlock(&tconn->conf_update); + mutex_unlock(&adm_ctx.resource->conf_update); goto fail; } - rcu_assign_pointer(tconn->net_conf, new_conf); + rcu_assign_pointer(connection->net_conf, new_net_conf); - conn_free_crypto(tconn); - tconn->cram_hmac_tfm = crypto.cram_hmac_tfm; - tconn->integrity_tfm = crypto.integrity_tfm; - tconn->csums_tfm = crypto.csums_tfm; - tconn->verify_tfm = crypto.verify_tfm; + conn_free_crypto(connection); + connection->cram_hmac_tfm = crypto.cram_hmac_tfm; + connection->integrity_tfm = crypto.integrity_tfm; + connection->csums_tfm = crypto.csums_tfm; + connection->verify_tfm = crypto.verify_tfm; - tconn->my_addr_len = nla_len(adm_ctx.my_addr); - memcpy(&tconn->my_addr, nla_data(adm_ctx.my_addr), tconn->my_addr_len); - tconn->peer_addr_len = nla_len(adm_ctx.peer_addr); - memcpy(&tconn->peer_addr, nla_data(adm_ctx.peer_addr), tconn->peer_addr_len); + connection->my_addr_len = nla_len(adm_ctx.my_addr); + memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len); + connection->peer_addr_len = nla_len(adm_ctx.peer_addr); + memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len); - mutex_unlock(&tconn->conf_update); + mutex_unlock(&adm_ctx.resource->conf_update); rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, i) { - mdev->send_cnt = 0; - mdev->recv_cnt = 0; + idr_for_each_entry(&connection->peer_devices, peer_device, i) { + struct drbd_device *device = peer_device->device; + device->send_cnt = 0; + device->recv_cnt = 0; } rcu_read_unlock(); - retcode = conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); + retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); - conn_reconfig_done(tconn); + conn_reconfig_done(connection); drbd_adm_finish(info, retcode); return 0; fail: free_crypto(&crypto); - kfree(new_conf); + kfree(new_net_conf); - conn_reconfig_done(tconn); + conn_reconfig_done(connection); out: drbd_adm_finish(info, retcode); return 0; } -static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force) +static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force) { enum drbd_state_rv rv; - rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), + rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), force ? CS_HARD : 0); switch (rv) { @@ -2269,18 +2311,18 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool for return SS_SUCCESS; case SS_PRIMARY_NOP: /* Our state checking code wants to see the peer outdated. */ - rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0); + rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0); if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */ - rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_VERBOSE); + rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE); break; case SS_CW_FAILED_BY_PEER: /* The peer probably wants to see us outdated. */ - rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, + rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, disk, D_OUTDATED), 0); if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) { - rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), + rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); } break; @@ -2294,18 +2336,18 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool for * The state handling only uses drbd_thread_stop_nowait(), * we want to really wait here until the receiver is no more. */ - drbd_thread_stop(&adm_ctx.tconn->receiver); + drbd_thread_stop(&connection->receiver); /* Race breaker. This additional state change request may be * necessary, if this was a forced disconnect during a receiver * restart. We may have "killed" the receiver thread just - * after drbdd_init() returned. Typically, we should be + * after drbd_receiver() returned. Typically, we should be * C_STANDALONE already, now, and this becomes a no-op. */ - rv2 = conn_request_state(tconn, NS(conn, C_STANDALONE), + rv2 = conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD); if (rv2 < SS_SUCCESS) - conn_err(tconn, + drbd_err(connection, "unexpected rv2=%d in conn_try_disconnect()\n", rv2); } @@ -2315,7 +2357,7 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool for int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) { struct disconnect_parms parms; - struct drbd_tconn *tconn; + struct drbd_connection *connection; enum drbd_state_rv rv; enum drbd_ret_code retcode; int err; @@ -2326,7 +2368,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail; - tconn = adm_ctx.tconn; + connection = adm_ctx.connection; memset(&parms, 0, sizeof(parms)); if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) { err = disconnect_parms_from_attrs(&parms, info); @@ -2337,7 +2379,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) } } - rv = conn_try_disconnect(tconn, parms.force_disconnect); + rv = conn_try_disconnect(connection, parms.force_disconnect); if (rv < SS_SUCCESS) retcode = rv; /* FIXME: Type mismatch. */ else @@ -2347,27 +2389,27 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) return 0; } -void resync_after_online_grow(struct drbd_conf *mdev) +void resync_after_online_grow(struct drbd_device *device) { int iass; /* I am sync source */ - dev_info(DEV, "Resync of new storage after online grow\n"); - if (mdev->state.role != mdev->state.peer) - iass = (mdev->state.role == R_PRIMARY); + drbd_info(device, "Resync of new storage after online grow\n"); + if (device->state.role != device->state.peer) + iass = (device->state.role == R_PRIMARY); else - iass = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags); + iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags); if (iass) - drbd_start_resync(mdev, C_SYNC_SOURCE); + drbd_start_resync(device, C_SYNC_SOURCE); else - _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); + _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); } int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) { struct disk_conf *old_disk_conf, *new_disk_conf = NULL; struct resize_parms rs; - struct drbd_conf *mdev; + struct drbd_device *device; enum drbd_ret_code retcode; enum determine_dev_size dd; bool change_al_layout = false; @@ -2381,15 +2423,15 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail; - mdev = adm_ctx.mdev; - if (!get_ldev(mdev)) { + device = adm_ctx.device; + if (!get_ldev(device)) { retcode = ERR_NO_DISK; goto fail; } memset(&rs, 0, sizeof(struct resize_parms)); - rs.al_stripes = mdev->ldev->md.al_stripes; - rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4; + rs.al_stripes = device->ldev->md.al_stripes; + rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4; if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { err = resize_parms_from_attrs(&rs, info); if (err) { @@ -2399,24 +2441,24 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) } } - if (mdev->state.conn > C_CONNECTED) { + if (device->state.conn > C_CONNECTED) { retcode = ERR_RESIZE_RESYNC; goto fail_ldev; } - if (mdev->state.role == R_SECONDARY && - mdev->state.peer == R_SECONDARY) { + if (device->state.role == R_SECONDARY && + device->state.peer == R_SECONDARY) { retcode = ERR_NO_PRIMARY; goto fail_ldev; } - if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) { + if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) { retcode = ERR_NEED_APV_93; goto fail_ldev; } rcu_read_lock(); - u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; rcu_read_unlock(); if (u_size != (sector_t)rs.resize_size) { new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL); @@ -2426,8 +2468,8 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) } } - if (mdev->ldev->md.al_stripes != rs.al_stripes || - mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) { + if (device->ldev->md.al_stripes != rs.al_stripes || + device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) { u32 al_size_k = rs.al_stripes * rs.al_stripe_size; if (al_size_k > (16 * 1024 * 1024)) { @@ -2440,7 +2482,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) goto fail_ldev; } - if (mdev->state.conn != C_CONNECTED) { + if (device->state.conn != C_CONNECTED) { retcode = ERR_MD_LAYOUT_CONNECTED; goto fail_ldev; } @@ -2448,24 +2490,24 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) change_al_layout = true; } - if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) - mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); + if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) + device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); if (new_disk_conf) { - mutex_lock(&mdev->tconn->conf_update); - old_disk_conf = mdev->ldev->disk_conf; + mutex_lock(&device->resource->conf_update); + old_disk_conf = device->ldev->disk_conf; *new_disk_conf = *old_disk_conf; new_disk_conf->disk_size = (sector_t)rs.resize_size; - rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); - mutex_unlock(&mdev->tconn->conf_update); + rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); + mutex_unlock(&device->resource->conf_update); synchronize_rcu(); kfree(old_disk_conf); } ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); - dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL); - drbd_md_sync(mdev); - put_ldev(mdev); + dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL); + drbd_md_sync(device); + put_ldev(device); if (dd == DS_ERROR) { retcode = ERR_NOMEM_BITMAP; goto fail; @@ -2477,12 +2519,12 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) goto fail; } - if (mdev->state.conn == C_CONNECTED) { + if (device->state.conn == C_CONNECTED) { if (dd == DS_GREW) - set_bit(RESIZE_PENDING, &mdev->flags); + set_bit(RESIZE_PENDING, &device->flags); - drbd_send_uuids(mdev); - drbd_send_sizes(mdev, 1, ddsf); + drbd_send_uuids(first_peer_device(device)); + drbd_send_sizes(first_peer_device(device), 1, ddsf); } fail: @@ -2490,14 +2532,13 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) return 0; fail_ldev: - put_ldev(mdev); + put_ldev(device); goto fail; } int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; - struct drbd_tconn *tconn; struct res_opts res_opts; int err; @@ -2506,9 +2547,8 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) return retcode; if (retcode != NO_ERROR) goto fail; - tconn = adm_ctx.tconn; - res_opts = tconn->res_opts; + res_opts = adm_ctx.resource->res_opts; if (should_set_defaults(info)) set_res_opts_defaults(&res_opts); @@ -2519,7 +2559,7 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) goto fail; } - err = set_resource_options(tconn, &res_opts); + err = set_resource_options(adm_ctx.resource, &res_opts); if (err) { retcode = ERR_INVALID_REQUEST; if (err == -ENOMEM) @@ -2533,7 +2573,7 @@ fail: int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; + struct drbd_device *device; int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -2542,29 +2582,29 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mdev = adm_ctx.mdev; + device = adm_ctx.device; /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. * Also wait for it's after_state_ch(). */ - drbd_suspend_io(mdev); - wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); - drbd_flush_workqueue(mdev); + drbd_suspend_io(device); + wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); + drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work); /* If we happen to be C_STANDALONE R_SECONDARY, just change to * D_INCONSISTENT, and set all bits in the bitmap. Otherwise, * try to start a resync handshake as sync target for full sync. */ - if (mdev->state.conn == C_STANDALONE && mdev->state.role == R_SECONDARY) { - retcode = drbd_request_state(mdev, NS(disk, D_INCONSISTENT)); + if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) { + retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT)); if (retcode >= SS_SUCCESS) { - if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, + if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from invalidate", BM_LOCKED_MASK)) retcode = ERR_IO_MD_DISK; } } else - retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); - drbd_resume_io(mdev); + retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T)); + drbd_resume_io(device); out: drbd_adm_finish(info, retcode); @@ -2582,25 +2622,25 @@ static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info * if (retcode != NO_ERROR) goto out; - retcode = drbd_request_state(adm_ctx.mdev, mask, val); + retcode = drbd_request_state(adm_ctx.device, mask, val); out: drbd_adm_finish(info, retcode); return 0; } -static int drbd_bmio_set_susp_al(struct drbd_conf *mdev) +static int drbd_bmio_set_susp_al(struct drbd_device *device) { int rv; - rv = drbd_bmio_set_n_write(mdev); - drbd_suspend_al(mdev); + rv = drbd_bmio_set_n_write(device); + drbd_suspend_al(device); return rv; } int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) { int retcode; /* drbd_ret_code, drbd_state_rv */ - struct drbd_conf *mdev; + struct drbd_device *device; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -2608,32 +2648,32 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mdev = adm_ctx.mdev; + device = adm_ctx.device; /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. * Also wait for it's after_state_ch(). */ - drbd_suspend_io(mdev); - wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); - drbd_flush_workqueue(mdev); + drbd_suspend_io(device); + wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); + drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work); /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits * in the bitmap. Otherwise, try to start a resync handshake * as sync source for full sync. */ - if (mdev->state.conn == C_STANDALONE && mdev->state.role == R_PRIMARY) { + if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) { /* The peer will get a resync upon connect anyways. Just make that into a full resync. */ - retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); + retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT)); if (retcode >= SS_SUCCESS) { - if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, + if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al, "set_n_write from invalidate_peer", BM_LOCKED_SET_ALLOWED)) retcode = ERR_IO_MD_DISK; } } else - retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); - drbd_resume_io(mdev); + retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S)); + drbd_resume_io(device); out: drbd_adm_finish(info, retcode); @@ -2650,7 +2690,7 @@ int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO) + if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO) retcode = ERR_PAUSE_IS_SET; out: drbd_adm_finish(info, retcode); @@ -2668,8 +2708,8 @@ int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { - s = adm_ctx.mdev->state; + if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { + s = adm_ctx.device->state; if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP : s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR; @@ -2690,7 +2730,7 @@ int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info) int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; + struct drbd_device *device; int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -2699,20 +2739,20 @@ int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mdev = adm_ctx.mdev; - if (test_bit(NEW_CUR_UUID, &mdev->flags)) { - drbd_uuid_new_current(mdev); - clear_bit(NEW_CUR_UUID, &mdev->flags); + device = adm_ctx.device; + if (test_bit(NEW_CUR_UUID, &device->flags)) { + drbd_uuid_new_current(device); + clear_bit(NEW_CUR_UUID, &device->flags); } - drbd_suspend_io(mdev); - retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); + drbd_suspend_io(device); + retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); if (retcode == SS_SUCCESS) { - if (mdev->state.conn < C_CONNECTED) - tl_clear(mdev->tconn); - if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED) - tl_restart(mdev->tconn, FAIL_FROZEN_DISK_IO); + if (device->state.conn < C_CONNECTED) + tl_clear(first_peer_device(device)->connection); + if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED) + tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO); } - drbd_resume_io(mdev); + drbd_resume_io(device); out: drbd_adm_finish(info, retcode); @@ -2724,23 +2764,28 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); } -int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsigned vnr) +static int nla_put_drbd_cfg_context(struct sk_buff *skb, + struct drbd_resource *resource, + struct drbd_connection *connection, + struct drbd_device *device) { struct nlattr *nla; nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); if (!nla) goto nla_put_failure; - if (vnr != VOLUME_UNSPECIFIED && - nla_put_u32(skb, T_ctx_volume, vnr)) - goto nla_put_failure; - if (nla_put_string(skb, T_ctx_resource_name, tconn->name)) - goto nla_put_failure; - if (tconn->my_addr_len && - nla_put(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr)) + if (device && + nla_put_u32(skb, T_ctx_volume, device->vnr)) goto nla_put_failure; - if (tconn->peer_addr_len && - nla_put(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr)) + if (nla_put_string(skb, T_ctx_resource_name, resource->name)) goto nla_put_failure; + if (connection) { + if (connection->my_addr_len && + nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr)) + goto nla_put_failure; + if (connection->peer_addr_len && + nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr)) + goto nla_put_failure; + } nla_nest_end(skb, nla); return 0; @@ -2750,9 +2795,22 @@ nla_put_failure: return -EMSGSIZE; } -int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, +/* + * Return the connection of @resource if @resource has exactly one connection. + */ +static struct drbd_connection *the_only_connection(struct drbd_resource *resource) +{ + struct list_head *connections = &resource->connections; + + if (list_empty(connections) || connections->next->next != connections) + return NULL; + return list_first_entry(&resource->connections, struct drbd_connection, connections); +} + +int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, const struct sib_info *sib) { + struct drbd_resource *resource = device->resource; struct state_info *si = NULL; /* for sizeof(si->member); */ struct nlattr *nla; int got_ldev; @@ -2772,27 +2830,27 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, * always in the context of the receiving process */ exclude_sensitive = sib || !capable(CAP_SYS_ADMIN); - got_ldev = get_ldev(mdev); + got_ldev = get_ldev(device); /* We need to add connection name and volume number information still. * Minor number is in drbd_genlmsghdr. */ - if (nla_put_drbd_cfg_context(skb, mdev->tconn, mdev->vnr)) + if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device)) goto nla_put_failure; - if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive)) + if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive)) goto nla_put_failure; rcu_read_lock(); if (got_ldev) { struct disk_conf *disk_conf; - disk_conf = rcu_dereference(mdev->ldev->disk_conf); + disk_conf = rcu_dereference(device->ldev->disk_conf); err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive); } if (!err) { struct net_conf *nc; - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(first_peer_device(device)->connection->net_conf); if (nc) err = net_conf_to_skb(skb, nc, exclude_sensitive); } @@ -2804,38 +2862,38 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (!nla) goto nla_put_failure; if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || - nla_put_u32(skb, T_current_state, mdev->state.i) || - nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) || - nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)) || - nla_put_u64(skb, T_send_cnt, mdev->send_cnt) || - nla_put_u64(skb, T_recv_cnt, mdev->recv_cnt) || - nla_put_u64(skb, T_read_cnt, mdev->read_cnt) || - nla_put_u64(skb, T_writ_cnt, mdev->writ_cnt) || - nla_put_u64(skb, T_al_writ_cnt, mdev->al_writ_cnt) || - nla_put_u64(skb, T_bm_writ_cnt, mdev->bm_writ_cnt) || - nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&mdev->ap_bio_cnt)) || - nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&mdev->ap_pending_cnt)) || - nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&mdev->rs_pending_cnt))) + nla_put_u32(skb, T_current_state, device->state.i) || + nla_put_u64(skb, T_ed_uuid, device->ed_uuid) || + nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) || + nla_put_u64(skb, T_send_cnt, device->send_cnt) || + nla_put_u64(skb, T_recv_cnt, device->recv_cnt) || + nla_put_u64(skb, T_read_cnt, device->read_cnt) || + nla_put_u64(skb, T_writ_cnt, device->writ_cnt) || + nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) || + nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) || + nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) || + nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) || + nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt))) goto nla_put_failure; if (got_ldev) { int err; - spin_lock_irq(&mdev->ldev->md.uuid_lock); - err = nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid); - spin_unlock_irq(&mdev->ldev->md.uuid_lock); + spin_lock_irq(&device->ldev->md.uuid_lock); + err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid); + spin_unlock_irq(&device->ldev->md.uuid_lock); if (err) goto nla_put_failure; - if (nla_put_u32(skb, T_disk_flags, mdev->ldev->md.flags) || - nla_put_u64(skb, T_bits_total, drbd_bm_bits(mdev)) || - nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(mdev))) + if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) || + nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) || + nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device))) goto nla_put_failure; - if (C_SYNC_SOURCE <= mdev->state.conn && - C_PAUSED_SYNC_T >= mdev->state.conn) { - if (nla_put_u64(skb, T_bits_rs_total, mdev->rs_total) || - nla_put_u64(skb, T_bits_rs_failed, mdev->rs_failed)) + if (C_SYNC_SOURCE <= device->state.conn && + C_PAUSED_SYNC_T >= device->state.conn) { + if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) || + nla_put_u64(skb, T_bits_rs_failed, device->rs_failed)) goto nla_put_failure; } } @@ -2867,7 +2925,7 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, nla_put_failure: err = -EMSGSIZE; if (got_ldev) - put_ldev(mdev); + put_ldev(device); return err; } @@ -2882,7 +2940,7 @@ int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL); + err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL); if (err) { nlmsg_free(adm_ctx.reply_skb); return err; @@ -2892,22 +2950,23 @@ out: return 0; } -int get_one_status(struct sk_buff *skb, struct netlink_callback *cb) +static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb) { - struct drbd_conf *mdev; + struct drbd_device *device; struct drbd_genlmsghdr *dh; - struct drbd_tconn *pos = (struct drbd_tconn*)cb->args[0]; - struct drbd_tconn *tconn = NULL; - struct drbd_tconn *tmp; + struct drbd_resource *pos = (struct drbd_resource *)cb->args[0]; + struct drbd_resource *resource = NULL; + struct drbd_resource *tmp; unsigned volume = cb->args[1]; /* Open coded, deferred, iteration: - * list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) { - * idr_for_each_entry(&tconn->volumes, mdev, i) { + * for_each_resource_safe(resource, tmp, &drbd_resources) { + * connection = "first connection of resource or undefined"; + * idr_for_each_entry(&resource->devices, device, i) { * ... * } * } - * where tconn is cb->args[0]; + * where resource is cb->args[0]; * and i is cb->args[1]; * * cb->args[2] indicates if we shall loop over all resources, @@ -2916,44 +2975,44 @@ int get_one_status(struct sk_buff *skb, struct netlink_callback *cb) * This may miss entries inserted after this dump started, * or entries deleted before they are reached. * - * We need to make sure the mdev won't disappear while + * We need to make sure the device won't disappear while * we are looking at it, and revalidate our iterators * on each iteration. */ - /* synchronize with conn_create()/conn_destroy() */ + /* synchronize with conn_create()/drbd_destroy_connection() */ rcu_read_lock(); /* revalidate iterator position */ - list_for_each_entry_rcu(tmp, &drbd_tconns, all_tconn) { + for_each_resource_rcu(tmp, &drbd_resources) { if (pos == NULL) { /* first iteration */ pos = tmp; - tconn = pos; + resource = pos; break; } if (tmp == pos) { - tconn = pos; + resource = pos; break; } } - if (tconn) { -next_tconn: - mdev = idr_get_next(&tconn->volumes, &volume); - if (!mdev) { - /* No more volumes to dump on this tconn. - * Advance tconn iterator. */ - pos = list_entry_rcu(tconn->all_tconn.next, - struct drbd_tconn, all_tconn); - /* Did we dump any volume on this tconn yet? */ + if (resource) { +next_resource: + device = idr_get_next(&resource->devices, &volume); + if (!device) { + /* No more volumes to dump on this resource. + * Advance resource iterator. */ + pos = list_entry_rcu(resource->resources.next, + struct drbd_resource, resources); + /* Did we dump any volume of this resource yet? */ if (volume != 0) { /* If we reached the end of the list, * or only a single resource dump was requested, * we are done. */ - if (&pos->all_tconn == &drbd_tconns || cb->args[2]) + if (&pos->resources == &drbd_resources || cb->args[2]) goto out; volume = 0; - tconn = pos; - goto next_tconn; + resource = pos; + goto next_resource; } } @@ -2963,43 +3022,49 @@ next_tconn: if (!dh) goto out; - if (!mdev) { - /* This is a tconn without a single volume. + if (!device) { + /* This is a connection without a single volume. * Suprisingly enough, it may have a network * configuration. */ - struct net_conf *nc; + struct drbd_connection *connection; + dh->minor = -1U; dh->ret_code = NO_ERROR; - if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED)) - goto cancel; - nc = rcu_dereference(tconn->net_conf); - if (nc && net_conf_to_skb(skb, nc, 1) != 0) + connection = the_only_connection(resource); + if (nla_put_drbd_cfg_context(skb, resource, connection, NULL)) goto cancel; + if (connection) { + struct net_conf *nc; + + nc = rcu_dereference(connection->net_conf); + if (nc && net_conf_to_skb(skb, nc, 1) != 0) + goto cancel; + } goto done; } - D_ASSERT(mdev->vnr == volume); - D_ASSERT(mdev->tconn == tconn); + D_ASSERT(device, device->vnr == volume); + D_ASSERT(device, device->resource == resource); - dh->minor = mdev_to_minor(mdev); + dh->minor = device_to_minor(device); dh->ret_code = NO_ERROR; - if (nla_put_status_info(skb, mdev, NULL)) { + if (nla_put_status_info(skb, device, NULL)) { cancel: genlmsg_cancel(skb, dh); goto out; } done: genlmsg_end(skb, dh); - } + } out: rcu_read_unlock(); /* where to start the next iteration */ - cb->args[0] = (long)pos; - cb->args[1] = (pos == tconn) ? volume + 1 : 0; + cb->args[0] = (long)pos; + cb->args[1] = (pos == resource) ? volume + 1 : 0; - /* No more tconns/volumes/minors found results in an empty skb. + /* No more resources/volumes/minors found results in an empty skb. * Which will terminate the dump. */ return skb->len; } @@ -3019,7 +3084,7 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ; struct nlattr *nla; const char *resource_name; - struct drbd_tconn *tconn; + struct drbd_resource *resource; int maxtype; /* Is this a followup call? */ @@ -3048,18 +3113,19 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) if (!nla) return -EINVAL; resource_name = nla_data(nla); - tconn = conn_get_by_name(resource_name); - - if (!tconn) + if (!*resource_name) + return -ENODEV; + resource = drbd_find_resource(resource_name); + if (!resource) return -ENODEV; - kref_put(&tconn->kref, &conn_destroy); /* get_one_status() (re)validates tconn by itself */ + kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */ /* prime iterators, and set "filter" mode mark: - * only dump this tconn. */ - cb->args[0] = (long)tconn; + * only dump this connection. */ + cb->args[0] = (long)resource; /* cb->args[1] = 0; passed in this way. */ - cb->args[2] = (long)tconn; + cb->args[2] = (long)resource; dump: return get_one_status(skb, cb); @@ -3078,8 +3144,8 @@ int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info) goto out; tp.timeout_type = - adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : - test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED : + adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : + test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED : UT_DEFAULT; err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp); @@ -3094,7 +3160,7 @@ out: int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; + struct drbd_device *device; enum drbd_ret_code retcode; struct start_ov_parms parms; @@ -3104,10 +3170,10 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mdev = adm_ctx.mdev; + device = adm_ctx.device; /* resume from last known position, if possible */ - parms.ov_start_sector = mdev->ov_start_sector; + parms.ov_start_sector = device->ov_start_sector; parms.ov_stop_sector = ULLONG_MAX; if (info->attrs[DRBD_NLA_START_OV_PARMS]) { int err = start_ov_parms_from_attrs(&parms, info); @@ -3118,15 +3184,15 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) } } /* w_make_ov_request expects position to be aligned */ - mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); - mdev->ov_stop_sector = parms.ov_stop_sector; + device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); + device->ov_stop_sector = parms.ov_stop_sector; /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ - drbd_suspend_io(mdev); - wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); - retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); - drbd_resume_io(mdev); + drbd_suspend_io(device); + wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); + retcode = drbd_request_state(device, NS(conn, C_VERIFY_S)); + drbd_resume_io(device); out: drbd_adm_finish(info, retcode); return 0; @@ -3135,7 +3201,7 @@ out: int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; + struct drbd_device *device; enum drbd_ret_code retcode; int skip_initial_sync = 0; int err; @@ -3147,7 +3213,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out_nolock; - mdev = adm_ctx.mdev; + device = adm_ctx.device; memset(&args, 0, sizeof(args)); if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) { err = new_c_uuid_parms_from_attrs(&args, info); @@ -3158,49 +3224,50 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) } } - mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */ + mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */ - if (!get_ldev(mdev)) { + if (!get_ldev(device)) { retcode = ERR_NO_DISK; goto out; } /* this is "skip initial sync", assume to be clean */ - if (mdev->state.conn == C_CONNECTED && mdev->tconn->agreed_pro_version >= 90 && - mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { - dev_info(DEV, "Preparing to skip initial sync\n"); + if (device->state.conn == C_CONNECTED && + first_peer_device(device)->connection->agreed_pro_version >= 90 && + device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { + drbd_info(device, "Preparing to skip initial sync\n"); skip_initial_sync = 1; - } else if (mdev->state.conn != C_STANDALONE) { + } else if (device->state.conn != C_STANDALONE) { retcode = ERR_CONNECTED; goto out_dec; } - drbd_uuid_set(mdev, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */ - drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */ + drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */ + drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */ if (args.clear_bm) { - err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, + err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid", BM_LOCKED_MASK); if (err) { - dev_err(DEV, "Writing bitmap failed with %d\n",err); + drbd_err(device, "Writing bitmap failed with %d\n", err); retcode = ERR_IO_MD_DISK; } if (skip_initial_sync) { - drbd_send_uuids_skip_initial_sync(mdev); - _drbd_uuid_set(mdev, UI_BITMAP, 0); - drbd_print_uuids(mdev, "cleared bitmap UUID"); - spin_lock_irq(&mdev->tconn->req_lock); - _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), + drbd_send_uuids_skip_initial_sync(first_peer_device(device)); + _drbd_uuid_set(device, UI_BITMAP, 0); + drbd_print_uuids(device, "cleared bitmap UUID"); + spin_lock_irq(&device->resource->req_lock); + _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); } } - drbd_md_sync(mdev); + drbd_md_sync(device); out_dec: - put_ldev(mdev); + put_ldev(device); out: - mutex_unlock(mdev->state_mutex); + mutex_unlock(device->state_mutex); out_nolock: drbd_adm_finish(info, retcode); return 0; @@ -3246,7 +3313,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - if (adm_ctx.tconn) { + if (adm_ctx.resource) { if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) { retcode = ERR_INVALID_REQUEST; drbd_msg_put_info("resource exists"); @@ -3262,7 +3329,7 @@ out: return 0; } -int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) { struct drbd_genlmsghdr *dh = info->userhdr; enum drbd_ret_code retcode; @@ -3285,41 +3352,36 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) } /* drbd_adm_prepare made sure already - * that mdev->tconn and mdev->vnr match the request. */ - if (adm_ctx.mdev) { + * that first_peer_device(device)->connection and device->vnr match the request. */ + if (adm_ctx.device) { if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) retcode = ERR_MINOR_EXISTS; /* else: still NO_ERROR */ goto out; } - retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume); + retcode = drbd_create_device(adm_ctx.resource, dh->minor, adm_ctx.volume); out: drbd_adm_finish(info, retcode); return 0; } -static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev) +static enum drbd_ret_code adm_del_minor(struct drbd_device *device) { - if (mdev->state.disk == D_DISKLESS && - /* no need to be mdev->state.conn == C_STANDALONE && + if (device->state.disk == D_DISKLESS && + /* no need to be device->state.conn == C_STANDALONE && * we may want to delete a minor from a live replication group. */ - mdev->state.role == R_SECONDARY) { - _drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS), + device->state.role == R_SECONDARY) { + _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE + CS_WAIT_COMPLETE); - idr_remove(&mdev->tconn->volumes, mdev->vnr); - idr_remove(&minors, mdev_to_minor(mdev)); - destroy_workqueue(mdev->submit.wq); - del_gendisk(mdev->vdisk); - synchronize_rcu(); - kref_put(&mdev->kref, &drbd_minor_destroy); + drbd_delete_device(device); return NO_ERROR; } else return ERR_MINOR_CONFIGURED; } -int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -3329,7 +3391,7 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - retcode = adm_delete_minor(adm_ctx.mdev); + retcode = adm_del_minor(adm_ctx.device); out: drbd_adm_finish(info, retcode); return 0; @@ -3337,55 +3399,58 @@ out: int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) { + struct drbd_resource *resource; + struct drbd_connection *connection; + struct drbd_device *device; int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ - struct drbd_conf *mdev; unsigned i; - retcode = drbd_adm_prepare(skb, info, 0); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) goto out; - if (!adm_ctx.tconn) { - retcode = ERR_RES_NOT_KNOWN; - goto out; - } - + resource = adm_ctx.resource; /* demote */ - idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { - retcode = drbd_set_role(mdev, R_SECONDARY, 0); + for_each_connection(connection, resource) { + struct drbd_peer_device *peer_device; + + idr_for_each_entry(&connection->peer_devices, peer_device, i) { + retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0); + if (retcode < SS_SUCCESS) { + drbd_msg_put_info("failed to demote"); + goto out; + } + } + + retcode = conn_try_disconnect(connection, 0); if (retcode < SS_SUCCESS) { - drbd_msg_put_info("failed to demote"); + drbd_msg_put_info("failed to disconnect"); goto out; } } - retcode = conn_try_disconnect(adm_ctx.tconn, 0); - if (retcode < SS_SUCCESS) { - drbd_msg_put_info("failed to disconnect"); - goto out; - } - /* detach */ - idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { - retcode = adm_detach(mdev, 0); + idr_for_each_entry(&resource->devices, device, i) { + retcode = adm_detach(device, 0); if (retcode < SS_SUCCESS || retcode > NO_ERROR) { drbd_msg_put_info("failed to detach"); goto out; } } - /* If we reach this, all volumes (of this tconn) are Secondary, + /* If we reach this, all volumes (of this connection) are Secondary, * Disconnected, Diskless, aka Unconfigured. Make sure all threads have * actually stopped, state handling only does drbd_thread_stop_nowait(). */ - drbd_thread_stop(&adm_ctx.tconn->worker); + for_each_connection(connection, resource) + drbd_thread_stop(&connection->worker); /* Now, nothing can fail anymore */ /* delete volumes */ - idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { - retcode = adm_delete_minor(mdev); + idr_for_each_entry(&resource->devices, device, i) { + retcode = adm_del_minor(device); if (retcode != NO_ERROR) { /* "can not happen" */ drbd_msg_put_info("failed to delete volume"); @@ -3393,19 +3458,11 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) } } - /* delete connection */ - if (conn_lowest_minor(adm_ctx.tconn) < 0) { - list_del_rcu(&adm_ctx.tconn->all_tconn); - synchronize_rcu(); - kref_put(&adm_ctx.tconn->kref, &conn_destroy); + list_del_rcu(&resource->resources); + synchronize_rcu(); + drbd_free_resource(resource); + retcode = NO_ERROR; - retcode = NO_ERROR; - } else { - /* "can not happen" */ - retcode = ERR_RES_IN_USE; - drbd_msg_put_info("failed to delete connection"); - } - goto out; out: drbd_adm_finish(info, retcode); return 0; @@ -3413,6 +3470,8 @@ out: int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) { + struct drbd_resource *resource; + struct drbd_connection *connection; enum drbd_ret_code retcode; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); @@ -3421,24 +3480,30 @@ int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - if (conn_lowest_minor(adm_ctx.tconn) < 0) { - list_del_rcu(&adm_ctx.tconn->all_tconn); - synchronize_rcu(); - kref_put(&adm_ctx.tconn->kref, &conn_destroy); - - retcode = NO_ERROR; - } else { + resource = adm_ctx.resource; + for_each_connection(connection, resource) { + if (connection->cstate > C_STANDALONE) { + retcode = ERR_NET_CONFIGURED; + goto out; + } + } + if (!idr_is_empty(&resource->devices)) { retcode = ERR_RES_IN_USE; + goto out; } - if (retcode == NO_ERROR) - drbd_thread_stop(&adm_ctx.tconn->worker); + list_del_rcu(&resource->resources); + for_each_connection(connection, resource) + drbd_thread_stop(&connection->worker); + synchronize_rcu(); + drbd_free_resource(resource); + retcode = NO_ERROR; out: drbd_adm_finish(info, retcode); return 0; } -void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) +void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib) { static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */ struct sk_buff *msg; @@ -3447,8 +3512,8 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) int err = -ENOMEM; if (sib->sib_reason == SIB_SYNC_PROGRESS) { - if (time_after(jiffies, mdev->rs_last_bcast + HZ)) - mdev->rs_last_bcast = jiffies; + if (time_after(jiffies, device->rs_last_bcast + HZ)) + device->rs_last_bcast = jiffies; else return; } @@ -3462,10 +3527,10 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT); if (!d_out) /* cannot happen, but anyways. */ goto nla_put_failure; - d_out->minor = mdev_to_minor(mdev); + d_out->minor = device_to_minor(device); d_out->ret_code = NO_ERROR; - if (nla_put_status_info(msg, mdev, sib)) + if (nla_put_status_info(msg, device, sib)) goto nla_put_failure; genlmsg_end(msg, d_out); err = drbd_genl_multicast_events(msg, 0); @@ -3478,7 +3543,7 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) nla_put_failure: nlmsg_free(msg); failed: - dev_err(DEV, "Error %d while broadcasting event. " + drbd_err(device, "Error %d while broadcasting event. " "Event seq:%u sib_reason:%u\n", err, seq, sib->sib_reason); } diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index bf31d41dbaad..2f26e8ffa45b 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -46,7 +46,7 @@ const struct file_operations drbd_proc_fops = { .release = drbd_proc_release, }; -void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) +static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) { /* v is in kB/sec. We don't expect TiByte/sec yet. */ if (unlikely(v >= 1000000)) { @@ -66,14 +66,14 @@ void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) * [=====>..............] 33.5% (23456/123456) * finish: 2:20:20 speed: 6,345 (6,456) K/sec */ -static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) +static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq) { unsigned long db, dt, dbdt, rt, rs_left; unsigned int res; int i, x, y; int stalled = 0; - drbd_get_syncer_progress(mdev, &rs_left, &res); + drbd_get_syncer_progress(device, &rs_left, &res); x = res/50; y = 20-x; @@ -85,21 +85,21 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) seq_printf(seq, "."); seq_printf(seq, "] "); - if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) + if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) seq_printf(seq, "verified:"); else seq_printf(seq, "sync'ed:"); seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); /* if more than a few GB, display in MB */ - if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) + if (device->rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) seq_printf(seq, "(%lu/%lu)M", (unsigned long) Bit2KB(rs_left >> 10), - (unsigned long) Bit2KB(mdev->rs_total >> 10)); + (unsigned long) Bit2KB(device->rs_total >> 10)); else seq_printf(seq, "(%lu/%lu)K\n\t", (unsigned long) Bit2KB(rs_left), - (unsigned long) Bit2KB(mdev->rs_total)); + (unsigned long) Bit2KB(device->rs_total)); /* see drivers/md/md.c * We do not want to overflow, so the order of operands and @@ -114,14 +114,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at * least DRBD_SYNC_MARK_STEP time before it will be modified. */ /* ------------------------ ~18s average ------------------------ */ - i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS; - dt = (jiffies - mdev->rs_mark_time[i]) / HZ; + i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS; + dt = (jiffies - device->rs_mark_time[i]) / HZ; if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS)) stalled = 1; if (!dt) dt++; - db = mdev->rs_mark_left[i] - rs_left; + db = device->rs_mark_left[i] - rs_left; rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */ seq_printf(seq, "finish: %lu:%02lu:%02lu", @@ -134,11 +134,11 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) /* ------------------------- ~3s average ------------------------ */ if (proc_details >= 1) { /* this is what drbd_rs_should_slow_down() uses */ - i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; - dt = (jiffies - mdev->rs_mark_time[i]) / HZ; + i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; + dt = (jiffies - device->rs_mark_time[i]) / HZ; if (!dt) dt++; - db = mdev->rs_mark_left[i] - rs_left; + db = device->rs_mark_left[i] - rs_left; dbdt = Bit2KB(db/dt); seq_printf_with_thousands_grouping(seq, dbdt); seq_printf(seq, " -- "); @@ -147,34 +147,34 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) /* --------------------- long term average ---------------------- */ /* mean speed since syncer started * we do account for PausedSync periods */ - dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; + dt = (jiffies - device->rs_start - device->rs_paused) / HZ; if (dt == 0) dt = 1; - db = mdev->rs_total - rs_left; + db = device->rs_total - rs_left; dbdt = Bit2KB(db/dt); seq_printf_with_thousands_grouping(seq, dbdt); seq_printf(seq, ")"); - if (mdev->state.conn == C_SYNC_TARGET || - mdev->state.conn == C_VERIFY_S) { + if (device->state.conn == C_SYNC_TARGET || + device->state.conn == C_VERIFY_S) { seq_printf(seq, " want: "); - seq_printf_with_thousands_grouping(seq, mdev->c_sync_rate); + seq_printf_with_thousands_grouping(seq, device->c_sync_rate); } seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : ""); if (proc_details >= 1) { /* 64 bit: * we convert to sectors in the display below. */ - unsigned long bm_bits = drbd_bm_bits(mdev); + unsigned long bm_bits = drbd_bm_bits(device); unsigned long bit_pos; unsigned long long stop_sector = 0; - if (mdev->state.conn == C_VERIFY_S || - mdev->state.conn == C_VERIFY_T) { - bit_pos = bm_bits - mdev->ov_left; - if (verify_can_do_stop_sector(mdev)) - stop_sector = mdev->ov_stop_sector; + if (device->state.conn == C_VERIFY_S || + device->state.conn == C_VERIFY_T) { + bit_pos = bm_bits - device->ov_left; + if (verify_can_do_stop_sector(device)) + stop_sector = device->ov_stop_sector; } else - bit_pos = mdev->bm_resync_fo; + bit_pos = device->bm_resync_fo; /* Total sectors may be slightly off for oddly * sized devices. So what. */ seq_printf(seq, @@ -202,7 +202,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) { int i, prev_i = -1; const char *sn; - struct drbd_conf *mdev; + struct drbd_device *device; struct net_conf *nc; char wp; @@ -236,72 +236,72 @@ static int drbd_seq_show(struct seq_file *seq, void *v) */ rcu_read_lock(); - idr_for_each_entry(&minors, mdev, i) { + idr_for_each_entry(&drbd_devices, device, i) { if (prev_i != i - 1) seq_printf(seq, "\n"); prev_i = i; - sn = drbd_conn_str(mdev->state.conn); + sn = drbd_conn_str(device->state.conn); - if (mdev->state.conn == C_STANDALONE && - mdev->state.disk == D_DISKLESS && - mdev->state.role == R_SECONDARY) { + if (device->state.conn == C_STANDALONE && + device->state.disk == D_DISKLESS && + device->state.role == R_SECONDARY) { seq_printf(seq, "%2d: cs:Unconfigured\n", i); } else { - /* reset mdev->congestion_reason */ - bdi_rw_congested(&mdev->rq_queue->backing_dev_info); + /* reset device->congestion_reason */ + bdi_rw_congested(&device->rq_queue->backing_dev_info); - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(first_peer_device(device)->connection->net_conf); wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' '; seq_printf(seq, "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n" " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c", i, sn, - drbd_role_str(mdev->state.role), - drbd_role_str(mdev->state.peer), - drbd_disk_str(mdev->state.disk), - drbd_disk_str(mdev->state.pdsk), + drbd_role_str(device->state.role), + drbd_role_str(device->state.peer), + drbd_disk_str(device->state.disk), + drbd_disk_str(device->state.pdsk), wp, - drbd_suspended(mdev) ? 's' : 'r', - mdev->state.aftr_isp ? 'a' : '-', - mdev->state.peer_isp ? 'p' : '-', - mdev->state.user_isp ? 'u' : '-', - mdev->congestion_reason ?: '-', - test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-', - mdev->send_cnt/2, - mdev->recv_cnt/2, - mdev->writ_cnt/2, - mdev->read_cnt/2, - mdev->al_writ_cnt, - mdev->bm_writ_cnt, - atomic_read(&mdev->local_cnt), - atomic_read(&mdev->ap_pending_cnt) + - atomic_read(&mdev->rs_pending_cnt), - atomic_read(&mdev->unacked_cnt), - atomic_read(&mdev->ap_bio_cnt), - mdev->tconn->epochs, - write_ordering_chars[mdev->tconn->write_ordering] + drbd_suspended(device) ? 's' : 'r', + device->state.aftr_isp ? 'a' : '-', + device->state.peer_isp ? 'p' : '-', + device->state.user_isp ? 'u' : '-', + device->congestion_reason ?: '-', + test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-', + device->send_cnt/2, + device->recv_cnt/2, + device->writ_cnt/2, + device->read_cnt/2, + device->al_writ_cnt, + device->bm_writ_cnt, + atomic_read(&device->local_cnt), + atomic_read(&device->ap_pending_cnt) + + atomic_read(&device->rs_pending_cnt), + atomic_read(&device->unacked_cnt), + atomic_read(&device->ap_bio_cnt), + first_peer_device(device)->connection->epochs, + write_ordering_chars[first_peer_device(device)->connection->write_ordering] ); seq_printf(seq, " oos:%llu\n", Bit2KB((unsigned long long) - drbd_bm_total_weight(mdev))); + drbd_bm_total_weight(device))); } - if (mdev->state.conn == C_SYNC_SOURCE || - mdev->state.conn == C_SYNC_TARGET || - mdev->state.conn == C_VERIFY_S || - mdev->state.conn == C_VERIFY_T) - drbd_syncer_progress(mdev, seq); - - if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) { - lc_seq_printf_stats(seq, mdev->resync); - lc_seq_printf_stats(seq, mdev->act_log); - put_ldev(mdev); + if (device->state.conn == C_SYNC_SOURCE || + device->state.conn == C_SYNC_TARGET || + device->state.conn == C_VERIFY_S || + device->state.conn == C_VERIFY_T) + drbd_syncer_progress(device, seq); + + if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) { + lc_seq_printf_stats(seq, device->resync); + lc_seq_printf_stats(seq, device->act_log); + put_ldev(device); } if (proc_details >= 2) { - if (mdev->resync) { - lc_seq_dump_details(seq, mdev->resync, "rs_left", + if (device->resync) { + lc_seq_dump_details(seq, device->resync, "rs_left", resync_dump_detail); } } diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h new file mode 100644 index 000000000000..3c04ec0ea333 --- /dev/null +++ b/drivers/block/drbd/drbd_protocol.h @@ -0,0 +1,295 @@ +#ifndef __DRBD_PROTOCOL_H +#define __DRBD_PROTOCOL_H + +enum drbd_packet { + /* receiver (data socket) */ + P_DATA = 0x00, + P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */ + P_RS_DATA_REPLY = 0x02, /* Response to P_RS_DATA_REQUEST */ + P_BARRIER = 0x03, + P_BITMAP = 0x04, + P_BECOME_SYNC_TARGET = 0x05, + P_BECOME_SYNC_SOURCE = 0x06, + P_UNPLUG_REMOTE = 0x07, /* Used at various times to hint the peer */ + P_DATA_REQUEST = 0x08, /* Used to ask for a data block */ + P_RS_DATA_REQUEST = 0x09, /* Used to ask for a data block for resync */ + P_SYNC_PARAM = 0x0a, + P_PROTOCOL = 0x0b, + P_UUIDS = 0x0c, + P_SIZES = 0x0d, + P_STATE = 0x0e, + P_SYNC_UUID = 0x0f, + P_AUTH_CHALLENGE = 0x10, + P_AUTH_RESPONSE = 0x11, + P_STATE_CHG_REQ = 0x12, + + /* asender (meta socket */ + P_PING = 0x13, + P_PING_ACK = 0x14, + P_RECV_ACK = 0x15, /* Used in protocol B */ + P_WRITE_ACK = 0x16, /* Used in protocol C */ + P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */ + P_SUPERSEDED = 0x18, /* Used in proto C, two-primaries conflict detection */ + P_NEG_ACK = 0x19, /* Sent if local disk is unusable */ + P_NEG_DREPLY = 0x1a, /* Local disk is broken... */ + P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */ + P_BARRIER_ACK = 0x1c, + P_STATE_CHG_REPLY = 0x1d, + + /* "new" commands, no longer fitting into the ordering scheme above */ + + P_OV_REQUEST = 0x1e, /* data socket */ + P_OV_REPLY = 0x1f, + P_OV_RESULT = 0x20, /* meta socket */ + P_CSUM_RS_REQUEST = 0x21, /* data socket */ + P_RS_IS_IN_SYNC = 0x22, /* meta socket */ + P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */ + P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */ + /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */ + /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */ + P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ + P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */ + P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */ + P_CONN_ST_CHG_REQ = 0x2a, /* data sock: Connection wide state request */ + P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ + P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */ + P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */ + + P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ + P_MAX_OPT_CMD = 0x101, + + /* special command ids for handshake */ + + P_INITIAL_META = 0xfff1, /* First Packet on the MetaSock */ + P_INITIAL_DATA = 0xfff2, /* First Packet on the Socket */ + + P_CONNECTION_FEATURES = 0xfffe /* FIXED for the next century! */ +}; + +#ifndef __packed +#define __packed __attribute__((packed)) +#endif + +/* This is the layout for a packet on the wire. + * The byteorder is the network byte order. + * (except block_id and barrier fields. + * these are pointers to local structs + * and have no relevance for the partner, + * which just echoes them as received.) + * + * NOTE that the payload starts at a long aligned offset, + * regardless of 32 or 64 bit arch! + */ +struct p_header80 { + u32 magic; + u16 command; + u16 length; /* bytes of data after this header */ +} __packed; + +/* Header for big packets, Used for data packets exceeding 64kB */ +struct p_header95 { + u16 magic; /* use DRBD_MAGIC_BIG here */ + u16 command; + u32 length; +} __packed; + +struct p_header100 { + u32 magic; + u16 volume; + u16 command; + u32 length; + u32 pad; +} __packed; + +/* these defines must not be changed without changing the protocol version */ +#define DP_HARDBARRIER 1 /* depricated */ +#define DP_RW_SYNC 2 /* equals REQ_SYNC */ +#define DP_MAY_SET_IN_SYNC 4 +#define DP_UNPLUG 8 /* not used anymore */ +#define DP_FUA 16 /* equals REQ_FUA */ +#define DP_FLUSH 32 /* equals REQ_FLUSH */ +#define DP_DISCARD 64 /* equals REQ_DISCARD */ +#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */ +#define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */ + +struct p_data { + u64 sector; /* 64 bits sector number */ + u64 block_id; /* to identify the request in protocol B&C */ + u32 seq_num; + u32 dp_flags; +} __packed; + +/* + * commands which share a struct: + * p_block_ack: + * P_RECV_ACK (proto B), P_WRITE_ACK (proto C), + * P_SUPERSEDED (proto C, two-primaries conflict detection) + * p_block_req: + * P_DATA_REQUEST, P_RS_DATA_REQUEST + */ +struct p_block_ack { + u64 sector; + u64 block_id; + u32 blksize; + u32 seq_num; +} __packed; + +struct p_block_req { + u64 sector; + u64 block_id; + u32 blksize; + u32 pad; /* to multiple of 8 Byte */ +} __packed; + +/* + * commands with their own struct for additional fields: + * P_CONNECTION_FEATURES + * P_BARRIER + * P_BARRIER_ACK + * P_SYNC_PARAM + * ReportParams + */ + +struct p_connection_features { + u32 protocol_min; + u32 feature_flags; + u32 protocol_max; + + /* should be more than enough for future enhancements + * for now, feature_flags and the reserved array shall be zero. + */ + + u32 _pad; + u64 reserved[7]; +} __packed; + +struct p_barrier { + u32 barrier; /* barrier number _handle_ only */ + u32 pad; /* to multiple of 8 Byte */ +} __packed; + +struct p_barrier_ack { + u32 barrier; + u32 set_size; +} __packed; + +struct p_rs_param { + u32 resync_rate; + + /* Since protocol version 88 and higher. */ + char verify_alg[0]; +} __packed; + +struct p_rs_param_89 { + u32 resync_rate; + /* protocol version 89: */ + char verify_alg[SHARED_SECRET_MAX]; + char csums_alg[SHARED_SECRET_MAX]; +} __packed; + +struct p_rs_param_95 { + u32 resync_rate; + char verify_alg[SHARED_SECRET_MAX]; + char csums_alg[SHARED_SECRET_MAX]; + u32 c_plan_ahead; + u32 c_delay_target; + u32 c_fill_target; + u32 c_max_rate; +} __packed; + +enum drbd_conn_flags { + CF_DISCARD_MY_DATA = 1, + CF_DRY_RUN = 2, +}; + +struct p_protocol { + u32 protocol; + u32 after_sb_0p; + u32 after_sb_1p; + u32 after_sb_2p; + u32 conn_flags; + u32 two_primaries; + + /* Since protocol version 87 and higher. */ + char integrity_alg[0]; + +} __packed; + +struct p_uuids { + u64 uuid[UI_EXTENDED_SIZE]; +} __packed; + +struct p_rs_uuid { + u64 uuid; +} __packed; + +struct p_sizes { + u64 d_size; /* size of disk */ + u64 u_size; /* user requested size */ + u64 c_size; /* current exported size */ + u32 max_bio_size; /* Maximal size of a BIO */ + u16 queue_order_type; /* not yet implemented in DRBD*/ + u16 dds_flags; /* use enum dds_flags here. */ +} __packed; + +struct p_state { + u32 state; +} __packed; + +struct p_req_state { + u32 mask; + u32 val; +} __packed; + +struct p_req_state_reply { + u32 retcode; +} __packed; + +struct p_drbd06_param { + u64 size; + u32 state; + u32 blksize; + u32 protocol; + u32 version; + u32 gen_cnt[5]; + u32 bit_map_gen[5]; +} __packed; + +struct p_block_desc { + u64 sector; + u32 blksize; + u32 pad; /* to multiple of 8 Byte */ +} __packed; + +/* Valid values for the encoding field. + * Bump proto version when changing this. */ +enum drbd_bitmap_code { + /* RLE_VLI_Bytes = 0, + * and other bit variants had been defined during + * algorithm evaluation. */ + RLE_VLI_Bits = 2, +}; + +struct p_compressed_bm { + /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code + * (encoding & 0x80): polarity (set/unset) of first runlength + * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits + * used to pad up to head.length bytes + */ + u8 encoding; + + u8 code[0]; +} __packed; + +struct p_delay_probe93 { + u32 seq_num; /* sequence number to match the two probe packets */ + u32 offset; /* usecs the probe got sent after the reference time point */ +} __packed; + +/* + * Bitmap packets need to fit within a single page on the sender and receiver, + * so we are limited to 4 KiB (and not to PAGE_SIZE, which can be bigger). + */ +#define DRBD_SOCKET_BUFFER_SIZE 4096 + +#endif /* __DRBD_PROTOCOL_H */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d073305ffd5e..68e3992e8838 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -44,6 +44,7 @@ #include <linux/string.h> #include <linux/scatterlist.h> #include "drbd_int.h" +#include "drbd_protocol.h" #include "drbd_req.h" #include "drbd_vli.h" @@ -61,11 +62,11 @@ enum finish_epoch { FE_RECYCLED, }; -static int drbd_do_features(struct drbd_tconn *tconn); -static int drbd_do_auth(struct drbd_tconn *tconn); -static int drbd_disconnected(struct drbd_conf *mdev); +static int drbd_do_features(struct drbd_connection *connection); +static int drbd_do_auth(struct drbd_connection *connection); +static int drbd_disconnected(struct drbd_peer_device *); -static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event); +static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event); static int e_end_block(struct drbd_work *, int); @@ -150,7 +151,7 @@ static void page_chain_add(struct page **head, *head = chain_first; } -static struct page *__drbd_alloc_pages(struct drbd_conf *mdev, +static struct page *__drbd_alloc_pages(struct drbd_device *device, unsigned int number) { struct page *page = NULL; @@ -196,41 +197,39 @@ static struct page *__drbd_alloc_pages(struct drbd_conf *mdev, return NULL; } -static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev, +static void reclaim_finished_net_peer_reqs(struct drbd_device *device, struct list_head *to_be_freed) { - struct drbd_peer_request *peer_req; - struct list_head *le, *tle; + struct drbd_peer_request *peer_req, *tmp; /* The EEs are always appended to the end of the list. Since they are sent in order over the wire, they have to finish in order. As soon as we see the first not finished we can stop to examine the list... */ - list_for_each_safe(le, tle, &mdev->net_ee) { - peer_req = list_entry(le, struct drbd_peer_request, w.list); + list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) { if (drbd_peer_req_has_active_page(peer_req)) break; - list_move(le, to_be_freed); + list_move(&peer_req->w.list, to_be_freed); } } -static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) +static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device) { LIST_HEAD(reclaimed); struct drbd_peer_request *peer_req, *t; - spin_lock_irq(&mdev->tconn->req_lock); - reclaim_finished_net_peer_reqs(mdev, &reclaimed); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); + reclaim_finished_net_peer_reqs(device, &reclaimed); + spin_unlock_irq(&device->resource->req_lock); list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_peer_req(mdev, peer_req); + drbd_free_net_peer_req(device, peer_req); } /** * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) - * @mdev: DRBD device. + * @device: DRBD device. * @number: number of pages requested * @retry: whether to retry, if not enough pages are available right now * @@ -240,9 +239,10 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) * * Returns a page chain linked via page->private. */ -struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number, +struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number, bool retry) { + struct drbd_device *device = peer_device->device; struct page *page = NULL; struct net_conf *nc; DEFINE_WAIT(wait); @@ -251,20 +251,20 @@ struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number, /* Yes, we may run up to @number over max_buffers. If we * follow it strictly, the admin will get it wrong anyways. */ rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(peer_device->connection->net_conf); mxb = nc ? nc->max_buffers : 1000000; rcu_read_unlock(); - if (atomic_read(&mdev->pp_in_use) < mxb) - page = __drbd_alloc_pages(mdev, number); + if (atomic_read(&device->pp_in_use) < mxb) + page = __drbd_alloc_pages(device, number); while (page == NULL) { prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); - drbd_kick_lo_and_reclaim_net(mdev); + drbd_kick_lo_and_reclaim_net(device); - if (atomic_read(&mdev->pp_in_use) < mxb) { - page = __drbd_alloc_pages(mdev, number); + if (atomic_read(&device->pp_in_use) < mxb) { + page = __drbd_alloc_pages(device, number); if (page) break; } @@ -273,7 +273,7 @@ struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number, break; if (signal_pending(current)) { - dev_warn(DEV, "drbd_alloc_pages interrupted!\n"); + drbd_warn(device, "drbd_alloc_pages interrupted!\n"); break; } @@ -282,17 +282,17 @@ struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number, finish_wait(&drbd_pp_wait, &wait); if (page) - atomic_add(number, &mdev->pp_in_use); + atomic_add(number, &device->pp_in_use); return page; } /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages. - * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock); + * Is also used from inside an other spin_lock_irq(&resource->req_lock); * Either links the page chain back to the global pool, * or returns all pages to the system. */ -static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net) +static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net) { - atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; + atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use; int i; if (page == NULL) @@ -310,7 +310,7 @@ static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_ne } i = atomic_sub_return(i, a); if (i < 0) - dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n", + drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n", is_net ? "pp_in_use_by_net" : "pp_in_use", i); wake_up(&drbd_pp_wait); } @@ -330,25 +330,26 @@ You must not have the req_lock: */ struct drbd_peer_request * -drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector, +drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector, unsigned int data_size, gfp_t gfp_mask) __must_hold(local) { + struct drbd_device *device = peer_device->device; struct drbd_peer_request *peer_req; struct page *page = NULL; unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; - if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE)) + if (drbd_insert_fault(device, DRBD_FAULT_AL_EE)) return NULL; peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); if (!peer_req) { if (!(gfp_mask & __GFP_NOWARN)) - dev_err(DEV, "%s: allocation failed\n", __func__); + drbd_err(device, "%s: allocation failed\n", __func__); return NULL; } if (data_size) { - page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT)); + page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT)); if (!page) goto fail; } @@ -360,7 +361,7 @@ drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector, peer_req->i.waiting = false; peer_req->epoch = NULL; - peer_req->w.mdev = mdev; + peer_req->peer_device = peer_device; peer_req->pages = page; atomic_set(&peer_req->pending_bios, 0); peer_req->flags = 0; @@ -377,30 +378,30 @@ drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector, return NULL; } -void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req, +void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, int is_net) { if (peer_req->flags & EE_HAS_DIGEST) kfree(peer_req->digest); - drbd_free_pages(mdev, peer_req->pages, is_net); - D_ASSERT(atomic_read(&peer_req->pending_bios) == 0); - D_ASSERT(drbd_interval_empty(&peer_req->i)); + drbd_free_pages(device, peer_req->pages, is_net); + D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); + D_ASSERT(device, drbd_interval_empty(&peer_req->i)); mempool_free(peer_req, drbd_ee_mempool); } -int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list) +int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) { LIST_HEAD(work_list); struct drbd_peer_request *peer_req, *t; int count = 0; - int is_net = list == &mdev->net_ee; + int is_net = list == &device->net_ee; - spin_lock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); list_splice_init(list, &work_list); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); list_for_each_entry_safe(peer_req, t, &work_list, w.list) { - __drbd_free_peer_req(mdev, peer_req, is_net); + __drbd_free_peer_req(device, peer_req, is_net); count++; } return count; @@ -409,20 +410,20 @@ int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list) /* * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier. */ -static int drbd_finish_peer_reqs(struct drbd_conf *mdev) +static int drbd_finish_peer_reqs(struct drbd_device *device) { LIST_HEAD(work_list); LIST_HEAD(reclaimed); struct drbd_peer_request *peer_req, *t; int err = 0; - spin_lock_irq(&mdev->tconn->req_lock); - reclaim_finished_net_peer_reqs(mdev, &reclaimed); - list_splice_init(&mdev->done_ee, &work_list); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); + reclaim_finished_net_peer_reqs(device, &reclaimed); + list_splice_init(&device->done_ee, &work_list); + spin_unlock_irq(&device->resource->req_lock); list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_peer_req(mdev, peer_req); + drbd_free_net_peer_req(device, peer_req); /* possible callbacks here: * e_end_block, and e_end_resync_block, e_send_superseded. @@ -435,14 +436,14 @@ static int drbd_finish_peer_reqs(struct drbd_conf *mdev) err2 = peer_req->w.cb(&peer_req->w, !!err); if (!err) err = err2; - drbd_free_peer_req(mdev, peer_req); + drbd_free_peer_req(device, peer_req); } - wake_up(&mdev->ee_wait); + wake_up(&device->ee_wait); return err; } -static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, +static void _drbd_wait_ee_list_empty(struct drbd_device *device, struct list_head *head) { DEFINE_WAIT(wait); @@ -450,82 +451,72 @@ static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, /* avoids spin_lock/unlock * and calling prepare_to_wait in the fast path */ while (!list_empty(head)) { - prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&mdev->tconn->req_lock); + prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&device->resource->req_lock); io_schedule(); - finish_wait(&mdev->ee_wait, &wait); - spin_lock_irq(&mdev->tconn->req_lock); + finish_wait(&device->ee_wait, &wait); + spin_lock_irq(&device->resource->req_lock); } } -static void drbd_wait_ee_list_empty(struct drbd_conf *mdev, +static void drbd_wait_ee_list_empty(struct drbd_device *device, struct list_head *head) { - spin_lock_irq(&mdev->tconn->req_lock); - _drbd_wait_ee_list_empty(mdev, head); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); + _drbd_wait_ee_list_empty(device, head); + spin_unlock_irq(&device->resource->req_lock); } static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags) { - mm_segment_t oldfs; struct kvec iov = { .iov_base = buf, .iov_len = size, }; struct msghdr msg = { - .msg_iovlen = 1, - .msg_iov = (struct iovec *)&iov, .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) }; - int rv; - - oldfs = get_fs(); - set_fs(KERNEL_DS); - rv = sock_recvmsg(sock, &msg, size, msg.msg_flags); - set_fs(oldfs); - - return rv; + return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags); } -static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) +static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size) { int rv; - rv = drbd_recv_short(tconn->data.socket, buf, size, 0); + rv = drbd_recv_short(connection->data.socket, buf, size, 0); if (rv < 0) { if (rv == -ECONNRESET) - conn_info(tconn, "sock was reset by peer\n"); + drbd_info(connection, "sock was reset by peer\n"); else if (rv != -ERESTARTSYS) - conn_err(tconn, "sock_recvmsg returned %d\n", rv); + drbd_err(connection, "sock_recvmsg returned %d\n", rv); } else if (rv == 0) { - if (test_bit(DISCONNECT_SENT, &tconn->flags)) { + if (test_bit(DISCONNECT_SENT, &connection->flags)) { long t; rcu_read_lock(); - t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10; + t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; rcu_read_unlock(); - t = wait_event_timeout(tconn->ping_wait, tconn->cstate < C_WF_REPORT_PARAMS, t); + t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t); if (t) goto out; } - conn_info(tconn, "sock was shut down by peer\n"); + drbd_info(connection, "sock was shut down by peer\n"); } if (rv != size) - conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD); + conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD); out: return rv; } -static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size) +static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size) { int err; - err = drbd_recv(tconn, buf, size); + err = drbd_recv(connection, buf, size); if (err != size) { if (err >= 0) err = -EIO; @@ -534,13 +525,13 @@ static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size) return err; } -static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size) +static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size) { int err; - err = drbd_recv_all(tconn, buf, size); + err = drbd_recv_all(connection, buf, size); if (err && !signal_pending(current)) - conn_warn(tconn, "short read (expected size %d)\n", (int)size); + drbd_warn(connection, "short read (expected size %d)\n", (int)size); return err; } @@ -563,7 +554,7 @@ static void drbd_setbufsize(struct socket *sock, unsigned int snd, } } -static struct socket *drbd_try_connect(struct drbd_tconn *tconn) +static struct socket *drbd_try_connect(struct drbd_connection *connection) { const char *what; struct socket *sock; @@ -575,7 +566,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) int disconnect_on_error = 1; rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); + nc = rcu_dereference(connection->net_conf); if (!nc) { rcu_read_unlock(); return NULL; @@ -585,16 +576,16 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) connect_int = nc->connect_int; rcu_read_unlock(); - my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6)); - memcpy(&src_in6, &tconn->my_addr, my_addr_len); + my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6)); + memcpy(&src_in6, &connection->my_addr, my_addr_len); - if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6) + if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6) src_in6.sin6_port = 0; else ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ - peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6)); - memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len); + peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6)); + memcpy(&peer_in6, &connection->peer_addr, peer_addr_len); what = "sock_create_kern"; err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family, @@ -642,17 +633,17 @@ out: disconnect_on_error = 0; break; default: - conn_err(tconn, "%s failed, err = %d\n", what, err); + drbd_err(connection, "%s failed, err = %d\n", what, err); } if (disconnect_on_error) - conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); } return sock; } struct accept_wait_data { - struct drbd_tconn *tconn; + struct drbd_connection *connection; struct socket *s_listen; struct completion door_bell; void (*original_sk_state_change)(struct sock *sk); @@ -670,7 +661,7 @@ static void drbd_incoming_connection(struct sock *sk) state_change(sk); } -static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_data *ad) +static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad) { int err, sndbuf_size, rcvbuf_size, my_addr_len; struct sockaddr_in6 my_addr; @@ -679,7 +670,7 @@ static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_da const char *what; rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); + nc = rcu_dereference(connection->net_conf); if (!nc) { rcu_read_unlock(); return -EIO; @@ -688,8 +679,8 @@ static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_da rcvbuf_size = nc->rcvbuf_size; rcu_read_unlock(); - my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6)); - memcpy(&my_addr, &tconn->my_addr, my_addr_len); + my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6)); + memcpy(&my_addr, &connection->my_addr, my_addr_len); what = "sock_create_kern"; err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family, @@ -725,8 +716,8 @@ out: sock_release(s_listen); if (err < 0) { if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { - conn_err(tconn, "%s failed, err = %d\n", what, err); - conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + drbd_err(connection, "%s failed, err = %d\n", what, err); + conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); } } @@ -741,14 +732,14 @@ static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad write_unlock_bh(&sk->sk_callback_lock); } -static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct accept_wait_data *ad) +static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad) { int timeo, connect_int, err = 0; struct socket *s_estab = NULL; struct net_conf *nc; rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); + nc = rcu_dereference(connection->net_conf); if (!nc) { rcu_read_unlock(); return NULL; @@ -767,8 +758,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct acc err = kernel_accept(ad->s_listen, &s_estab, 0); if (err < 0) { if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { - conn_err(tconn, "accept failed, err = %d\n", err); - conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + drbd_err(connection, "accept failed, err = %d\n", err); + conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); } } @@ -778,29 +769,29 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct acc return s_estab; } -static int decode_header(struct drbd_tconn *, void *, struct packet_info *); +static int decode_header(struct drbd_connection *, void *, struct packet_info *); -static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock, +static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock, enum drbd_packet cmd) { - if (!conn_prepare_command(tconn, sock)) + if (!conn_prepare_command(connection, sock)) return -EIO; - return conn_send_command(tconn, sock, cmd, 0, NULL, 0); + return conn_send_command(connection, sock, cmd, 0, NULL, 0); } -static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock) +static int receive_first_packet(struct drbd_connection *connection, struct socket *sock) { - unsigned int header_size = drbd_header_size(tconn); + unsigned int header_size = drbd_header_size(connection); struct packet_info pi; int err; - err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0); + err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0); if (err != header_size) { if (err >= 0) err = -EIO; return err; } - err = decode_header(tconn, tconn->data.rbuf, &pi); + err = decode_header(connection, connection->data.rbuf, &pi); if (err) return err; return pi.cmd; @@ -830,28 +821,29 @@ static int drbd_socket_okay(struct socket **sock) } /* Gets called if a connection is established, or if a new minor gets created in a connection */ -int drbd_connected(struct drbd_conf *mdev) +int drbd_connected(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; int err; - atomic_set(&mdev->packet_seq, 0); - mdev->peer_seq = 0; + atomic_set(&device->packet_seq, 0); + device->peer_seq = 0; - mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ? - &mdev->tconn->cstate_mutex : - &mdev->own_state_mutex; + device->state_mutex = peer_device->connection->agreed_pro_version < 100 ? + &peer_device->connection->cstate_mutex : + &device->own_state_mutex; - err = drbd_send_sync_param(mdev); + err = drbd_send_sync_param(peer_device); if (!err) - err = drbd_send_sizes(mdev, 0, 0); + err = drbd_send_sizes(peer_device, 0, 0); if (!err) - err = drbd_send_uuids(mdev); + err = drbd_send_uuids(peer_device); if (!err) - err = drbd_send_current_state(mdev); - clear_bit(USE_DEGR_WFC_T, &mdev->flags); - clear_bit(RESIZE_PENDING, &mdev->flags); - atomic_set(&mdev->ap_in_flight, 0); - mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ + err = drbd_send_current_state(peer_device); + clear_bit(USE_DEGR_WFC_T, &device->flags); + clear_bit(RESIZE_PENDING, &device->flags); + atomic_set(&device->ap_in_flight, 0); + mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */ return err; } @@ -863,59 +855,59 @@ int drbd_connected(struct drbd_conf *mdev) * no point in trying again, please go standalone. * -2 We do not have a network config... */ -static int conn_connect(struct drbd_tconn *tconn) +static int conn_connect(struct drbd_connection *connection) { struct drbd_socket sock, msock; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; struct net_conf *nc; int vnr, timeout, h, ok; bool discard_my_data; enum drbd_state_rv rv; struct accept_wait_data ad = { - .tconn = tconn, + .connection = connection, .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell), }; - clear_bit(DISCONNECT_SENT, &tconn->flags); - if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) + clear_bit(DISCONNECT_SENT, &connection->flags); + if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) return -2; mutex_init(&sock.mutex); - sock.sbuf = tconn->data.sbuf; - sock.rbuf = tconn->data.rbuf; + sock.sbuf = connection->data.sbuf; + sock.rbuf = connection->data.rbuf; sock.socket = NULL; mutex_init(&msock.mutex); - msock.sbuf = tconn->meta.sbuf; - msock.rbuf = tconn->meta.rbuf; + msock.sbuf = connection->meta.sbuf; + msock.rbuf = connection->meta.rbuf; msock.socket = NULL; /* Assume that the peer only understands protocol 80 until we know better. */ - tconn->agreed_pro_version = 80; + connection->agreed_pro_version = 80; - if (prepare_listen_socket(tconn, &ad)) + if (prepare_listen_socket(connection, &ad)) return 0; do { struct socket *s; - s = drbd_try_connect(tconn); + s = drbd_try_connect(connection); if (s) { if (!sock.socket) { sock.socket = s; - send_first_packet(tconn, &sock, P_INITIAL_DATA); + send_first_packet(connection, &sock, P_INITIAL_DATA); } else if (!msock.socket) { - clear_bit(RESOLVE_CONFLICTS, &tconn->flags); + clear_bit(RESOLVE_CONFLICTS, &connection->flags); msock.socket = s; - send_first_packet(tconn, &msock, P_INITIAL_META); + send_first_packet(connection, &msock, P_INITIAL_META); } else { - conn_err(tconn, "Logic error in conn_connect()\n"); + drbd_err(connection, "Logic error in conn_connect()\n"); goto out_release_sockets; } } if (sock.socket && msock.socket) { rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); + nc = rcu_dereference(connection->net_conf); timeout = nc->ping_timeo * HZ / 10; rcu_read_unlock(); schedule_timeout_interruptible(timeout); @@ -926,15 +918,15 @@ static int conn_connect(struct drbd_tconn *tconn) } retry: - s = drbd_wait_for_connect(tconn, &ad); + s = drbd_wait_for_connect(connection, &ad); if (s) { - int fp = receive_first_packet(tconn, s); + int fp = receive_first_packet(connection, s); drbd_socket_okay(&sock.socket); drbd_socket_okay(&msock.socket); switch (fp) { case P_INITIAL_DATA: if (sock.socket) { - conn_warn(tconn, "initial packet S crossed\n"); + drbd_warn(connection, "initial packet S crossed\n"); sock_release(sock.socket); sock.socket = s; goto randomize; @@ -942,9 +934,9 @@ retry: sock.socket = s; break; case P_INITIAL_META: - set_bit(RESOLVE_CONFLICTS, &tconn->flags); + set_bit(RESOLVE_CONFLICTS, &connection->flags); if (msock.socket) { - conn_warn(tconn, "initial packet M crossed\n"); + drbd_warn(connection, "initial packet M crossed\n"); sock_release(msock.socket); msock.socket = s; goto randomize; @@ -952,7 +944,7 @@ retry: msock.socket = s; break; default: - conn_warn(tconn, "Error receiving initial packet\n"); + drbd_warn(connection, "Error receiving initial packet\n"); sock_release(s); randomize: if (prandom_u32() & 1) @@ -960,12 +952,12 @@ randomize: } } - if (tconn->cstate <= C_DISCONNECTING) + if (connection->cstate <= C_DISCONNECTING) goto out_release_sockets; if (signal_pending(current)) { flush_signals(current); smp_rmb(); - if (get_t_state(&tconn->receiver) == EXITING) + if (get_t_state(&connection->receiver) == EXITING) goto out_release_sockets; } @@ -986,12 +978,12 @@ randomize: msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE; /* NOT YET ... - * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; + * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10; * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; * first set it to the P_CONNECTION_FEATURES timeout, * which we set to 4x the configured ping_timeout. */ rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); + nc = rcu_dereference(connection->net_conf); sock.socket->sk->sk_sndtimeo = sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10; @@ -1008,37 +1000,38 @@ randomize: drbd_tcp_nodelay(sock.socket); drbd_tcp_nodelay(msock.socket); - tconn->data.socket = sock.socket; - tconn->meta.socket = msock.socket; - tconn->last_received = jiffies; + connection->data.socket = sock.socket; + connection->meta.socket = msock.socket; + connection->last_received = jiffies; - h = drbd_do_features(tconn); + h = drbd_do_features(connection); if (h <= 0) return h; - if (tconn->cram_hmac_tfm) { - /* drbd_request_state(mdev, NS(conn, WFAuth)); */ - switch (drbd_do_auth(tconn)) { + if (connection->cram_hmac_tfm) { + /* drbd_request_state(device, NS(conn, WFAuth)); */ + switch (drbd_do_auth(connection)) { case -1: - conn_err(tconn, "Authentication of peer failed\n"); + drbd_err(connection, "Authentication of peer failed\n"); return -1; case 0: - conn_err(tconn, "Authentication of peer failed, trying again.\n"); + drbd_err(connection, "Authentication of peer failed, trying again.\n"); return 0; } } - tconn->data.socket->sk->sk_sndtimeo = timeout; - tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; + connection->data.socket->sk->sk_sndtimeo = timeout; + connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; - if (drbd_send_protocol(tconn) == -EOPNOTSUPP) + if (drbd_send_protocol(connection) == -EOPNOTSUPP) return -1; - set_bit(STATE_SENT, &tconn->flags); + set_bit(STATE_SENT, &connection->flags); rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - kref_get(&mdev->kref); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + kref_get(&device->kref); rcu_read_unlock(); /* Prevent a race between resync-handshake and @@ -1048,35 +1041,35 @@ randomize: * drbd_set_role() is finished, and any incoming drbd_set_role * will see the STATE_SENT flag, and wait for it to be cleared. */ - mutex_lock(mdev->state_mutex); - mutex_unlock(mdev->state_mutex); + mutex_lock(device->state_mutex); + mutex_unlock(device->state_mutex); if (discard_my_data) - set_bit(DISCARD_MY_DATA, &mdev->flags); + set_bit(DISCARD_MY_DATA, &device->flags); else - clear_bit(DISCARD_MY_DATA, &mdev->flags); + clear_bit(DISCARD_MY_DATA, &device->flags); - drbd_connected(mdev); - kref_put(&mdev->kref, &drbd_minor_destroy); + drbd_connected(peer_device); + kref_put(&device->kref, drbd_destroy_device); rcu_read_lock(); } rcu_read_unlock(); - rv = conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE); - if (rv < SS_SUCCESS || tconn->cstate != C_WF_REPORT_PARAMS) { - clear_bit(STATE_SENT, &tconn->flags); + rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE); + if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) { + clear_bit(STATE_SENT, &connection->flags); return 0; } - drbd_thread_start(&tconn->asender); + drbd_thread_start(&connection->asender); - mutex_lock(&tconn->conf_update); + mutex_lock(&connection->resource->conf_update); /* The discard_my_data flag is a single-shot modifier to the next * connection attempt, the handshake of which is now well underway. * No need for rcu style copying of the whole struct * just to clear a single value. */ - tconn->net_conf->discard_my_data = 0; - mutex_unlock(&tconn->conf_update); + connection->net_conf->discard_my_data = 0; + mutex_unlock(&connection->resource->conf_update); return h; @@ -1090,15 +1083,15 @@ out_release_sockets: return -1; } -static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi) +static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi) { - unsigned int header_size = drbd_header_size(tconn); + unsigned int header_size = drbd_header_size(connection); if (header_size == sizeof(struct p_header100) && *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) { struct p_header100 *h = header; if (h->pad != 0) { - conn_err(tconn, "Header padding is not zero\n"); + drbd_err(connection, "Header padding is not zero\n"); return -EINVAL; } pi->vnr = be16_to_cpu(h->volume); @@ -1117,55 +1110,57 @@ static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_i pi->size = be16_to_cpu(h->length); pi->vnr = 0; } else { - conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n", + drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n", be32_to_cpu(*(__be32 *)header), - tconn->agreed_pro_version); + connection->agreed_pro_version); return -EINVAL; } pi->data = header + header_size; return 0; } -static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi) +static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi) { - void *buffer = tconn->data.rbuf; + void *buffer = connection->data.rbuf; int err; - err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn)); + err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection)); if (err) return err; - err = decode_header(tconn, buffer, pi); - tconn->last_received = jiffies; + err = decode_header(connection, buffer, pi); + connection->last_received = jiffies; return err; } -static void drbd_flush(struct drbd_tconn *tconn) +static void drbd_flush(struct drbd_connection *connection) { int rv; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; - if (tconn->write_ordering >= WO_bdev_flush) { + if (connection->write_ordering >= WO_bdev_flush) { rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (!get_ldev(mdev)) + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + + if (!get_ldev(device)) continue; - kref_get(&mdev->kref); + kref_get(&device->kref); rcu_read_unlock(); - rv = blkdev_issue_flush(mdev->ldev->backing_bdev, + rv = blkdev_issue_flush(device->ldev->backing_bdev, GFP_NOIO, NULL); if (rv) { - dev_info(DEV, "local disk flush failed with status %d\n", rv); + drbd_info(device, "local disk flush failed with status %d\n", rv); /* would rather check on EOPNOTSUPP, but that is not reliable. * don't try again for ANY return value != 0 * if (rv == -EOPNOTSUPP) */ - drbd_bump_write_ordering(tconn, WO_drain_io); + drbd_bump_write_ordering(connection, WO_drain_io); } - put_ldev(mdev); - kref_put(&mdev->kref, &drbd_minor_destroy); + put_ldev(device); + kref_put(&device->kref, drbd_destroy_device); rcu_read_lock(); if (rv) @@ -1177,11 +1172,11 @@ static void drbd_flush(struct drbd_tconn *tconn) /** * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it. - * @mdev: DRBD device. + * @device: DRBD device. * @epoch: Epoch object. * @ev: Epoch event. */ -static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn, +static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection, struct drbd_epoch *epoch, enum epoch_event ev) { @@ -1189,7 +1184,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn, struct drbd_epoch *next_epoch; enum finish_epoch rv = FE_STILL_LIVE; - spin_lock(&tconn->epoch_lock); + spin_lock(&connection->epoch_lock); do { next_epoch = NULL; @@ -1211,22 +1206,22 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn, atomic_read(&epoch->active) == 0 && (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { if (!(ev & EV_CLEANUP)) { - spin_unlock(&tconn->epoch_lock); - drbd_send_b_ack(epoch->tconn, epoch->barrier_nr, epoch_size); - spin_lock(&tconn->epoch_lock); + spin_unlock(&connection->epoch_lock); + drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size); + spin_lock(&connection->epoch_lock); } #if 0 /* FIXME: dec unacked on connection, once we have * something to count pending connection packets in. */ if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) - dec_unacked(epoch->tconn); + dec_unacked(epoch->connection); #endif - if (tconn->current_epoch != epoch) { + if (connection->current_epoch != epoch) { next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); list_del(&epoch->list); ev = EV_BECAME_LAST | (ev & EV_CLEANUP); - tconn->epochs--; + connection->epochs--; kfree(epoch); if (rv == FE_STILL_LIVE) @@ -1246,20 +1241,20 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn, epoch = next_epoch; } while (1); - spin_unlock(&tconn->epoch_lock); + spin_unlock(&connection->epoch_lock); return rv; } /** * drbd_bump_write_ordering() - Fall back to an other write ordering method - * @tconn: DRBD connection. + * @connection: DRBD connection. * @wo: Write ordering method to try. */ -void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo) +void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo) { struct disk_conf *dc; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; enum write_ordering_e pwo; int vnr; static char *write_ordering_str[] = { @@ -1268,29 +1263,31 @@ void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo [WO_bdev_flush] = "flush", }; - pwo = tconn->write_ordering; + pwo = connection->write_ordering; wo = min(pwo, wo); rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (!get_ldev_if_state(mdev, D_ATTACHING)) + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + + if (!get_ldev_if_state(device, D_ATTACHING)) continue; - dc = rcu_dereference(mdev->ldev->disk_conf); + dc = rcu_dereference(device->ldev->disk_conf); if (wo == WO_bdev_flush && !dc->disk_flushes) wo = WO_drain_io; if (wo == WO_drain_io && !dc->disk_drain) wo = WO_none; - put_ldev(mdev); + put_ldev(device); } rcu_read_unlock(); - tconn->write_ordering = wo; - if (pwo != tconn->write_ordering || wo == WO_bdev_flush) - conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]); + connection->write_ordering = wo; + if (pwo != connection->write_ordering || wo == WO_bdev_flush) + drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]); } /** * drbd_submit_peer_request() - * @mdev: DRBD device. + * @device: DRBD device. * @peer_req: peer request * @rw: flag field, see bio->bi_rw * @@ -1305,7 +1302,7 @@ void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo * on certain Xen deployments. */ /* TODO allocate from our own bio_set. */ -int drbd_submit_peer_request(struct drbd_conf *mdev, +int drbd_submit_peer_request(struct drbd_device *device, struct drbd_peer_request *peer_req, const unsigned rw, const int fault_type) { @@ -1329,12 +1326,12 @@ int drbd_submit_peer_request(struct drbd_conf *mdev, next_bio: bio = bio_alloc(GFP_NOIO, nr_pages); if (!bio) { - dev_err(DEV, "submit_ee: Allocation of a bio failed\n"); + drbd_err(device, "submit_ee: Allocation of a bio failed\n"); goto fail; } /* > peer_req->i.sector, unless this is the first bio */ bio->bi_iter.bi_sector = sector; - bio->bi_bdev = mdev->ldev->backing_bdev; + bio->bi_bdev = device->ldev->backing_bdev; bio->bi_rw = rw; bio->bi_private = peer_req; bio->bi_end_io = drbd_peer_request_endio; @@ -1350,7 +1347,7 @@ next_bio: * But in case it fails anyways, * we deal with it, and complain (below). */ if (bio->bi_vcnt == 0) { - dev_err(DEV, + drbd_err(device, "bio_add_page failed for len=%u, " "bi_vcnt=0 (bi_sector=%llu)\n", len, (uint64_t)bio->bi_iter.bi_sector); @@ -1363,8 +1360,8 @@ next_bio: sector += len >> 9; --nr_pages; } - D_ASSERT(page == NULL); - D_ASSERT(ds == 0); + D_ASSERT(device, page == NULL); + D_ASSERT(device, ds == 0); atomic_set(&peer_req->pending_bios, n_bios); do { @@ -1372,7 +1369,7 @@ next_bio: bios = bios->bi_next; bio->bi_next = NULL; - drbd_generic_make_request(mdev, fault_type, bio); + drbd_generic_make_request(device, fault_type, bio); } while (bios); return 0; @@ -1385,36 +1382,44 @@ fail: return err; } -static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev, +static void drbd_remove_epoch_entry_interval(struct drbd_device *device, struct drbd_peer_request *peer_req) { struct drbd_interval *i = &peer_req->i; - drbd_remove_interval(&mdev->write_requests, i); + drbd_remove_interval(&device->write_requests, i); drbd_clear_interval(i); /* Wake up any processes waiting for this peer request to complete. */ if (i->waiting) - wake_up(&mdev->misc_wait); + wake_up(&device->misc_wait); } -void conn_wait_active_ee_empty(struct drbd_tconn *tconn) +static void conn_wait_active_ee_empty(struct drbd_connection *connection) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - kref_get(&mdev->kref); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + + kref_get(&device->kref); rcu_read_unlock(); - drbd_wait_ee_list_empty(mdev, &mdev->active_ee); - kref_put(&mdev->kref, &drbd_minor_destroy); + drbd_wait_ee_list_empty(device, &device->active_ee); + kref_put(&device->kref, drbd_destroy_device); rcu_read_lock(); } rcu_read_unlock(); } -static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) +static struct drbd_peer_device * +conn_peer_device(struct drbd_connection *connection, int volume_number) +{ + return idr_find(&connection->peer_devices, volume_number); +} + +static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi) { int rv; struct p_barrier *p = pi->data; @@ -1423,16 +1428,16 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) /* FIXME these are unacked on connection, * not a specific (peer)device. */ - tconn->current_epoch->barrier_nr = p->barrier; - tconn->current_epoch->tconn = tconn; - rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR); + connection->current_epoch->barrier_nr = p->barrier; + connection->current_epoch->connection = connection; + rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR); /* P_BARRIER_ACK may imply that the corresponding extent is dropped from * the activity log, which means it would not be resynced in case the * R_PRIMARY crashes now. * Therefore we must send the barrier_ack after the barrier request was * completed. */ - switch (tconn->write_ordering) { + switch (connection->write_ordering) { case WO_none: if (rv == FE_RECYCLED) return 0; @@ -1443,15 +1448,15 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) if (epoch) break; else - conn_warn(tconn, "Allocation of an epoch failed, slowing down\n"); + drbd_warn(connection, "Allocation of an epoch failed, slowing down\n"); /* Fall through */ case WO_bdev_flush: case WO_drain_io: - conn_wait_active_ee_empty(tconn); - drbd_flush(tconn); + conn_wait_active_ee_empty(connection); + drbd_flush(connection); - if (atomic_read(&tconn->current_epoch->epoch_size)) { + if (atomic_read(&connection->current_epoch->epoch_size)) { epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); if (epoch) break; @@ -1459,7 +1464,7 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) return 0; default: - conn_err(tconn, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering); + drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering); return -EIO; } @@ -1467,16 +1472,16 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) atomic_set(&epoch->epoch_size, 0); atomic_set(&epoch->active, 0); - spin_lock(&tconn->epoch_lock); - if (atomic_read(&tconn->current_epoch->epoch_size)) { - list_add(&epoch->list, &tconn->current_epoch->list); - tconn->current_epoch = epoch; - tconn->epochs++; + spin_lock(&connection->epoch_lock); + if (atomic_read(&connection->current_epoch->epoch_size)) { + list_add(&epoch->list, &connection->current_epoch->list); + connection->current_epoch = epoch; + connection->epochs++; } else { /* The current_epoch got recycled while we allocated this one... */ kfree(epoch); } - spin_unlock(&tconn->epoch_lock); + spin_unlock(&connection->epoch_lock); return 0; } @@ -1484,25 +1489,26 @@ static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi) /* used from receive_RSDataReply (recv_resync_read) * and from receive_Data */ static struct drbd_peer_request * -read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, +read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, int data_size) __must_hold(local) { - const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + struct drbd_device *device = peer_device->device; + const sector_t capacity = drbd_get_capacity(device->this_bdev); struct drbd_peer_request *peer_req; struct page *page; int dgs, ds, err; - void *dig_in = mdev->tconn->int_dig_in; - void *dig_vv = mdev->tconn->int_dig_vv; + void *dig_in = peer_device->connection->int_dig_in; + void *dig_vv = peer_device->connection->int_dig_vv; unsigned long *data; dgs = 0; - if (mdev->tconn->peer_integrity_tfm) { - dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm); + if (peer_device->connection->peer_integrity_tfm) { + dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); /* * FIXME: Receive the incoming digest into the receive buffer * here, together with its struct p_data? */ - err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs); + err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs); if (err) return NULL; data_size -= dgs; @@ -1516,7 +1522,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, /* even though we trust out peer, * we sometimes have to double check. */ if (sector + (data_size>>9) > capacity) { - dev_err(DEV, "request from peer beyond end of local disk: " + drbd_err(device, "request from peer beyond end of local disk: " "capacity: %llus < sector: %llus + size: %u\n", (unsigned long long)capacity, (unsigned long long)sector, data_size); @@ -1526,7 +1532,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ - peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO); + peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, GFP_NOIO); if (!peer_req) return NULL; @@ -1538,36 +1544,36 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, page_chain_for_each(page) { unsigned len = min_t(int, ds, PAGE_SIZE); data = kmap(page); - err = drbd_recv_all_warn(mdev->tconn, data, len); - if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) { - dev_err(DEV, "Fault injection: Corrupting data on receive\n"); + err = drbd_recv_all_warn(peer_device->connection, data, len); + if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) { + drbd_err(device, "Fault injection: Corrupting data on receive\n"); data[0] = data[0] ^ (unsigned long)-1; } kunmap(page); if (err) { - drbd_free_peer_req(mdev, peer_req); + drbd_free_peer_req(device, peer_req); return NULL; } ds -= len; } if (dgs) { - drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv); + drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { - dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", + drbd_err(device, "Digest integrity check FAILED: %llus +%u\n", (unsigned long long)sector, data_size); - drbd_free_peer_req(mdev, peer_req); + drbd_free_peer_req(device, peer_req); return NULL; } } - mdev->recv_cnt += data_size>>9; + device->recv_cnt += data_size>>9; return peer_req; } /* drbd_drain_block() just takes a data block * out of the socket input buffer, and discards it. */ -static int drbd_drain_block(struct drbd_conf *mdev, int data_size) +static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size) { struct page *page; int err = 0; @@ -1576,36 +1582,36 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) if (!data_size) return 0; - page = drbd_alloc_pages(mdev, 1, 1); + page = drbd_alloc_pages(peer_device, 1, 1); data = kmap(page); while (data_size) { unsigned int len = min_t(int, data_size, PAGE_SIZE); - err = drbd_recv_all_warn(mdev->tconn, data, len); + err = drbd_recv_all_warn(peer_device->connection, data, len); if (err) break; data_size -= len; } kunmap(page); - drbd_free_pages(mdev, page, 0); + drbd_free_pages(peer_device->device, page, 0); return err; } -static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, +static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req, sector_t sector, int data_size) { struct bio_vec bvec; struct bvec_iter iter; struct bio *bio; int dgs, err, expect; - void *dig_in = mdev->tconn->int_dig_in; - void *dig_vv = mdev->tconn->int_dig_vv; + void *dig_in = peer_device->connection->int_dig_in; + void *dig_vv = peer_device->connection->int_dig_vv; dgs = 0; - if (mdev->tconn->peer_integrity_tfm) { - dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm); - err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs); + if (peer_device->connection->peer_integrity_tfm) { + dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); + err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs); if (err) return err; data_size -= dgs; @@ -1613,15 +1619,15 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, /* optimistically update recv_cnt. if receiving fails below, * we disconnect anyways, and counters will be reset. */ - mdev->recv_cnt += data_size>>9; + peer_device->device->recv_cnt += data_size>>9; bio = req->master_bio; - D_ASSERT(sector == bio->bi_iter.bi_sector); + D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector); bio_for_each_segment(bvec, bio, iter) { void *mapped = kmap(bvec.bv_page) + bvec.bv_offset; expect = min_t(int, data_size, bvec.bv_len); - err = drbd_recv_all_warn(mdev->tconn, mapped, expect); + err = drbd_recv_all_warn(peer_device->connection, mapped, expect); kunmap(bvec.bv_page); if (err) return err; @@ -1629,14 +1635,14 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, } if (dgs) { - drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv); + drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { - dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n"); + drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n"); return -EINVAL; } } - D_ASSERT(data_size == 0); + D_ASSERT(peer_device->device, data_size == 0); return 0; } @@ -1648,64 +1654,67 @@ static int e_end_resync_block(struct drbd_work *w, int unused) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_peer_device *peer_device = peer_req->peer_device; + struct drbd_device *device = peer_device->device; sector_t sector = peer_req->i.sector; int err; - D_ASSERT(drbd_interval_empty(&peer_req->i)); + D_ASSERT(device, drbd_interval_empty(&peer_req->i)); if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - drbd_set_in_sync(mdev, sector, peer_req->i.size); - err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req); + drbd_set_in_sync(device, sector, peer_req->i.size); + err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req); } else { /* Record failure to sync */ - drbd_rs_failed_io(mdev, sector, peer_req->i.size); + drbd_rs_failed_io(device, sector, peer_req->i.size); - err = drbd_send_ack(mdev, P_NEG_ACK, peer_req); + err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); } - dec_unacked(mdev); + dec_unacked(device); return err; } -static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) +static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector, + int data_size) __releases(local) { + struct drbd_device *device = peer_device->device; struct drbd_peer_request *peer_req; - peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size); + peer_req = read_in_block(peer_device, ID_SYNCER, sector, data_size); if (!peer_req) goto fail; - dec_rs_pending(mdev); + dec_rs_pending(device); - inc_unacked(mdev); + inc_unacked(device); /* corresponding dec_unacked() in e_end_resync_block() * respective _drbd_clear_done_ee */ peer_req->w.cb = e_end_resync_block; - spin_lock_irq(&mdev->tconn->req_lock); - list_add(&peer_req->w.list, &mdev->sync_ee); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); + list_add(&peer_req->w.list, &device->sync_ee); + spin_unlock_irq(&device->resource->req_lock); - atomic_add(data_size >> 9, &mdev->rs_sect_ev); - if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) + atomic_add(data_size >> 9, &device->rs_sect_ev); + if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) return 0; /* don't care for the reason here */ - dev_err(DEV, "submit failed, triggering re-connect\n"); - spin_lock_irq(&mdev->tconn->req_lock); + drbd_err(device, "submit failed, triggering re-connect\n"); + spin_lock_irq(&device->resource->req_lock); list_del(&peer_req->w.list); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); - drbd_free_peer_req(mdev, peer_req); + drbd_free_peer_req(device, peer_req); fail: - put_ldev(mdev); + put_ldev(device); return -EIO; } static struct drbd_request * -find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id, +find_request(struct drbd_device *device, struct rb_root *root, u64 id, sector_t sector, bool missing_ok, const char *func) { struct drbd_request *req; @@ -1715,36 +1724,38 @@ find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id, if (drbd_contains_interval(root, sector, &req->i) && req->i.local) return req; if (!missing_ok) { - dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func, + drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func, (unsigned long)id, (unsigned long long)sector); } return NULL; } -static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct drbd_request *req; sector_t sector; int err; struct p_data *p = pi->data; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; sector = be64_to_cpu(p->sector); - spin_lock_irq(&mdev->tconn->req_lock); - req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); + req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__); + spin_unlock_irq(&device->resource->req_lock); if (unlikely(!req)) return -EIO; /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid * special casing it there for the various failure cases. * still no race with drbd_fail_pending_reads */ - err = recv_dless_read(mdev, req, sector, pi->size); + err = recv_dless_read(peer_device, req, sector, pi->size); if (!err) req_mod(req, DATA_RECEIVED); /* else: nothing. handled from drbd_disconnect... @@ -1754,46 +1765,48 @@ static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi) return err; } -static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; sector_t sector; int err; struct p_data *p = pi->data; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; sector = be64_to_cpu(p->sector); - D_ASSERT(p->block_id == ID_SYNCER); + D_ASSERT(device, p->block_id == ID_SYNCER); - if (get_ldev(mdev)) { + if (get_ldev(device)) { /* data is submitted to disk within recv_resync_read. * corresponding put_ldev done below on error, * or in drbd_peer_request_endio. */ - err = recv_resync_read(mdev, sector, pi->size); + err = recv_resync_read(peer_device, sector, pi->size); } else { if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "Can not write resync data to local disk.\n"); + drbd_err(device, "Can not write resync data to local disk.\n"); - err = drbd_drain_block(mdev, pi->size); + err = drbd_drain_block(peer_device, pi->size); - drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size); + drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); } - atomic_add(pi->size >> 9, &mdev->rs_sect_in); + atomic_add(pi->size >> 9, &device->rs_sect_in); return err; } -static void restart_conflicting_writes(struct drbd_conf *mdev, +static void restart_conflicting_writes(struct drbd_device *device, sector_t sector, int size) { struct drbd_interval *i; struct drbd_request *req; - drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { + drbd_for_each_overlap(i, &device->write_requests, sector, size) { if (!i->local) continue; req = container_of(i, struct drbd_request, i); @@ -1813,52 +1826,53 @@ static int e_end_block(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_peer_device *peer_device = peer_req->peer_device; + struct drbd_device *device = peer_device->device; sector_t sector = peer_req->i.sector; int err = 0, pcmd; if (peer_req->flags & EE_SEND_WRITE_ACK) { if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - pcmd = (mdev->state.conn >= C_SYNC_SOURCE && - mdev->state.conn <= C_PAUSED_SYNC_T && + pcmd = (device->state.conn >= C_SYNC_SOURCE && + device->state.conn <= C_PAUSED_SYNC_T && peer_req->flags & EE_MAY_SET_IN_SYNC) ? P_RS_WRITE_ACK : P_WRITE_ACK; - err = drbd_send_ack(mdev, pcmd, peer_req); + err = drbd_send_ack(peer_device, pcmd, peer_req); if (pcmd == P_RS_WRITE_ACK) - drbd_set_in_sync(mdev, sector, peer_req->i.size); + drbd_set_in_sync(device, sector, peer_req->i.size); } else { - err = drbd_send_ack(mdev, P_NEG_ACK, peer_req); + err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); /* we expect it to be marked out of sync anyways... * maybe assert this? */ } - dec_unacked(mdev); + dec_unacked(device); } /* we delete from the conflict detection hash _after_ we sent out the * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (peer_req->flags & EE_IN_INTERVAL_TREE) { - spin_lock_irq(&mdev->tconn->req_lock); - D_ASSERT(!drbd_interval_empty(&peer_req->i)); - drbd_remove_epoch_entry_interval(mdev, peer_req); + spin_lock_irq(&device->resource->req_lock); + D_ASSERT(device, !drbd_interval_empty(&peer_req->i)); + drbd_remove_epoch_entry_interval(device, peer_req); if (peer_req->flags & EE_RESTART_REQUESTS) - restart_conflicting_writes(mdev, sector, peer_req->i.size); - spin_unlock_irq(&mdev->tconn->req_lock); + restart_conflicting_writes(device, sector, peer_req->i.size); + spin_unlock_irq(&device->resource->req_lock); } else - D_ASSERT(drbd_interval_empty(&peer_req->i)); + D_ASSERT(device, drbd_interval_empty(&peer_req->i)); - drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); + drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); return err; } static int e_send_ack(struct drbd_work *w, enum drbd_packet ack) { - struct drbd_conf *mdev = w->mdev; struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_peer_device *peer_device = peer_req->peer_device; int err; - err = drbd_send_ack(mdev, ack, peer_req); - dec_unacked(mdev); + err = drbd_send_ack(peer_device, ack, peer_req); + dec_unacked(peer_device->device); return err; } @@ -1870,9 +1884,11 @@ static int e_send_superseded(struct drbd_work *w, int unused) static int e_send_retry_write(struct drbd_work *w, int unused) { - struct drbd_tconn *tconn = w->mdev->tconn; + struct drbd_peer_request *peer_req = + container_of(w, struct drbd_peer_request, w); + struct drbd_connection *connection = peer_req->peer_device->connection; - return e_send_ack(w, tconn->agreed_pro_version >= 100 ? + return e_send_ack(w, connection->agreed_pro_version >= 100 ? P_RETRY_WRITE : P_SUPERSEDED); } @@ -1891,18 +1907,19 @@ static u32 seq_max(u32 a, u32 b) return seq_greater(a, b) ? a : b; } -static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) +static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq) { + struct drbd_device *device = peer_device->device; unsigned int newest_peer_seq; - if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)) { - spin_lock(&mdev->peer_seq_lock); - newest_peer_seq = seq_max(mdev->peer_seq, peer_seq); - mdev->peer_seq = newest_peer_seq; - spin_unlock(&mdev->peer_seq_lock); - /* wake up only if we actually changed mdev->peer_seq */ + if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) { + spin_lock(&device->peer_seq_lock); + newest_peer_seq = seq_max(device->peer_seq, peer_seq); + device->peer_seq = newest_peer_seq; + spin_unlock(&device->peer_seq_lock); + /* wake up only if we actually changed device->peer_seq */ if (peer_seq == newest_peer_seq) - wake_up(&mdev->seq_wait); + wake_up(&device->seq_wait); } } @@ -1912,20 +1929,20 @@ static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) } /* maybe change sync_ee into interval trees as well? */ -static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req) +static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req) { struct drbd_peer_request *rs_req; bool rv = 0; - spin_lock_irq(&mdev->tconn->req_lock); - list_for_each_entry(rs_req, &mdev->sync_ee, w.list) { + spin_lock_irq(&device->resource->req_lock); + list_for_each_entry(rs_req, &device->sync_ee, w.list) { if (overlaps(peer_req->i.sector, peer_req->i.size, rs_req->i.sector, rs_req->i.size)) { rv = 1; break; } } - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); return rv; } @@ -1939,9 +1956,9 @@ static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_peer_re * * Note: we don't care for Ack packets overtaking P_DATA packets. * - * In case packet_seq is larger than mdev->peer_seq number, there are + * In case packet_seq is larger than device->peer_seq number, there are * outstanding packets on the msock. We wait for them to arrive. - * In case we are the logically next packet, we update mdev->peer_seq + * In case we are the logically next packet, we update device->peer_seq * ourselves. Correctly handles 32bit wrap around. * * Assume we have a 10 GBit connection, that is about 1<<30 byte per second, @@ -1951,19 +1968,20 @@ static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_peer_re * * returns 0 if we may process the packet, * -ERESTARTSYS if we were interrupted (by disconnect signal). */ -static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq) +static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq) { + struct drbd_device *device = peer_device->device; DEFINE_WAIT(wait); long timeout; int ret = 0, tp; - if (!test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)) + if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) return 0; - spin_lock(&mdev->peer_seq_lock); + spin_lock(&device->peer_seq_lock); for (;;) { - if (!seq_greater(peer_seq - 1, mdev->peer_seq)) { - mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); + if (!seq_greater(peer_seq - 1, device->peer_seq)) { + device->peer_seq = seq_max(device->peer_seq, peer_seq); break; } @@ -1973,35 +1991,35 @@ static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_s } rcu_read_lock(); - tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries; + tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries; rcu_read_unlock(); if (!tp) break; /* Only need to wait if two_primaries is enabled */ - prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); - spin_unlock(&mdev->peer_seq_lock); + prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE); + spin_unlock(&device->peer_seq_lock); rcu_read_lock(); - timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10; + timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10; rcu_read_unlock(); timeout = schedule_timeout(timeout); - spin_lock(&mdev->peer_seq_lock); + spin_lock(&device->peer_seq_lock); if (!timeout) { ret = -ETIMEDOUT; - dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n"); + drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n"); break; } } - spin_unlock(&mdev->peer_seq_lock); - finish_wait(&mdev->seq_wait, &wait); + spin_unlock(&device->peer_seq_lock); + finish_wait(&device->seq_wait, &wait); return ret; } /* see also bio_flags_to_wire() * DRBD_REQ_*, because we need to semantically map the flags to data packet * flags and back. We may replicate to other kernel versions. */ -static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf) +static unsigned long wire_flags_to_bio(u32 dpf) { return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | (dpf & DP_FUA ? REQ_FUA : 0) | @@ -2009,13 +2027,13 @@ static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf) (dpf & DP_DISCARD ? REQ_DISCARD : 0); } -static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector, +static void fail_postponed_requests(struct drbd_device *device, sector_t sector, unsigned int size) { struct drbd_interval *i; repeat: - drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { + drbd_for_each_overlap(i, &device->write_requests, sector, size) { struct drbd_request *req; struct bio_and_error m; @@ -2026,19 +2044,19 @@ static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector, continue; req->rq_state &= ~RQ_POSTPONED; __req_mod(req, NEG_ACKED, &m); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); if (m.bio) - complete_master_bio(mdev, &m); - spin_lock_irq(&mdev->tconn->req_lock); + complete_master_bio(device, &m); + spin_lock_irq(&device->resource->req_lock); goto repeat; } } -static int handle_write_conflicts(struct drbd_conf *mdev, +static int handle_write_conflicts(struct drbd_device *device, struct drbd_peer_request *peer_req) { - struct drbd_tconn *tconn = mdev->tconn; - bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &tconn->flags); + struct drbd_connection *connection = peer_req->peer_device->connection; + bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags); sector_t sector = peer_req->i.sector; const unsigned int size = peer_req->i.size; struct drbd_interval *i; @@ -2049,10 +2067,10 @@ static int handle_write_conflicts(struct drbd_conf *mdev, * Inserting the peer request into the write_requests tree will prevent * new conflicting local requests from being added. */ - drbd_insert_interval(&mdev->write_requests, &peer_req->i); + drbd_insert_interval(&device->write_requests, &peer_req->i); repeat: - drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { + drbd_for_each_overlap(i, &device->write_requests, sector, size) { if (i == &peer_req->i) continue; @@ -2062,7 +2080,7 @@ static int handle_write_conflicts(struct drbd_conf *mdev, * should not happen in a two-node setup. Wait for the * earlier peer request to complete. */ - err = drbd_wait_misc(mdev, i); + err = drbd_wait_misc(device, i); if (err) goto out; goto repeat; @@ -2080,18 +2098,18 @@ static int handle_write_conflicts(struct drbd_conf *mdev, (i->size >> 9) >= sector + (size >> 9); if (!equal) - dev_alert(DEV, "Concurrent writes detected: " + drbd_alert(device, "Concurrent writes detected: " "local=%llus +%u, remote=%llus +%u, " "assuming %s came first\n", (unsigned long long)i->sector, i->size, (unsigned long long)sector, size, superseded ? "local" : "remote"); - inc_unacked(mdev); + inc_unacked(device); peer_req->w.cb = superseded ? e_send_superseded : e_send_retry_write; - list_add_tail(&peer_req->w.list, &mdev->done_ee); - wake_asender(mdev->tconn); + list_add_tail(&peer_req->w.list, &device->done_ee); + wake_asender(connection); err = -ENOENT; goto out; @@ -2100,7 +2118,7 @@ static int handle_write_conflicts(struct drbd_conf *mdev, container_of(i, struct drbd_request, i); if (!equal) - dev_alert(DEV, "Concurrent writes detected: " + drbd_alert(device, "Concurrent writes detected: " "local=%llus +%u, remote=%llus +%u\n", (unsigned long long)i->sector, i->size, (unsigned long long)sector, size); @@ -2118,12 +2136,10 @@ static int handle_write_conflicts(struct drbd_conf *mdev, * request to finish locally before submitting * the conflicting peer request. */ - err = drbd_wait_misc(mdev, &req->i); + err = drbd_wait_misc(device, &req->i); if (err) { - _conn_request_state(mdev->tconn, - NS(conn, C_TIMEOUT), - CS_HARD); - fail_postponed_requests(mdev, sector, size); + _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD); + fail_postponed_requests(device, sector, size); goto out; } goto repeat; @@ -2139,14 +2155,15 @@ static int handle_write_conflicts(struct drbd_conf *mdev, out: if (err) - drbd_remove_epoch_entry_interval(mdev, peer_req); + drbd_remove_epoch_entry_interval(device, peer_req); return err; } /* mirrored write */ -static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_Data(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; sector_t sector; struct drbd_peer_request *peer_req; struct p_data *p = pi->data; @@ -2155,17 +2172,18 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) u32 dp_flags; int err, tp; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; - if (!get_ldev(mdev)) { + if (!get_ldev(device)) { int err2; - err = wait_for_and_update_peer_seq(mdev, peer_seq); - drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size); - atomic_inc(&tconn->current_epoch->epoch_size); - err2 = drbd_drain_block(mdev, pi->size); + err = wait_for_and_update_peer_seq(peer_device, peer_seq); + drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); + atomic_inc(&connection->current_epoch->epoch_size); + err2 = drbd_drain_block(peer_device, pi->size); if (!err) err = err2; return err; @@ -2178,61 +2196,61 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) */ sector = be64_to_cpu(p->sector); - peer_req = read_in_block(mdev, p->block_id, sector, pi->size); + peer_req = read_in_block(peer_device, p->block_id, sector, pi->size); if (!peer_req) { - put_ldev(mdev); + put_ldev(device); return -EIO; } peer_req->w.cb = e_end_block; dp_flags = be32_to_cpu(p->dp_flags); - rw |= wire_flags_to_bio(mdev, dp_flags); + rw |= wire_flags_to_bio(dp_flags); if (peer_req->pages == NULL) { - D_ASSERT(peer_req->i.size == 0); - D_ASSERT(dp_flags & DP_FLUSH); + D_ASSERT(device, peer_req->i.size == 0); + D_ASSERT(device, dp_flags & DP_FLUSH); } if (dp_flags & DP_MAY_SET_IN_SYNC) peer_req->flags |= EE_MAY_SET_IN_SYNC; - spin_lock(&tconn->epoch_lock); - peer_req->epoch = tconn->current_epoch; + spin_lock(&connection->epoch_lock); + peer_req->epoch = connection->current_epoch; atomic_inc(&peer_req->epoch->epoch_size); atomic_inc(&peer_req->epoch->active); - spin_unlock(&tconn->epoch_lock); + spin_unlock(&connection->epoch_lock); rcu_read_lock(); - tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries; + tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries; rcu_read_unlock(); if (tp) { peer_req->flags |= EE_IN_INTERVAL_TREE; - err = wait_for_and_update_peer_seq(mdev, peer_seq); + err = wait_for_and_update_peer_seq(peer_device, peer_seq); if (err) goto out_interrupted; - spin_lock_irq(&mdev->tconn->req_lock); - err = handle_write_conflicts(mdev, peer_req); + spin_lock_irq(&device->resource->req_lock); + err = handle_write_conflicts(device, peer_req); if (err) { - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); if (err == -ENOENT) { - put_ldev(mdev); + put_ldev(device); return 0; } goto out_interrupted; } } else { - update_peer_seq(mdev, peer_seq); - spin_lock_irq(&mdev->tconn->req_lock); + update_peer_seq(peer_device, peer_seq); + spin_lock_irq(&device->resource->req_lock); } - list_add(&peer_req->w.list, &mdev->active_ee); - spin_unlock_irq(&mdev->tconn->req_lock); + list_add(&peer_req->w.list, &device->active_ee); + spin_unlock_irq(&device->resource->req_lock); - if (mdev->state.conn == C_SYNC_TARGET) - wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, peer_req)); + if (device->state.conn == C_SYNC_TARGET) + wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req)); - if (mdev->tconn->agreed_pro_version < 100) { + if (peer_device->connection->agreed_pro_version < 100) { rcu_read_lock(); - switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) { + switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) { case DRBD_PROT_C: dp_flags |= DP_SEND_WRITE_ACK; break; @@ -2245,7 +2263,7 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) if (dp_flags & DP_SEND_WRITE_ACK) { peer_req->flags |= EE_SEND_WRITE_ACK; - inc_unacked(mdev); + inc_unacked(device); /* corresponding dec_unacked() in e_end_block() * respective _drbd_clear_done_ee */ } @@ -2253,34 +2271,34 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi) if (dp_flags & DP_SEND_RECEIVE_ACK) { /* I really don't like it that the receiver thread * sends on the msock, but anyways */ - drbd_send_ack(mdev, P_RECV_ACK, peer_req); + drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req); } - if (mdev->state.pdsk < D_INCONSISTENT) { + if (device->state.pdsk < D_INCONSISTENT) { /* In case we have the only disk of the cluster, */ - drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size); + drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); peer_req->flags |= EE_CALL_AL_COMPLETE_IO; peer_req->flags &= ~EE_MAY_SET_IN_SYNC; - drbd_al_begin_io(mdev, &peer_req->i, true); + drbd_al_begin_io(device, &peer_req->i, true); } - err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR); + err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR); if (!err) return 0; /* don't care for the reason here */ - dev_err(DEV, "submit failed, triggering re-connect\n"); - spin_lock_irq(&mdev->tconn->req_lock); + drbd_err(device, "submit failed, triggering re-connect\n"); + spin_lock_irq(&device->resource->req_lock); list_del(&peer_req->w.list); - drbd_remove_epoch_entry_interval(mdev, peer_req); - spin_unlock_irq(&mdev->tconn->req_lock); + drbd_remove_epoch_entry_interval(device, peer_req); + spin_unlock_irq(&device->resource->req_lock); if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) - drbd_al_complete_io(mdev, &peer_req->i); + drbd_al_complete_io(device, &peer_req->i); out_interrupted: - drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP); - put_ldev(mdev); - drbd_free_peer_req(mdev, peer_req); + drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP); + put_ldev(device); + drbd_free_peer_req(device, peer_req); return err; } @@ -2295,9 +2313,9 @@ out_interrupted: * The current sync rate used here uses only the most recent two step marks, * to have a short time average so we can react faster. */ -int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) +int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) { - struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; + struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; unsigned long db, dt, dbdt; struct lc_element *tmp; int curr_events; @@ -2305,48 +2323,48 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) unsigned int c_min_rate; rcu_read_lock(); - c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate; + c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; rcu_read_unlock(); /* feature disabled? */ if (c_min_rate == 0) return 0; - spin_lock_irq(&mdev->al_lock); - tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector)); + spin_lock_irq(&device->al_lock); + tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); if (tmp) { struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); if (test_bit(BME_PRIORITY, &bm_ext->flags)) { - spin_unlock_irq(&mdev->al_lock); + spin_unlock_irq(&device->al_lock); return 0; } /* Do not slow down if app IO is already waiting for this extent */ } - spin_unlock_irq(&mdev->al_lock); + spin_unlock_irq(&device->al_lock); curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + (int)part_stat_read(&disk->part0, sectors[1]) - - atomic_read(&mdev->rs_sect_ev); + atomic_read(&device->rs_sect_ev); - if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { + if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { unsigned long rs_left; int i; - mdev->rs_last_events = curr_events; + device->rs_last_events = curr_events; /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, * approx. */ - i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; + i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; - if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) - rs_left = mdev->ov_left; + if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) + rs_left = device->ov_left; else - rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; + rs_left = drbd_bm_total_weight(device) - device->rs_failed; - dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ; + dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ; if (!dt) dt++; - db = mdev->rs_mark_left[i] - rs_left; + db = device->rs_mark_left[i] - rs_left; dbdt = Bit2KB(db/dt); if (dbdt > c_min_rate) @@ -2356,9 +2374,10 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) } -static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; sector_t sector; sector_t capacity; struct drbd_peer_request *peer_req; @@ -2367,58 +2386,59 @@ static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi) unsigned int fault_type; struct p_block_req *p = pi->data; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; - capacity = drbd_get_capacity(mdev->this_bdev); + device = peer_device->device; + capacity = drbd_get_capacity(device->this_bdev); sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { - dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, + drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, (unsigned long long)sector, size); return -EINVAL; } if (sector + (size>>9) > capacity) { - dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, + drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, (unsigned long long)sector, size); return -EINVAL; } - if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { + if (!get_ldev_if_state(device, D_UP_TO_DATE)) { verb = 1; switch (pi->cmd) { case P_DATA_REQUEST: - drbd_send_ack_rp(mdev, P_NEG_DREPLY, p); + drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p); break; case P_RS_DATA_REQUEST: case P_CSUM_RS_REQUEST: case P_OV_REQUEST: - drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p); + drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p); break; case P_OV_REPLY: verb = 0; - dec_rs_pending(mdev); - drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC); + dec_rs_pending(device); + drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC); break; default: BUG(); } if (verb && __ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "Can not satisfy peer's read request, " + drbd_err(device, "Can not satisfy peer's read request, " "no local data.\n"); /* drain possibly payload */ - return drbd_drain_block(mdev, pi->size); + return drbd_drain_block(peer_device, pi->size); } /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ - peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO); + peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, GFP_NOIO); if (!peer_req) { - put_ldev(mdev); + put_ldev(device); return -ENOMEM; } @@ -2433,7 +2453,7 @@ static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi) peer_req->w.cb = w_e_end_rsdata_req; fault_type = DRBD_FAULT_RS_RD; /* used in the sector offset progress display */ - mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); + device->bm_resync_fo = BM_SECT_TO_BIT(sector); break; case P_OV_REPLY: @@ -2449,19 +2469,19 @@ static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi) peer_req->digest = di; peer_req->flags |= EE_HAS_DIGEST; - if (drbd_recv_all(mdev->tconn, di->digest, pi->size)) + if (drbd_recv_all(peer_device->connection, di->digest, pi->size)) goto out_free_e; if (pi->cmd == P_CSUM_RS_REQUEST) { - D_ASSERT(mdev->tconn->agreed_pro_version >= 89); + D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); peer_req->w.cb = w_e_end_csum_rs_req; /* used in the sector offset progress display */ - mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); + device->bm_resync_fo = BM_SECT_TO_BIT(sector); } else if (pi->cmd == P_OV_REPLY) { /* track progress, we may need to throttle */ - atomic_add(size >> 9, &mdev->rs_sect_in); + atomic_add(size >> 9, &device->rs_sect_in); peer_req->w.cb = w_e_end_ov_reply; - dec_rs_pending(mdev); + dec_rs_pending(device); /* drbd_rs_begin_io done when we sent this request, * but accounting still needs to be done. */ goto submit_for_resync; @@ -2469,19 +2489,19 @@ static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi) break; case P_OV_REQUEST: - if (mdev->ov_start_sector == ~(sector_t)0 && - mdev->tconn->agreed_pro_version >= 90) { + if (device->ov_start_sector == ~(sector_t)0 && + peer_device->connection->agreed_pro_version >= 90) { unsigned long now = jiffies; int i; - mdev->ov_start_sector = sector; - mdev->ov_position = sector; - mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector); - mdev->rs_total = mdev->ov_left; + device->ov_start_sector = sector; + device->ov_position = sector; + device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector); + device->rs_total = device->ov_left; for (i = 0; i < DRBD_SYNC_MARKS; i++) { - mdev->rs_mark_left[i] = mdev->ov_left; - mdev->rs_mark_time[i] = now; + device->rs_mark_left[i] = device->ov_left; + device->rs_mark_time[i] = now; } - dev_info(DEV, "Online Verify start sector: %llu\n", + drbd_info(device, "Online Verify start sector: %llu\n", (unsigned long long)sector); } peer_req->w.cb = w_e_end_ov_req; @@ -2514,57 +2534,61 @@ static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi) * we would also throttle its application reads. * In that case, throttling is done on the SyncTarget only. */ - if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector)) + if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector)) schedule_timeout_uninterruptible(HZ/10); - if (drbd_rs_begin_io(mdev, sector)) + if (drbd_rs_begin_io(device, sector)) goto out_free_e; submit_for_resync: - atomic_add(size >> 9, &mdev->rs_sect_ev); + atomic_add(size >> 9, &device->rs_sect_ev); submit: - inc_unacked(mdev); - spin_lock_irq(&mdev->tconn->req_lock); - list_add_tail(&peer_req->w.list, &mdev->read_ee); - spin_unlock_irq(&mdev->tconn->req_lock); + inc_unacked(device); + spin_lock_irq(&device->resource->req_lock); + list_add_tail(&peer_req->w.list, &device->read_ee); + spin_unlock_irq(&device->resource->req_lock); - if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0) + if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0) return 0; /* don't care for the reason here */ - dev_err(DEV, "submit failed, triggering re-connect\n"); - spin_lock_irq(&mdev->tconn->req_lock); + drbd_err(device, "submit failed, triggering re-connect\n"); + spin_lock_irq(&device->resource->req_lock); list_del(&peer_req->w.list); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); /* no drbd_rs_complete_io(), we are dropping the connection anyways */ out_free_e: - put_ldev(mdev); - drbd_free_peer_req(mdev, peer_req); + put_ldev(device); + drbd_free_peer_req(device, peer_req); return -EIO; } -static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) +/** + * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries + */ +static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local) { + struct drbd_device *device = peer_device->device; int self, peer, rv = -100; unsigned long ch_self, ch_peer; enum drbd_after_sb_p after_sb_0p; - self = mdev->ldev->md.uuid[UI_BITMAP] & 1; - peer = mdev->p_uuid[UI_BITMAP] & 1; + self = device->ldev->md.uuid[UI_BITMAP] & 1; + peer = device->p_uuid[UI_BITMAP] & 1; - ch_peer = mdev->p_uuid[UI_SIZE]; - ch_self = mdev->comm_bm_set; + ch_peer = device->p_uuid[UI_SIZE]; + ch_self = device->comm_bm_set; rcu_read_lock(); - after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p; + after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p; rcu_read_unlock(); switch (after_sb_0p) { case ASB_CONSENSUS: case ASB_DISCARD_SECONDARY: case ASB_CALL_HELPER: case ASB_VIOLENTLY: - dev_err(DEV, "Configuration error.\n"); + drbd_err(device, "Configuration error.\n"); break; case ASB_DISCONNECT: break; @@ -2588,11 +2612,11 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) break; } /* Else fall through to one of the other strategies... */ - dev_warn(DEV, "Discard younger/older primary did not find a decision\n" + drbd_warn(device, "Discard younger/older primary did not find a decision\n" "Using discard-least-changes instead\n"); case ASB_DISCARD_ZERO_CHG: if (ch_peer == 0 && ch_self == 0) { - rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) + rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) ? -1 : 1; break; } else { @@ -2608,7 +2632,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) rv = 1; else /* ( ch_self == ch_peer ) */ /* Well, then use something else. */ - rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) + rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) ? -1 : 1; break; case ASB_DISCARD_LOCAL: @@ -2621,13 +2645,17 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) return rv; } -static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) +/** + * drbd_asb_recover_1p - Recover after split-brain with one remaining primary + */ +static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local) { + struct drbd_device *device = peer_device->device; int hg, rv = -100; enum drbd_after_sb_p after_sb_1p; rcu_read_lock(); - after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p; + after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p; rcu_read_unlock(); switch (after_sb_1p) { case ASB_DISCARD_YOUNGER_PRI: @@ -2636,35 +2664,35 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) case ASB_DISCARD_LOCAL: case ASB_DISCARD_REMOTE: case ASB_DISCARD_ZERO_CHG: - dev_err(DEV, "Configuration error.\n"); + drbd_err(device, "Configuration error.\n"); break; case ASB_DISCONNECT: break; case ASB_CONSENSUS: - hg = drbd_asb_recover_0p(mdev); - if (hg == -1 && mdev->state.role == R_SECONDARY) + hg = drbd_asb_recover_0p(peer_device); + if (hg == -1 && device->state.role == R_SECONDARY) rv = hg; - if (hg == 1 && mdev->state.role == R_PRIMARY) + if (hg == 1 && device->state.role == R_PRIMARY) rv = hg; break; case ASB_VIOLENTLY: - rv = drbd_asb_recover_0p(mdev); + rv = drbd_asb_recover_0p(peer_device); break; case ASB_DISCARD_SECONDARY: - return mdev->state.role == R_PRIMARY ? 1 : -1; + return device->state.role == R_PRIMARY ? 1 : -1; case ASB_CALL_HELPER: - hg = drbd_asb_recover_0p(mdev); - if (hg == -1 && mdev->state.role == R_PRIMARY) { + hg = drbd_asb_recover_0p(peer_device); + if (hg == -1 && device->state.role == R_PRIMARY) { enum drbd_state_rv rv2; /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, * we might be here in C_WF_REPORT_PARAMS which is transient. * we do not need to wait for the after state change work either. */ - rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); + rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); if (rv2 != SS_SUCCESS) { - drbd_khelper(mdev, "pri-lost-after-sb"); + drbd_khelper(device, "pri-lost-after-sb"); } else { - dev_warn(DEV, "Successfully gave up primary role.\n"); + drbd_warn(device, "Successfully gave up primary role.\n"); rv = hg; } } else @@ -2674,13 +2702,17 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) return rv; } -static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) +/** + * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries + */ +static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local) { + struct drbd_device *device = peer_device->device; int hg, rv = -100; enum drbd_after_sb_p after_sb_2p; rcu_read_lock(); - after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p; + after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p; rcu_read_unlock(); switch (after_sb_2p) { case ASB_DISCARD_YOUNGER_PRI: @@ -2691,26 +2723,26 @@ static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) case ASB_CONSENSUS: case ASB_DISCARD_SECONDARY: case ASB_DISCARD_ZERO_CHG: - dev_err(DEV, "Configuration error.\n"); + drbd_err(device, "Configuration error.\n"); break; case ASB_VIOLENTLY: - rv = drbd_asb_recover_0p(mdev); + rv = drbd_asb_recover_0p(peer_device); break; case ASB_DISCONNECT: break; case ASB_CALL_HELPER: - hg = drbd_asb_recover_0p(mdev); + hg = drbd_asb_recover_0p(peer_device); if (hg == -1) { enum drbd_state_rv rv2; /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, * we might be here in C_WF_REPORT_PARAMS which is transient. * we do not need to wait for the after state change work either. */ - rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); + rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); if (rv2 != SS_SUCCESS) { - drbd_khelper(mdev, "pri-lost-after-sb"); + drbd_khelper(device, "pri-lost-after-sb"); } else { - dev_warn(DEV, "Successfully gave up primary role.\n"); + drbd_warn(device, "Successfully gave up primary role.\n"); rv = hg; } } else @@ -2720,14 +2752,14 @@ static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) return rv; } -static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid, +static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, u64 bits, u64 flags) { if (!uuid) { - dev_info(DEV, "%s uuid info vanished while I was looking!\n", text); + drbd_info(device, "%s uuid info vanished while I was looking!\n", text); return; } - dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", + drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", text, (unsigned long long)uuid[UI_CURRENT], (unsigned long long)uuid[UI_BITMAP], @@ -2749,13 +2781,13 @@ static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid, -1091 requires proto 91 -1096 requires proto 96 */ -static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local) +static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local) { u64 self, peer; int i, j; - self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1); - peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1); + self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); + peer = device->p_uuid[UI_CURRENT] & ~((u64)1); *rule_nr = 10; if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED) @@ -2774,46 +2806,46 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l if (self == peer) { int rct, dc; /* roles at crash time */ - if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) { + if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) { - if (mdev->tconn->agreed_pro_version < 91) + if (first_peer_device(device)->connection->agreed_pro_version < 91) return -1091; - if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && - (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { - dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n"); - drbd_uuid_move_history(mdev); - mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; - mdev->ldev->md.uuid[UI_BITMAP] = 0; + if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) && + (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { + drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n"); + drbd_uuid_move_history(device); + device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP]; + device->ldev->md.uuid[UI_BITMAP] = 0; - drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, - mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); + drbd_uuid_dump(device, "self", device->ldev->md.uuid, + device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); *rule_nr = 34; } else { - dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n"); + drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n"); *rule_nr = 36; } return 1; } - if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) { + if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) { - if (mdev->tconn->agreed_pro_version < 91) + if (first_peer_device(device)->connection->agreed_pro_version < 91) return -1091; - if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) && - (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) { - dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n"); + if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) && + (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) { + drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n"); - mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START]; - mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP]; - mdev->p_uuid[UI_BITMAP] = 0UL; + device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START]; + device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP]; + device->p_uuid[UI_BITMAP] = 0UL; - drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); + drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); *rule_nr = 35; } else { - dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n"); + drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n"); *rule_nr = 37; } @@ -2821,8 +2853,8 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l } /* Common power [off|failure] */ - rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) + - (mdev->p_uuid[UI_FLAGS] & 2); + rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) + + (device->p_uuid[UI_FLAGS] & 2); /* lowest bit is set when we were primary, * next bit (weight 2) is set when peer was primary */ *rule_nr = 40; @@ -2832,72 +2864,72 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l case 1: /* self_pri && !peer_pri */ return 1; case 2: /* !self_pri && peer_pri */ return -1; case 3: /* self_pri && peer_pri */ - dc = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags); + dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags); return dc ? -1 : 1; } } *rule_nr = 50; - peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1); + peer = device->p_uuid[UI_BITMAP] & ~((u64)1); if (self == peer) return -1; *rule_nr = 51; - peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); + peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1); if (self == peer) { - if (mdev->tconn->agreed_pro_version < 96 ? - (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == - (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : - peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) { + if (first_peer_device(device)->connection->agreed_pro_version < 96 ? + (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == + (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : + peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) { /* The last P_SYNC_UUID did not get though. Undo the last start of resync as sync source modifications of the peer's UUIDs. */ - if (mdev->tconn->agreed_pro_version < 91) + if (first_peer_device(device)->connection->agreed_pro_version < 91) return -1091; - mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; - mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; + device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START]; + device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1]; - dev_info(DEV, "Lost last syncUUID packet, corrected:\n"); - drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); + drbd_info(device, "Lost last syncUUID packet, corrected:\n"); + drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); return -1; } } *rule_nr = 60; - self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1); + self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { - peer = mdev->p_uuid[i] & ~((u64)1); + peer = device->p_uuid[i] & ~((u64)1); if (self == peer) return -2; } *rule_nr = 70; - self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1); - peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1); + self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); + peer = device->p_uuid[UI_CURRENT] & ~((u64)1); if (self == peer) return 1; *rule_nr = 71; - self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); + self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); if (self == peer) { - if (mdev->tconn->agreed_pro_version < 96 ? - (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == - (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) : - self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { + if (first_peer_device(device)->connection->agreed_pro_version < 96 ? + (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == + (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) : + self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { /* The last P_SYNC_UUID did not get though. Undo the last start of resync as sync source modifications of our UUIDs. */ - if (mdev->tconn->agreed_pro_version < 91) + if (first_peer_device(device)->connection->agreed_pro_version < 91) return -1091; - __drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); - __drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); + __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]); + __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]); - dev_info(DEV, "Last syncUUID did not get through, corrected:\n"); - drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, - mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); + drbd_info(device, "Last syncUUID did not get through, corrected:\n"); + drbd_uuid_dump(device, "self", device->ldev->md.uuid, + device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); return 1; } @@ -2905,24 +2937,24 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l *rule_nr = 80; - peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1); + peer = device->p_uuid[UI_CURRENT] & ~((u64)1); for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { - self = mdev->ldev->md.uuid[i] & ~((u64)1); + self = device->ldev->md.uuid[i] & ~((u64)1); if (self == peer) return 2; } *rule_nr = 90; - self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1); - peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1); + self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); + peer = device->p_uuid[UI_BITMAP] & ~((u64)1); if (self == peer && self != ((u64)0)) return 100; *rule_nr = 100; for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { - self = mdev->ldev->md.uuid[i] & ~((u64)1); + self = device->ldev->md.uuid[i] & ~((u64)1); for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) { - peer = mdev->p_uuid[j] & ~((u64)1); + peer = device->p_uuid[j] & ~((u64)1); if (self == peer) return -100; } @@ -2934,36 +2966,38 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l /* drbd_sync_handshake() returns the new conn state on success, or CONN_MASK (-1) on failure. */ -static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role, +static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, + enum drbd_role peer_role, enum drbd_disk_state peer_disk) __must_hold(local) { + struct drbd_device *device = peer_device->device; enum drbd_conns rv = C_MASK; enum drbd_disk_state mydisk; struct net_conf *nc; int hg, rule_nr, rr_conflict, tentative; - mydisk = mdev->state.disk; + mydisk = device->state.disk; if (mydisk == D_NEGOTIATING) - mydisk = mdev->new_state_tmp.disk; + mydisk = device->new_state_tmp.disk; - dev_info(DEV, "drbd_sync_handshake:\n"); + drbd_info(device, "drbd_sync_handshake:\n"); - spin_lock_irq(&mdev->ldev->md.uuid_lock); - drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0); - drbd_uuid_dump(mdev, "peer", mdev->p_uuid, - mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); + spin_lock_irq(&device->ldev->md.uuid_lock); + drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0); + drbd_uuid_dump(device, "peer", device->p_uuid, + device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); - hg = drbd_uuid_compare(mdev, &rule_nr); - spin_unlock_irq(&mdev->ldev->md.uuid_lock); + hg = drbd_uuid_compare(device, &rule_nr); + spin_unlock_irq(&device->ldev->md.uuid_lock); - dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr); + drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr); if (hg == -1000) { - dev_alert(DEV, "Unrelated data, aborting!\n"); + drbd_alert(device, "Unrelated data, aborting!\n"); return C_MASK; } if (hg < -1000) { - dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000); + drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000); return C_MASK; } @@ -2973,38 +3007,38 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol hg = mydisk > D_INCONSISTENT ? 1 : -1; if (f) hg = hg*2; - dev_info(DEV, "Becoming sync %s due to disk states.\n", + drbd_info(device, "Becoming sync %s due to disk states.\n", hg > 0 ? "source" : "target"); } if (abs(hg) == 100) - drbd_khelper(mdev, "initial-split-brain"); + drbd_khelper(device, "initial-split-brain"); rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(peer_device->connection->net_conf); if (hg == 100 || (hg == -100 && nc->always_asbp)) { - int pcount = (mdev->state.role == R_PRIMARY) + int pcount = (device->state.role == R_PRIMARY) + (peer_role == R_PRIMARY); int forced = (hg == -100); switch (pcount) { case 0: - hg = drbd_asb_recover_0p(mdev); + hg = drbd_asb_recover_0p(peer_device); break; case 1: - hg = drbd_asb_recover_1p(mdev); + hg = drbd_asb_recover_1p(peer_device); break; case 2: - hg = drbd_asb_recover_2p(mdev); + hg = drbd_asb_recover_2p(peer_device); break; } if (abs(hg) < 100) { - dev_warn(DEV, "Split-Brain detected, %d primaries, " + drbd_warn(device, "Split-Brain detected, %d primaries, " "automatically solved. Sync from %s node\n", pcount, (hg < 0) ? "peer" : "this"); if (forced) { - dev_warn(DEV, "Doing a full sync, since" + drbd_warn(device, "Doing a full sync, since" " UUIDs where ambiguous.\n"); hg = hg*2; } @@ -3012,13 +3046,13 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { - if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1)) + if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1)) hg = -1; - if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1)) + if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1)) hg = 1; if (abs(hg) < 100) - dev_warn(DEV, "Split-Brain detected, manually solved. " + drbd_warn(device, "Split-Brain detected, manually solved. " "Sync from %s node\n", (hg < 0) ? "peer" : "this"); } @@ -3031,44 +3065,44 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol * after an attempted attach on a diskless node. * We just refuse to attach -- well, we drop the "connection" * to that disk, in a way... */ - dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n"); - drbd_khelper(mdev, "split-brain"); + drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n"); + drbd_khelper(device, "split-brain"); return C_MASK; } if (hg > 0 && mydisk <= D_INCONSISTENT) { - dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n"); + drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n"); return C_MASK; } if (hg < 0 && /* by intention we do not use mydisk here. */ - mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) { + device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) { switch (rr_conflict) { case ASB_CALL_HELPER: - drbd_khelper(mdev, "pri-lost"); + drbd_khelper(device, "pri-lost"); /* fall through */ case ASB_DISCONNECT: - dev_err(DEV, "I shall become SyncTarget, but I am primary!\n"); + drbd_err(device, "I shall become SyncTarget, but I am primary!\n"); return C_MASK; case ASB_VIOLENTLY: - dev_warn(DEV, "Becoming SyncTarget, violating the stable-data" + drbd_warn(device, "Becoming SyncTarget, violating the stable-data" "assumption\n"); } } - if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) { + if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) { if (hg == 0) - dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); + drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n"); else - dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.", + drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.", drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET), abs(hg) >= 2 ? "full" : "bit-map based"); return C_MASK; } if (abs(hg) >= 2) { - dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); - if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", + drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); + if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", BM_LOCKED_SET_ALLOWED)) return C_MASK; } @@ -3079,9 +3113,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol rv = C_WF_BITMAP_T; } else { rv = C_CONNECTED; - if (drbd_bm_total_weight(mdev)) { - dev_info(DEV, "No resync, but %lu bits in bitmap!\n", - drbd_bm_total_weight(mdev)); + if (drbd_bm_total_weight(device)) { + drbd_info(device, "No resync, but %lu bits in bitmap!\n", + drbd_bm_total_weight(device)); } } @@ -3102,7 +3136,7 @@ static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer) return peer; } -static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi) { struct p_protocol *p = pi->data; enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; @@ -3120,58 +3154,58 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) cf = be32_to_cpu(p->conn_flags); p_discard_my_data = cf & CF_DISCARD_MY_DATA; - if (tconn->agreed_pro_version >= 87) { + if (connection->agreed_pro_version >= 87) { int err; if (pi->size > sizeof(integrity_alg)) return -EIO; - err = drbd_recv_all(tconn, integrity_alg, pi->size); + err = drbd_recv_all(connection, integrity_alg, pi->size); if (err) return err; integrity_alg[SHARED_SECRET_MAX - 1] = 0; } if (pi->cmd != P_PROTOCOL_UPDATE) { - clear_bit(CONN_DRY_RUN, &tconn->flags); + clear_bit(CONN_DRY_RUN, &connection->flags); if (cf & CF_DRY_RUN) - set_bit(CONN_DRY_RUN, &tconn->flags); + set_bit(CONN_DRY_RUN, &connection->flags); rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); + nc = rcu_dereference(connection->net_conf); if (p_proto != nc->wire_protocol) { - conn_err(tconn, "incompatible %s settings\n", "protocol"); + drbd_err(connection, "incompatible %s settings\n", "protocol"); goto disconnect_rcu_unlock; } if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) { - conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri"); + drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri"); goto disconnect_rcu_unlock; } if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) { - conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri"); + drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri"); goto disconnect_rcu_unlock; } if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) { - conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri"); + drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri"); goto disconnect_rcu_unlock; } if (p_discard_my_data && nc->discard_my_data) { - conn_err(tconn, "incompatible %s settings\n", "discard-my-data"); + drbd_err(connection, "incompatible %s settings\n", "discard-my-data"); goto disconnect_rcu_unlock; } if (p_two_primaries != nc->two_primaries) { - conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries"); + drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries"); goto disconnect_rcu_unlock; } if (strcmp(integrity_alg, nc->integrity_alg)) { - conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg"); + drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg"); goto disconnect_rcu_unlock; } @@ -3192,7 +3226,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); if (!peer_integrity_tfm) { - conn_err(tconn, "peer data-integrity-alg %s not supported\n", + drbd_err(connection, "peer data-integrity-alg %s not supported\n", integrity_alg); goto disconnect; } @@ -3201,20 +3235,20 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) int_dig_in = kmalloc(hash_size, GFP_KERNEL); int_dig_vv = kmalloc(hash_size, GFP_KERNEL); if (!(int_dig_in && int_dig_vv)) { - conn_err(tconn, "Allocation of buffers for data integrity checking failed\n"); + drbd_err(connection, "Allocation of buffers for data integrity checking failed\n"); goto disconnect; } } new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); if (!new_net_conf) { - conn_err(tconn, "Allocation of new net_conf failed\n"); + drbd_err(connection, "Allocation of new net_conf failed\n"); goto disconnect; } - mutex_lock(&tconn->data.mutex); - mutex_lock(&tconn->conf_update); - old_net_conf = tconn->net_conf; + mutex_lock(&connection->data.mutex); + mutex_lock(&connection->resource->conf_update); + old_net_conf = connection->net_conf; *new_net_conf = *old_net_conf; new_net_conf->wire_protocol = p_proto; @@ -3223,19 +3257,19 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p); new_net_conf->two_primaries = p_two_primaries; - rcu_assign_pointer(tconn->net_conf, new_net_conf); - mutex_unlock(&tconn->conf_update); - mutex_unlock(&tconn->data.mutex); + rcu_assign_pointer(connection->net_conf, new_net_conf); + mutex_unlock(&connection->resource->conf_update); + mutex_unlock(&connection->data.mutex); - crypto_free_hash(tconn->peer_integrity_tfm); - kfree(tconn->int_dig_in); - kfree(tconn->int_dig_vv); - tconn->peer_integrity_tfm = peer_integrity_tfm; - tconn->int_dig_in = int_dig_in; - tconn->int_dig_vv = int_dig_vv; + crypto_free_hash(connection->peer_integrity_tfm); + kfree(connection->int_dig_in); + kfree(connection->int_dig_vv); + connection->peer_integrity_tfm = peer_integrity_tfm; + connection->int_dig_in = int_dig_in; + connection->int_dig_vv = int_dig_vv; if (strcmp(old_net_conf->integrity_alg, integrity_alg)) - conn_info(tconn, "peer data-integrity-alg: %s\n", + drbd_info(connection, "peer data-integrity-alg: %s\n", integrity_alg[0] ? integrity_alg : "(none)"); synchronize_rcu(); @@ -3248,7 +3282,7 @@ disconnect: crypto_free_hash(peer_integrity_tfm); kfree(int_dig_in); kfree(int_dig_vv); - conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); return -EIO; } @@ -3257,7 +3291,8 @@ disconnect: * return: NULL (alg name was "") * ERR_PTR(error) if something goes wrong * or the crypto hash ptr, if it worked out ok. */ -struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, +static +struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device, const char *alg, const char *name) { struct crypto_hash *tfm; @@ -3267,21 +3302,21 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { - dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n", + drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n", alg, name, PTR_ERR(tfm)); return tfm; } return tfm; } -static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi) +static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi) { - void *buffer = tconn->data.rbuf; + void *buffer = connection->data.rbuf; int size = pi->size; while (size) { int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE); - s = drbd_recv(tconn, buffer, s); + s = drbd_recv(connection, buffer, s); if (s <= 0) { if (s < 0) return s; @@ -3305,30 +3340,32 @@ static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info * * (We can also end up here if drbd is misconfigured.) */ -static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi) +static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi) { - conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n", + drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n", cmdname(pi->cmd), pi->vnr); - return ignore_remaining_packet(tconn, pi); + return ignore_remaining_packet(connection, pi); } -static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_rs_param_95 *p; unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; struct net_conf *old_net_conf, *new_net_conf = NULL; struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; - const int apv = tconn->agreed_pro_version; + const int apv = connection->agreed_pro_version; struct fifo_buffer *old_plan = NULL, *new_plan = NULL; int fifo_size = 0; int err; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) - return config_unknown_volume(tconn, pi); + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) + return config_unknown_volume(connection, pi); + device = peer_device->device; exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) @@ -3337,7 +3374,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) : /* apv >= 95 */ sizeof(struct p_rs_param_95); if (pi->size > exp_max_sz) { - dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", + drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n", pi->size, exp_max_sz); return -EIO; } @@ -3348,33 +3385,33 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } else if (apv <= 94) { header_size = sizeof(struct p_rs_param_89); data_size = pi->size - header_size; - D_ASSERT(data_size == 0); + D_ASSERT(device, data_size == 0); } else { header_size = sizeof(struct p_rs_param_95); data_size = pi->size - header_size; - D_ASSERT(data_size == 0); + D_ASSERT(device, data_size == 0); } /* initialize verify_alg and csums_alg */ p = pi->data; memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - err = drbd_recv_all(mdev->tconn, p, header_size); + err = drbd_recv_all(peer_device->connection, p, header_size); if (err) return err; - mutex_lock(&mdev->tconn->conf_update); - old_net_conf = mdev->tconn->net_conf; - if (get_ldev(mdev)) { + mutex_lock(&connection->resource->conf_update); + old_net_conf = peer_device->connection->net_conf; + if (get_ldev(device)) { new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); if (!new_disk_conf) { - put_ldev(mdev); - mutex_unlock(&mdev->tconn->conf_update); - dev_err(DEV, "Allocation of new disk_conf failed\n"); + put_ldev(device); + mutex_unlock(&connection->resource->conf_update); + drbd_err(device, "Allocation of new disk_conf failed\n"); return -ENOMEM; } - old_disk_conf = mdev->ldev->disk_conf; + old_disk_conf = device->ldev->disk_conf; *new_disk_conf = *old_disk_conf; new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate); @@ -3383,37 +3420,37 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) if (apv >= 88) { if (apv == 88) { if (data_size > SHARED_SECRET_MAX || data_size == 0) { - dev_err(DEV, "verify-alg of wrong size, " + drbd_err(device, "verify-alg of wrong size, " "peer wants %u, accepting only up to %u byte\n", data_size, SHARED_SECRET_MAX); err = -EIO; goto reconnect; } - err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size); + err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size); if (err) goto reconnect; /* we expect NUL terminated string */ /* but just in case someone tries to be evil */ - D_ASSERT(p->verify_alg[data_size-1] == 0); + D_ASSERT(device, p->verify_alg[data_size-1] == 0); p->verify_alg[data_size-1] = 0; } else /* apv >= 89 */ { /* we still expect NUL terminated strings */ /* but just in case someone tries to be evil */ - D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0); - D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0); + D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0); + D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0); p->verify_alg[SHARED_SECRET_MAX-1] = 0; p->csums_alg[SHARED_SECRET_MAX-1] = 0; } if (strcmp(old_net_conf->verify_alg, p->verify_alg)) { - if (mdev->state.conn == C_WF_REPORT_PARAMS) { - dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", + if (device->state.conn == C_WF_REPORT_PARAMS) { + drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", old_net_conf->verify_alg, p->verify_alg); goto disconnect; } - verify_tfm = drbd_crypto_alloc_digest_safe(mdev, + verify_tfm = drbd_crypto_alloc_digest_safe(device, p->verify_alg, "verify-alg"); if (IS_ERR(verify_tfm)) { verify_tfm = NULL; @@ -3422,12 +3459,12 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) } if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) { - if (mdev->state.conn == C_WF_REPORT_PARAMS) { - dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", + if (device->state.conn == C_WF_REPORT_PARAMS) { + drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", old_net_conf->csums_alg, p->csums_alg); goto disconnect; } - csums_tfm = drbd_crypto_alloc_digest_safe(mdev, + csums_tfm = drbd_crypto_alloc_digest_safe(device, p->csums_alg, "csums-alg"); if (IS_ERR(csums_tfm)) { csums_tfm = NULL; @@ -3442,11 +3479,11 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate); fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; - if (fifo_size != mdev->rs_plan_s->size) { + if (fifo_size != device->rs_plan_s->size) { new_plan = fifo_alloc(fifo_size); if (!new_plan) { - dev_err(DEV, "kmalloc of fifo_buffer failed"); - put_ldev(mdev); + drbd_err(device, "kmalloc of fifo_buffer failed"); + put_ldev(device); goto disconnect; } } @@ -3455,7 +3492,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) if (verify_tfm || csums_tfm) { new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); if (!new_net_conf) { - dev_err(DEV, "Allocation of new net_conf failed\n"); + drbd_err(device, "Allocation of new net_conf failed\n"); goto disconnect; } @@ -3464,32 +3501,32 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) if (verify_tfm) { strcpy(new_net_conf->verify_alg, p->verify_alg); new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; - crypto_free_hash(mdev->tconn->verify_tfm); - mdev->tconn->verify_tfm = verify_tfm; - dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg); + crypto_free_hash(peer_device->connection->verify_tfm); + peer_device->connection->verify_tfm = verify_tfm; + drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { strcpy(new_net_conf->csums_alg, p->csums_alg); new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; - crypto_free_hash(mdev->tconn->csums_tfm); - mdev->tconn->csums_tfm = csums_tfm; - dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); + crypto_free_hash(peer_device->connection->csums_tfm); + peer_device->connection->csums_tfm = csums_tfm; + drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg); } - rcu_assign_pointer(tconn->net_conf, new_net_conf); + rcu_assign_pointer(connection->net_conf, new_net_conf); } } if (new_disk_conf) { - rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); - put_ldev(mdev); + rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); + put_ldev(device); } if (new_plan) { - old_plan = mdev->rs_plan_s; - rcu_assign_pointer(mdev->rs_plan_s, new_plan); + old_plan = device->rs_plan_s; + rcu_assign_pointer(device->rs_plan_s, new_plan); } - mutex_unlock(&mdev->tconn->conf_update); + mutex_unlock(&connection->resource->conf_update); synchronize_rcu(); if (new_net_conf) kfree(old_net_conf); @@ -3500,30 +3537,30 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) reconnect: if (new_disk_conf) { - put_ldev(mdev); + put_ldev(device); kfree(new_disk_conf); } - mutex_unlock(&mdev->tconn->conf_update); + mutex_unlock(&connection->resource->conf_update); return -EIO; disconnect: kfree(new_plan); if (new_disk_conf) { - put_ldev(mdev); + put_ldev(device); kfree(new_disk_conf); } - mutex_unlock(&mdev->tconn->conf_update); + mutex_unlock(&connection->resource->conf_update); /* just for completeness: actually not needed, * as this is not reached if csums_tfm was ok. */ crypto_free_hash(csums_tfm); /* but free the verify_tfm again, if csums_tfm did not work out */ crypto_free_hash(verify_tfm); - conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); + conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); return -EIO; } /* warn if the arguments differ by more than 12.5% */ -static void warn_if_differ_considerably(struct drbd_conf *mdev, +static void warn_if_differ_considerably(struct drbd_device *device, const char *s, sector_t a, sector_t b) { sector_t d; @@ -3531,54 +3568,56 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, return; d = (a > b) ? (a - b) : (b - a); if (d > (a>>3) || d > (b>>3)) - dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s, + drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s, (unsigned long long)a, (unsigned long long)b); } -static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_sizes *p = pi->data; enum determine_dev_size dd = DS_UNCHANGED; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ enum dds_flags ddsf; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) - return config_unknown_volume(tconn, pi); + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) + return config_unknown_volume(connection, pi); + device = peer_device->device; p_size = be64_to_cpu(p->d_size); p_usize = be64_to_cpu(p->u_size); /* just store the peer's disk size for now. * we still need to figure out whether we accept that. */ - mdev->p_size = p_size; + device->p_size = p_size; - if (get_ldev(mdev)) { + if (get_ldev(device)) { rcu_read_lock(); - my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size; + my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; rcu_read_unlock(); - warn_if_differ_considerably(mdev, "lower level device sizes", - p_size, drbd_get_max_capacity(mdev->ldev)); - warn_if_differ_considerably(mdev, "user requested size", + warn_if_differ_considerably(device, "lower level device sizes", + p_size, drbd_get_max_capacity(device->ldev)); + warn_if_differ_considerably(device, "user requested size", p_usize, my_usize); /* if this is the first connect, or an otherwise expected * param exchange, choose the minimum */ - if (mdev->state.conn == C_WF_REPORT_PARAMS) + if (device->state.conn == C_WF_REPORT_PARAMS) p_usize = min_not_zero(my_usize, p_usize); /* Never shrink a device with usable data during connect. But allow online shrinking if we are connected. */ - if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) < - drbd_get_capacity(mdev->this_bdev) && - mdev->state.disk >= D_OUTDATED && - mdev->state.conn < C_CONNECTED) { - dev_err(DEV, "The peer's disk size is too small!\n"); - conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); - put_ldev(mdev); + if (drbd_new_dev_size(device, device->ldev, p_usize, 0) < + drbd_get_capacity(device->this_bdev) && + device->state.disk >= D_OUTDATED && + device->state.conn < C_CONNECTED) { + drbd_err(device, "The peer's disk size is too small!\n"); + conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); + put_ldev(device); return -EIO; } @@ -3587,145 +3626,147 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); if (!new_disk_conf) { - dev_err(DEV, "Allocation of new disk_conf failed\n"); - put_ldev(mdev); + drbd_err(device, "Allocation of new disk_conf failed\n"); + put_ldev(device); return -ENOMEM; } - mutex_lock(&mdev->tconn->conf_update); - old_disk_conf = mdev->ldev->disk_conf; + mutex_lock(&connection->resource->conf_update); + old_disk_conf = device->ldev->disk_conf; *new_disk_conf = *old_disk_conf; new_disk_conf->disk_size = p_usize; - rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); - mutex_unlock(&mdev->tconn->conf_update); + rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); + mutex_unlock(&connection->resource->conf_update); synchronize_rcu(); kfree(old_disk_conf); - dev_info(DEV, "Peer sets u_size to %lu sectors\n", + drbd_info(device, "Peer sets u_size to %lu sectors\n", (unsigned long)my_usize); } - put_ldev(mdev); + put_ldev(device); } ddsf = be16_to_cpu(p->dds_flags); - if (get_ldev(mdev)) { - dd = drbd_determine_dev_size(mdev, ddsf, NULL); - put_ldev(mdev); + if (get_ldev(device)) { + dd = drbd_determine_dev_size(device, ddsf, NULL); + put_ldev(device); if (dd == DS_ERROR) return -EIO; - drbd_md_sync(mdev); + drbd_md_sync(device); } else { /* I am diskless, need to accept the peer's size. */ - drbd_set_my_capacity(mdev, p_size); + drbd_set_my_capacity(device, p_size); } - mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size); - drbd_reconsider_max_bio_size(mdev); + device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); + drbd_reconsider_max_bio_size(device); - if (get_ldev(mdev)) { - if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { - mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); + if (get_ldev(device)) { + if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) { + device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); ldsc = 1; } - put_ldev(mdev); + put_ldev(device); } - if (mdev->state.conn > C_WF_REPORT_PARAMS) { + if (device->state.conn > C_WF_REPORT_PARAMS) { if (be64_to_cpu(p->c_size) != - drbd_get_capacity(mdev->this_bdev) || ldsc) { + drbd_get_capacity(device->this_bdev) || ldsc) { /* we have different sizes, probably peer * needs to know my new size... */ - drbd_send_sizes(mdev, 0, ddsf); + drbd_send_sizes(peer_device, 0, ddsf); } - if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) || - (dd == DS_GREW && mdev->state.conn == C_CONNECTED)) { - if (mdev->state.pdsk >= D_INCONSISTENT && - mdev->state.disk >= D_INCONSISTENT) { + if (test_and_clear_bit(RESIZE_PENDING, &device->flags) || + (dd == DS_GREW && device->state.conn == C_CONNECTED)) { + if (device->state.pdsk >= D_INCONSISTENT && + device->state.disk >= D_INCONSISTENT) { if (ddsf & DDSF_NO_RESYNC) - dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n"); + drbd_info(device, "Resync of new storage suppressed with --assume-clean\n"); else - resync_after_online_grow(mdev); + resync_after_online_grow(device); } else - set_bit(RESYNC_AFTER_NEG, &mdev->flags); + set_bit(RESYNC_AFTER_NEG, &device->flags); } } return 0; } -static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_uuids *p = pi->data; u64 *p_uuid; int i, updated_uuids = 0; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) - return config_unknown_volume(tconn, pi); + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) + return config_unknown_volume(connection, pi); + device = peer_device->device; p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); if (!p_uuid) { - dev_err(DEV, "kmalloc of p_uuid failed\n"); + drbd_err(device, "kmalloc of p_uuid failed\n"); return false; } for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) p_uuid[i] = be64_to_cpu(p->uuid[i]); - kfree(mdev->p_uuid); - mdev->p_uuid = p_uuid; + kfree(device->p_uuid); + device->p_uuid = p_uuid; - if (mdev->state.conn < C_CONNECTED && - mdev->state.disk < D_INCONSISTENT && - mdev->state.role == R_PRIMARY && - (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { - dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", - (unsigned long long)mdev->ed_uuid); - conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); + if (device->state.conn < C_CONNECTED && + device->state.disk < D_INCONSISTENT && + device->state.role == R_PRIMARY && + (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { + drbd_err(device, "Can only connect to data with current UUID=%016llX\n", + (unsigned long long)device->ed_uuid); + conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); return -EIO; } - if (get_ldev(mdev)) { + if (get_ldev(device)) { int skip_initial_sync = - mdev->state.conn == C_CONNECTED && - mdev->tconn->agreed_pro_version >= 90 && - mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && + device->state.conn == C_CONNECTED && + peer_device->connection->agreed_pro_version >= 90 && + device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && (p_uuid[UI_FLAGS] & 8); if (skip_initial_sync) { - dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); - drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, + drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n"); + drbd_bitmap_io(device, &drbd_bmio_clear_n_write, "clear_n_write from receive_uuids", BM_LOCKED_TEST_ALLOWED); - _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]); - _drbd_uuid_set(mdev, UI_BITMAP, 0); - _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), + _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]); + _drbd_uuid_set(device, UI_BITMAP, 0); + _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), CS_VERBOSE, NULL); - drbd_md_sync(mdev); + drbd_md_sync(device); updated_uuids = 1; } - put_ldev(mdev); - } else if (mdev->state.disk < D_INCONSISTENT && - mdev->state.role == R_PRIMARY) { + put_ldev(device); + } else if (device->state.disk < D_INCONSISTENT && + device->state.role == R_PRIMARY) { /* I am a diskless primary, the peer just created a new current UUID for me. */ - updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); + updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); } /* Before we test for the disk state, we should wait until an eventually ongoing cluster wide state change is finished. That is important if we are primary and are detaching from our disk. We need to see the new disk state... */ - mutex_lock(mdev->state_mutex); - mutex_unlock(mdev->state_mutex); - if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) - updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); + mutex_lock(device->state_mutex); + mutex_unlock(device->state_mutex); + if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT) + updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); if (updated_uuids) - drbd_print_uuids(mdev, "receiver updated UUIDs to"); + drbd_print_uuids(device, "receiver updated UUIDs to"); return 0; } @@ -3761,38 +3802,40 @@ static union drbd_state convert_state(union drbd_state ps) return ms; } -static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_req_state *p = pi->data; union drbd_state mask, val; enum drbd_state_rv rv; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; mask.i = be32_to_cpu(p->mask); val.i = be32_to_cpu(p->val); - if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) && - mutex_is_locked(mdev->state_mutex)) { - drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); + if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) && + mutex_is_locked(device->state_mutex)) { + drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG); return 0; } mask = convert_state(mask); val = convert_state(val); - rv = drbd_change_state(mdev, CS_VERBOSE, mask, val); - drbd_send_sr_reply(mdev, rv); + rv = drbd_change_state(device, CS_VERBOSE, mask, val); + drbd_send_sr_reply(peer_device, rv); - drbd_md_sync(mdev); + drbd_md_sync(device); return 0; } -static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi) { struct p_req_state *p = pi->data; union drbd_state mask, val; @@ -3801,46 +3844,48 @@ static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info * mask.i = be32_to_cpu(p->mask); val.i = be32_to_cpu(p->val); - if (test_bit(RESOLVE_CONFLICTS, &tconn->flags) && - mutex_is_locked(&tconn->cstate_mutex)) { - conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG); + if (test_bit(RESOLVE_CONFLICTS, &connection->flags) && + mutex_is_locked(&connection->cstate_mutex)) { + conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG); return 0; } mask = convert_state(mask); val = convert_state(val); - rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL); - conn_send_sr_reply(tconn, rv); + rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL); + conn_send_sr_reply(connection, rv); return 0; } -static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_state(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_state *p = pi->data; union drbd_state os, ns, peer_state; enum drbd_disk_state real_peer_disk; enum chg_state_flags cs_flags; int rv; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) - return config_unknown_volume(tconn, pi); + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) + return config_unknown_volume(connection, pi); + device = peer_device->device; peer_state.i = be32_to_cpu(p->state); real_peer_disk = peer_state.disk; if (peer_state.disk == D_NEGOTIATING) { - real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT; - dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); + real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT; + drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); } - spin_lock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); retry: - os = ns = drbd_read_state(mdev); - spin_unlock_irq(&mdev->tconn->req_lock); + os = ns = drbd_read_state(device); + spin_unlock_irq(&device->resource->req_lock); /* If some other part of the code (asender thread, timeout) * already decided to close the connection again, @@ -3872,8 +3917,8 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) * Maybe we should finish it up, too? */ else if (os.conn >= C_SYNC_SOURCE && peer_state.conn == C_CONNECTED) { - if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) - drbd_resync_finished(mdev); + if (drbd_bm_total_weight(device) <= device->rs_failed) + drbd_resync_finished(device); return 0; } } @@ -3881,8 +3926,8 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) /* explicit verify finished notification, stop sector reached. */ if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { - ov_out_of_sync_print(mdev); - drbd_resync_finished(mdev); + ov_out_of_sync_print(device); + drbd_resync_finished(device); return 0; } @@ -3901,8 +3946,8 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) if (peer_state.conn == C_AHEAD) ns.conn = C_BEHIND; - if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && - get_ldev_if_state(mdev, D_NEGOTIATING)) { + if (device->p_uuid && peer_state.disk >= D_NEGOTIATING && + get_ldev_if_state(device, D_NEGOTIATING)) { int cr; /* consider resync */ /* if we established a new connection */ @@ -3914,7 +3959,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) os.disk == D_NEGOTIATING)); /* if we have both been inconsistent, and the peer has been * forced to be UpToDate with --overwrite-data */ - cr |= test_bit(CONSIDER_RESYNC, &mdev->flags); + cr |= test_bit(CONSIDER_RESYNC, &device->flags); /* if we had been plain connected, and the admin requested to * start a sync by "invalidate" or "invalidate-remote" */ cr |= (os.conn == C_CONNECTED && @@ -3922,55 +3967,55 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) peer_state.conn <= C_WF_BITMAP_T)); if (cr) - ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk); + ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk); - put_ldev(mdev); + put_ldev(device); if (ns.conn == C_MASK) { ns.conn = C_CONNECTED; - if (mdev->state.disk == D_NEGOTIATING) { - drbd_force_state(mdev, NS(disk, D_FAILED)); + if (device->state.disk == D_NEGOTIATING) { + drbd_force_state(device, NS(disk, D_FAILED)); } else if (peer_state.disk == D_NEGOTIATING) { - dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); + drbd_err(device, "Disk attach process on the peer node was aborted.\n"); peer_state.disk = D_DISKLESS; real_peer_disk = D_DISKLESS; } else { - if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags)) + if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags)) return -EIO; - D_ASSERT(os.conn == C_WF_REPORT_PARAMS); - conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); + D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS); + conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); return -EIO; } } } - spin_lock_irq(&mdev->tconn->req_lock); - if (os.i != drbd_read_state(mdev).i) + spin_lock_irq(&device->resource->req_lock); + if (os.i != drbd_read_state(device).i) goto retry; - clear_bit(CONSIDER_RESYNC, &mdev->flags); + clear_bit(CONSIDER_RESYNC, &device->flags); ns.peer = peer_state.role; ns.pdsk = real_peer_disk; ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) - ns.disk = mdev->new_state_tmp.disk; + ns.disk = device->new_state_tmp.disk; cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); - if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && - test_bit(NEW_CUR_UUID, &mdev->flags)) { + if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && + test_bit(NEW_CUR_UUID, &device->flags)) { /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this for temporal network outages! */ - spin_unlock_irq(&mdev->tconn->req_lock); - dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); - tl_clear(mdev->tconn); - drbd_uuid_new_current(mdev); - clear_bit(NEW_CUR_UUID, &mdev->flags); - conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD); + spin_unlock_irq(&device->resource->req_lock); + drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); + tl_clear(peer_device->connection); + drbd_uuid_new_current(device); + clear_bit(NEW_CUR_UUID, &device->flags); + conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD); return -EIO; } - rv = _drbd_set_state(mdev, ns, cs_flags, NULL); - ns = drbd_read_state(mdev); - spin_unlock_irq(&mdev->tconn->req_lock); + rv = _drbd_set_state(device, ns, cs_flags, NULL); + ns = drbd_read_state(device); + spin_unlock_irq(&device->resource->req_lock); if (rv < SS_SUCCESS) { - conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); + conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); return -EIO; } @@ -3980,47 +4025,49 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) /* we want resync, peer has not yet decided to sync... */ /* Nowadays only used when forcing a node into primary role and setting its disk to UpToDate with that */ - drbd_send_uuids(mdev); - drbd_send_current_state(mdev); + drbd_send_uuids(peer_device); + drbd_send_current_state(peer_device); } } - clear_bit(DISCARD_MY_DATA, &mdev->flags); + clear_bit(DISCARD_MY_DATA, &device->flags); - drbd_md_sync(mdev); /* update connected indicator, la_size_sect, ... */ + drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */ return 0; } -static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_rs_uuid *p = pi->data; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; - wait_event(mdev->misc_wait, - mdev->state.conn == C_WF_SYNC_UUID || - mdev->state.conn == C_BEHIND || - mdev->state.conn < C_CONNECTED || - mdev->state.disk < D_NEGOTIATING); + wait_event(device->misc_wait, + device->state.conn == C_WF_SYNC_UUID || + device->state.conn == C_BEHIND || + device->state.conn < C_CONNECTED || + device->state.disk < D_NEGOTIATING); - /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */ + /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */ /* Here the _drbd_uuid_ functions are right, current should _not_ be rotated into the history */ - if (get_ldev_if_state(mdev, D_NEGOTIATING)) { - _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid)); - _drbd_uuid_set(mdev, UI_BITMAP, 0UL); + if (get_ldev_if_state(device, D_NEGOTIATING)) { + _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid)); + _drbd_uuid_set(device, UI_BITMAP, 0UL); - drbd_print_uuids(mdev, "updated sync uuid"); - drbd_start_resync(mdev, C_SYNC_TARGET); + drbd_print_uuids(device, "updated sync uuid"); + drbd_start_resync(device, C_SYNC_TARGET); - put_ldev(mdev); + put_ldev(device); } else - dev_err(DEV, "Ignoring SyncUUID packet!\n"); + drbd_err(device, "Ignoring SyncUUID packet!\n"); return 0; } @@ -4032,27 +4079,27 @@ static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi) * code upon failure. */ static int -receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size, +receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size, unsigned long *p, struct bm_xfer_ctx *c) { unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - - drbd_header_size(mdev->tconn); + drbd_header_size(peer_device->connection); unsigned int num_words = min_t(size_t, data_size / sizeof(*p), c->bm_words - c->word_offset); unsigned int want = num_words * sizeof(*p); int err; if (want != size) { - dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size); + drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size); return -EIO; } if (want == 0) return 0; - err = drbd_recv_all(mdev->tconn, p, want); + err = drbd_recv_all(peer_device->connection, p, want); if (err) return err; - drbd_bm_merge_lel(mdev, c->word_offset, num_words, p); + drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; @@ -4084,7 +4131,7 @@ static int dcbp_get_pad_bits(struct p_compressed_bm *p) * code upon failure. */ static int -recv_bm_rle_bits(struct drbd_conf *mdev, +recv_bm_rle_bits(struct drbd_peer_device *peer_device, struct p_compressed_bm *p, struct bm_xfer_ctx *c, unsigned int len) @@ -4113,14 +4160,14 @@ recv_bm_rle_bits(struct drbd_conf *mdev, if (toggle) { e = s + rl -1; if (e >= c->bm_bits) { - dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); + drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); return -EIO; } - _drbd_bm_set_bits(mdev, s, e); + _drbd_bm_set_bits(peer_device->device, s, e); } if (have < bits) { - dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n", + drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n", have, bits, look_ahead, (unsigned int)(bs.cur.b - p->code), (unsigned int)bs.buf_len); @@ -4153,28 +4200,28 @@ recv_bm_rle_bits(struct drbd_conf *mdev, * code upon failure. */ static int -decode_bitmap_c(struct drbd_conf *mdev, +decode_bitmap_c(struct drbd_peer_device *peer_device, struct p_compressed_bm *p, struct bm_xfer_ctx *c, unsigned int len) { if (dcbp_get_code(p) == RLE_VLI_Bits) - return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p)); + return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p)); /* other variants had been implemented for evaluation, * but have been dropped as this one turned out to be "best" * during all our tests. */ - dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); - conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); + drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding); + conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); return -EIO; } -void INFO_bm_xfer_stats(struct drbd_conf *mdev, +void INFO_bm_xfer_stats(struct drbd_device *device, const char *direction, struct bm_xfer_ctx *c) { /* what would it take to transfer it "plaintext" */ - unsigned int header_size = drbd_header_size(mdev->tconn); + unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; unsigned int plain = header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) + @@ -4198,7 +4245,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, r = 1000; r = 1000 - r; - dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " + drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " "total %u; compression: %u.%u%%\n", direction, c->bytes[1], c->packets[1], @@ -4214,129 +4261,133 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, in order to be agnostic to the 32 vs 64 bits issue. returns 0 on failure, 1 if we successfully received it. */ -static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct bm_xfer_ctx c; int err; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; - drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); + drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED); /* you are supposed to send additional out-of-sync information * if you actually set bits during this phase */ c = (struct bm_xfer_ctx) { - .bm_bits = drbd_bm_bits(mdev), - .bm_words = drbd_bm_words(mdev), + .bm_bits = drbd_bm_bits(device), + .bm_words = drbd_bm_words(device), }; for(;;) { if (pi->cmd == P_BITMAP) - err = receive_bitmap_plain(mdev, pi->size, pi->data, &c); + err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c); else if (pi->cmd == P_COMPRESSED_BITMAP) { /* MAYBE: sanity check that we speak proto >= 90, * and the feature is enabled! */ struct p_compressed_bm *p = pi->data; - if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) { - dev_err(DEV, "ReportCBitmap packet too large\n"); + if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) { + drbd_err(device, "ReportCBitmap packet too large\n"); err = -EIO; goto out; } if (pi->size <= sizeof(*p)) { - dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size); + drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size); err = -EIO; goto out; } - err = drbd_recv_all(mdev->tconn, p, pi->size); + err = drbd_recv_all(peer_device->connection, p, pi->size); if (err) goto out; - err = decode_bitmap_c(mdev, p, &c, pi->size); + err = decode_bitmap_c(peer_device, p, &c, pi->size); } else { - dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd); + drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd); err = -EIO; goto out; } c.packets[pi->cmd == P_BITMAP]++; - c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size; + c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size; if (err <= 0) { if (err < 0) goto out; break; } - err = drbd_recv_header(mdev->tconn, pi); + err = drbd_recv_header(peer_device->connection, pi); if (err) goto out; } - INFO_bm_xfer_stats(mdev, "receive", &c); + INFO_bm_xfer_stats(device, "receive", &c); - if (mdev->state.conn == C_WF_BITMAP_T) { + if (device->state.conn == C_WF_BITMAP_T) { enum drbd_state_rv rv; - err = drbd_send_bitmap(mdev); + err = drbd_send_bitmap(device); if (err) goto out; /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ - rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); - D_ASSERT(rv == SS_SUCCESS); - } else if (mdev->state.conn != C_WF_BITMAP_S) { + rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); + D_ASSERT(device, rv == SS_SUCCESS); + } else if (device->state.conn != C_WF_BITMAP_S) { /* admin may have requested C_DISCONNECTING, * other threads may have noticed network errors */ - dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n", - drbd_conn_str(mdev->state.conn)); + drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n", + drbd_conn_str(device->state.conn)); } err = 0; out: - drbd_bm_unlock(mdev); - if (!err && mdev->state.conn == C_WF_BITMAP_S) - drbd_start_resync(mdev, C_SYNC_SOURCE); + drbd_bm_unlock(device); + if (!err && device->state.conn == C_WF_BITMAP_S) + drbd_start_resync(device, C_SYNC_SOURCE); return err; } -static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_skip(struct drbd_connection *connection, struct packet_info *pi) { - conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n", + drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n", pi->cmd, pi->size); - return ignore_remaining_packet(tconn, pi); + return ignore_remaining_packet(connection, pi); } -static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi) { /* Make sure we've acked all the TCP data associated * with the data requests being unplugged */ - drbd_tcp_quickack(tconn->data.socket); + drbd_tcp_quickack(connection->data.socket); return 0; } -static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi) +static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_block_desc *p = pi->data; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; - switch (mdev->state.conn) { + switch (device->state.conn) { case C_WF_SYNC_UUID: case C_WF_BITMAP_T: case C_BEHIND: break; default: - dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n", - drbd_conn_str(mdev->state.conn)); + drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n", + drbd_conn_str(device->state.conn)); } - drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); + drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); return 0; } @@ -4344,7 +4395,7 @@ static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi) struct data_cmd { int expect_payload; size_t pkt_size; - int (*fn)(struct drbd_tconn *, struct packet_info *); + int (*fn)(struct drbd_connection *, struct packet_info *); }; static struct data_cmd drbd_cmd_handler[] = { @@ -4374,43 +4425,43 @@ static struct data_cmd drbd_cmd_handler[] = { [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, }; -static void drbdd(struct drbd_tconn *tconn) +static void drbdd(struct drbd_connection *connection) { struct packet_info pi; size_t shs; /* sub header size */ int err; - while (get_t_state(&tconn->receiver) == RUNNING) { + while (get_t_state(&connection->receiver) == RUNNING) { struct data_cmd *cmd; - drbd_thread_current_set_cpu(&tconn->receiver); - if (drbd_recv_header(tconn, &pi)) + drbd_thread_current_set_cpu(&connection->receiver); + if (drbd_recv_header(connection, &pi)) goto err_out; cmd = &drbd_cmd_handler[pi.cmd]; if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) { - conn_err(tconn, "Unexpected data packet %s (0x%04x)", + drbd_err(connection, "Unexpected data packet %s (0x%04x)", cmdname(pi.cmd), pi.cmd); goto err_out; } shs = cmd->pkt_size; if (pi.size > shs && !cmd->expect_payload) { - conn_err(tconn, "No payload expected %s l:%d\n", + drbd_err(connection, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size); goto err_out; } if (shs) { - err = drbd_recv_all_warn(tconn, pi.data, shs); + err = drbd_recv_all_warn(connection, pi.data, shs); if (err) goto err_out; pi.size -= shs; } - err = cmd->fn(tconn, &pi); + err = cmd->fn(connection, &pi); if (err) { - conn_err(tconn, "error receiving %s, e: %d l: %d!\n", + drbd_err(connection, "error receiving %s, e: %d l: %d!\n", cmdname(pi.cmd), err, pi.size); goto err_out; } @@ -4418,27 +4469,16 @@ static void drbdd(struct drbd_tconn *tconn) return; err_out: - conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); -} - -void conn_flush_workqueue(struct drbd_tconn *tconn) -{ - struct drbd_wq_barrier barr; - - barr.w.cb = w_prev_work_done; - barr.w.tconn = tconn; - init_completion(&barr.done); - drbd_queue_work(&tconn->sender_work, &barr.w); - wait_for_completion(&barr.done); + conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); } -static void conn_disconnect(struct drbd_tconn *tconn) +static void conn_disconnect(struct drbd_connection *connection) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; enum drbd_conns oc; int vnr; - if (tconn->cstate == C_STANDALONE) + if (connection->cstate == C_STANDALONE) return; /* We are about to start the cleanup after connection loss. @@ -4446,54 +4486,56 @@ static void conn_disconnect(struct drbd_tconn *tconn) * Usually we should be in some network failure state already, * but just in case we are not, we fix it up here. */ - conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD); + conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); /* asender does not clean up anything. it must not interfere, either */ - drbd_thread_stop(&tconn->asender); - drbd_free_sock(tconn); + drbd_thread_stop(&connection->asender); + drbd_free_sock(connection); rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - kref_get(&mdev->kref); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + kref_get(&device->kref); rcu_read_unlock(); - drbd_disconnected(mdev); - kref_put(&mdev->kref, &drbd_minor_destroy); + drbd_disconnected(peer_device); + kref_put(&device->kref, drbd_destroy_device); rcu_read_lock(); } rcu_read_unlock(); - if (!list_empty(&tconn->current_epoch->list)) - conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n"); + if (!list_empty(&connection->current_epoch->list)) + drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n"); /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ - atomic_set(&tconn->current_epoch->epoch_size, 0); - tconn->send.seen_any_write_yet = false; + atomic_set(&connection->current_epoch->epoch_size, 0); + connection->send.seen_any_write_yet = false; - conn_info(tconn, "Connection closed\n"); + drbd_info(connection, "Connection closed\n"); - if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN) - conn_try_outdate_peer_async(tconn); + if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN) + conn_try_outdate_peer_async(connection); - spin_lock_irq(&tconn->req_lock); - oc = tconn->cstate; + spin_lock_irq(&connection->resource->req_lock); + oc = connection->cstate; if (oc >= C_UNCONNECTED) - _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); + _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); - spin_unlock_irq(&tconn->req_lock); + spin_unlock_irq(&connection->resource->req_lock); if (oc == C_DISCONNECTING) - conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD); + conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD); } -static int drbd_disconnected(struct drbd_conf *mdev) +static int drbd_disconnected(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; unsigned int i; /* wait for current activity to cease. */ - spin_lock_irq(&mdev->tconn->req_lock); - _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); - _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee); - _drbd_wait_ee_list_empty(mdev, &mdev->read_ee); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); + _drbd_wait_ee_list_empty(device, &device->active_ee); + _drbd_wait_ee_list_empty(device, &device->sync_ee); + _drbd_wait_ee_list_empty(device, &device->read_ee); + spin_unlock_irq(&device->resource->req_lock); /* We do not have data structures that would allow us to * get the rs_pending_cnt down to 0 again. @@ -4505,42 +4547,42 @@ static int drbd_disconnected(struct drbd_conf *mdev) * resync_LRU. The resync_LRU tracks the whole operation including * the disk-IO, while the rs_pending_cnt only tracks the blocks * on the fly. */ - drbd_rs_cancel_all(mdev); - mdev->rs_total = 0; - mdev->rs_failed = 0; - atomic_set(&mdev->rs_pending_cnt, 0); - wake_up(&mdev->misc_wait); + drbd_rs_cancel_all(device); + device->rs_total = 0; + device->rs_failed = 0; + atomic_set(&device->rs_pending_cnt, 0); + wake_up(&device->misc_wait); - del_timer_sync(&mdev->resync_timer); - resync_timer_fn((unsigned long)mdev); + del_timer_sync(&device->resync_timer); + resync_timer_fn((unsigned long)device); /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, * w_make_resync_request etc. which may still be on the worker queue * to be "canceled" */ - drbd_flush_workqueue(mdev); + drbd_flush_workqueue(&peer_device->connection->sender_work); - drbd_finish_peer_reqs(mdev); + drbd_finish_peer_reqs(device); /* This second workqueue flush is necessary, since drbd_finish_peer_reqs() might have issued a work again. The one before drbd_finish_peer_reqs() is necessary to reclain net_ee in drbd_finish_peer_reqs(). */ - drbd_flush_workqueue(mdev); + drbd_flush_workqueue(&peer_device->connection->sender_work); /* need to do it again, drbd_finish_peer_reqs() may have populated it * again via drbd_try_clear_on_disk_bm(). */ - drbd_rs_cancel_all(mdev); + drbd_rs_cancel_all(device); - kfree(mdev->p_uuid); - mdev->p_uuid = NULL; + kfree(device->p_uuid); + device->p_uuid = NULL; - if (!drbd_suspended(mdev)) - tl_clear(mdev->tconn); + if (!drbd_suspended(device)) + tl_clear(peer_device->connection); - drbd_md_sync(mdev); + drbd_md_sync(device); /* serialize with bitmap writeout triggered by the state change, * if any. */ - wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); /* tcp_close and release of sendpage pages can be deferred. I don't * want to use SO_LINGER, because apparently it can be deferred for @@ -4549,20 +4591,20 @@ static int drbd_disconnected(struct drbd_conf *mdev) * Actually we don't care for exactly when the network stack does its * put_page(), but release our reference on these pages right here. */ - i = drbd_free_peer_reqs(mdev, &mdev->net_ee); + i = drbd_free_peer_reqs(device, &device->net_ee); if (i) - dev_info(DEV, "net_ee not empty, killed %u entries\n", i); - i = atomic_read(&mdev->pp_in_use_by_net); + drbd_info(device, "net_ee not empty, killed %u entries\n", i); + i = atomic_read(&device->pp_in_use_by_net); if (i) - dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i); - i = atomic_read(&mdev->pp_in_use); + drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i); + i = atomic_read(&device->pp_in_use); if (i) - dev_info(DEV, "pp_in_use = %d, expected 0\n", i); + drbd_info(device, "pp_in_use = %d, expected 0\n", i); - D_ASSERT(list_empty(&mdev->read_ee)); - D_ASSERT(list_empty(&mdev->active_ee)); - D_ASSERT(list_empty(&mdev->sync_ee)); - D_ASSERT(list_empty(&mdev->done_ee)); + D_ASSERT(device, list_empty(&device->read_ee)); + D_ASSERT(device, list_empty(&device->active_ee)); + D_ASSERT(device, list_empty(&device->sync_ee)); + D_ASSERT(device, list_empty(&device->done_ee)); return 0; } @@ -4576,19 +4618,19 @@ static int drbd_disconnected(struct drbd_conf *mdev) * * for now, they are expected to be zero, but ignored. */ -static int drbd_send_features(struct drbd_tconn *tconn) +static int drbd_send_features(struct drbd_connection *connection) { struct drbd_socket *sock; struct p_connection_features *p; - sock = &tconn->data; - p = conn_prepare_command(tconn, sock); + sock = &connection->data; + p = conn_prepare_command(connection, sock); if (!p) return -EIO; memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0); + return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0); } /* @@ -4598,36 +4640,36 @@ static int drbd_send_features(struct drbd_tconn *tconn) * -1 peer talks different language, * no point in trying again, please go standalone. */ -static int drbd_do_features(struct drbd_tconn *tconn) +static int drbd_do_features(struct drbd_connection *connection) { - /* ASSERT current == tconn->receiver ... */ + /* ASSERT current == connection->receiver ... */ struct p_connection_features *p; const int expect = sizeof(struct p_connection_features); struct packet_info pi; int err; - err = drbd_send_features(tconn); + err = drbd_send_features(connection); if (err) return 0; - err = drbd_recv_header(tconn, &pi); + err = drbd_recv_header(connection, &pi); if (err) return 0; if (pi.cmd != P_CONNECTION_FEATURES) { - conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n", + drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n", cmdname(pi.cmd), pi.cmd); return -1; } if (pi.size != expect) { - conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n", + drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n", expect, pi.size); return -1; } p = pi.data; - err = drbd_recv_all_warn(tconn, p, expect); + err = drbd_recv_all_warn(connection, p, expect); if (err) return 0; @@ -4640,15 +4682,15 @@ static int drbd_do_features(struct drbd_tconn *tconn) PRO_VERSION_MIN > p->protocol_max) goto incompat; - tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); + connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); - conn_info(tconn, "Handshake successful: " - "Agreed network protocol version %d\n", tconn->agreed_pro_version); + drbd_info(connection, "Handshake successful: " + "Agreed network protocol version %d\n", connection->agreed_pro_version); return 1; incompat: - conn_err(tconn, "incompatible DRBD dialects: " + drbd_err(connection, "incompatible DRBD dialects: " "I support %d-%d, peer supports %d-%d\n", PRO_VERSION_MIN, PRO_VERSION_MAX, p->protocol_min, p->protocol_max); @@ -4656,10 +4698,10 @@ static int drbd_do_features(struct drbd_tconn *tconn) } #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) -static int drbd_do_auth(struct drbd_tconn *tconn) +static int drbd_do_auth(struct drbd_connection *connection) { - conn_err(tconn, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); - conn_err(tconn, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); + drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); + drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); return -1; } #else @@ -4671,7 +4713,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) -1 - auth failed, don't try again. */ -static int drbd_do_auth(struct drbd_tconn *tconn) +static int drbd_do_auth(struct drbd_connection *connection) { struct drbd_socket *sock; char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ @@ -4690,69 +4732,69 @@ static int drbd_do_auth(struct drbd_tconn *tconn) /* FIXME: Put the challenge/response into the preallocated socket buffer. */ rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); + nc = rcu_dereference(connection->net_conf); key_len = strlen(nc->shared_secret); memcpy(secret, nc->shared_secret, key_len); rcu_read_unlock(); - desc.tfm = tconn->cram_hmac_tfm; + desc.tfm = connection->cram_hmac_tfm; desc.flags = 0; - rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len); + rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len); if (rv) { - conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv); + drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv); rv = -1; goto fail; } get_random_bytes(my_challenge, CHALLENGE_LEN); - sock = &tconn->data; - if (!conn_prepare_command(tconn, sock)) { + sock = &connection->data; + if (!conn_prepare_command(connection, sock)) { rv = 0; goto fail; } - rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0, + rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0, my_challenge, CHALLENGE_LEN); if (!rv) goto fail; - err = drbd_recv_header(tconn, &pi); + err = drbd_recv_header(connection, &pi); if (err) { rv = 0; goto fail; } if (pi.cmd != P_AUTH_CHALLENGE) { - conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n", + drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n", cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } if (pi.size > CHALLENGE_LEN * 2) { - conn_err(tconn, "expected AuthChallenge payload too big.\n"); + drbd_err(connection, "expected AuthChallenge payload too big.\n"); rv = -1; goto fail; } peers_ch = kmalloc(pi.size, GFP_NOIO); if (peers_ch == NULL) { - conn_err(tconn, "kmalloc of peers_ch failed\n"); + drbd_err(connection, "kmalloc of peers_ch failed\n"); rv = -1; goto fail; } - err = drbd_recv_all_warn(tconn, peers_ch, pi.size); + err = drbd_recv_all_warn(connection, peers_ch, pi.size); if (err) { rv = 0; goto fail; } - resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm); + resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm); response = kmalloc(resp_size, GFP_NOIO); if (response == NULL) { - conn_err(tconn, "kmalloc of response failed\n"); + drbd_err(connection, "kmalloc of response failed\n"); rv = -1; goto fail; } @@ -4762,40 +4804,40 @@ static int drbd_do_auth(struct drbd_tconn *tconn) rv = crypto_hash_digest(&desc, &sg, sg.length, response); if (rv) { - conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv); + drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); rv = -1; goto fail; } - if (!conn_prepare_command(tconn, sock)) { + if (!conn_prepare_command(connection, sock)) { rv = 0; goto fail; } - rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0, + rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0, response, resp_size); if (!rv) goto fail; - err = drbd_recv_header(tconn, &pi); + err = drbd_recv_header(connection, &pi); if (err) { rv = 0; goto fail; } if (pi.cmd != P_AUTH_RESPONSE) { - conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n", + drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n", cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } if (pi.size != resp_size) { - conn_err(tconn, "expected AuthResponse payload of wrong size\n"); + drbd_err(connection, "expected AuthResponse payload of wrong size\n"); rv = 0; goto fail; } - err = drbd_recv_all_warn(tconn, response , resp_size); + err = drbd_recv_all_warn(connection, response , resp_size); if (err) { rv = 0; goto fail; @@ -4803,7 +4845,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) right_response = kmalloc(resp_size, GFP_NOIO); if (right_response == NULL) { - conn_err(tconn, "kmalloc of right_response failed\n"); + drbd_err(connection, "kmalloc of right_response failed\n"); rv = -1; goto fail; } @@ -4812,7 +4854,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) rv = crypto_hash_digest(&desc, &sg, sg.length, right_response); if (rv) { - conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv); + drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); rv = -1; goto fail; } @@ -4820,7 +4862,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) rv = !memcmp(response, right_response, resp_size); if (rv) - conn_info(tconn, "Peer authenticated using %d bytes HMAC\n", + drbd_info(connection, "Peer authenticated using %d bytes HMAC\n", resp_size); else rv = -1; @@ -4834,163 +4876,169 @@ static int drbd_do_auth(struct drbd_tconn *tconn) } #endif -int drbdd_init(struct drbd_thread *thi) +int drbd_receiver(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->tconn; + struct drbd_connection *connection = thi->connection; int h; - conn_info(tconn, "receiver (re)started\n"); + drbd_info(connection, "receiver (re)started\n"); do { - h = conn_connect(tconn); + h = conn_connect(connection); if (h == 0) { - conn_disconnect(tconn); + conn_disconnect(connection); schedule_timeout_interruptible(HZ); } if (h == -1) { - conn_warn(tconn, "Discarding network configuration.\n"); - conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + drbd_warn(connection, "Discarding network configuration.\n"); + conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); } } while (h == 0); if (h > 0) - drbdd(tconn); + drbdd(connection); - conn_disconnect(tconn); + conn_disconnect(connection); - conn_info(tconn, "receiver terminated\n"); + drbd_info(connection, "receiver terminated\n"); return 0; } /* ********* acknowledge sender ******** */ -static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi) { struct p_req_state_reply *p = pi->data; int retcode = be32_to_cpu(p->retcode); if (retcode >= SS_SUCCESS) { - set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags); + set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags); } else { - set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags); - conn_err(tconn, "Requested state change failed by peer: %s (%d)\n", + set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags); + drbd_err(connection, "Requested state change failed by peer: %s (%d)\n", drbd_set_st_err_str(retcode), retcode); } - wake_up(&tconn->ping_wait); + wake_up(&connection->ping_wait); return 0; } -static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_req_state_reply *p = pi->data; int retcode = be32_to_cpu(p->retcode); - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; - if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) { - D_ASSERT(tconn->agreed_pro_version < 100); - return got_conn_RqSReply(tconn, pi); + if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) { + D_ASSERT(device, connection->agreed_pro_version < 100); + return got_conn_RqSReply(connection, pi); } if (retcode >= SS_SUCCESS) { - set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); + set_bit(CL_ST_CHG_SUCCESS, &device->flags); } else { - set_bit(CL_ST_CHG_FAIL, &mdev->flags); - dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", + set_bit(CL_ST_CHG_FAIL, &device->flags); + drbd_err(device, "Requested state change failed by peer: %s (%d)\n", drbd_set_st_err_str(retcode), retcode); } - wake_up(&mdev->state_wait); + wake_up(&device->state_wait); return 0; } -static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_Ping(struct drbd_connection *connection, struct packet_info *pi) { - return drbd_send_ping_ack(tconn); + return drbd_send_ping_ack(connection); } -static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi) { /* restore idle timeout */ - tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ; - if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags)) - wake_up(&tconn->ping_wait); + connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ; + if (!test_and_set_bit(GOT_PING_ACK, &connection->flags)) + wake_up(&connection->ping_wait); return 0; } -static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_block_ack *p = pi->data; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; - D_ASSERT(mdev->tconn->agreed_pro_version >= 89); + D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); - update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); - if (get_ldev(mdev)) { - drbd_rs_complete_io(mdev, sector); - drbd_set_in_sync(mdev, sector, blksize); + if (get_ldev(device)) { + drbd_rs_complete_io(device, sector); + drbd_set_in_sync(device, sector, blksize); /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ - mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); - put_ldev(mdev); + device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); + put_ldev(device); } - dec_rs_pending(mdev); - atomic_add(blksize >> 9, &mdev->rs_sect_in); + dec_rs_pending(device); + atomic_add(blksize >> 9, &device->rs_sect_in); return 0; } static int -validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, +validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector, struct rb_root *root, const char *func, enum drbd_req_event what, bool missing_ok) { struct drbd_request *req; struct bio_and_error m; - spin_lock_irq(&mdev->tconn->req_lock); - req = find_request(mdev, root, id, sector, missing_ok, func); + spin_lock_irq(&device->resource->req_lock); + req = find_request(device, root, id, sector, missing_ok, func); if (unlikely(!req)) { - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); return -EIO; } __req_mod(req, what, &m); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); if (m.bio) - complete_master_bio(mdev, &m); + complete_master_bio(device, &m); return 0; } -static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_block_ack *p = pi->data; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); enum drbd_req_event what; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; - update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { - drbd_set_in_sync(mdev, sector, blksize); - dec_rs_pending(mdev); + drbd_set_in_sync(device, sector, blksize); + dec_rs_pending(device); return 0; } switch (pi->cmd) { @@ -5013,33 +5061,35 @@ static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi) BUG(); } - return validate_req_change_req_state(mdev, p->block_id, sector, - &mdev->write_requests, __func__, + return validate_req_change_req_state(device, p->block_id, sector, + &device->write_requests, __func__, what, false); } -static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_block_ack *p = pi->data; sector_t sector = be64_to_cpu(p->sector); int size = be32_to_cpu(p->blksize); int err; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; - update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { - dec_rs_pending(mdev); - drbd_rs_failed_io(mdev, sector, size); + dec_rs_pending(device); + drbd_rs_failed_io(device, sector, size); return 0; } - err = validate_req_change_req_state(mdev, p->block_id, sector, - &mdev->write_requests, __func__, + err = validate_req_change_req_state(device, p->block_id, sector, + &device->write_requests, __func__, NEG_ACKED, true); if (err) { /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. @@ -5047,80 +5097,86 @@ static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi) request is no longer in the collision hash. */ /* In Protocol B we might already have got a P_RECV_ACK but then get a P_NEG_ACK afterwards. */ - drbd_set_out_of_sync(mdev, sector, size); + drbd_set_out_of_sync(device, sector, size); } return 0; } -static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_block_ack *p = pi->data; sector_t sector = be64_to_cpu(p->sector); - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; - update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); - dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n", + drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); - return validate_req_change_req_state(mdev, p->block_id, sector, - &mdev->read_requests, __func__, + return validate_req_change_req_state(device, p->block_id, sector, + &device->read_requests, __func__, NEG_ACKED, false); } -static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; sector_t sector; int size; struct p_block_ack *p = pi->data; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); - update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); - dec_rs_pending(mdev); + dec_rs_pending(device); - if (get_ldev_if_state(mdev, D_FAILED)) { - drbd_rs_complete_io(mdev, sector); + if (get_ldev_if_state(device, D_FAILED)) { + drbd_rs_complete_io(device, sector); switch (pi->cmd) { case P_NEG_RS_DREPLY: - drbd_rs_failed_io(mdev, sector, size); + drbd_rs_failed_io(device, sector, size); case P_RS_CANCEL: break; default: BUG(); } - put_ldev(mdev); + put_ldev(device); } return 0; } -static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi) { struct p_barrier_ack *p = pi->data; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; - tl_release(tconn, p->barrier, be32_to_cpu(p->set_size)); + tl_release(connection, p->barrier, be32_to_cpu(p->set_size)); rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (mdev->state.conn == C_AHEAD && - atomic_read(&mdev->ap_in_flight) == 0 && - !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) { - mdev->start_resync_timer.expires = jiffies + HZ; - add_timer(&mdev->start_resync_timer); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + + if (device->state.conn == C_AHEAD && + atomic_read(&device->ap_in_flight) == 0 && + !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) { + device->start_resync_timer.expires = jiffies + HZ; + add_timer(&device->start_resync_timer); } } rcu_read_unlock(); @@ -5128,90 +5184,94 @@ static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi) return 0; } -static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; + struct drbd_device *device; struct p_block_ack *p = pi->data; - struct drbd_work *w; + struct drbd_device_work *dw; sector_t sector; int size; - mdev = vnr_to_mdev(tconn, pi->vnr); - if (!mdev) + peer_device = conn_peer_device(connection, pi->vnr); + if (!peer_device) return -EIO; + device = peer_device->device; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); - update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) - drbd_ov_out_of_sync_found(mdev, sector, size); + drbd_ov_out_of_sync_found(device, sector, size); else - ov_out_of_sync_print(mdev); + ov_out_of_sync_print(device); - if (!get_ldev(mdev)) + if (!get_ldev(device)) return 0; - drbd_rs_complete_io(mdev, sector); - dec_rs_pending(mdev); + drbd_rs_complete_io(device, sector); + dec_rs_pending(device); - --mdev->ov_left; + --device->ov_left; /* let's advance progress step marks only for every other megabyte */ - if ((mdev->ov_left & 0x200) == 0x200) - drbd_advance_rs_marks(mdev, mdev->ov_left); - - if (mdev->ov_left == 0) { - w = kmalloc(sizeof(*w), GFP_NOIO); - if (w) { - w->cb = w_ov_finished; - w->mdev = mdev; - drbd_queue_work(&mdev->tconn->sender_work, w); + if ((device->ov_left & 0x200) == 0x200) + drbd_advance_rs_marks(device, device->ov_left); + + if (device->ov_left == 0) { + dw = kmalloc(sizeof(*dw), GFP_NOIO); + if (dw) { + dw->w.cb = w_ov_finished; + dw->device = device; + drbd_queue_work(&peer_device->connection->sender_work, &dw->w); } else { - dev_err(DEV, "kmalloc(w) failed."); - ov_out_of_sync_print(mdev); - drbd_resync_finished(mdev); + drbd_err(device, "kmalloc(dw) failed."); + ov_out_of_sync_print(device); + drbd_resync_finished(device); } } - put_ldev(mdev); + put_ldev(device); return 0; } -static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi) +static int got_skip(struct drbd_connection *connection, struct packet_info *pi) { return 0; } -static int tconn_finish_peer_reqs(struct drbd_tconn *tconn) +static int connection_finish_peer_reqs(struct drbd_connection *connection) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr, not_empty = 0; do { - clear_bit(SIGNAL_ASENDER, &tconn->flags); + clear_bit(SIGNAL_ASENDER, &connection->flags); flush_signals(current); rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - kref_get(&mdev->kref); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + kref_get(&device->kref); rcu_read_unlock(); - if (drbd_finish_peer_reqs(mdev)) { - kref_put(&mdev->kref, &drbd_minor_destroy); + if (drbd_finish_peer_reqs(device)) { + kref_put(&device->kref, drbd_destroy_device); return 1; } - kref_put(&mdev->kref, &drbd_minor_destroy); + kref_put(&device->kref, drbd_destroy_device); rcu_read_lock(); } - set_bit(SIGNAL_ASENDER, &tconn->flags); + set_bit(SIGNAL_ASENDER, &connection->flags); - spin_lock_irq(&tconn->req_lock); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - not_empty = !list_empty(&mdev->done_ee); + spin_lock_irq(&connection->resource->req_lock); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + not_empty = !list_empty(&device->done_ee); if (not_empty) break; } - spin_unlock_irq(&tconn->req_lock); + spin_unlock_irq(&connection->resource->req_lock); rcu_read_unlock(); } while (not_empty); @@ -5220,7 +5280,7 @@ static int tconn_finish_peer_reqs(struct drbd_tconn *tconn) struct asender_cmd { size_t pkt_size; - int (*fn)(struct drbd_tconn *tconn, struct packet_info *); + int (*fn)(struct drbd_connection *connection, struct packet_info *); }; static struct asender_cmd asender_tbl[] = { @@ -5245,13 +5305,13 @@ static struct asender_cmd asender_tbl[] = { int drbd_asender(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->tconn; + struct drbd_connection *connection = thi->connection; struct asender_cmd *cmd = NULL; struct packet_info pi; int rv; - void *buf = tconn->meta.rbuf; + void *buf = connection->meta.rbuf; int received = 0; - unsigned int header_size = drbd_header_size(tconn); + unsigned int header_size = drbd_header_size(connection); int expect = header_size; bool ping_timeout_active = false; struct net_conf *nc; @@ -5260,45 +5320,45 @@ int drbd_asender(struct drbd_thread *thi) rv = sched_setscheduler(current, SCHED_RR, ¶m); if (rv < 0) - conn_err(tconn, "drbd_asender: ERROR set priority, ret=%d\n", rv); + drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv); while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); + nc = rcu_dereference(connection->net_conf); ping_timeo = nc->ping_timeo; tcp_cork = nc->tcp_cork; ping_int = nc->ping_int; rcu_read_unlock(); - if (test_and_clear_bit(SEND_PING, &tconn->flags)) { - if (drbd_send_ping(tconn)) { - conn_err(tconn, "drbd_send_ping has failed\n"); + if (test_and_clear_bit(SEND_PING, &connection->flags)) { + if (drbd_send_ping(connection)) { + drbd_err(connection, "drbd_send_ping has failed\n"); goto reconnect; } - tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10; + connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10; ping_timeout_active = true; } /* TODO: conditionally cork; it may hurt latency if we cork without much to send */ if (tcp_cork) - drbd_tcp_cork(tconn->meta.socket); - if (tconn_finish_peer_reqs(tconn)) { - conn_err(tconn, "tconn_finish_peer_reqs() failed\n"); + drbd_tcp_cork(connection->meta.socket); + if (connection_finish_peer_reqs(connection)) { + drbd_err(connection, "connection_finish_peer_reqs() failed\n"); goto reconnect; } /* but unconditionally uncork unless disabled */ if (tcp_cork) - drbd_tcp_uncork(tconn->meta.socket); + drbd_tcp_uncork(connection->meta.socket); /* short circuit, recv_msg would return EINTR anyways. */ if (signal_pending(current)) continue; - rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0); - clear_bit(SIGNAL_ASENDER, &tconn->flags); + rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0); + clear_bit(SIGNAL_ASENDER, &connection->flags); flush_signals(current); @@ -5316,51 +5376,51 @@ int drbd_asender(struct drbd_thread *thi) received += rv; buf += rv; } else if (rv == 0) { - if (test_bit(DISCONNECT_SENT, &tconn->flags)) { + if (test_bit(DISCONNECT_SENT, &connection->flags)) { long t; rcu_read_lock(); - t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10; + t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; rcu_read_unlock(); - t = wait_event_timeout(tconn->ping_wait, - tconn->cstate < C_WF_REPORT_PARAMS, + t = wait_event_timeout(connection->ping_wait, + connection->cstate < C_WF_REPORT_PARAMS, t); if (t) break; } - conn_err(tconn, "meta connection shut down by peer.\n"); + drbd_err(connection, "meta connection shut down by peer.\n"); goto reconnect; } else if (rv == -EAGAIN) { /* If the data socket received something meanwhile, * that is good enough: peer is still alive. */ - if (time_after(tconn->last_received, - jiffies - tconn->meta.socket->sk->sk_rcvtimeo)) + if (time_after(connection->last_received, + jiffies - connection->meta.socket->sk->sk_rcvtimeo)) continue; if (ping_timeout_active) { - conn_err(tconn, "PingAck did not arrive in time.\n"); + drbd_err(connection, "PingAck did not arrive in time.\n"); goto reconnect; } - set_bit(SEND_PING, &tconn->flags); + set_bit(SEND_PING, &connection->flags); continue; } else if (rv == -EINTR) { continue; } else { - conn_err(tconn, "sock_recvmsg returned %d\n", rv); + drbd_err(connection, "sock_recvmsg returned %d\n", rv); goto reconnect; } if (received == expect && cmd == NULL) { - if (decode_header(tconn, tconn->meta.rbuf, &pi)) + if (decode_header(connection, connection->meta.rbuf, &pi)) goto reconnect; cmd = &asender_tbl[pi.cmd]; if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) { - conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n", + drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n", cmdname(pi.cmd), pi.cmd); goto disconnect; } expect = header_size + cmd->pkt_size; if (pi.size != expect - header_size) { - conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n", + drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n", pi.cmd, pi.size); goto reconnect; } @@ -5368,21 +5428,21 @@ int drbd_asender(struct drbd_thread *thi) if (received == expect) { bool err; - err = cmd->fn(tconn, &pi); + err = cmd->fn(connection, &pi); if (err) { - conn_err(tconn, "%pf failed\n", cmd->fn); + drbd_err(connection, "%pf failed\n", cmd->fn); goto reconnect; } - tconn->last_received = jiffies; + connection->last_received = jiffies; if (cmd == &asender_tbl[P_PING_ACK]) { /* restore idle timeout */ - tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ; + connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ; ping_timeout_active = false; } - buf = tconn->meta.rbuf; + buf = connection->meta.rbuf; received = 0; expect = header_size; cmd = NULL; @@ -5391,16 +5451,16 @@ int drbd_asender(struct drbd_thread *thi) if (0) { reconnect: - conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD); - conn_md_sync(tconn); + conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); + conn_md_sync(connection); } if (0) { disconnect: - conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); } - clear_bit(SIGNAL_ASENDER, &tconn->flags); + clear_bit(SIGNAL_ASENDER, &connection->flags); - conn_info(tconn, "asender terminated\n"); + drbd_info(connection, "asender terminated\n"); return 0; } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 104a040f24de..3779c8d2875b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -31,37 +31,37 @@ #include "drbd_req.h" -static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size); +static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size); /* Update disk stats at start of I/O request */ -static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req) +static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req) { const int rw = bio_data_dir(req->master_bio); int cpu; cpu = part_stat_lock(); - part_round_stats(cpu, &mdev->vdisk->part0); - part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); - part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], req->i.size >> 9); + part_round_stats(cpu, &device->vdisk->part0); + part_stat_inc(cpu, &device->vdisk->part0, ios[rw]); + part_stat_add(cpu, &device->vdisk->part0, sectors[rw], req->i.size >> 9); (void) cpu; /* The macro invocations above want the cpu argument, I do not like the compiler warning about cpu only assigned but never used... */ - part_inc_in_flight(&mdev->vdisk->part0, rw); + part_inc_in_flight(&device->vdisk->part0, rw); part_stat_unlock(); } /* Update disk stats when completing request upwards */ -static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) +static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req) { int rw = bio_data_dir(req->master_bio); unsigned long duration = jiffies - req->start_time; int cpu; cpu = part_stat_lock(); - part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration); - part_round_stats(cpu, &mdev->vdisk->part0); - part_dec_in_flight(&mdev->vdisk->part0, rw); + part_stat_add(cpu, &device->vdisk->part0, ticks[rw], duration); + part_round_stats(cpu, &device->vdisk->part0); + part_dec_in_flight(&device->vdisk->part0, rw); part_stat_unlock(); } -static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, +static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio *bio_src) { struct drbd_request *req; @@ -72,7 +72,7 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, drbd_req_make_private_bio(req, bio_src); req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0; - req->w.mdev = mdev; + req->device = device; req->master_bio = bio_src; req->epoch = 0; @@ -95,14 +95,14 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, void drbd_req_destroy(struct kref *kref) { struct drbd_request *req = container_of(kref, struct drbd_request, kref); - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; const unsigned s = req->rq_state; if ((req->master_bio && !(s & RQ_POSTPONED)) || atomic_read(&req->completion_ref) || (s & RQ_LOCAL_PENDING) || ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) { - dev_err(DEV, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n", + drbd_err(device, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n", s, atomic_read(&req->completion_ref)); return; } @@ -132,10 +132,10 @@ void drbd_req_destroy(struct kref *kref) */ if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) { if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) - drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); + drbd_set_out_of_sync(device, req->i.sector, req->i.size); if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) - drbd_set_in_sync(mdev, req->i.sector, req->i.size); + drbd_set_in_sync(device, req->i.sector, req->i.size); } /* one might be tempted to move the drbd_al_complete_io @@ -149,11 +149,11 @@ void drbd_req_destroy(struct kref *kref) * we would forget to resync the corresponding extent. */ if (s & RQ_IN_ACT_LOG) { - if (get_ldev_if_state(mdev, D_FAILED)) { - drbd_al_complete_io(mdev, &req->i); - put_ldev(mdev); + if (get_ldev_if_state(device, D_FAILED)) { + drbd_al_complete_io(device, &req->i); + put_ldev(device); } else if (__ratelimit(&drbd_ratelimit_state)) { - dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu, %u), " + drbd_warn(device, "Should have called drbd_al_complete_io(, %llu, %u), " "but my Disk seems to have failed :(\n", (unsigned long long) req->i.sector, req->i.size); } @@ -163,41 +163,42 @@ void drbd_req_destroy(struct kref *kref) mempool_free(req, drbd_request_mempool); } -static void wake_all_senders(struct drbd_tconn *tconn) { - wake_up(&tconn->sender_work.q_wait); +static void wake_all_senders(struct drbd_connection *connection) +{ + wake_up(&connection->sender_work.q_wait); } /* must hold resource->req_lock */ -void start_new_tl_epoch(struct drbd_tconn *tconn) +void start_new_tl_epoch(struct drbd_connection *connection) { /* no point closing an epoch, if it is empty, anyways. */ - if (tconn->current_tle_writes == 0) + if (connection->current_tle_writes == 0) return; - tconn->current_tle_writes = 0; - atomic_inc(&tconn->current_tle_nr); - wake_all_senders(tconn); + connection->current_tle_writes = 0; + atomic_inc(&connection->current_tle_nr); + wake_all_senders(connection); } -void complete_master_bio(struct drbd_conf *mdev, +void complete_master_bio(struct drbd_device *device, struct bio_and_error *m) { bio_endio(m->bio, m->error); - dec_ap_bio(mdev); + dec_ap_bio(device); } static void drbd_remove_request_interval(struct rb_root *root, struct drbd_request *req) { - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; struct drbd_interval *i = &req->i; drbd_remove_interval(root, i); /* Wake up any processes waiting for this request to complete. */ if (i->waiting) - wake_up(&mdev->misc_wait); + wake_up(&device->misc_wait); } /* Helper for __req_mod(). @@ -210,7 +211,7 @@ static void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) { const unsigned s = req->rq_state; - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; int rw; int error, ok; @@ -226,12 +227,12 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) if ((s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) || (s & RQ_NET_QUEUED) || (s & RQ_NET_PENDING) || (s & RQ_COMPLETION_SUSP)) { - dev_err(DEV, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s); + drbd_err(device, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s); return; } if (!req->master_bio) { - dev_err(DEV, "drbd_req_complete: Logic BUG, master_bio == NULL!\n"); + drbd_err(device, "drbd_req_complete: Logic BUG, master_bio == NULL!\n"); return; } @@ -259,9 +260,9 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) struct rb_root *root; if (rw == WRITE) - root = &mdev->write_requests; + root = &device->write_requests; else - root = &mdev->read_requests; + root = &device->read_requests; drbd_remove_request_interval(root, req); } @@ -273,11 +274,11 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) * and reset the transfer log epoch write_cnt. */ if (rw == WRITE && - req->epoch == atomic_read(&mdev->tconn->current_tle_nr)) - start_new_tl_epoch(mdev->tconn); + req->epoch == atomic_read(&first_peer_device(device)->connection->current_tle_nr)) + start_new_tl_epoch(first_peer_device(device)->connection); /* Update disk stats */ - _drbd_end_io_acct(mdev, req); + _drbd_end_io_acct(device, req); /* If READ failed, * have it be pushed back to the retry work queue, @@ -305,8 +306,8 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) { - struct drbd_conf *mdev = req->w.mdev; - D_ASSERT(m || (req->rq_state & RQ_POSTPONED)); + struct drbd_device *device = req->device; + D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED)); if (!atomic_sub_and_test(put, &req->completion_ref)) return 0; @@ -328,12 +329,12 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, int clear, int set) { - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; unsigned s = req->rq_state; int c_put = 0; int k_put = 0; - if (drbd_suspended(mdev) && !((s | clear) & RQ_COMPLETION_SUSP)) + if (drbd_suspended(device) && !((s | clear) & RQ_COMPLETION_SUSP)) set |= RQ_COMPLETION_SUSP; /* apply */ @@ -351,7 +352,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, atomic_inc(&req->completion_ref); if (!(s & RQ_NET_PENDING) && (set & RQ_NET_PENDING)) { - inc_ap_pending(mdev); + inc_ap_pending(device); atomic_inc(&req->completion_ref); } @@ -362,7 +363,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, kref_get(&req->kref); /* wait for the DONE */ if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) - atomic_add(req->i.size >> 9, &mdev->ap_in_flight); + atomic_add(req->i.size >> 9, &device->ap_in_flight); if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP)) atomic_inc(&req->completion_ref); @@ -373,7 +374,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, ++c_put; if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) { - D_ASSERT(req->rq_state & RQ_LOCAL_PENDING); + D_ASSERT(device, req->rq_state & RQ_LOCAL_PENDING); /* local completion may still come in later, * we need to keep the req object around. */ kref_get(&req->kref); @@ -388,7 +389,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, } if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) { - dec_ap_pending(mdev); + dec_ap_pending(device); ++c_put; } @@ -397,7 +398,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if ((s & RQ_EXP_BARR_ACK) && !(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) { if (req->rq_state & RQ_NET_SENT) - atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); + atomic_sub(req->i.size >> 9, &device->ap_in_flight); ++k_put; } @@ -409,14 +410,14 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, int at_least = k_put + !!c_put; int refcount = atomic_read(&req->kref.refcount); if (refcount < at_least) - dev_err(DEV, + drbd_err(device, "mod_rq_state: Logic BUG: %x -> %x: refcount = %d, should be >= %d\n", s, req->rq_state, refcount, at_least); } /* If we made progress, retry conflicting peer requests, if any. */ if (req->i.waiting) - wake_up(&mdev->misc_wait); + wake_up(&device->misc_wait); if (c_put) k_put += drbd_req_put_completion_ref(req, m, c_put); @@ -424,18 +425,18 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, kref_sub(&req->kref, k_put, drbd_req_destroy); } -static void drbd_report_io_error(struct drbd_conf *mdev, struct drbd_request *req) +static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req) { char b[BDEVNAME_SIZE]; if (!__ratelimit(&drbd_ratelimit_state)) return; - dev_warn(DEV, "local %s IO error sector %llu+%u on %s\n", + drbd_warn(device, "local %s IO error sector %llu+%u on %s\n", (req->rq_state & RQ_WRITE) ? "WRITE" : "READ", (unsigned long long)req->i.sector, req->i.size >> 9, - bdevname(mdev->ldev->backing_bdev, b)); + bdevname(device->ldev->backing_bdev, b)); } /* obviously this could be coded as many single functions @@ -453,7 +454,7 @@ static void drbd_report_io_error(struct drbd_conf *mdev, struct drbd_request *re int __req_mod(struct drbd_request *req, enum drbd_req_event what, struct bio_and_error *m) { - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; struct net_conf *nc; int p, rv = 0; @@ -462,7 +463,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, switch (what) { default: - dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__); + drbd_err(device, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__); break; /* does not happen... @@ -474,9 +475,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case TO_BE_SENT: /* via network */ /* reached via __drbd_make_request * and from w_read_retry_remote */ - D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + D_ASSERT(device, !(req->rq_state & RQ_NET_MASK)); rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(first_peer_device(device)->connection->net_conf); p = nc->wire_protocol; rcu_read_unlock(); req->rq_state |= @@ -487,15 +488,15 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case TO_BE_SUBMITTED: /* locally */ /* reached via __drbd_make_request */ - D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK)); + D_ASSERT(device, !(req->rq_state & RQ_LOCAL_MASK)); mod_rq_state(req, m, 0, RQ_LOCAL_PENDING); break; case COMPLETED_OK: if (req->rq_state & RQ_WRITE) - mdev->writ_cnt += req->i.size >> 9; + device->writ_cnt += req->i.size >> 9; else - mdev->read_cnt += req->i.size >> 9; + device->read_cnt += req->i.size >> 9; mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); @@ -506,15 +507,15 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case WRITE_COMPLETED_WITH_ERROR: - drbd_report_io_error(mdev, req); - __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); + drbd_report_io_error(device, req); + __drbd_chk_io_error(device, DRBD_WRITE_ERROR); mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); break; case READ_COMPLETED_WITH_ERROR: - drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); - drbd_report_io_error(mdev, req); - __drbd_chk_io_error(mdev, DRBD_READ_ERROR); + drbd_set_out_of_sync(device, req->i.sector, req->i.size); + drbd_report_io_error(device, req); + __drbd_chk_io_error(device, DRBD_READ_ERROR); /* fall through. */ case READ_AHEAD_COMPLETED_WITH_ERROR: /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ @@ -532,16 +533,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* So we can verify the handle in the answer packet. * Corresponding drbd_remove_request_interval is in * drbd_req_complete() */ - D_ASSERT(drbd_interval_empty(&req->i)); - drbd_insert_interval(&mdev->read_requests, &req->i); + D_ASSERT(device, drbd_interval_empty(&req->i)); + drbd_insert_interval(&device->read_requests, &req->i); - set_bit(UNPLUG_REMOTE, &mdev->flags); + set_bit(UNPLUG_REMOTE, &device->flags); - D_ASSERT(req->rq_state & RQ_NET_PENDING); - D_ASSERT((req->rq_state & RQ_LOCAL_MASK) == 0); + D_ASSERT(device, req->rq_state & RQ_NET_PENDING); + D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0); mod_rq_state(req, m, 0, RQ_NET_QUEUED); req->w.cb = w_send_read_req; - drbd_queue_work(&mdev->tconn->sender_work, &req->w); + drbd_queue_work(&first_peer_device(device)->connection->sender_work, + &req->w); break; case QUEUE_FOR_NET_WRITE: @@ -550,8 +552,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* Corresponding drbd_remove_request_interval is in * drbd_req_complete() */ - D_ASSERT(drbd_interval_empty(&req->i)); - drbd_insert_interval(&mdev->write_requests, &req->i); + D_ASSERT(device, drbd_interval_empty(&req->i)); + drbd_insert_interval(&device->write_requests, &req->i); /* NOTE * In case the req ended up on the transfer log before being @@ -570,28 +572,30 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* otherwise we may lose an unplug, which may cause some remote * io-scheduler timeout to expire, increasing maximum latency, * hurting performance. */ - set_bit(UNPLUG_REMOTE, &mdev->flags); + set_bit(UNPLUG_REMOTE, &device->flags); /* queue work item to send data */ - D_ASSERT(req->rq_state & RQ_NET_PENDING); + D_ASSERT(device, req->rq_state & RQ_NET_PENDING); mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK); req->w.cb = w_send_dblock; - drbd_queue_work(&mdev->tconn->sender_work, &req->w); + drbd_queue_work(&first_peer_device(device)->connection->sender_work, + &req->w); /* close the epoch, in case it outgrew the limit */ rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(first_peer_device(device)->connection->net_conf); p = nc->max_epoch_size; rcu_read_unlock(); - if (mdev->tconn->current_tle_writes >= p) - start_new_tl_epoch(mdev->tconn); + if (first_peer_device(device)->connection->current_tle_writes >= p) + start_new_tl_epoch(first_peer_device(device)->connection); break; case QUEUE_FOR_SEND_OOS: mod_rq_state(req, m, 0, RQ_NET_QUEUED); req->w.cb = w_send_out_of_sync; - drbd_queue_work(&mdev->tconn->sender_work, &req->w); + drbd_queue_work(&first_peer_device(device)->connection->sender_work, + &req->w); break; case READ_RETRY_REMOTE_CANCELED: @@ -639,15 +643,15 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * If this request had been marked as RQ_POSTPONED before, * it will actually not be completed, but "restarted", * resubmitted from the retry worker context. */ - D_ASSERT(req->rq_state & RQ_NET_PENDING); - D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK); + D_ASSERT(device, req->rq_state & RQ_NET_PENDING); + D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK); mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_DONE|RQ_NET_OK); break; case WRITE_ACKED_BY_PEER_AND_SIS: req->rq_state |= RQ_NET_SIS; case WRITE_ACKED_BY_PEER: - D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK); + D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK); /* protocol C; successfully written on peer. * Nothing more to do here. * We want to keep the tl in place for all protocols, to cater @@ -655,25 +659,25 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, goto ack_common; case RECV_ACKED_BY_PEER: - D_ASSERT(req->rq_state & RQ_EXP_RECEIVE_ACK); + D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK); /* protocol B; pretends to be successfully written on peer. * see also notes above in HANDED_OVER_TO_NETWORK about * protocol != C */ ack_common: - D_ASSERT(req->rq_state & RQ_NET_PENDING); + D_ASSERT(device, req->rq_state & RQ_NET_PENDING); mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); break; case POSTPONE_WRITE: - D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK); + D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK); /* If this node has already detected the write conflict, the * worker will be waiting on misc_wait. Wake it up once this * request has completed locally. */ - D_ASSERT(req->rq_state & RQ_NET_PENDING); + D_ASSERT(device, req->rq_state & RQ_NET_PENDING); req->rq_state |= RQ_POSTPONED; if (req->i.waiting) - wake_up(&mdev->misc_wait); + wake_up(&device->misc_wait); /* Do not clear RQ_NET_PENDING. This request will make further * progress via restart_conflicting_writes() or * fail_postponed_requests(). Hopefully. */ @@ -701,9 +705,10 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (bio_data_dir(req->master_bio) == WRITE) rv = MR_WRITE; - get_ldev(mdev); /* always succeeds in this call path */ + get_ldev(device); /* always succeeds in this call path */ req->w.cb = w_restart_disk_io; - drbd_queue_work(&mdev->tconn->sender_work, &req->w); + drbd_queue_work(&first_peer_device(device)->connection->sender_work, + &req->w); break; case RESEND: @@ -719,12 +724,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, Throwing them out of the TL here by pretending we got a BARRIER_ACK. During connection handshake, we ensure that the peer was not rebooted. */ if (!(req->rq_state & RQ_NET_OK)) { - /* FIXME could this possibly be a req->w.cb == w_send_out_of_sync? + /* FIXME could this possibly be a req->dw.cb == w_send_out_of_sync? * in that case we must not set RQ_NET_PENDING. */ mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING); if (req->w.cb) { - drbd_queue_work(&mdev->tconn->sender_work, &req->w); + drbd_queue_work(&first_peer_device(device)->connection->sender_work, + &req->w); rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; } /* else: FIXME can this happen? */ break; @@ -740,7 +746,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* barrier came in before all requests were acked. * this is bad, because if the connection is lost now, * we won't be able to clean them up... */ - dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n"); + drbd_err(device, "FIXME (BARRIER_ACKED but pending)\n"); } /* Allowed to complete requests, even while suspended. * As this is called for all requests within a matching epoch, @@ -751,12 +757,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case DATA_RECEIVED: - D_ASSERT(req->rq_state & RQ_NET_PENDING); + D_ASSERT(device, req->rq_state & RQ_NET_PENDING); mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE); break; case QUEUE_AS_DRBD_BARRIER: - start_new_tl_epoch(mdev->tconn); + start_new_tl_epoch(first_peer_device(device)->connection); mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE); break; }; @@ -771,27 +777,27 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * since size may be bigger than BM_BLOCK_SIZE, * we may need to check several bits. */ -static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size) +static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size) { unsigned long sbnr, ebnr; sector_t esector, nr_sectors; - if (mdev->state.disk == D_UP_TO_DATE) + if (device->state.disk == D_UP_TO_DATE) return true; - if (mdev->state.disk != D_INCONSISTENT) + if (device->state.disk != D_INCONSISTENT) return false; esector = sector + (size >> 9) - 1; - nr_sectors = drbd_get_capacity(mdev->this_bdev); - D_ASSERT(sector < nr_sectors); - D_ASSERT(esector < nr_sectors); + nr_sectors = drbd_get_capacity(device->this_bdev); + D_ASSERT(device, sector < nr_sectors); + D_ASSERT(device, esector < nr_sectors); sbnr = BM_SECT_TO_BIT(sector); ebnr = BM_SECT_TO_BIT(esector); - return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; + return drbd_bm_count_bits(device, sbnr, ebnr) == 0; } -static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector, +static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t sector, enum drbd_read_balancing rbm) { struct backing_dev_info *bdi; @@ -799,11 +805,11 @@ static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector switch (rbm) { case RB_CONGESTED_REMOTE: - bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info; + bdi = &device->ldev->backing_bdev->bd_disk->queue->backing_dev_info; return bdi_read_congested(bdi); case RB_LEAST_PENDING: - return atomic_read(&mdev->local_cnt) > - atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt); + return atomic_read(&device->local_cnt) > + atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt); case RB_32K_STRIPING: /* stripe_shift = 15 */ case RB_64K_STRIPING: case RB_128K_STRIPING: @@ -813,7 +819,7 @@ static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector stripe_shift = (rbm - RB_32K_STRIPING + 15); return (sector >> (stripe_shift - 9)) & 1; case RB_ROUND_ROBIN: - return test_and_change_bit(READ_BALANCE_RR, &mdev->flags); + return test_and_change_bit(READ_BALANCE_RR, &device->flags); case RB_PREFER_REMOTE: return true; case RB_PREFER_LOCAL: @@ -834,73 +840,73 @@ static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector static void complete_conflicting_writes(struct drbd_request *req) { DEFINE_WAIT(wait); - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; struct drbd_interval *i; sector_t sector = req->i.sector; int size = req->i.size; - i = drbd_find_overlap(&mdev->write_requests, sector, size); + i = drbd_find_overlap(&device->write_requests, sector, size); if (!i) return; for (;;) { - prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); - i = drbd_find_overlap(&mdev->write_requests, sector, size); + prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE); + i = drbd_find_overlap(&device->write_requests, sector, size); if (!i) break; /* Indicate to wake up device->misc_wait on progress. */ i->waiting = true; - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); schedule(); - spin_lock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); } - finish_wait(&mdev->misc_wait, &wait); + finish_wait(&device->misc_wait, &wait); } /* called within req_lock and rcu_read_lock() */ -static void maybe_pull_ahead(struct drbd_conf *mdev) +static void maybe_pull_ahead(struct drbd_device *device) { - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_connection *connection = first_peer_device(device)->connection; struct net_conf *nc; bool congested = false; enum drbd_on_congestion on_congestion; rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); + nc = rcu_dereference(connection->net_conf); on_congestion = nc ? nc->on_congestion : OC_BLOCK; rcu_read_unlock(); if (on_congestion == OC_BLOCK || - tconn->agreed_pro_version < 96) + connection->agreed_pro_version < 96) return; /* If I don't even have good local storage, we can not reasonably try * to pull ahead of the peer. We also need the local reference to make - * sure mdev->act_log is there. + * sure device->act_log is there. */ - if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) + if (!get_ldev_if_state(device, D_UP_TO_DATE)) return; if (nc->cong_fill && - atomic_read(&mdev->ap_in_flight) >= nc->cong_fill) { - dev_info(DEV, "Congestion-fill threshold reached\n"); + atomic_read(&device->ap_in_flight) >= nc->cong_fill) { + drbd_info(device, "Congestion-fill threshold reached\n"); congested = true; } - if (mdev->act_log->used >= nc->cong_extents) { - dev_info(DEV, "Congestion-extents threshold reached\n"); + if (device->act_log->used >= nc->cong_extents) { + drbd_info(device, "Congestion-extents threshold reached\n"); congested = true; } if (congested) { /* start a new epoch for non-mirrored writes */ - start_new_tl_epoch(mdev->tconn); + start_new_tl_epoch(first_peer_device(device)->connection); if (on_congestion == OC_PULL_AHEAD) - _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); + _drbd_set_state(_NS(device, conn, C_AHEAD), 0, NULL); else /*nc->on_congestion == OC_DISCONNECT */ - _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); + _drbd_set_state(_NS(device, conn, C_DISCONNECTING), 0, NULL); } - put_ldev(mdev); + put_ldev(device); } /* If this returns false, and req->private_bio is still set, @@ -914,19 +920,19 @@ static void maybe_pull_ahead(struct drbd_conf *mdev) */ static bool do_remote_read(struct drbd_request *req) { - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; enum drbd_read_balancing rbm; if (req->private_bio) { - if (!drbd_may_do_local_read(mdev, + if (!drbd_may_do_local_read(device, req->i.sector, req->i.size)) { bio_put(req->private_bio); req->private_bio = NULL; - put_ldev(mdev); + put_ldev(device); } } - if (mdev->state.pdsk != D_UP_TO_DATE) + if (device->state.pdsk != D_UP_TO_DATE) return false; if (req->private_bio == NULL) @@ -936,17 +942,17 @@ static bool do_remote_read(struct drbd_request *req) * protocol, pending requests etc. */ rcu_read_lock(); - rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing; + rbm = rcu_dereference(device->ldev->disk_conf)->read_balancing; rcu_read_unlock(); if (rbm == RB_PREFER_LOCAL && req->private_bio) return false; /* submit locally */ - if (remote_due_to_read_balancing(mdev, req->i.sector, rbm)) { + if (remote_due_to_read_balancing(device, req->i.sector, rbm)) { if (req->private_bio) { bio_put(req->private_bio); req->private_bio = NULL; - put_ldev(mdev); + put_ldev(device); } return true; } @@ -959,11 +965,11 @@ static bool do_remote_read(struct drbd_request *req) * which does NOT include those that we are L_AHEAD for. */ static int drbd_process_write_request(struct drbd_request *req) { - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; int remote, send_oos; - remote = drbd_should_do_remote(mdev->state); - send_oos = drbd_should_send_out_of_sync(mdev->state); + remote = drbd_should_do_remote(device->state); + send_oos = drbd_should_send_out_of_sync(device->state); /* Need to replicate writes. Unless it is an empty flush, * which is better mapped to a DRBD P_BARRIER packet, @@ -973,7 +979,7 @@ static int drbd_process_write_request(struct drbd_request *req) * replicating, in which case there is no point. */ if (unlikely(req->i.size == 0)) { /* The only size==0 bios we expect are empty flushes. */ - D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH); + D_ASSERT(device, req->master_bio->bi_rw & REQ_FLUSH); if (remote) _req_mod(req, QUEUE_AS_DRBD_BARRIER); return remote; @@ -982,12 +988,12 @@ static int drbd_process_write_request(struct drbd_request *req) if (!remote && !send_oos) return 0; - D_ASSERT(!(remote && send_oos)); + D_ASSERT(device, !(remote && send_oos)); if (remote) { _req_mod(req, TO_BE_SENT); _req_mod(req, QUEUE_FOR_NET_WRITE); - } else if (drbd_set_out_of_sync(mdev, req->i.sector, req->i.size)) + } else if (drbd_set_out_of_sync(device, req->i.sector, req->i.size)) _req_mod(req, QUEUE_FOR_SEND_OOS); return remote; @@ -996,36 +1002,36 @@ static int drbd_process_write_request(struct drbd_request *req) static void drbd_submit_req_private_bio(struct drbd_request *req) { - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; struct bio *bio = req->private_bio; const int rw = bio_rw(bio); - bio->bi_bdev = mdev->ldev->backing_bdev; + bio->bi_bdev = device->ldev->backing_bdev; /* State may have changed since we grabbed our reference on the * ->ldev member. Double check, and short-circuit to endio. * In case the last activity log transaction failed to get on * stable storage, and this is a WRITE, we may not even submit * this bio. */ - if (get_ldev(mdev)) { - if (drbd_insert_fault(mdev, + if (get_ldev(device)) { + if (drbd_insert_fault(device, rw == WRITE ? DRBD_FAULT_DT_WR : rw == READ ? DRBD_FAULT_DT_RD : DRBD_FAULT_DT_RA)) bio_endio(bio, -EIO); else generic_make_request(bio); - put_ldev(mdev); + put_ldev(device); } else bio_endio(bio, -EIO); } -static void drbd_queue_write(struct drbd_conf *mdev, struct drbd_request *req) +static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req) { - spin_lock(&mdev->submit.lock); - list_add_tail(&req->tl_requests, &mdev->submit.writes); - spin_unlock(&mdev->submit.lock); - queue_work(mdev->submit.wq, &mdev->submit.worker); + spin_lock(&device->submit.lock); + list_add_tail(&req->tl_requests, &device->submit.writes); + spin_unlock(&device->submit.lock); + queue_work(device->submit.wq, &device->submit.worker); } /* returns the new drbd_request pointer, if the caller is expected to @@ -1033,36 +1039,36 @@ static void drbd_queue_write(struct drbd_conf *mdev, struct drbd_request *req) * request on the submitter thread. * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request. */ -struct drbd_request * -drbd_request_prepare(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) +static struct drbd_request * +drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_time) { const int rw = bio_data_dir(bio); struct drbd_request *req; /* allocate outside of all locks; */ - req = drbd_req_new(mdev, bio); + req = drbd_req_new(device, bio); if (!req) { - dec_ap_bio(mdev); + dec_ap_bio(device); /* only pass the error to the upper layers. * if user cannot handle io errors, that's not our business. */ - dev_err(DEV, "could not kmalloc() req\n"); + drbd_err(device, "could not kmalloc() req\n"); bio_endio(bio, -ENOMEM); return ERR_PTR(-ENOMEM); } req->start_time = start_time; - if (!get_ldev(mdev)) { + if (!get_ldev(device)) { bio_put(req->private_bio); req->private_bio = NULL; } /* Update disk stats */ - _drbd_start_io_acct(mdev, req); + _drbd_start_io_acct(device, req); if (rw == WRITE && req->private_bio && req->i.size - && !test_bit(AL_SUSPENDED, &mdev->flags)) { - if (!drbd_al_begin_io_fastpath(mdev, &req->i)) { - drbd_queue_write(mdev, req); + && !test_bit(AL_SUSPENDED, &device->flags)) { + if (!drbd_al_begin_io_fastpath(device, &req->i)) { + drbd_queue_write(device, req); return NULL; } req->rq_state |= RQ_IN_ACT_LOG; @@ -1071,13 +1077,13 @@ drbd_request_prepare(struct drbd_conf *mdev, struct bio *bio, unsigned long star return req; } -static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *req) +static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) { const int rw = bio_rw(req->master_bio); struct bio_and_error m = { NULL, }; bool no_remote = false; - spin_lock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); if (rw == WRITE) { /* This may temporarily give up the req_lock, * but will re-aquire it before it returns here. @@ -1087,17 +1093,17 @@ static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *re /* check for congestion, and potentially stop sending * full data updates, but start sending "dirty bits" only. */ - maybe_pull_ahead(mdev); + maybe_pull_ahead(device); } - if (drbd_suspended(mdev)) { + if (drbd_suspended(device)) { /* push back and retry: */ req->rq_state |= RQ_POSTPONED; if (req->private_bio) { bio_put(req->private_bio); req->private_bio = NULL; - put_ldev(mdev); + put_ldev(device); } goto out; } @@ -1111,15 +1117,15 @@ static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *re } /* which transfer log epoch does this belong to? */ - req->epoch = atomic_read(&mdev->tconn->current_tle_nr); + req->epoch = atomic_read(&first_peer_device(device)->connection->current_tle_nr); /* no point in adding empty flushes to the transfer log, * they are mapped to drbd barriers already. */ if (likely(req->i.size!=0)) { if (rw == WRITE) - mdev->tconn->current_tle_writes++; + first_peer_device(device)->connection->current_tle_writes++; - list_add_tail(&req->tl_requests, &mdev->tconn->transfer_log); + list_add_tail(&req->tl_requests, &first_peer_device(device)->connection->transfer_log); } if (rw == WRITE) { @@ -1139,13 +1145,13 @@ static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request *re /* needs to be marked within the same spinlock */ _req_mod(req, TO_BE_SUBMITTED); /* but we need to give up the spinlock to submit */ - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); drbd_submit_req_private_bio(req); - spin_lock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); } else if (no_remote) { nodata: if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "IO ERROR: neither local nor remote data, sector %llu+%u\n", + drbd_err(device, "IO ERROR: neither local nor remote data, sector %llu+%u\n", (unsigned long long)req->i.sector, req->i.size >> 9); /* A write may have been queued for send_oos, however. * So we can not simply free it, we must go through drbd_req_put_completion_ref() */ @@ -1154,21 +1160,21 @@ nodata: out: if (drbd_req_put_completion_ref(req, &m, 1)) kref_put(&req->kref, drbd_req_destroy); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); if (m.bio) - complete_master_bio(mdev, &m); + complete_master_bio(device, &m); } -void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) +void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_time) { - struct drbd_request *req = drbd_request_prepare(mdev, bio, start_time); + struct drbd_request *req = drbd_request_prepare(device, bio, start_time); if (IS_ERR_OR_NULL(req)) return; - drbd_send_and_submit(mdev, req); + drbd_send_and_submit(device, req); } -static void submit_fast_path(struct drbd_conf *mdev, struct list_head *incoming) +static void submit_fast_path(struct drbd_device *device, struct list_head *incoming) { struct drbd_request *req, *tmp; list_for_each_entry_safe(req, tmp, incoming, tl_requests) { @@ -1176,19 +1182,19 @@ static void submit_fast_path(struct drbd_conf *mdev, struct list_head *incoming) if (rw == WRITE /* rw != WRITE should not even end up here! */ && req->private_bio && req->i.size - && !test_bit(AL_SUSPENDED, &mdev->flags)) { - if (!drbd_al_begin_io_fastpath(mdev, &req->i)) + && !test_bit(AL_SUSPENDED, &device->flags)) { + if (!drbd_al_begin_io_fastpath(device, &req->i)) continue; req->rq_state |= RQ_IN_ACT_LOG; } list_del_init(&req->tl_requests); - drbd_send_and_submit(mdev, req); + drbd_send_and_submit(device, req); } } -static bool prepare_al_transaction_nonblock(struct drbd_conf *mdev, +static bool prepare_al_transaction_nonblock(struct drbd_device *device, struct list_head *incoming, struct list_head *pending) { @@ -1196,9 +1202,9 @@ static bool prepare_al_transaction_nonblock(struct drbd_conf *mdev, int wake = 0; int err; - spin_lock_irq(&mdev->al_lock); + spin_lock_irq(&device->al_lock); list_for_each_entry_safe(req, tmp, incoming, tl_requests) { - err = drbd_al_begin_io_nonblock(mdev, &req->i); + err = drbd_al_begin_io_nonblock(device, &req->i); if (err == -EBUSY) wake = 1; if (err) @@ -1206,30 +1212,30 @@ static bool prepare_al_transaction_nonblock(struct drbd_conf *mdev, req->rq_state |= RQ_IN_ACT_LOG; list_move_tail(&req->tl_requests, pending); } - spin_unlock_irq(&mdev->al_lock); + spin_unlock_irq(&device->al_lock); if (wake) - wake_up(&mdev->al_wait); + wake_up(&device->al_wait); return !list_empty(pending); } void do_submit(struct work_struct *ws) { - struct drbd_conf *mdev = container_of(ws, struct drbd_conf, submit.worker); + struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker); LIST_HEAD(incoming); LIST_HEAD(pending); struct drbd_request *req, *tmp; for (;;) { - spin_lock(&mdev->submit.lock); - list_splice_tail_init(&mdev->submit.writes, &incoming); - spin_unlock(&mdev->submit.lock); + spin_lock(&device->submit.lock); + list_splice_tail_init(&device->submit.writes, &incoming); + spin_unlock(&device->submit.lock); - submit_fast_path(mdev, &incoming); + submit_fast_path(device, &incoming); if (list_empty(&incoming)) break; - wait_event(mdev->al_wait, prepare_al_transaction_nonblock(mdev, &incoming, &pending)); + wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending)); /* Maybe more was queued, while we prepared the transaction? * Try to stuff them into this transaction as well. * Be strictly non-blocking here, no wait_event, we already @@ -1243,17 +1249,17 @@ void do_submit(struct work_struct *ws) /* It is ok to look outside the lock, * it's only an optimization anyways */ - if (list_empty(&mdev->submit.writes)) + if (list_empty(&device->submit.writes)) break; - spin_lock(&mdev->submit.lock); - list_splice_tail_init(&mdev->submit.writes, &more_incoming); - spin_unlock(&mdev->submit.lock); + spin_lock(&device->submit.lock); + list_splice_tail_init(&device->submit.writes, &more_incoming); + spin_unlock(&device->submit.lock); if (list_empty(&more_incoming)) break; - made_progress = prepare_al_transaction_nonblock(mdev, &more_incoming, &more_pending); + made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending); list_splice_tail_init(&more_pending, &pending); list_splice_tail_init(&more_incoming, &incoming); @@ -1261,18 +1267,18 @@ void do_submit(struct work_struct *ws) if (!made_progress) break; } - drbd_al_begin_io_commit(mdev, false); + drbd_al_begin_io_commit(device, false); list_for_each_entry_safe(req, tmp, &pending, tl_requests) { list_del_init(&req->tl_requests); - drbd_send_and_submit(mdev, req); + drbd_send_and_submit(device, req); } } } void drbd_make_request(struct request_queue *q, struct bio *bio) { - struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; + struct drbd_device *device = (struct drbd_device *) q->queuedata; unsigned long start_time; start_time = jiffies; @@ -1280,10 +1286,10 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) /* * what we "blindly" assume: */ - D_ASSERT(IS_ALIGNED(bio->bi_iter.bi_size, 512)); + D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512)); - inc_ap_bio(mdev); - __drbd_make_request(mdev, bio, start_time); + inc_ap_bio(device); + __drbd_make_request(device, bio, start_time); } /* This is called by bio_add_page(). @@ -1300,32 +1306,32 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) */ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) { - struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; + struct drbd_device *device = (struct drbd_device *) q->queuedata; unsigned int bio_size = bvm->bi_size; int limit = DRBD_MAX_BIO_SIZE; int backing_limit; - if (bio_size && get_ldev(mdev)) { + if (bio_size && get_ldev(device)) { unsigned int max_hw_sectors = queue_max_hw_sectors(q); struct request_queue * const b = - mdev->ldev->backing_bdev->bd_disk->queue; + device->ldev->backing_bdev->bd_disk->queue; if (b->merge_bvec_fn) { backing_limit = b->merge_bvec_fn(b, bvm, bvec); limit = min(limit, backing_limit); } - put_ldev(mdev); + put_ldev(device); if ((limit >> 9) > max_hw_sectors) limit = max_hw_sectors << 9; } return limit; } -struct drbd_request *find_oldest_request(struct drbd_tconn *tconn) +static struct drbd_request *find_oldest_request(struct drbd_connection *connection) { /* Walk the transfer log, * and find the oldest not yet completed request */ struct drbd_request *r; - list_for_each_entry(r, &tconn->transfer_log, tl_requests) { + list_for_each_entry(r, &connection->transfer_log, tl_requests) { if (atomic_read(&r->completion_ref)) return r; } @@ -1334,21 +1340,21 @@ struct drbd_request *find_oldest_request(struct drbd_tconn *tconn) void request_timer_fn(unsigned long data) { - struct drbd_conf *mdev = (struct drbd_conf *) data; - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_device *device = (struct drbd_device *) data; + struct drbd_connection *connection = first_peer_device(device)->connection; struct drbd_request *req; /* oldest request */ struct net_conf *nc; unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ unsigned long now; rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); - if (nc && mdev->state.conn >= C_WF_REPORT_PARAMS) + nc = rcu_dereference(connection->net_conf); + if (nc && device->state.conn >= C_WF_REPORT_PARAMS) ent = nc->timeout * HZ/10 * nc->ko_count; - if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */ - dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10; - put_ldev(mdev); + if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */ + dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10; + put_ldev(device); } rcu_read_unlock(); @@ -1359,11 +1365,11 @@ void request_timer_fn(unsigned long data) now = jiffies; - spin_lock_irq(&tconn->req_lock); - req = find_oldest_request(tconn); + spin_lock_irq(&device->resource->req_lock); + req = find_oldest_request(connection); if (!req) { - spin_unlock_irq(&tconn->req_lock); - mod_timer(&mdev->request_timer, now + et); + spin_unlock_irq(&device->resource->req_lock); + mod_timer(&device->request_timer, now + et); return; } @@ -1385,17 +1391,17 @@ void request_timer_fn(unsigned long data) */ if (ent && req->rq_state & RQ_NET_PENDING && time_after(now, req->start_time + ent) && - !time_in_range(now, tconn->last_reconnect_jif, tconn->last_reconnect_jif + ent)) { - dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); - _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); + !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { + drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); + _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); } - if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev && + if (dt && req->rq_state & RQ_LOCAL_PENDING && req->device == device && time_after(now, req->start_time + dt) && - !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) { - dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); - __drbd_chk_io_error(mdev, DRBD_FORCE_DETACH); + !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { + drbd_warn(device, "Local backing device failed to meet the disk-timeout\n"); + __drbd_chk_io_error(device, DRBD_FORCE_DETACH); } nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et; - spin_unlock_irq(&tconn->req_lock); - mod_timer(&mdev->request_timer, nt); + spin_unlock_irq(&connection->resource->req_lock); + mod_timer(&device->request_timer, nt); } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 28e15d91197a..c684c963538e 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -275,17 +275,17 @@ struct bio_and_error { int error; }; -extern void start_new_tl_epoch(struct drbd_tconn *tconn); +extern void start_new_tl_epoch(struct drbd_connection *connection); extern void drbd_req_destroy(struct kref *kref); extern void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m); extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, struct bio_and_error *m); -extern void complete_master_bio(struct drbd_conf *mdev, +extern void complete_master_bio(struct drbd_device *device, struct bio_and_error *m); extern void request_timer_fn(unsigned long data); -extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); -extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); +extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what); +extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what); /* this is in drbd_main.c */ extern void drbd_restart_request(struct drbd_request *req); @@ -294,14 +294,14 @@ extern void drbd_restart_request(struct drbd_request *req); * outside the spinlock, e.g. when walking some list on cleanup. */ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) { - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; struct bio_and_error m; int rv; /* __req_mod possibly frees req, do not touch req after that! */ rv = __req_mod(req, what, &m); if (m.bio) - complete_master_bio(mdev, &m); + complete_master_bio(device, &m); return rv; } @@ -314,16 +314,16 @@ static inline int req_mod(struct drbd_request *req, enum drbd_req_event what) { unsigned long flags; - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; struct bio_and_error m; int rv; - spin_lock_irqsave(&mdev->tconn->req_lock, flags); + spin_lock_irqsave(&device->resource->req_lock, flags); rv = __req_mod(req, what, &m); - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + spin_unlock_irqrestore(&device->resource->req_lock, flags); if (m.bio) - complete_master_bio(mdev, &m); + complete_master_bio(device, &m); return rv; } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 216d47b7e88b..1a84345a3868 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -27,13 +27,12 @@ #include <linux/drbd_limits.h> #include "drbd_int.h" +#include "drbd_protocol.h" #include "drbd_req.h" -/* in drbd_main.c */ -extern void tl_abort_disk_io(struct drbd_conf *mdev); - struct after_state_chg_work { struct drbd_work w; + struct drbd_device *device; union drbd_state os; union drbd_state ns; enum chg_state_flags flags; @@ -50,12 +49,12 @@ enum sanitize_state_warnings { }; static int w_after_state_ch(struct drbd_work *w, int unused); -static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, +static void after_state_ch(struct drbd_device *device, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); -static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); -static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_tconn *); +static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state); +static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, +static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns, enum sanitize_state_warnings *warn); static inline bool is_susp(union drbd_state s) @@ -63,17 +62,18 @@ static inline bool is_susp(union drbd_state s) return s.susp || s.susp_nod || s.susp_fen; } -bool conn_all_vols_unconf(struct drbd_tconn *tconn) +bool conn_all_vols_unconf(struct drbd_connection *connection) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; bool rv = true; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (mdev->state.disk != D_DISKLESS || - mdev->state.conn != C_STANDALONE || - mdev->state.role != R_SECONDARY) { + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + if (device->state.disk != D_DISKLESS || + device->state.conn != C_STANDALONE || + device->state.role != R_SECONDARY) { rv = false; break; } @@ -102,99 +102,111 @@ static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2) return R_PRIMARY; } -enum drbd_role conn_highest_role(struct drbd_tconn *tconn) +enum drbd_role conn_highest_role(struct drbd_connection *connection) { enum drbd_role role = R_UNKNOWN; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) - role = max_role(role, mdev->state.role); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + role = max_role(role, device->state.role); + } rcu_read_unlock(); return role; } -enum drbd_role conn_highest_peer(struct drbd_tconn *tconn) +enum drbd_role conn_highest_peer(struct drbd_connection *connection) { enum drbd_role peer = R_UNKNOWN; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) - peer = max_role(peer, mdev->state.peer); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + peer = max_role(peer, device->state.peer); + } rcu_read_unlock(); return peer; } -enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn) +enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection) { enum drbd_disk_state ds = D_DISKLESS; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) - ds = max_t(enum drbd_disk_state, ds, mdev->state.disk); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + ds = max_t(enum drbd_disk_state, ds, device->state.disk); + } rcu_read_unlock(); return ds; } -enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn) +enum drbd_disk_state conn_lowest_disk(struct drbd_connection *connection) { enum drbd_disk_state ds = D_MASK; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) - ds = min_t(enum drbd_disk_state, ds, mdev->state.disk); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + ds = min_t(enum drbd_disk_state, ds, device->state.disk); + } rcu_read_unlock(); return ds; } -enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn) +enum drbd_disk_state conn_highest_pdsk(struct drbd_connection *connection) { enum drbd_disk_state ds = D_DISKLESS; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) - ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + ds = max_t(enum drbd_disk_state, ds, device->state.pdsk); + } rcu_read_unlock(); return ds; } -enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn) +enum drbd_conns conn_lowest_conn(struct drbd_connection *connection) { enum drbd_conns conn = C_MASK; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) - conn = min_t(enum drbd_conns, conn, mdev->state.conn); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + conn = min_t(enum drbd_conns, conn, device->state.conn); + } rcu_read_unlock(); return conn; } -static bool no_peer_wf_report_params(struct drbd_tconn *tconn) +static bool no_peer_wf_report_params(struct drbd_connection *connection) { - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; bool rv = true; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) - if (mdev->state.conn == C_WF_REPORT_PARAMS) { + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) + if (peer_device->device->state.conn == C_WF_REPORT_PARAMS) { rv = false; break; } @@ -206,11 +218,11 @@ static bool no_peer_wf_report_params(struct drbd_tconn *tconn) /** * cl_wide_st_chg() - true if the state change is a cluster wide one - * @mdev: DRBD device. + * @device: DRBD device. * @os: old (current) state. * @ns: new (wanted) state. */ -static int cl_wide_st_chg(struct drbd_conf *mdev, +static int cl_wide_st_chg(struct drbd_device *device, union drbd_state os, union drbd_state ns) { return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && @@ -232,72 +244,72 @@ apply_mask_val(union drbd_state os, union drbd_state mask, union drbd_state val) } enum drbd_state_rv -drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, +drbd_change_state(struct drbd_device *device, enum chg_state_flags f, union drbd_state mask, union drbd_state val) { unsigned long flags; union drbd_state ns; enum drbd_state_rv rv; - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - ns = apply_mask_val(drbd_read_state(mdev), mask, val); - rv = _drbd_set_state(mdev, ns, f, NULL); - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + spin_lock_irqsave(&device->resource->req_lock, flags); + ns = apply_mask_val(drbd_read_state(device), mask, val); + rv = _drbd_set_state(device, ns, f, NULL); + spin_unlock_irqrestore(&device->resource->req_lock, flags); return rv; } /** * drbd_force_state() - Impose a change which happens outside our control on our state - * @mdev: DRBD device. + * @device: DRBD device. * @mask: mask of state bits to change. * @val: value of new state bits. */ -void drbd_force_state(struct drbd_conf *mdev, +void drbd_force_state(struct drbd_device *device, union drbd_state mask, union drbd_state val) { - drbd_change_state(mdev, CS_HARD, mask, val); + drbd_change_state(device, CS_HARD, mask, val); } static enum drbd_state_rv -_req_st_cond(struct drbd_conf *mdev, union drbd_state mask, +_req_st_cond(struct drbd_device *device, union drbd_state mask, union drbd_state val) { union drbd_state os, ns; unsigned long flags; enum drbd_state_rv rv; - if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) + if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &device->flags)) return SS_CW_SUCCESS; - if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) + if (test_and_clear_bit(CL_ST_CHG_FAIL, &device->flags)) return SS_CW_FAILED_BY_PEER; - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = drbd_read_state(mdev); - ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); + spin_lock_irqsave(&device->resource->req_lock, flags); + os = drbd_read_state(device); + ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL); rv = is_valid_transition(os, ns); if (rv >= SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ - if (!cl_wide_st_chg(mdev, os, ns)) + if (!cl_wide_st_chg(device, os, ns)) rv = SS_CW_NO_NEED; if (rv == SS_UNKNOWN_ERROR) { - rv = is_valid_state(mdev, ns); + rv = is_valid_state(device, ns); if (rv >= SS_SUCCESS) { - rv = is_valid_soft_transition(os, ns, mdev->tconn); + rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); if (rv >= SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ } } - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + spin_unlock_irqrestore(&device->resource->req_lock, flags); return rv; } /** * drbd_req_state() - Perform an eventually cluster wide state change - * @mdev: DRBD device. + * @device: DRBD device. * @mask: mask of state bits to change. * @val: value of new state bits. * @f: flags @@ -306,7 +318,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, * _drbd_request_state(). */ static enum drbd_state_rv -drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, +drbd_req_state(struct drbd_device *device, union drbd_state mask, union drbd_state val, enum chg_state_flags f) { struct completion done; @@ -317,68 +329,68 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, init_completion(&done); if (f & CS_SERIALIZE) - mutex_lock(mdev->state_mutex); + mutex_lock(device->state_mutex); - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = drbd_read_state(mdev); - ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); + spin_lock_irqsave(&device->resource->req_lock, flags); + os = drbd_read_state(device); + ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL); rv = is_valid_transition(os, ns); if (rv < SS_SUCCESS) { - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + spin_unlock_irqrestore(&device->resource->req_lock, flags); goto abort; } - if (cl_wide_st_chg(mdev, os, ns)) { - rv = is_valid_state(mdev, ns); + if (cl_wide_st_chg(device, os, ns)) { + rv = is_valid_state(device, ns); if (rv == SS_SUCCESS) - rv = is_valid_soft_transition(os, ns, mdev->tconn); - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); + spin_unlock_irqrestore(&device->resource->req_lock, flags); if (rv < SS_SUCCESS) { if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); + print_st_err(device, os, ns, rv); goto abort; } - if (drbd_send_state_req(mdev, mask, val)) { + if (drbd_send_state_req(first_peer_device(device), mask, val)) { rv = SS_CW_FAILED_BY_PEER; if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); + print_st_err(device, os, ns, rv); goto abort; } - wait_event(mdev->state_wait, - (rv = _req_st_cond(mdev, mask, val))); + wait_event(device->state_wait, + (rv = _req_st_cond(device, mask, val))); if (rv < SS_SUCCESS) { if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); + print_st_err(device, os, ns, rv); goto abort; } - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - ns = apply_mask_val(drbd_read_state(mdev), mask, val); - rv = _drbd_set_state(mdev, ns, f, &done); + spin_lock_irqsave(&device->resource->req_lock, flags); + ns = apply_mask_val(drbd_read_state(device), mask, val); + rv = _drbd_set_state(device, ns, f, &done); } else { - rv = _drbd_set_state(mdev, ns, f, &done); + rv = _drbd_set_state(device, ns, f, &done); } - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + spin_unlock_irqrestore(&device->resource->req_lock, flags); if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { - D_ASSERT(current != mdev->tconn->worker.task); + D_ASSERT(device, current != first_peer_device(device)->connection->worker.task); wait_for_completion(&done); } abort: if (f & CS_SERIALIZE) - mutex_unlock(mdev->state_mutex); + mutex_unlock(device->state_mutex); return rv; } /** * _drbd_request_state() - Request a state change (with flags) - * @mdev: DRBD device. + * @device: DRBD device. * @mask: mask of state bits to change. * @val: value of new state bits. * @f: flags @@ -387,20 +399,20 @@ abort: * flag, or when logging of failed state change requests is not desired. */ enum drbd_state_rv -_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, +_drbd_request_state(struct drbd_device *device, union drbd_state mask, union drbd_state val, enum chg_state_flags f) { enum drbd_state_rv rv; - wait_event(mdev->state_wait, - (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); + wait_event(device->state_wait, + (rv = drbd_req_state(device, mask, val, f)) != SS_IN_TRANSIENT_STATE); return rv; } -static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) +static void print_st(struct drbd_device *device, char *name, union drbd_state ns) { - dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", + drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", name, drbd_conn_str(ns.conn), drbd_role_str(ns.role), @@ -416,14 +428,14 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) ); } -void print_st_err(struct drbd_conf *mdev, union drbd_state os, +void print_st_err(struct drbd_device *device, union drbd_state os, union drbd_state ns, enum drbd_state_rv err) { if (err == SS_IN_TRANSIENT_STATE) return; - dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err)); - print_st(mdev, " state", os); - print_st(mdev, "wanted", ns); + drbd_err(device, "State change failed: %s\n", drbd_set_st_err_str(err)); + print_st(device, " state", os); + print_st(device, "wanted", ns); } static long print_state_change(char *pb, union drbd_state os, union drbd_state ns, @@ -457,7 +469,7 @@ static long print_state_change(char *pb, union drbd_state os, union drbd_state n return pbp - pb; } -static void drbd_pr_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, +static void drbd_pr_state_change(struct drbd_device *device, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { char pb[300]; @@ -479,10 +491,10 @@ static void drbd_pr_state_change(struct drbd_conf *mdev, union drbd_state os, un ns.user_isp); if (pbp != pb) - dev_info(DEV, "%s\n", pb); + drbd_info(device, "%s\n", pb); } -static void conn_pr_state_change(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns, +static void conn_pr_state_change(struct drbd_connection *connection, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { char pb[300]; @@ -496,17 +508,17 @@ static void conn_pr_state_change(struct drbd_tconn *tconn, union drbd_state os, is_susp(ns)); if (pbp != pb) - conn_info(tconn, "%s\n", pb); + drbd_info(connection, "%s\n", pb); } /** * is_valid_state() - Returns an SS_ error code if ns is not valid - * @mdev: DRBD device. + * @device: DRBD device. * @ns: State to consider. */ static enum drbd_state_rv -is_valid_state(struct drbd_conf *mdev, union drbd_state ns) +is_valid_state(struct drbd_device *device, union drbd_state ns) { /* See drbd_state_sw_errors in drbd_strings.c */ @@ -516,24 +528,24 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) rcu_read_lock(); fp = FP_DONT_CARE; - if (get_ldev(mdev)) { - fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; - put_ldev(mdev); + if (get_ldev(device)) { + fp = rcu_dereference(device->ldev->disk_conf)->fencing; + put_ldev(device); } - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(first_peer_device(device)->connection->net_conf); if (nc) { if (!nc->two_primaries && ns.role == R_PRIMARY) { if (ns.peer == R_PRIMARY) rv = SS_TWO_PRIMARIES; - else if (conn_highest_peer(mdev->tconn) == R_PRIMARY) + else if (conn_highest_peer(first_peer_device(device)->connection) == R_PRIMARY) rv = SS_O_VOL_PEER_PRI; } } if (rv <= 0) /* already found a reason to abort */; - else if (ns.role == R_SECONDARY && mdev->open_cnt) + else if (ns.role == R_SECONDARY && device->open_cnt) rv = SS_DEVICE_IN_USE; else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE) @@ -567,7 +579,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) rv = SS_NO_VERIFY_ALG; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - mdev->tconn->agreed_pro_version < 88) + first_peer_device(device)->connection->agreed_pro_version < 88) rv = SS_NOT_SUPPORTED; else if (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) @@ -589,12 +601,12 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible * This function limits state transitions that may be declined by DRBD. I.e. * user requests (aka soft transitions). - * @mdev: DRBD device. + * @device: DRBD device. * @ns: new state. * @os: old state. */ static enum drbd_state_rv -is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_tconn *tconn) +is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_connection *connection) { enum drbd_state_rv rv = SS_SUCCESS; @@ -622,7 +634,7 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_t /* While establishing a connection only allow cstate to change. Delay/refuse role changes, detach attach etc... */ - if (test_bit(STATE_SENT, &tconn->flags) && + if (test_bit(STATE_SENT, &connection->flags) && !(os.conn == C_WF_REPORT_PARAMS || (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION))) rv = SS_IN_TRANSIENT_STATE; @@ -703,7 +715,7 @@ is_valid_transition(union drbd_state os, union drbd_state ns) return rv; } -static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn) +static void print_sanitize_warnings(struct drbd_device *device, enum sanitize_state_warnings warn) { static const char *msg_table[] = { [NO_WARNING] = "", @@ -715,12 +727,12 @@ static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_ }; if (warn != NO_WARNING) - dev_warn(DEV, "%s\n", msg_table[warn]); + drbd_warn(device, "%s\n", msg_table[warn]); } /** * sanitize_state() - Resolves implicitly necessary additional changes to a state transition - * @mdev: DRBD device. + * @device: DRBD device. * @os: old state. * @ns: new state. * @warn_sync_abort: @@ -728,7 +740,7 @@ static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_ * When we loose connection, we have to set the state of the peers disk (pdsk) * to D_UNKNOWN. This rule and many more along those lines are in this function. */ -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, +static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns, enum sanitize_state_warnings *warn) { enum drbd_fencing_p fp; @@ -738,11 +750,11 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state *warn = NO_WARNING; fp = FP_DONT_CARE; - if (get_ldev(mdev)) { + if (get_ldev(device)) { rcu_read_lock(); - fp = rcu_dereference(mdev->ldev->disk_conf)->fencing; + fp = rcu_dereference(device->ldev->disk_conf)->fencing; rcu_read_unlock(); - put_ldev(mdev); + put_ldev(device); } /* Implications from connection to peer and peer_isp */ @@ -768,17 +780,17 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state /* Connection breaks down before we finished "Negotiating" */ if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && - get_ldev_if_state(mdev, D_NEGOTIATING)) { - if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) { - ns.disk = mdev->new_state_tmp.disk; - ns.pdsk = mdev->new_state_tmp.pdsk; + get_ldev_if_state(device, D_NEGOTIATING)) { + if (device->ed_uuid == device->ldev->md.uuid[UI_CURRENT]) { + ns.disk = device->new_state_tmp.disk; + ns.pdsk = device->new_state_tmp.pdsk; } else { if (warn) *warn = CONNECTION_LOST_NEGOTIATING; ns.disk = D_DISKLESS; ns.pdsk = D_UNKNOWN; } - put_ldev(mdev); + put_ldev(device); } /* D_CONSISTENT and D_OUTDATED vanish when we get connected */ @@ -873,7 +885,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED)) ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ - if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO && + if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO && (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ @@ -892,42 +904,42 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state return ns; } -void drbd_resume_al(struct drbd_conf *mdev) +void drbd_resume_al(struct drbd_device *device) { - if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags)) - dev_info(DEV, "Resumed AL updates\n"); + if (test_and_clear_bit(AL_SUSPENDED, &device->flags)) + drbd_info(device, "Resumed AL updates\n"); } /* helper for __drbd_set_state */ -static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) +static void set_ov_position(struct drbd_device *device, enum drbd_conns cs) { - if (mdev->tconn->agreed_pro_version < 90) - mdev->ov_start_sector = 0; - mdev->rs_total = drbd_bm_bits(mdev); - mdev->ov_position = 0; + if (first_peer_device(device)->connection->agreed_pro_version < 90) + device->ov_start_sector = 0; + device->rs_total = drbd_bm_bits(device); + device->ov_position = 0; if (cs == C_VERIFY_T) { /* starting online verify from an arbitrary position * does not fit well into the existing protocol. * on C_VERIFY_T, we initialize ov_left and friends * implicitly in receive_DataRequest once the * first P_OV_REQUEST is received */ - mdev->ov_start_sector = ~(sector_t)0; + device->ov_start_sector = ~(sector_t)0; } else { - unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); - if (bit >= mdev->rs_total) { - mdev->ov_start_sector = - BM_BIT_TO_SECT(mdev->rs_total - 1); - mdev->rs_total = 1; + unsigned long bit = BM_SECT_TO_BIT(device->ov_start_sector); + if (bit >= device->rs_total) { + device->ov_start_sector = + BM_BIT_TO_SECT(device->rs_total - 1); + device->rs_total = 1; } else - mdev->rs_total -= bit; - mdev->ov_position = mdev->ov_start_sector; + device->rs_total -= bit; + device->ov_position = device->ov_start_sector; } - mdev->ov_left = mdev->rs_total; + device->ov_left = device->rs_total; } /** * __drbd_set_state() - Set a new DRBD state - * @mdev: DRBD device. + * @device: DRBD device. * @ns: new state. * @flags: Flags * @done: Optional completion, that will get completed after the after_state_ch() finished @@ -935,7 +947,7 @@ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) * Caller needs to hold req_lock, and global_state_lock. Do not call directly. */ enum drbd_state_rv -__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, +__drbd_set_state(struct drbd_device *device, union drbd_state ns, enum chg_state_flags flags, struct completion *done) { union drbd_state os; @@ -944,9 +956,9 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, struct after_state_chg_work *ascw; bool did_remote, should_do_remote; - os = drbd_read_state(mdev); + os = drbd_read_state(device); - ns = sanitize_state(mdev, ns, &ssw); + ns = sanitize_state(device, ns, &ssw); if (ns.i == os.i) return SS_NOTHING_TO_DO; @@ -958,32 +970,33 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, /* pre-state-change checks ; only look at ns */ /* See drbd_state_sw_errors in drbd_strings.c */ - rv = is_valid_state(mdev, ns); + rv = is_valid_state(device, ns); if (rv < SS_SUCCESS) { /* If the old state was illegal as well, then let this happen...*/ - if (is_valid_state(mdev, os) == rv) - rv = is_valid_soft_transition(os, ns, mdev->tconn); + if (is_valid_state(device, os) == rv) + rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); } else - rv = is_valid_soft_transition(os, ns, mdev->tconn); + rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); } if (rv < SS_SUCCESS) { if (flags & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); + print_st_err(device, os, ns, rv); return rv; } - print_sanitize_warnings(mdev, ssw); + print_sanitize_warnings(device, ssw); - drbd_pr_state_change(mdev, os, ns, flags); + drbd_pr_state_change(device, os, ns, flags); /* Display changes to the susp* flags that where caused by the call to sanitize_state(). Only display it here if we where not called from _conn_request_state() */ if (!(flags & CS_DC_SUSP)) - conn_pr_state_change(mdev->tconn, os, ns, (flags & ~CS_DC_MASK) | CS_DC_SUSP); + conn_pr_state_change(first_peer_device(device)->connection, os, ns, + (flags & ~CS_DC_MASK) | CS_DC_SUSP); /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference * on the ldev here, to be sure the transition -> D_DISKLESS resp. @@ -991,55 +1004,55 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, * after_state_ch works run, where we put_ldev again. */ if ((os.disk != D_FAILED && ns.disk == D_FAILED) || (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) - atomic_inc(&mdev->local_cnt); + atomic_inc(&device->local_cnt); - did_remote = drbd_should_do_remote(mdev->state); - mdev->state.i = ns.i; - should_do_remote = drbd_should_do_remote(mdev->state); - mdev->tconn->susp = ns.susp; - mdev->tconn->susp_nod = ns.susp_nod; - mdev->tconn->susp_fen = ns.susp_fen; + did_remote = drbd_should_do_remote(device->state); + device->state.i = ns.i; + should_do_remote = drbd_should_do_remote(device->state); + device->resource->susp = ns.susp; + device->resource->susp_nod = ns.susp_nod; + device->resource->susp_fen = ns.susp_fen; /* put replicated vs not-replicated requests in seperate epochs */ if (did_remote != should_do_remote) - start_new_tl_epoch(mdev->tconn); + start_new_tl_epoch(first_peer_device(device)->connection); if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) - drbd_print_uuids(mdev, "attached to UUIDs"); + drbd_print_uuids(device, "attached to UUIDs"); /* Wake up role changes, that were delayed because of connection establishing */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS && - no_peer_wf_report_params(mdev->tconn)) - clear_bit(STATE_SENT, &mdev->tconn->flags); + no_peer_wf_report_params(first_peer_device(device)->connection)) + clear_bit(STATE_SENT, &first_peer_device(device)->connection->flags); - wake_up(&mdev->misc_wait); - wake_up(&mdev->state_wait); - wake_up(&mdev->tconn->ping_wait); + wake_up(&device->misc_wait); + wake_up(&device->state_wait); + wake_up(&first_peer_device(device)->connection->ping_wait); /* Aborted verify run, or we reached the stop sector. * Log the last position, unless end-of-device. */ if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && ns.conn <= C_CONNECTED) { - mdev->ov_start_sector = - BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); - if (mdev->ov_left) - dev_info(DEV, "Online Verify reached sector %llu\n", - (unsigned long long)mdev->ov_start_sector); + device->ov_start_sector = + BM_BIT_TO_SECT(drbd_bm_bits(device) - device->ov_left); + if (device->ov_left) + drbd_info(device, "Online Verify reached sector %llu\n", + (unsigned long long)device->ov_start_sector); } if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { - dev_info(DEV, "Syncer continues.\n"); - mdev->rs_paused += (long)jiffies - -(long)mdev->rs_mark_time[mdev->rs_last_mark]; + drbd_info(device, "Syncer continues.\n"); + device->rs_paused += (long)jiffies + -(long)device->rs_mark_time[device->rs_last_mark]; if (ns.conn == C_SYNC_TARGET) - mod_timer(&mdev->resync_timer, jiffies); + mod_timer(&device->resync_timer, jiffies); } if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { - dev_info(DEV, "Resync suspended\n"); - mdev->rs_mark_time[mdev->rs_last_mark] = jiffies; + drbd_info(device, "Resync suspended\n"); + device->rs_mark_time[device->rs_last_mark] = jiffies; } if (os.conn == C_CONNECTED && @@ -1047,77 +1060,77 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, unsigned long now = jiffies; int i; - set_ov_position(mdev, ns.conn); - mdev->rs_start = now; - mdev->rs_last_events = 0; - mdev->rs_last_sect_ev = 0; - mdev->ov_last_oos_size = 0; - mdev->ov_last_oos_start = 0; + set_ov_position(device, ns.conn); + device->rs_start = now; + device->rs_last_events = 0; + device->rs_last_sect_ev = 0; + device->ov_last_oos_size = 0; + device->ov_last_oos_start = 0; for (i = 0; i < DRBD_SYNC_MARKS; i++) { - mdev->rs_mark_left[i] = mdev->ov_left; - mdev->rs_mark_time[i] = now; + device->rs_mark_left[i] = device->ov_left; + device->rs_mark_time[i] = now; } - drbd_rs_controller_reset(mdev); + drbd_rs_controller_reset(device); if (ns.conn == C_VERIFY_S) { - dev_info(DEV, "Starting Online Verify from sector %llu\n", - (unsigned long long)mdev->ov_position); - mod_timer(&mdev->resync_timer, jiffies); + drbd_info(device, "Starting Online Verify from sector %llu\n", + (unsigned long long)device->ov_position); + mod_timer(&device->resync_timer, jiffies); } } - if (get_ldev(mdev)) { - u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| + if (get_ldev(device)) { + u32 mdf = device->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); mdf &= ~MDF_AL_CLEAN; - if (test_bit(CRASHED_PRIMARY, &mdev->flags)) + if (test_bit(CRASHED_PRIMARY, &device->flags)) mdf |= MDF_CRASHED_PRIMARY; - if (mdev->state.role == R_PRIMARY || - (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY)) + if (device->state.role == R_PRIMARY || + (device->state.pdsk < D_INCONSISTENT && device->state.peer == R_PRIMARY)) mdf |= MDF_PRIMARY_IND; - if (mdev->state.conn > C_WF_REPORT_PARAMS) + if (device->state.conn > C_WF_REPORT_PARAMS) mdf |= MDF_CONNECTED_IND; - if (mdev->state.disk > D_INCONSISTENT) + if (device->state.disk > D_INCONSISTENT) mdf |= MDF_CONSISTENT; - if (mdev->state.disk > D_OUTDATED) + if (device->state.disk > D_OUTDATED) mdf |= MDF_WAS_UP_TO_DATE; - if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT) + if (device->state.pdsk <= D_OUTDATED && device->state.pdsk >= D_INCONSISTENT) mdf |= MDF_PEER_OUT_DATED; - if (mdf != mdev->ldev->md.flags) { - mdev->ldev->md.flags = mdf; - drbd_md_mark_dirty(mdev); + if (mdf != device->ldev->md.flags) { + device->ldev->md.flags = mdf; + drbd_md_mark_dirty(device); } if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) - drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]); - put_ldev(mdev); + drbd_set_ed_uuid(device, device->ldev->md.uuid[UI_CURRENT]); + put_ldev(device); } /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT && os.peer == R_SECONDARY && ns.peer == R_PRIMARY) - set_bit(CONSIDER_RESYNC, &mdev->flags); + set_bit(CONSIDER_RESYNC, &device->flags); /* Receiver should clean up itself */ if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) - drbd_thread_stop_nowait(&mdev->tconn->receiver); + drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver); /* Now the receiver finished cleaning up itself, it should die */ if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) - drbd_thread_stop_nowait(&mdev->tconn->receiver); + drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver); /* Upon network failure, we need to restart the receiver. */ if (os.conn > C_WF_CONNECTION && ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) - drbd_thread_restart_nowait(&mdev->tconn->receiver); + drbd_thread_restart_nowait(&first_peer_device(device)->connection->receiver); /* Resume AL writing if we get a connection */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { - drbd_resume_al(mdev); - mdev->tconn->connect_cnt++; + drbd_resume_al(device); + first_peer_device(device)->connection->connect_cnt++; } /* remember last attach time so request_timer_fn() won't @@ -1125,7 +1138,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, * previously frozen IO */ if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && ns.disk > D_NEGOTIATING) - mdev->last_reattach_jif = jiffies; + device->last_reattach_jif = jiffies; ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); if (ascw) { @@ -1133,11 +1146,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, ascw->ns = ns; ascw->flags = flags; ascw->w.cb = w_after_state_ch; - ascw->w.mdev = mdev; + ascw->device = device; ascw->done = done; - drbd_queue_work(&mdev->tconn->sender_work, &ascw->w); + drbd_queue_work(&first_peer_device(device)->connection->sender_work, + &ascw->w); } else { - dev_err(DEV, "Could not kmalloc an ascw\n"); + drbd_err(device, "Could not kmalloc an ascw\n"); } return rv; @@ -1147,66 +1161,65 @@ static int w_after_state_ch(struct drbd_work *w, int unused) { struct after_state_chg_work *ascw = container_of(w, struct after_state_chg_work, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_device *device = ascw->device; - after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); - if (ascw->flags & CS_WAIT_COMPLETE) { - D_ASSERT(ascw->done != NULL); + after_state_ch(device, ascw->os, ascw->ns, ascw->flags); + if (ascw->flags & CS_WAIT_COMPLETE) complete(ascw->done); - } kfree(ascw); return 0; } -static void abw_start_sync(struct drbd_conf *mdev, int rv) +static void abw_start_sync(struct drbd_device *device, int rv) { if (rv) { - dev_err(DEV, "Writing the bitmap failed not starting resync.\n"); - _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE); + drbd_err(device, "Writing the bitmap failed not starting resync.\n"); + _drbd_request_state(device, NS(conn, C_CONNECTED), CS_VERBOSE); return; } - switch (mdev->state.conn) { + switch (device->state.conn) { case C_STARTING_SYNC_T: - _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); + _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); break; case C_STARTING_SYNC_S: - drbd_start_resync(mdev, C_SYNC_SOURCE); + drbd_start_resync(device, C_SYNC_SOURCE); break; } } -int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, - int (*io_fn)(struct drbd_conf *), +int drbd_bitmap_io_from_worker(struct drbd_device *device, + int (*io_fn)(struct drbd_device *), char *why, enum bm_flag flags) { int rv; - D_ASSERT(current == mdev->tconn->worker.task); + D_ASSERT(device, current == first_peer_device(device)->connection->worker.task); - /* open coded non-blocking drbd_suspend_io(mdev); */ - set_bit(SUSPEND_IO, &mdev->flags); + /* open coded non-blocking drbd_suspend_io(device); */ + set_bit(SUSPEND_IO, &device->flags); - drbd_bm_lock(mdev, why, flags); - rv = io_fn(mdev); - drbd_bm_unlock(mdev); + drbd_bm_lock(device, why, flags); + rv = io_fn(device); + drbd_bm_unlock(device); - drbd_resume_io(mdev); + drbd_resume_io(device); return rv; } /** * after_state_ch() - Perform after state change actions that may sleep - * @mdev: DRBD device. + * @device: DRBD device. * @os: old state. * @ns: new state. * @flags: Flags */ -static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, +static void after_state_ch(struct drbd_device *device, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { + struct drbd_resource *resource = device->resource; struct sib_info sib; sib.sib_reason = SIB_STATE_CHANGE; @@ -1214,63 +1227,63 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, sib.ns = ns; if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { - clear_bit(CRASHED_PRIMARY, &mdev->flags); - if (mdev->p_uuid) - mdev->p_uuid[UI_FLAGS] &= ~((u64)2); + clear_bit(CRASHED_PRIMARY, &device->flags); + if (device->p_uuid) + device->p_uuid[UI_FLAGS] &= ~((u64)2); } /* Inform userspace about the change... */ - drbd_bcast_event(mdev, &sib); + drbd_bcast_event(device, &sib); if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) - drbd_khelper(mdev, "pri-on-incon-degr"); + drbd_khelper(device, "pri-on-incon-degr"); /* Here we have the actions that are performed after a state change. This function might sleep */ if (ns.susp_nod) { - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_connection *connection = first_peer_device(device)->connection; enum drbd_req_event what = NOTHING; - spin_lock_irq(&tconn->req_lock); - if (os.conn < C_CONNECTED && conn_lowest_conn(tconn) >= C_CONNECTED) + spin_lock_irq(&device->resource->req_lock); + if (os.conn < C_CONNECTED && conn_lowest_conn(connection) >= C_CONNECTED) what = RESEND; if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && - conn_lowest_disk(tconn) > D_NEGOTIATING) + conn_lowest_disk(connection) > D_NEGOTIATING) what = RESTART_FROZEN_DISK_IO; - if (tconn->susp_nod && what != NOTHING) { - _tl_restart(tconn, what); - _conn_request_state(tconn, + if (resource->susp_nod && what != NOTHING) { + _tl_restart(connection, what); + _conn_request_state(connection, (union drbd_state) { { .susp_nod = 1 } }, (union drbd_state) { { .susp_nod = 0 } }, CS_VERBOSE); } - spin_unlock_irq(&tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); } if (ns.susp_fen) { - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_connection *connection = first_peer_device(device)->connection; - spin_lock_irq(&tconn->req_lock); - if (tconn->susp_fen && conn_lowest_conn(tconn) >= C_CONNECTED) { + spin_lock_irq(&device->resource->req_lock); + if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) { /* case2: The connection was established again: */ - struct drbd_conf *odev; + struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, odev, vnr) - clear_bit(NEW_CUR_UUID, &odev->flags); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) + clear_bit(NEW_CUR_UUID, &peer_device->device->flags); rcu_read_unlock(); - _tl_restart(tconn, RESEND); - _conn_request_state(tconn, + _tl_restart(connection, RESEND); + _conn_request_state(connection, (union drbd_state) { { .susp_fen = 1 } }, (union drbd_state) { { .susp_fen = 0 } }, CS_VERBOSE); } - spin_unlock_irq(&tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); } /* Became sync source. With protocol >= 96, we still need to send out @@ -1279,9 +1292,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * which is unexpected. */ if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && - mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) { - drbd_gen_and_send_sync_uuid(mdev); - put_ldev(mdev); + first_peer_device(device)->connection->agreed_pro_version >= 96 && get_ldev(device)) { + drbd_gen_and_send_sync_uuid(first_peer_device(device)); + put_ldev(device); } /* Do not change the order of the if above and the two below... */ @@ -1289,20 +1302,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) { /* attach on the peer */ /* we probably will start a resync soon. * make sure those things are properly reset. */ - mdev->rs_total = 0; - mdev->rs_failed = 0; - atomic_set(&mdev->rs_pending_cnt, 0); - drbd_rs_cancel_all(mdev); + device->rs_total = 0; + device->rs_failed = 0; + atomic_set(&device->rs_pending_cnt, 0); + drbd_rs_cancel_all(device); - drbd_send_uuids(mdev); - drbd_send_state(mdev, ns); + drbd_send_uuids(first_peer_device(device)); + drbd_send_state(first_peer_device(device), ns); } /* No point in queuing send_bitmap if we don't have a connection * anymore, so check also the _current_ state, not only the new state * at the time this work was queued. */ if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && - mdev->state.conn == C_WF_BITMAP_S) - drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, + device->state.conn == C_WF_BITMAP_S) + drbd_queue_bitmap_io(device, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)", BM_LOCKED_TEST_ALLOWED); @@ -1313,80 +1326,80 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, && (ns.pdsk < D_INCONSISTENT || ns.pdsk == D_UNKNOWN || ns.pdsk == D_OUTDATED)) { - if (get_ldev(mdev)) { + if (get_ldev(device)) { if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && - mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { - if (drbd_suspended(mdev)) { - set_bit(NEW_CUR_UUID, &mdev->flags); + device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { + if (drbd_suspended(device)) { + set_bit(NEW_CUR_UUID, &device->flags); } else { - drbd_uuid_new_current(mdev); - drbd_send_uuids(mdev); + drbd_uuid_new_current(device); + drbd_send_uuids(first_peer_device(device)); } } - put_ldev(mdev); + put_ldev(device); } } - if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { + if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) { if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && - mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { - drbd_uuid_new_current(mdev); - drbd_send_uuids(mdev); + device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { + drbd_uuid_new_current(device); + drbd_send_uuids(first_peer_device(device)); } /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) /* We may still be Primary ourselves. * No harm done if the bitmap still changes, * redirtied pages will follow later. */ - drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, + drbd_bitmap_io_from_worker(device, &drbd_bm_write, "demote diskless peer", BM_LOCKED_SET_ALLOWED); - put_ldev(mdev); + put_ldev(device); } /* Write out all changed bits on demote. * Though, no need to da that just yet * if there is a resync going on still */ if (os.role == R_PRIMARY && ns.role == R_SECONDARY && - mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { + device->state.conn <= C_CONNECTED && get_ldev(device)) { /* No changes to the bitmap expected this time, so assert that, * even though no harm was done if it did change. */ - drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, + drbd_bitmap_io_from_worker(device, &drbd_bm_write, "demote", BM_LOCKED_TEST_ALLOWED); - put_ldev(mdev); + put_ldev(device); } /* Last part of the attaching process ... */ if (ns.conn >= C_CONNECTED && os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { - drbd_send_sizes(mdev, 0, 0); /* to start sync... */ - drbd_send_uuids(mdev); - drbd_send_state(mdev, ns); + drbd_send_sizes(first_peer_device(device), 0, 0); /* to start sync... */ + drbd_send_uuids(first_peer_device(device)); + drbd_send_state(first_peer_device(device), ns); } /* We want to pause/continue resync, tell peer. */ if (ns.conn >= C_CONNECTED && ((os.aftr_isp != ns.aftr_isp) || (os.user_isp != ns.user_isp))) - drbd_send_state(mdev, ns); + drbd_send_state(first_peer_device(device), ns); /* In case one of the isp bits got set, suspend other devices. */ if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && (ns.aftr_isp || ns.peer_isp || ns.user_isp)) - suspend_other_sg(mdev); + suspend_other_sg(device); /* Make sure the peer gets informed about eventual state changes (ISP bits) while we were in WFReportParams. */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) - drbd_send_state(mdev, ns); + drbd_send_state(first_peer_device(device), ns); if (os.conn != C_AHEAD && ns.conn == C_AHEAD) - drbd_send_state(mdev, ns); + drbd_send_state(first_peer_device(device), ns); /* We are in the progress to start a full sync... */ if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) /* no other bitmap changes expected during this phase */ - drbd_queue_bitmap_io(mdev, + drbd_queue_bitmap_io(device, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); @@ -1399,15 +1412,15 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * our cleanup here with the transition to D_DISKLESS. * But is is still not save to dreference ldev here, since * we might come from an failed Attach before ldev was set. */ - if (mdev->ldev) { + if (device->ldev) { rcu_read_lock(); - eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error; + eh = rcu_dereference(device->ldev->disk_conf)->on_io_error; rcu_read_unlock(); - was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + was_io_error = test_and_clear_bit(WAS_IO_ERROR, &device->flags); if (was_io_error && eh == EP_CALL_HELPER) - drbd_khelper(mdev, "local-io-error"); + drbd_khelper(device, "local-io-error"); /* Immediately allow completion of all application IO, * that waits for completion from the local disk, @@ -1422,76 +1435,76 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * So aborting local requests may cause crashes, * or even worse, silent data corruption. */ - if (test_and_clear_bit(FORCE_DETACH, &mdev->flags)) - tl_abort_disk_io(mdev); + if (test_and_clear_bit(FORCE_DETACH, &device->flags)) + tl_abort_disk_io(device); /* current state still has to be D_FAILED, * there is only one way out: to D_DISKLESS, * and that may only happen after our put_ldev below. */ - if (mdev->state.disk != D_FAILED) - dev_err(DEV, + if (device->state.disk != D_FAILED) + drbd_err(device, "ASSERT FAILED: disk is %s during detach\n", - drbd_disk_str(mdev->state.disk)); + drbd_disk_str(device->state.disk)); if (ns.conn >= C_CONNECTED) - drbd_send_state(mdev, ns); + drbd_send_state(first_peer_device(device), ns); - drbd_rs_cancel_all(mdev); + drbd_rs_cancel_all(device); /* In case we want to get something to stable storage still, * this may be the last chance. * Following put_ldev may transition to D_DISKLESS. */ - drbd_md_sync(mdev); + drbd_md_sync(device); } - put_ldev(mdev); + put_ldev(device); } - /* second half of local IO error, failure to attach, - * or administrative detach, - * after local_cnt references have reached zero again */ - if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) { - /* We must still be diskless, - * re-attach has to be serialized with this! */ - if (mdev->state.disk != D_DISKLESS) - dev_err(DEV, - "ASSERT FAILED: disk is %s while going diskless\n", - drbd_disk_str(mdev->state.disk)); + /* second half of local IO error, failure to attach, + * or administrative detach, + * after local_cnt references have reached zero again */ + if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) { + /* We must still be diskless, + * re-attach has to be serialized with this! */ + if (device->state.disk != D_DISKLESS) + drbd_err(device, + "ASSERT FAILED: disk is %s while going diskless\n", + drbd_disk_str(device->state.disk)); if (ns.conn >= C_CONNECTED) - drbd_send_state(mdev, ns); + drbd_send_state(first_peer_device(device), ns); /* corresponding get_ldev in __drbd_set_state * this may finally trigger drbd_ldev_destroy. */ - put_ldev(mdev); + put_ldev(device); } /* Notify peer that I had a local IO error, and did not detached.. */ if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED) - drbd_send_state(mdev, ns); + drbd_send_state(first_peer_device(device), ns); /* Disks got bigger while they were detached */ if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && - test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { + test_and_clear_bit(RESYNC_AFTER_NEG, &device->flags)) { if (ns.conn == C_CONNECTED) - resync_after_online_grow(mdev); + resync_after_online_grow(device); } /* A resync finished or aborted, wake paused devices... */ if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) || (os.peer_isp && !ns.peer_isp) || (os.user_isp && !ns.user_isp)) - resume_next_sg(mdev); + resume_next_sg(device); /* sync target done with resync. Explicitly notify peer, even though * it should (at least for non-empty resyncs) already know itself. */ if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) - drbd_send_state(mdev, ns); + drbd_send_state(first_peer_device(device), ns); /* Verify finished, or reached stop sector. Peer did not know about * the stop sector, and we may even have changed the stop sector during * verify to interrupt/stop early. Send the new state. */ if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED - && verify_can_do_stop_sector(mdev)) - drbd_send_state(mdev, ns); + && verify_can_do_stop_sector(device)) + drbd_send_state(first_peer_device(device), ns); /* This triggers bitmap writeout of potentially still unwritten pages * if the resync finished cleanly, or aborted because of peer disk @@ -1500,56 +1513,57 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * any bitmap writeout anymore. * No harm done if some bits change during this phase. */ - if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { - drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL, + if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(device)) { + drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL, "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED); - put_ldev(mdev); + put_ldev(device); } if (ns.disk == D_DISKLESS && ns.conn == C_STANDALONE && ns.role == R_SECONDARY) { if (os.aftr_isp != ns.aftr_isp) - resume_next_sg(mdev); + resume_next_sg(device); } - drbd_md_sync(mdev); + drbd_md_sync(device); } struct after_conn_state_chg_work { struct drbd_work w; enum drbd_conns oc; union drbd_state ns_min; - union drbd_state ns_max; /* new, max state, over all mdevs */ + union drbd_state ns_max; /* new, max state, over all devices */ enum chg_state_flags flags; + struct drbd_connection *connection; }; static int w_after_conn_state_ch(struct drbd_work *w, int unused) { struct after_conn_state_chg_work *acscw = container_of(w, struct after_conn_state_chg_work, w); - struct drbd_tconn *tconn = w->tconn; + struct drbd_connection *connection = acscw->connection; enum drbd_conns oc = acscw->oc; union drbd_state ns_max = acscw->ns_max; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; kfree(acscw); /* Upon network configuration, we need to start the receiver */ if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED) - drbd_thread_start(&tconn->receiver); + drbd_thread_start(&connection->receiver); if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) { struct net_conf *old_conf; - mutex_lock(&tconn->conf_update); - old_conf = tconn->net_conf; - tconn->my_addr_len = 0; - tconn->peer_addr_len = 0; - rcu_assign_pointer(tconn->net_conf, NULL); - conn_free_crypto(tconn); - mutex_unlock(&tconn->conf_update); + mutex_lock(&connection->resource->conf_update); + old_conf = connection->net_conf; + connection->my_addr_len = 0; + connection->peer_addr_len = 0; + rcu_assign_pointer(connection->net_conf, NULL); + conn_free_crypto(connection); + mutex_unlock(&connection->resource->conf_update); synchronize_rcu(); kfree(old_conf); @@ -1559,45 +1573,47 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) /* case1: The outdate peer handler is successful: */ if (ns_max.pdsk <= D_OUTDATED) { rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (test_bit(NEW_CUR_UUID, &mdev->flags)) { - drbd_uuid_new_current(mdev); - clear_bit(NEW_CUR_UUID, &mdev->flags); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + if (test_bit(NEW_CUR_UUID, &device->flags)) { + drbd_uuid_new_current(device); + clear_bit(NEW_CUR_UUID, &device->flags); } } rcu_read_unlock(); - spin_lock_irq(&tconn->req_lock); - _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); - _conn_request_state(tconn, + spin_lock_irq(&connection->resource->req_lock); + _tl_restart(connection, CONNECTION_LOST_WHILE_PENDING); + _conn_request_state(connection, (union drbd_state) { { .susp_fen = 1 } }, (union drbd_state) { { .susp_fen = 0 } }, CS_VERBOSE); - spin_unlock_irq(&tconn->req_lock); + spin_unlock_irq(&connection->resource->req_lock); } } - kref_put(&tconn->kref, &conn_destroy); + kref_put(&connection->kref, drbd_destroy_connection); - conn_md_sync(tconn); + conn_md_sync(connection); return 0; } -void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum chg_state_flags *pf) +void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf) { enum chg_state_flags flags = ~0; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr, first_vol = 1; union drbd_dev_state os, cs = { { .role = R_SECONDARY, .peer = R_UNKNOWN, - .conn = tconn->cstate, + .conn = connection->cstate, .disk = D_DISKLESS, .pdsk = D_UNKNOWN, } }; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - os = mdev->state; + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + os = device->state; if (first_vol) { cs = os; @@ -1628,18 +1644,19 @@ void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum } static enum drbd_state_rv -conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, +conn_is_valid_transition(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, enum chg_state_flags flags) { enum drbd_state_rv rv = SS_SUCCESS; union drbd_state ns, os; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; int vnr; rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - os = drbd_read_state(mdev); - ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + os = drbd_read_state(device); + ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL); if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) ns.disk = os.disk; @@ -1648,30 +1665,29 @@ conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union continue; rv = is_valid_transition(os, ns); - if (rv < SS_SUCCESS) - break; - if (!(flags & CS_HARD)) { - rv = is_valid_state(mdev, ns); + if (rv >= SS_SUCCESS && !(flags & CS_HARD)) { + rv = is_valid_state(device, ns); if (rv < SS_SUCCESS) { - if (is_valid_state(mdev, os) == rv) - rv = is_valid_soft_transition(os, ns, tconn); + if (is_valid_state(device, os) == rv) + rv = is_valid_soft_transition(os, ns, connection); } else - rv = is_valid_soft_transition(os, ns, tconn); + rv = is_valid_soft_transition(os, ns, connection); } - if (rv < SS_SUCCESS) + + if (rv < SS_SUCCESS) { + if (flags & CS_VERBOSE) + print_st_err(device, os, ns, rv); break; + } } rcu_read_unlock(); - if (rv < SS_SUCCESS && flags & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - return rv; } void -conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, +conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags) { union drbd_state ns, os, ns_max = { }; @@ -1682,7 +1698,7 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state .disk = D_MASK, .pdsk = D_MASK } }; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; enum drbd_state_rv rv; int vnr, number_of_volumes = 0; @@ -1690,27 +1706,28 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state /* remember last connect time so request_timer_fn() won't * kill newly established sessions while we are still trying to thaw * previously frozen IO */ - if (tconn->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS) - tconn->last_reconnect_jif = jiffies; + if (connection->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS) + connection->last_reconnect_jif = jiffies; - tconn->cstate = val.conn; + connection->cstate = val.conn; } rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; number_of_volumes++; - os = drbd_read_state(mdev); + os = drbd_read_state(device); ns = apply_mask_val(os, mask, val); - ns = sanitize_state(mdev, ns, NULL); + ns = sanitize_state(device, ns, NULL); if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) ns.disk = os.disk; - rv = __drbd_set_state(mdev, ns, flags, NULL); + rv = __drbd_set_state(device, ns, flags, NULL); if (rv < SS_SUCCESS) BUG(); - ns.i = mdev->state.i; + ns.i = device->state.i; ns_max.role = max_role(ns.role, ns_max.role); ns_max.peer = max_role(ns.peer, ns_max.peer); ns_max.conn = max_t(enum drbd_conns, ns.conn, ns_max.conn); @@ -1735,39 +1752,39 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state } }; } - ns_min.susp = ns_max.susp = tconn->susp; - ns_min.susp_nod = ns_max.susp_nod = tconn->susp_nod; - ns_min.susp_fen = ns_max.susp_fen = tconn->susp_fen; + ns_min.susp = ns_max.susp = connection->resource->susp; + ns_min.susp_nod = ns_max.susp_nod = connection->resource->susp_nod; + ns_min.susp_fen = ns_max.susp_fen = connection->resource->susp_fen; *pns_min = ns_min; *pns_max = ns_max; } static enum drbd_state_rv -_conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val) +_conn_rq_cond(struct drbd_connection *connection, union drbd_state mask, union drbd_state val) { enum drbd_state_rv rv; - if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags)) + if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &connection->flags)) return SS_CW_SUCCESS; - if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags)) + if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &connection->flags)) return SS_CW_FAILED_BY_PEER; - rv = conn_is_valid_transition(tconn, mask, val, 0); - if (rv == SS_SUCCESS && tconn->cstate == C_WF_REPORT_PARAMS) + rv = conn_is_valid_transition(connection, mask, val, 0); + if (rv == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS) rv = SS_UNKNOWN_ERROR; /* continue waiting */ return rv; } enum drbd_state_rv -_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, +_conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, enum chg_state_flags flags) { enum drbd_state_rv rv = SS_SUCCESS; struct after_conn_state_chg_work *acscw; - enum drbd_conns oc = tconn->cstate; + enum drbd_conns oc = connection->cstate; union drbd_state ns_max, ns_min, os; bool have_mutex = false; @@ -1777,7 +1794,7 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ goto abort; } - rv = conn_is_valid_transition(tconn, mask, val, flags); + rv = conn_is_valid_transition(connection, mask, val, flags); if (rv < SS_SUCCESS) goto abort; @@ -1787,38 +1804,38 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ /* This will be a cluster-wide state change. * Need to give up the spinlock, grab the mutex, * then send the state change request, ... */ - spin_unlock_irq(&tconn->req_lock); - mutex_lock(&tconn->cstate_mutex); + spin_unlock_irq(&connection->resource->req_lock); + mutex_lock(&connection->cstate_mutex); have_mutex = true; - set_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); - if (conn_send_state_req(tconn, mask, val)) { + set_bit(CONN_WD_ST_CHG_REQ, &connection->flags); + if (conn_send_state_req(connection, mask, val)) { /* sending failed. */ - clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); + clear_bit(CONN_WD_ST_CHG_REQ, &connection->flags); rv = SS_CW_FAILED_BY_PEER; /* need to re-aquire the spin lock, though */ goto abort_unlocked; } if (val.conn == C_DISCONNECTING) - set_bit(DISCONNECT_SENT, &tconn->flags); + set_bit(DISCONNECT_SENT, &connection->flags); /* ... and re-aquire the spinlock. * If _conn_rq_cond() returned >= SS_SUCCESS, we must call * conn_set_state() within the same spinlock. */ - spin_lock_irq(&tconn->req_lock); - wait_event_lock_irq(tconn->ping_wait, - (rv = _conn_rq_cond(tconn, mask, val)), - tconn->req_lock); - clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); + spin_lock_irq(&connection->resource->req_lock); + wait_event_lock_irq(connection->ping_wait, + (rv = _conn_rq_cond(connection, mask, val)), + connection->resource->req_lock); + clear_bit(CONN_WD_ST_CHG_REQ, &connection->flags); if (rv < SS_SUCCESS) goto abort; } - conn_old_common_state(tconn, &os, &flags); + conn_old_common_state(connection, &os, &flags); flags |= CS_DC_SUSP; - conn_set_state(tconn, mask, val, &ns_min, &ns_max, flags); - conn_pr_state_change(tconn, os, ns_max, flags); + conn_set_state(connection, mask, val, &ns_min, &ns_max, flags); + conn_pr_state_change(connection, os, ns_max, flags); acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC); if (acscw) { @@ -1827,39 +1844,39 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ acscw->ns_max = ns_max; acscw->flags = flags; acscw->w.cb = w_after_conn_state_ch; - kref_get(&tconn->kref); - acscw->w.tconn = tconn; - drbd_queue_work(&tconn->sender_work, &acscw->w); + kref_get(&connection->kref); + acscw->connection = connection; + drbd_queue_work(&connection->sender_work, &acscw->w); } else { - conn_err(tconn, "Could not kmalloc an acscw\n"); + drbd_err(connection, "Could not kmalloc an acscw\n"); } abort: if (have_mutex) { /* mutex_unlock() "... must not be used in interrupt context.", * so give up the spinlock, then re-aquire it */ - spin_unlock_irq(&tconn->req_lock); + spin_unlock_irq(&connection->resource->req_lock); abort_unlocked: - mutex_unlock(&tconn->cstate_mutex); - spin_lock_irq(&tconn->req_lock); + mutex_unlock(&connection->cstate_mutex); + spin_lock_irq(&connection->resource->req_lock); } if (rv < SS_SUCCESS && flags & CS_VERBOSE) { - conn_err(tconn, "State change failed: %s\n", drbd_set_st_err_str(rv)); - conn_err(tconn, " mask = 0x%x val = 0x%x\n", mask.i, val.i); - conn_err(tconn, " old_conn:%s wanted_conn:%s\n", drbd_conn_str(oc), drbd_conn_str(val.conn)); + drbd_err(connection, "State change failed: %s\n", drbd_set_st_err_str(rv)); + drbd_err(connection, " mask = 0x%x val = 0x%x\n", mask.i, val.i); + drbd_err(connection, " old_conn:%s wanted_conn:%s\n", drbd_conn_str(oc), drbd_conn_str(val.conn)); } return rv; } enum drbd_state_rv -conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, +conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, enum chg_state_flags flags) { enum drbd_state_rv rv; - spin_lock_irq(&tconn->req_lock); - rv = _conn_request_state(tconn, mask, val, flags); - spin_unlock_irq(&tconn->req_lock); + spin_lock_irq(&connection->resource->req_lock); + rv = _conn_request_state(connection, mask, val, flags); + spin_unlock_irq(&connection->resource->req_lock); return rv; } diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index a3c361bbc4b6..cc41605ba21c 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -1,8 +1,8 @@ #ifndef DRBD_STATE_H #define DRBD_STATE_H -struct drbd_conf; -struct drbd_tconn; +struct drbd_device; +struct drbd_connection; /** * DOC: DRBD State macros @@ -107,36 +107,36 @@ union drbd_dev_state { unsigned int i; }; -extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, +extern enum drbd_state_rv drbd_change_state(struct drbd_device *device, enum chg_state_flags f, union drbd_state mask, union drbd_state val); -extern void drbd_force_state(struct drbd_conf *, union drbd_state, +extern void drbd_force_state(struct drbd_device *, union drbd_state, union drbd_state); -extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *, +extern enum drbd_state_rv _drbd_request_state(struct drbd_device *, union drbd_state, union drbd_state, enum chg_state_flags); -extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, +extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state, enum chg_state_flags, struct completion *done); -extern void print_st_err(struct drbd_conf *, union drbd_state, +extern void print_st_err(struct drbd_device *, union drbd_state, union drbd_state, int); enum drbd_state_rv -_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, +_conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, enum chg_state_flags flags); enum drbd_state_rv -conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, +conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, enum chg_state_flags flags); -extern void drbd_resume_al(struct drbd_conf *mdev); -extern bool conn_all_vols_unconf(struct drbd_tconn *tconn); +extern void drbd_resume_al(struct drbd_device *device); +extern bool conn_all_vols_unconf(struct drbd_connection *connection); /** * drbd_request_state() - Reqest a state change - * @mdev: DRBD device. + * @device: DRBD device. * @mask: mask of state bits to change. * @val: value of new state bits. * @@ -144,18 +144,18 @@ extern bool conn_all_vols_unconf(struct drbd_tconn *tconn); * quite verbose in case the state change is not possible, and all those * state changes are globally serialized. */ -static inline int drbd_request_state(struct drbd_conf *mdev, +static inline int drbd_request_state(struct drbd_device *device, union drbd_state mask, union drbd_state val) { - return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); + return _drbd_request_state(device, mask, val, CS_VERBOSE + CS_ORDERED); } -enum drbd_role conn_highest_role(struct drbd_tconn *tconn); -enum drbd_role conn_highest_peer(struct drbd_tconn *tconn); -enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn); -enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn); -enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn); -enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn); +enum drbd_role conn_highest_role(struct drbd_connection *connection); +enum drbd_role conn_highest_peer(struct drbd_connection *connection); +enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection); +enum drbd_disk_state conn_lowest_disk(struct drbd_connection *connection); +enum drbd_disk_state conn_highest_pdsk(struct drbd_connection *connection); +enum drbd_conns conn_lowest_conn(struct drbd_connection *connection); #endif diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index 58e08ff2b2ce..80b0f63c7075 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -24,6 +24,7 @@ */ #include <linux/drbd.h> +#include "drbd_strings.h" static const char *drbd_conn_s_names[] = { [C_STANDALONE] = "StandAlone", diff --git a/drivers/block/drbd/drbd_strings.h b/drivers/block/drbd/drbd_strings.h new file mode 100644 index 000000000000..f9923cc88afb --- /dev/null +++ b/drivers/block/drbd/drbd_strings.h @@ -0,0 +1,9 @@ +#ifndef __DRBD_STRINGS_H +#define __DRBD_STRINGS_H + +extern const char *drbd_conn_str(enum drbd_conns); +extern const char *drbd_role_str(enum drbd_role); +extern const char *drbd_disk_str(enum drbd_disk_state); +extern const char *drbd_set_st_err_str(enum drbd_state_rv); + +#endif /* __DRBD_STRINGS_H */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 84d3175d493a..2c4ce42c3657 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -21,7 +21,7 @@ along with drbd; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - */ +*/ #include <linux/module.h> #include <linux/drbd.h> @@ -36,10 +36,11 @@ #include <linux/scatterlist.h> #include "drbd_int.h" +#include "drbd_protocol.h" #include "drbd_req.h" -static int w_make_ov_request(struct drbd_work *w, int cancel); - +static int make_ov_request(struct drbd_device *, int); +static int make_resync_request(struct drbd_device *, int); /* endio handlers: * drbd_md_io_complete (defined here) @@ -67,10 +68,10 @@ rwlock_t global_state_lock; void drbd_md_io_complete(struct bio *bio, int error) { struct drbd_md_io *md_io; - struct drbd_conf *mdev; + struct drbd_device *device; md_io = (struct drbd_md_io *)bio->bi_private; - mdev = container_of(md_io, struct drbd_conf, md_io); + device = container_of(md_io, struct drbd_device, md_io); md_io->error = error; @@ -83,35 +84,36 @@ void drbd_md_io_complete(struct bio *bio, int error) * Make sure we first drop the reference, and only then signal * completion, or we may (in drbd_al_read_log()) cycle so fast into the * next drbd_md_sync_page_io(), that we trigger the - * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there. + * ASSERT(atomic_read(&device->md_io_in_use) == 1) there. */ - drbd_md_put_buffer(mdev); + drbd_md_put_buffer(device); md_io->done = 1; - wake_up(&mdev->misc_wait); + wake_up(&device->misc_wait); bio_put(bio); - if (mdev->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */ - put_ldev(mdev); + if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */ + put_ldev(device); } /* reads on behalf of the partner, * "submitted" by the receiver */ -void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) +static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) { unsigned long flags = 0; - struct drbd_conf *mdev = peer_req->w.mdev; + struct drbd_peer_device *peer_device = peer_req->peer_device; + struct drbd_device *device = peer_device->device; - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - mdev->read_cnt += peer_req->i.size >> 9; + spin_lock_irqsave(&device->resource->req_lock, flags); + device->read_cnt += peer_req->i.size >> 9; list_del(&peer_req->w.list); - if (list_empty(&mdev->read_ee)) - wake_up(&mdev->ee_wait); + if (list_empty(&device->read_ee)) + wake_up(&device->ee_wait); if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) - __drbd_chk_io_error(mdev, DRBD_READ_ERROR); - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + __drbd_chk_io_error(device, DRBD_READ_ERROR); + spin_unlock_irqrestore(&device->resource->req_lock, flags); - drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w); - put_ldev(mdev); + drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w); + put_ldev(device); } /* writes on behalf of the partner, or resync writes, @@ -119,7 +121,8 @@ void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(lo static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) { unsigned long flags = 0; - struct drbd_conf *mdev = peer_req->w.mdev; + struct drbd_peer_device *peer_device = peer_req->peer_device; + struct drbd_device *device = peer_device->device; struct drbd_interval i; int do_wake; u64 block_id; @@ -133,35 +136,35 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; block_id = peer_req->block_id; - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - mdev->writ_cnt += peer_req->i.size >> 9; - list_move_tail(&peer_req->w.list, &mdev->done_ee); + spin_lock_irqsave(&device->resource->req_lock, flags); + device->writ_cnt += peer_req->i.size >> 9; + list_move_tail(&peer_req->w.list, &device->done_ee); /* * Do not remove from the write_requests tree here: we did not send the * Ack yet and did not wake possibly waiting conflicting requests. * Removed from the tree from "drbd_process_done_ee" within the - * appropriate w.cb (e_end_block/e_end_resync_block) or from + * appropriate dw.cb (e_end_block/e_end_resync_block) or from * _drbd_clear_done_ee. */ - do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); + do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) - __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + __drbd_chk_io_error(device, DRBD_WRITE_ERROR); + spin_unlock_irqrestore(&device->resource->req_lock, flags); if (block_id == ID_SYNCER) - drbd_rs_complete_io(mdev, i.sector); + drbd_rs_complete_io(device, i.sector); if (do_wake) - wake_up(&mdev->ee_wait); + wake_up(&device->ee_wait); if (do_al_complete_io) - drbd_al_complete_io(mdev, &i); + drbd_al_complete_io(device, &i); - wake_asender(mdev->tconn); - put_ldev(mdev); + wake_asender(peer_device->connection); + put_ldev(device); } /* writes on behalf of the partner, or resync writes, @@ -170,17 +173,17 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel void drbd_peer_request_endio(struct bio *bio, int error) { struct drbd_peer_request *peer_req = bio->bi_private; - struct drbd_conf *mdev = peer_req->w.mdev; + struct drbd_device *device = peer_req->peer_device->device; int uptodate = bio_flagged(bio, BIO_UPTODATE); int is_write = bio_data_dir(bio) == WRITE; if (error && __ratelimit(&drbd_ratelimit_state)) - dev_warn(DEV, "%s: error=%d s=%llus\n", + drbd_warn(device, "%s: error=%d s=%llus\n", is_write ? "write" : "read", error, (unsigned long long)peer_req->i.sector); if (!error && !uptodate) { if (__ratelimit(&drbd_ratelimit_state)) - dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", + drbd_warn(device, "%s: setting error to -EIO s=%llus\n", is_write ? "write" : "read", (unsigned long long)peer_req->i.sector); /* strange behavior of some lower level drivers... @@ -207,13 +210,13 @@ void drbd_request_endio(struct bio *bio, int error) { unsigned long flags; struct drbd_request *req = bio->bi_private; - struct drbd_conf *mdev = req->w.mdev; + struct drbd_device *device = req->device; struct bio_and_error m; enum drbd_req_event what; int uptodate = bio_flagged(bio, BIO_UPTODATE); if (!error && !uptodate) { - dev_warn(DEV, "p %s: setting error to -EIO\n", + drbd_warn(device, "p %s: setting error to -EIO\n", bio_data_dir(bio) == WRITE ? "write" : "read"); /* strange behavior of some lower level drivers... * fail the request by clearing the uptodate flag, @@ -252,7 +255,7 @@ void drbd_request_endio(struct bio *bio, int error) */ if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) { if (__ratelimit(&drbd_ratelimit_state)) - dev_emerg(DEV, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); + drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); if (!error) panic("possible random memory corruption caused by delayed completion of aborted local request\n"); @@ -272,17 +275,16 @@ void drbd_request_endio(struct bio *bio, int error) req->private_bio = ERR_PTR(error); /* not req_mod(), we need irqsave here! */ - spin_lock_irqsave(&mdev->tconn->req_lock, flags); + spin_lock_irqsave(&device->resource->req_lock, flags); __req_mod(req, what, &m); - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - put_ldev(mdev); + spin_unlock_irqrestore(&device->resource->req_lock, flags); + put_ldev(device); if (m.bio) - complete_master_bio(mdev, &m); + complete_master_bio(device, &m); } -void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, - struct drbd_peer_request *peer_req, void *digest) +void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest) { struct hash_desc desc; struct scatterlist sg; @@ -309,7 +311,7 @@ void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, crypto_hash_final(&desc, digest); } -void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest) +void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest) { struct hash_desc desc; struct scatterlist sg; @@ -333,7 +335,8 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * static int w_e_send_csum(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_peer_device *peer_device = peer_req->peer_device; + struct drbd_device *device = peer_device->device; int digest_size; void *digest; int err = 0; @@ -344,89 +347,92 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) goto out; - digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm); + digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { sector_t sector = peer_req->i.sector; unsigned int size = peer_req->i.size; - drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest); + drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); /* Free peer_req and pages before send. * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_alloc_pages due to pp_in_use > max_buffers. */ - drbd_free_peer_req(mdev, peer_req); + drbd_free_peer_req(device, peer_req); peer_req = NULL; - inc_rs_pending(mdev); - err = drbd_send_drequest_csum(mdev, sector, size, + inc_rs_pending(device); + err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_CSUM_RS_REQUEST); kfree(digest); } else { - dev_err(DEV, "kmalloc() of digest failed.\n"); + drbd_err(device, "kmalloc() of digest failed.\n"); err = -ENOMEM; } out: if (peer_req) - drbd_free_peer_req(mdev, peer_req); + drbd_free_peer_req(device, peer_req); if (unlikely(err)) - dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); + drbd_err(device, "drbd_send_drequest(..., csum) failed\n"); return err; } #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) -static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) +static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size) { + struct drbd_device *device = peer_device->device; struct drbd_peer_request *peer_req; - if (!get_ldev(mdev)) + if (!get_ldev(device)) return -EIO; - if (drbd_rs_should_slow_down(mdev, sector)) + if (drbd_rs_should_slow_down(device, sector)) goto defer; /* GFP_TRY, because if there is no memory available right now, this may * be rescheduled for later. It is "only" background resync, after all. */ - peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector, + peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, size, GFP_TRY); if (!peer_req) goto defer; peer_req->w.cb = w_e_send_csum; - spin_lock_irq(&mdev->tconn->req_lock); - list_add(&peer_req->w.list, &mdev->read_ee); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); + list_add(&peer_req->w.list, &device->read_ee); + spin_unlock_irq(&device->resource->req_lock); - atomic_add(size >> 9, &mdev->rs_sect_ev); - if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0) + atomic_add(size >> 9, &device->rs_sect_ev); + if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0) return 0; /* If it failed because of ENOMEM, retry should help. If it failed * because bio_add_page failed (probably broken lower level driver), * retry may or may not help. * If it does not, you may need to force disconnect. */ - spin_lock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&device->resource->req_lock); list_del(&peer_req->w.list); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&device->resource->req_lock); - drbd_free_peer_req(mdev, peer_req); + drbd_free_peer_req(device, peer_req); defer: - put_ldev(mdev); + put_ldev(device); return -EAGAIN; } int w_resync_timer(struct drbd_work *w, int cancel) { - struct drbd_conf *mdev = w->mdev; - switch (mdev->state.conn) { + struct drbd_device *device = + container_of(w, struct drbd_device, resync_work); + + switch (device->state.conn) { case C_VERIFY_S: - w_make_ov_request(w, cancel); + make_ov_request(device, cancel); break; case C_SYNC_TARGET: - w_make_resync_request(w, cancel); + make_resync_request(device, cancel); break; } @@ -435,10 +441,11 @@ int w_resync_timer(struct drbd_work *w, int cancel) void resync_timer_fn(unsigned long data) { - struct drbd_conf *mdev = (struct drbd_conf *) data; + struct drbd_device *device = (struct drbd_device *) data; - if (list_empty(&mdev->resync_work.list)) - drbd_queue_work(&mdev->tconn->sender_work, &mdev->resync_work); + if (list_empty(&device->resync_work.list)) + drbd_queue_work(&first_peer_device(device)->connection->sender_work, + &device->resync_work); } static void fifo_set(struct fifo_buffer *fb, int value) @@ -485,7 +492,7 @@ struct fifo_buffer *fifo_alloc(int fifo_size) return fb; } -static int drbd_rs_controller(struct drbd_conf *mdev) +static int drbd_rs_controller(struct drbd_device *device) { struct disk_conf *dc; unsigned int sect_in; /* Number of sectors that came in since the last turn */ @@ -498,22 +505,22 @@ static int drbd_rs_controller(struct drbd_conf *mdev) int max_sect; struct fifo_buffer *plan; - sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */ - mdev->rs_in_flight -= sect_in; + sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */ + device->rs_in_flight -= sect_in; - dc = rcu_dereference(mdev->ldev->disk_conf); - plan = rcu_dereference(mdev->rs_plan_s); + dc = rcu_dereference(device->ldev->disk_conf); + plan = rcu_dereference(device->rs_plan_s); steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ - if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ + if (device->rs_in_flight + sect_in == 0) { /* At start of resync */ want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; } else { /* normal path */ want = dc->c_fill_target ? dc->c_fill_target : sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); } - correction = want - mdev->rs_in_flight - plan->total; + correction = want - device->rs_in_flight - plan->total; /* Plan ahead */ cps = correction / steps; @@ -533,25 +540,25 @@ static int drbd_rs_controller(struct drbd_conf *mdev) req_sect = max_sect; /* - dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", - sect_in, mdev->rs_in_flight, want, correction, - steps, cps, mdev->rs_planed, curr_corr, req_sect); + drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", + sect_in, device->rs_in_flight, want, correction, + steps, cps, device->rs_planed, curr_corr, req_sect); */ return req_sect; } -static int drbd_rs_number_requests(struct drbd_conf *mdev) +static int drbd_rs_number_requests(struct drbd_device *device) { int number; rcu_read_lock(); - if (rcu_dereference(mdev->rs_plan_s)->size) { - number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); - mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; + if (rcu_dereference(device->rs_plan_s)->size) { + number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9); + device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { - mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate; - number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); + device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; + number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); } rcu_read_unlock(); @@ -560,12 +567,11 @@ static int drbd_rs_number_requests(struct drbd_conf *mdev) return number; } -int w_make_resync_request(struct drbd_work *w, int cancel) +static int make_resync_request(struct drbd_device *device, int cancel) { - struct drbd_conf *mdev = w->mdev; unsigned long bit; sector_t sector; - const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + const sector_t capacity = drbd_get_capacity(device->this_bdev); int max_bio_size; int number, rollback_i, size; int align, queued, sndbuf; @@ -574,61 +580,61 @@ int w_make_resync_request(struct drbd_work *w, int cancel) if (unlikely(cancel)) return 0; - if (mdev->rs_total == 0) { + if (device->rs_total == 0) { /* empty resync? */ - drbd_resync_finished(mdev); + drbd_resync_finished(device); return 0; } - if (!get_ldev(mdev)) { - /* Since we only need to access mdev->rsync a - get_ldev_if_state(mdev,D_FAILED) would be sufficient, but + if (!get_ldev(device)) { + /* Since we only need to access device->rsync a + get_ldev_if_state(device,D_FAILED) would be sufficient, but to continue resync with a broken disk makes no sense at all */ - dev_err(DEV, "Disk broke down during resync!\n"); + drbd_err(device, "Disk broke down during resync!\n"); return 0; } - max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9; - number = drbd_rs_number_requests(mdev); + max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; + number = drbd_rs_number_requests(device); if (number == 0) goto requeue; for (i = 0; i < number; i++) { /* Stop generating RS requests, when half of the send buffer is filled */ - mutex_lock(&mdev->tconn->data.mutex); - if (mdev->tconn->data.socket) { - queued = mdev->tconn->data.socket->sk->sk_wmem_queued; - sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf; + mutex_lock(&first_peer_device(device)->connection->data.mutex); + if (first_peer_device(device)->connection->data.socket) { + queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued; + sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf; } else { queued = 1; sndbuf = 0; } - mutex_unlock(&mdev->tconn->data.mutex); + mutex_unlock(&first_peer_device(device)->connection->data.mutex); if (queued > sndbuf / 2) goto requeue; next_sector: size = BM_BLOCK_SIZE; - bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); + bit = drbd_bm_find_next(device, device->bm_resync_fo); if (bit == DRBD_END_OF_BITMAP) { - mdev->bm_resync_fo = drbd_bm_bits(mdev); - put_ldev(mdev); + device->bm_resync_fo = drbd_bm_bits(device); + put_ldev(device); return 0; } sector = BM_BIT_TO_SECT(bit); - if (drbd_rs_should_slow_down(mdev, sector) || - drbd_try_rs_begin_io(mdev, sector)) { - mdev->bm_resync_fo = bit; + if (drbd_rs_should_slow_down(device, sector) || + drbd_try_rs_begin_io(device, sector)) { + device->bm_resync_fo = bit; goto requeue; } - mdev->bm_resync_fo = bit + 1; + device->bm_resync_fo = bit + 1; - if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) { - drbd_rs_complete_io(mdev, sector); + if (unlikely(drbd_bm_test_bit(device, bit) == 0)) { + drbd_rs_complete_io(device, sector); goto next_sector; } @@ -657,7 +663,7 @@ next_sector: * obscure reason; ( b == 0 ) would get the out-of-band * only accidentally right because of the "oddly sized" * adjustment below */ - if (drbd_bm_test_bit(mdev, bit+1) != 1) + if (drbd_bm_test_bit(device, bit+1) != 1) break; bit++; size += BM_BLOCK_SIZE; @@ -668,20 +674,21 @@ next_sector: /* if we merged some, * reset the offset to start the next drbd_bm_find_next from */ if (size > BM_BLOCK_SIZE) - mdev->bm_resync_fo = bit + 1; + device->bm_resync_fo = bit + 1; #endif /* adjust very last sectors, in case we are oddly sized */ if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) { - switch (read_for_csum(mdev, sector, size)) { + if (first_peer_device(device)->connection->agreed_pro_version >= 89 && + first_peer_device(device)->connection->csums_tfm) { + switch (read_for_csum(first_peer_device(device), sector, size)) { case -EIO: /* Disk failure */ - put_ldev(mdev); + put_ldev(device); return -EIO; case -EAGAIN: /* allocation failed, or ldev busy */ - drbd_rs_complete_io(mdev, sector); - mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); + drbd_rs_complete_io(device, sector); + device->bm_resync_fo = BM_SECT_TO_BIT(sector); i = rollback_i; goto requeue; case 0: @@ -693,50 +700,49 @@ next_sector: } else { int err; - inc_rs_pending(mdev); - err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST, + inc_rs_pending(device); + err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST, sector, size, ID_SYNCER); if (err) { - dev_err(DEV, "drbd_send_drequest() failed, aborting...\n"); - dec_rs_pending(mdev); - put_ldev(mdev); + drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); + dec_rs_pending(device); + put_ldev(device); return err; } } } - if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) { + if (device->bm_resync_fo >= drbd_bm_bits(device)) { /* last syncer _request_ was sent, * but the P_RS_DATA_REPLY not yet received. sync will end (and * next sync group will resume), as soon as we receive the last * resync data block, and the last bit is cleared. * until then resync "work" is "inactive" ... */ - put_ldev(mdev); + put_ldev(device); return 0; } requeue: - mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); - mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); - put_ldev(mdev); + device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); + mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); + put_ldev(device); return 0; } -static int w_make_ov_request(struct drbd_work *w, int cancel) +static int make_ov_request(struct drbd_device *device, int cancel) { - struct drbd_conf *mdev = w->mdev; int number, i, size; sector_t sector; - const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + const sector_t capacity = drbd_get_capacity(device->this_bdev); bool stop_sector_reached = false; if (unlikely(cancel)) return 1; - number = drbd_rs_number_requests(mdev); + number = drbd_rs_number_requests(device); - sector = mdev->ov_position; + sector = device->ov_position; for (i = 0; i < number; i++) { if (sector >= capacity) return 1; @@ -745,116 +751,121 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) * w_e_end_ov_reply(). * We need to send at least one request out. */ stop_sector_reached = i > 0 - && verify_can_do_stop_sector(mdev) - && sector >= mdev->ov_stop_sector; + && verify_can_do_stop_sector(device) + && sector >= device->ov_stop_sector; if (stop_sector_reached) break; size = BM_BLOCK_SIZE; - if (drbd_rs_should_slow_down(mdev, sector) || - drbd_try_rs_begin_io(mdev, sector)) { - mdev->ov_position = sector; + if (drbd_rs_should_slow_down(device, sector) || + drbd_try_rs_begin_io(device, sector)) { + device->ov_position = sector; goto requeue; } if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - inc_rs_pending(mdev); - if (drbd_send_ov_request(mdev, sector, size)) { - dec_rs_pending(mdev); + inc_rs_pending(device); + if (drbd_send_ov_request(first_peer_device(device), sector, size)) { + dec_rs_pending(device); return 0; } sector += BM_SECT_PER_BIT; } - mdev->ov_position = sector; + device->ov_position = sector; requeue: - mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); + device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); if (i == 0 || !stop_sector_reached) - mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); + mod_timer(&device->resync_timer, jiffies + SLEEP_TIME); return 1; } int w_ov_finished(struct drbd_work *w, int cancel) { - struct drbd_conf *mdev = w->mdev; - kfree(w); - ov_out_of_sync_print(mdev); - drbd_resync_finished(mdev); + struct drbd_device_work *dw = + container_of(w, struct drbd_device_work, w); + struct drbd_device *device = dw->device; + kfree(dw); + ov_out_of_sync_print(device); + drbd_resync_finished(device); return 0; } static int w_resync_finished(struct drbd_work *w, int cancel) { - struct drbd_conf *mdev = w->mdev; - kfree(w); + struct drbd_device_work *dw = + container_of(w, struct drbd_device_work, w); + struct drbd_device *device = dw->device; + kfree(dw); - drbd_resync_finished(mdev); + drbd_resync_finished(device); return 0; } -static void ping_peer(struct drbd_conf *mdev) +static void ping_peer(struct drbd_device *device) { - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_connection *connection = first_peer_device(device)->connection; - clear_bit(GOT_PING_ACK, &tconn->flags); - request_ping(tconn); - wait_event(tconn->ping_wait, - test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED); + clear_bit(GOT_PING_ACK, &connection->flags); + request_ping(connection); + wait_event(connection->ping_wait, + test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED); } -int drbd_resync_finished(struct drbd_conf *mdev) +int drbd_resync_finished(struct drbd_device *device) { unsigned long db, dt, dbdt; unsigned long n_oos; union drbd_state os, ns; - struct drbd_work *w; + struct drbd_device_work *dw; char *khelper_cmd = NULL; int verify_done = 0; /* Remove all elements from the resync LRU. Since future actions * might set bits in the (main) bitmap, then the entries in the * resync LRU would be wrong. */ - if (drbd_rs_del_all(mdev)) { + if (drbd_rs_del_all(device)) { /* In case this is not possible now, most probably because * there are P_RS_DATA_REPLY Packets lingering on the worker's * queue (or even the read operations for those packets * is not finished by now). Retry in 100ms. */ schedule_timeout_interruptible(HZ / 10); - w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); - if (w) { - w->cb = w_resync_finished; - w->mdev = mdev; - drbd_queue_work(&mdev->tconn->sender_work, w); + dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC); + if (dw) { + dw->w.cb = w_resync_finished; + dw->device = device; + drbd_queue_work(&first_peer_device(device)->connection->sender_work, + &dw->w); return 1; } - dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n"); + drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n"); } - dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; + dt = (jiffies - device->rs_start - device->rs_paused) / HZ; if (dt <= 0) dt = 1; - - db = mdev->rs_total; + + db = device->rs_total; /* adjust for verify start and stop sectors, respective reached position */ - if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) - db -= mdev->ov_left; + if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) + db -= device->ov_left; dbdt = Bit2KB(db/dt); - mdev->rs_paused /= HZ; + device->rs_paused /= HZ; - if (!get_ldev(mdev)) + if (!get_ldev(device)) goto out; - ping_peer(mdev); + ping_peer(device); - spin_lock_irq(&mdev->tconn->req_lock); - os = drbd_read_state(mdev); + spin_lock_irq(&device->resource->req_lock); + os = drbd_read_state(device); verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); @@ -866,41 +877,41 @@ int drbd_resync_finished(struct drbd_conf *mdev) ns = os; ns.conn = C_CONNECTED; - dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", + drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", verify_done ? "Online verify" : "Resync", - dt + mdev->rs_paused, mdev->rs_paused, dbdt); + dt + device->rs_paused, device->rs_paused, dbdt); - n_oos = drbd_bm_total_weight(mdev); + n_oos = drbd_bm_total_weight(device); if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) { if (n_oos) { - dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n", + drbd_alert(device, "Online verify found %lu %dk block out of sync!\n", n_oos, Bit2KB(1)); khelper_cmd = "out-of-sync"; } } else { - D_ASSERT((n_oos - mdev->rs_failed) == 0); + D_ASSERT(device, (n_oos - device->rs_failed) == 0); if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) khelper_cmd = "after-resync-target"; - if (mdev->tconn->csums_tfm && mdev->rs_total) { - const unsigned long s = mdev->rs_same_csum; - const unsigned long t = mdev->rs_total; + if (first_peer_device(device)->connection->csums_tfm && device->rs_total) { + const unsigned long s = device->rs_same_csum; + const unsigned long t = device->rs_total; const int ratio = (t == 0) ? 0 : (t < 100000) ? ((s*100)/t) : (s/(t/100)); - dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; " + drbd_info(device, "%u %% had equal checksums, eliminated: %luK; " "transferred %luK total %luK\n", ratio, - Bit2KB(mdev->rs_same_csum), - Bit2KB(mdev->rs_total - mdev->rs_same_csum), - Bit2KB(mdev->rs_total)); + Bit2KB(device->rs_same_csum), + Bit2KB(device->rs_total - device->rs_same_csum), + Bit2KB(device->rs_total)); } } - if (mdev->rs_failed) { - dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed); + if (device->rs_failed) { + drbd_info(device, " %lu failed blocks\n", device->rs_failed); if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { ns.disk = D_INCONSISTENT; @@ -914,179 +925,181 @@ int drbd_resync_finished(struct drbd_conf *mdev) ns.pdsk = D_UP_TO_DATE; if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { - if (mdev->p_uuid) { + if (device->p_uuid) { int i; for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) - _drbd_uuid_set(mdev, i, mdev->p_uuid[i]); - drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]); - _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]); + _drbd_uuid_set(device, i, device->p_uuid[i]); + drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]); + _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]); } else { - dev_err(DEV, "mdev->p_uuid is NULL! BUG\n"); + drbd_err(device, "device->p_uuid is NULL! BUG\n"); } } if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) { /* for verify runs, we don't update uuids here, * so there would be nothing to report. */ - drbd_uuid_set_bm(mdev, 0UL); - drbd_print_uuids(mdev, "updated UUIDs"); - if (mdev->p_uuid) { + drbd_uuid_set_bm(device, 0UL); + drbd_print_uuids(device, "updated UUIDs"); + if (device->p_uuid) { /* Now the two UUID sets are equal, update what we * know of the peer. */ int i; for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) - mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; + device->p_uuid[i] = device->ldev->md.uuid[i]; } } } - _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); + _drbd_set_state(device, ns, CS_VERBOSE, NULL); out_unlock: - spin_unlock_irq(&mdev->tconn->req_lock); - put_ldev(mdev); + spin_unlock_irq(&device->resource->req_lock); + put_ldev(device); out: - mdev->rs_total = 0; - mdev->rs_failed = 0; - mdev->rs_paused = 0; + device->rs_total = 0; + device->rs_failed = 0; + device->rs_paused = 0; /* reset start sector, if we reached end of device */ - if (verify_done && mdev->ov_left == 0) - mdev->ov_start_sector = 0; + if (verify_done && device->ov_left == 0) + device->ov_start_sector = 0; - drbd_md_sync(mdev); + drbd_md_sync(device); if (khelper_cmd) - drbd_khelper(mdev, khelper_cmd); + drbd_khelper(device, khelper_cmd); return 1; } /* helper */ -static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req) +static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req) { if (drbd_peer_req_has_active_page(peer_req)) { /* This might happen if sendpage() has not finished */ int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; - atomic_add(i, &mdev->pp_in_use_by_net); - atomic_sub(i, &mdev->pp_in_use); - spin_lock_irq(&mdev->tconn->req_lock); - list_add_tail(&peer_req->w.list, &mdev->net_ee); - spin_unlock_irq(&mdev->tconn->req_lock); + atomic_add(i, &device->pp_in_use_by_net); + atomic_sub(i, &device->pp_in_use); + spin_lock_irq(&device->resource->req_lock); + list_add_tail(&peer_req->w.list, &device->net_ee); + spin_unlock_irq(&device->resource->req_lock); wake_up(&drbd_pp_wait); } else - drbd_free_peer_req(mdev, peer_req); + drbd_free_peer_req(device, peer_req); } /** * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST - * @mdev: DRBD device. + * @device: DRBD device. * @w: work object. * @cancel: The connection will be closed anyways */ int w_e_end_data_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_peer_device *peer_device = peer_req->peer_device; + struct drbd_device *device = peer_device->device; int err; if (unlikely(cancel)) { - drbd_free_peer_req(mdev, peer_req); - dec_unacked(mdev); + drbd_free_peer_req(device, peer_req); + dec_unacked(device); return 0; } if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - err = drbd_send_block(mdev, P_DATA_REPLY, peer_req); + err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req); } else { if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "Sending NegDReply. sector=%llus.\n", + drbd_err(device, "Sending NegDReply. sector=%llus.\n", (unsigned long long)peer_req->i.sector); - err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req); + err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req); } - dec_unacked(mdev); + dec_unacked(device); - move_to_net_ee_or_free(mdev, peer_req); + move_to_net_ee_or_free(device, peer_req); if (unlikely(err)) - dev_err(DEV, "drbd_send_block() failed\n"); + drbd_err(device, "drbd_send_block() failed\n"); return err; } /** * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST - * @mdev: DRBD device. * @w: work object. * @cancel: The connection will be closed anyways */ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_peer_device *peer_device = peer_req->peer_device; + struct drbd_device *device = peer_device->device; int err; if (unlikely(cancel)) { - drbd_free_peer_req(mdev, peer_req); - dec_unacked(mdev); + drbd_free_peer_req(device, peer_req); + dec_unacked(device); return 0; } - if (get_ldev_if_state(mdev, D_FAILED)) { - drbd_rs_complete_io(mdev, peer_req->i.sector); - put_ldev(mdev); + if (get_ldev_if_state(device, D_FAILED)) { + drbd_rs_complete_io(device, peer_req->i.sector); + put_ldev(device); } - if (mdev->state.conn == C_AHEAD) { - err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req); + if (device->state.conn == C_AHEAD) { + err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req); } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { - inc_rs_pending(mdev); - err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); + if (likely(device->state.pdsk >= D_INCONSISTENT)) { + inc_rs_pending(device); + err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); } else { if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "Not sending RSDataReply, " + drbd_err(device, "Not sending RSDataReply, " "partner DISKLESS!\n"); err = 0; } } else { if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", + drbd_err(device, "Sending NegRSDReply. sector %llus.\n", (unsigned long long)peer_req->i.sector); - err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); + err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); /* update resync data with failure */ - drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size); + drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size); } - dec_unacked(mdev); + dec_unacked(device); - move_to_net_ee_or_free(mdev, peer_req); + move_to_net_ee_or_free(device, peer_req); if (unlikely(err)) - dev_err(DEV, "drbd_send_block() failed\n"); + drbd_err(device, "drbd_send_block() failed\n"); return err; } int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_peer_device *peer_device = peer_req->peer_device; + struct drbd_device *device = peer_device->device; struct digest_info *di; int digest_size; void *digest = NULL; int err, eq = 0; if (unlikely(cancel)) { - drbd_free_peer_req(mdev, peer_req); - dec_unacked(mdev); + drbd_free_peer_req(device, peer_req); + dec_unacked(device); return 0; } - if (get_ldev(mdev)) { - drbd_rs_complete_io(mdev, peer_req->i.sector); - put_ldev(mdev); + if (get_ldev(device)) { + drbd_rs_complete_io(device, peer_req->i.sector); + put_ldev(device); } di = peer_req->digest; @@ -1095,47 +1108,48 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) /* quick hack to try to avoid a race against reconfiguration. * a real fix would be much more involved, * introducing more locking mechanisms */ - if (mdev->tconn->csums_tfm) { - digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm); - D_ASSERT(digest_size == di->digest_size); + if (peer_device->connection->csums_tfm) { + digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm); + D_ASSERT(device, digest_size == di->digest_size); digest = kmalloc(digest_size, GFP_NOIO); } if (digest) { - drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest); + drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest); eq = !memcmp(digest, di->digest, digest_size); kfree(digest); } if (eq) { - drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size); + drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size); /* rs_same_csums unit is BM_BLOCK_SIZE */ - mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; - err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req); + device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; + err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req); } else { - inc_rs_pending(mdev); + inc_rs_pending(device); peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ kfree(di); - err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); + err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); } } else { - err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); + err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); + drbd_err(device, "Sending NegDReply. I guess it gets messy.\n"); } - dec_unacked(mdev); - move_to_net_ee_or_free(mdev, peer_req); + dec_unacked(device); + move_to_net_ee_or_free(device, peer_req); if (unlikely(err)) - dev_err(DEV, "drbd_send_block/ack() failed\n"); + drbd_err(device, "drbd_send_block/ack() failed\n"); return err; } int w_e_end_ov_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_peer_device *peer_device = peer_req->peer_device; + struct drbd_device *device = peer_device->device; sector_t sector = peer_req->i.sector; unsigned int size = peer_req->i.size; int digest_size; @@ -1145,7 +1159,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) if (unlikely(cancel)) goto out; - digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm); + digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (!digest) { err = 1; /* terminate the connection in case the allocation failed */ @@ -1153,7 +1167,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) } if (likely(!(peer_req->flags & EE_WAS_ERROR))) - drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest); + drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); else memset(digest, 0, digest_size); @@ -1162,36 +1176,37 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_alloc_pages due to pp_in_use > max_buffers. */ - drbd_free_peer_req(mdev, peer_req); + drbd_free_peer_req(device, peer_req); peer_req = NULL; - inc_rs_pending(mdev); - err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY); + inc_rs_pending(device); + err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY); if (err) - dec_rs_pending(mdev); + dec_rs_pending(device); kfree(digest); out: if (peer_req) - drbd_free_peer_req(mdev, peer_req); - dec_unacked(mdev); + drbd_free_peer_req(device, peer_req); + dec_unacked(device); return err; } -void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size) +void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size) { - if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) { - mdev->ov_last_oos_size += size>>9; + if (device->ov_last_oos_start + device->ov_last_oos_size == sector) { + device->ov_last_oos_size += size>>9; } else { - mdev->ov_last_oos_start = sector; - mdev->ov_last_oos_size = size>>9; + device->ov_last_oos_start = sector; + device->ov_last_oos_size = size>>9; } - drbd_set_out_of_sync(mdev, sector, size); + drbd_set_out_of_sync(device, sector, size); } int w_e_end_ov_reply(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_peer_device *peer_device = peer_req->peer_device; + struct drbd_device *device = peer_device->device; struct digest_info *di; void *digest; sector_t sector = peer_req->i.sector; @@ -1201,27 +1216,27 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) bool stop_sector_reached = false; if (unlikely(cancel)) { - drbd_free_peer_req(mdev, peer_req); - dec_unacked(mdev); + drbd_free_peer_req(device, peer_req); + dec_unacked(device); return 0; } /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all * the resync lru has been cleaned up already */ - if (get_ldev(mdev)) { - drbd_rs_complete_io(mdev, peer_req->i.sector); - put_ldev(mdev); + if (get_ldev(device)) { + drbd_rs_complete_io(device, peer_req->i.sector); + put_ldev(device); } di = peer_req->digest; if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm); + digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { - drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest); + drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); - D_ASSERT(digest_size == di->digest_size); + D_ASSERT(device, digest_size == di->digest_size); eq = !memcmp(digest, di->digest, digest_size); kfree(digest); } @@ -1232,102 +1247,95 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_alloc_pages due to pp_in_use > max_buffers. */ - drbd_free_peer_req(mdev, peer_req); + drbd_free_peer_req(device, peer_req); if (!eq) - drbd_ov_out_of_sync_found(mdev, sector, size); + drbd_ov_out_of_sync_found(device, sector, size); else - ov_out_of_sync_print(mdev); + ov_out_of_sync_print(device); - err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, + err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); - dec_unacked(mdev); + dec_unacked(device); - --mdev->ov_left; + --device->ov_left; /* let's advance progress step marks only for every other megabyte */ - if ((mdev->ov_left & 0x200) == 0x200) - drbd_advance_rs_marks(mdev, mdev->ov_left); + if ((device->ov_left & 0x200) == 0x200) + drbd_advance_rs_marks(device, device->ov_left); - stop_sector_reached = verify_can_do_stop_sector(mdev) && - (sector + (size>>9)) >= mdev->ov_stop_sector; + stop_sector_reached = verify_can_do_stop_sector(device) && + (sector + (size>>9)) >= device->ov_stop_sector; - if (mdev->ov_left == 0 || stop_sector_reached) { - ov_out_of_sync_print(mdev); - drbd_resync_finished(mdev); + if (device->ov_left == 0 || stop_sector_reached) { + ov_out_of_sync_print(device); + drbd_resync_finished(device); } return err; } -int w_prev_work_done(struct drbd_work *w, int cancel) -{ - struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w); - - complete(&b->done); - return 0; -} - /* FIXME * We need to track the number of pending barrier acks, * and to be able to wait for them. * See also comment in drbd_adm_attach before drbd_suspend_io. */ -int drbd_send_barrier(struct drbd_tconn *tconn) +static int drbd_send_barrier(struct drbd_connection *connection) { struct p_barrier *p; struct drbd_socket *sock; - sock = &tconn->data; - p = conn_prepare_command(tconn, sock); + sock = &connection->data; + p = conn_prepare_command(connection, sock); if (!p) return -EIO; - p->barrier = tconn->send.current_epoch_nr; + p->barrier = connection->send.current_epoch_nr; p->pad = 0; - tconn->send.current_epoch_writes = 0; + connection->send.current_epoch_writes = 0; - return conn_send_command(tconn, sock, P_BARRIER, sizeof(*p), NULL, 0); + return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0); } int w_send_write_hint(struct drbd_work *w, int cancel) { - struct drbd_conf *mdev = w->mdev; + struct drbd_device *device = + container_of(w, struct drbd_device, unplug_work); struct drbd_socket *sock; if (cancel) return 0; - sock = &mdev->tconn->data; - if (!drbd_prepare_command(mdev, sock)) + sock = &first_peer_device(device)->connection->data; + if (!drbd_prepare_command(first_peer_device(device), sock)) return -EIO; - return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0); + return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0); } -static void re_init_if_first_write(struct drbd_tconn *tconn, unsigned int epoch) +static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch) { - if (!tconn->send.seen_any_write_yet) { - tconn->send.seen_any_write_yet = true; - tconn->send.current_epoch_nr = epoch; - tconn->send.current_epoch_writes = 0; + if (!connection->send.seen_any_write_yet) { + connection->send.seen_any_write_yet = true; + connection->send.current_epoch_nr = epoch; + connection->send.current_epoch_writes = 0; } } -static void maybe_send_barrier(struct drbd_tconn *tconn, unsigned int epoch) +static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch) { /* re-init if first write on this connection */ - if (!tconn->send.seen_any_write_yet) + if (!connection->send.seen_any_write_yet) return; - if (tconn->send.current_epoch_nr != epoch) { - if (tconn->send.current_epoch_writes) - drbd_send_barrier(tconn); - tconn->send.current_epoch_nr = epoch; + if (connection->send.current_epoch_nr != epoch) { + if (connection->send.current_epoch_writes) + drbd_send_barrier(connection); + connection->send.current_epoch_nr = epoch; } } int w_send_out_of_sync(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); - struct drbd_conf *mdev = w->mdev; - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_device *device = req->device; + struct drbd_connection *connection = first_peer_device(device)->connection; int err; if (unlikely(cancel)) { @@ -1335,13 +1343,13 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) return 0; } - /* this time, no tconn->send.current_epoch_writes++; + /* this time, no connection->send.current_epoch_writes++; * If it was sent, it was the closing barrier for the last * replicated epoch, before we went into AHEAD mode. * No more barriers will be sent, until we leave AHEAD mode again. */ - maybe_send_barrier(tconn, req->epoch); + maybe_send_barrier(connection, req->epoch); - err = drbd_send_out_of_sync(mdev, req); + err = drbd_send_out_of_sync(first_peer_device(device), req); req_mod(req, OOS_HANDED_TO_NETWORK); return err; @@ -1349,15 +1357,14 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) /** * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request - * @mdev: DRBD device. * @w: work object. * @cancel: The connection will be closed anyways */ int w_send_dblock(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); - struct drbd_conf *mdev = w->mdev; - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_device *device = req->device; + struct drbd_connection *connection = first_peer_device(device)->connection; int err; if (unlikely(cancel)) { @@ -1365,11 +1372,11 @@ int w_send_dblock(struct drbd_work *w, int cancel) return 0; } - re_init_if_first_write(tconn, req->epoch); - maybe_send_barrier(tconn, req->epoch); - tconn->send.current_epoch_writes++; + re_init_if_first_write(connection, req->epoch); + maybe_send_barrier(connection, req->epoch); + connection->send.current_epoch_writes++; - err = drbd_send_dblock(mdev, req); + err = drbd_send_dblock(first_peer_device(device), req); req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); return err; @@ -1377,15 +1384,14 @@ int w_send_dblock(struct drbd_work *w, int cancel) /** * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet - * @mdev: DRBD device. * @w: work object. * @cancel: The connection will be closed anyways */ int w_send_read_req(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); - struct drbd_conf *mdev = w->mdev; - struct drbd_tconn *tconn = mdev->tconn; + struct drbd_device *device = req->device; + struct drbd_connection *connection = first_peer_device(device)->connection; int err; if (unlikely(cancel)) { @@ -1395,9 +1401,9 @@ int w_send_read_req(struct drbd_work *w, int cancel) /* Even read requests may close a write epoch, * if there was any yet. */ - maybe_send_barrier(tconn, req->epoch); + maybe_send_barrier(connection, req->epoch); - err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, + err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req); req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); @@ -1408,21 +1414,21 @@ int w_send_read_req(struct drbd_work *w, int cancel) int w_restart_disk_io(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); - struct drbd_conf *mdev = w->mdev; + struct drbd_device *device = req->device; if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) - drbd_al_begin_io(mdev, &req->i, false); + drbd_al_begin_io(device, &req->i, false); drbd_req_make_private_bio(req, req->master_bio); - req->private_bio->bi_bdev = mdev->ldev->backing_bdev; + req->private_bio->bi_bdev = device->ldev->backing_bdev; generic_make_request(req->private_bio); return 0; } -static int _drbd_may_sync_now(struct drbd_conf *mdev) +static int _drbd_may_sync_now(struct drbd_device *device) { - struct drbd_conf *odev = mdev; + struct drbd_device *odev = device; int resync_after; while (1) { @@ -1433,7 +1439,7 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev) rcu_read_unlock(); if (resync_after == -1) return 1; - odev = minor_to_mdev(resync_after); + odev = minor_to_device(resync_after); if (!odev) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && @@ -1446,17 +1452,17 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev) /** * _drbd_pause_after() - Pause resync on all devices that may not resync now - * @mdev: DRBD device. + * @device: DRBD device. * * Called from process context only (admin command and after_state_ch). */ -static int _drbd_pause_after(struct drbd_conf *mdev) +static int _drbd_pause_after(struct drbd_device *device) { - struct drbd_conf *odev; + struct drbd_device *odev; int i, rv = 0; rcu_read_lock(); - idr_for_each_entry(&minors, odev, i) { + idr_for_each_entry(&drbd_devices, odev, i) { if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) continue; if (!_drbd_may_sync_now(odev)) @@ -1470,17 +1476,17 @@ static int _drbd_pause_after(struct drbd_conf *mdev) /** * _drbd_resume_next() - Resume resync on all devices that may resync now - * @mdev: DRBD device. + * @device: DRBD device. * * Called from process context only (admin command and worker). */ -static int _drbd_resume_next(struct drbd_conf *mdev) +static int _drbd_resume_next(struct drbd_device *device) { - struct drbd_conf *odev; + struct drbd_device *odev; int i, rv = 0; rcu_read_lock(); - idr_for_each_entry(&minors, odev, i) { + idr_for_each_entry(&drbd_devices, odev, i) { if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) continue; if (odev->state.aftr_isp) { @@ -1494,24 +1500,24 @@ static int _drbd_resume_next(struct drbd_conf *mdev) return rv; } -void resume_next_sg(struct drbd_conf *mdev) +void resume_next_sg(struct drbd_device *device) { write_lock_irq(&global_state_lock); - _drbd_resume_next(mdev); + _drbd_resume_next(device); write_unlock_irq(&global_state_lock); } -void suspend_other_sg(struct drbd_conf *mdev) +void suspend_other_sg(struct drbd_device *device) { write_lock_irq(&global_state_lock); - _drbd_pause_after(mdev); + _drbd_pause_after(device); write_unlock_irq(&global_state_lock); } /* caller must hold global_state_lock */ -enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor) +enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor) { - struct drbd_conf *odev; + struct drbd_device *odev; int resync_after; if (o_minor == -1) @@ -1520,9 +1526,9 @@ enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor) return ERR_RESYNC_AFTER; /* check for loops */ - odev = minor_to_mdev(o_minor); + odev = minor_to_device(o_minor); while (1) { - if (odev == mdev) + if (odev == device) return ERR_RESYNC_AFTER_CYCLE; /* You are free to depend on diskless, non-existing, @@ -1542,35 +1548,35 @@ enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor) return NO_ERROR; /* follow the dependency chain */ - odev = minor_to_mdev(resync_after); + odev = minor_to_device(resync_after); } } /* caller must hold global_state_lock */ -void drbd_resync_after_changed(struct drbd_conf *mdev) +void drbd_resync_after_changed(struct drbd_device *device) { int changes; do { - changes = _drbd_pause_after(mdev); - changes |= _drbd_resume_next(mdev); + changes = _drbd_pause_after(device); + changes |= _drbd_resume_next(device); } while (changes); } -void drbd_rs_controller_reset(struct drbd_conf *mdev) +void drbd_rs_controller_reset(struct drbd_device *device) { struct fifo_buffer *plan; - atomic_set(&mdev->rs_sect_in, 0); - atomic_set(&mdev->rs_sect_ev, 0); - mdev->rs_in_flight = 0; + atomic_set(&device->rs_sect_in, 0); + atomic_set(&device->rs_sect_ev, 0); + device->rs_in_flight = 0; /* Updating the RCU protected object in place is necessary since this function gets called from atomic context. It is valid since all other updates also lead to an completely empty fifo */ rcu_read_lock(); - plan = rcu_dereference(mdev->rs_plan_s); + plan = rcu_dereference(device->rs_plan_s); plan->total = 0; fifo_set(plan, 0); rcu_read_unlock(); @@ -1578,101 +1584,104 @@ void drbd_rs_controller_reset(struct drbd_conf *mdev) void start_resync_timer_fn(unsigned long data) { - struct drbd_conf *mdev = (struct drbd_conf *) data; + struct drbd_device *device = (struct drbd_device *) data; - drbd_queue_work(&mdev->tconn->sender_work, &mdev->start_resync_work); + drbd_queue_work(&first_peer_device(device)->connection->sender_work, + &device->start_resync_work); } int w_start_resync(struct drbd_work *w, int cancel) { - struct drbd_conf *mdev = w->mdev; + struct drbd_device *device = + container_of(w, struct drbd_device, start_resync_work); - if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) { - dev_warn(DEV, "w_start_resync later...\n"); - mdev->start_resync_timer.expires = jiffies + HZ/10; - add_timer(&mdev->start_resync_timer); + if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) { + drbd_warn(device, "w_start_resync later...\n"); + device->start_resync_timer.expires = jiffies + HZ/10; + add_timer(&device->start_resync_timer); return 0; } - drbd_start_resync(mdev, C_SYNC_SOURCE); - clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags); + drbd_start_resync(device, C_SYNC_SOURCE); + clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); return 0; } /** * drbd_start_resync() - Start the resync process - * @mdev: DRBD device. + * @device: DRBD device. * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET * * This function might bring you directly into one of the * C_PAUSED_SYNC_* states. */ -void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) +void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) { union drbd_state ns; int r; - if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) { - dev_err(DEV, "Resync already running!\n"); + if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) { + drbd_err(device, "Resync already running!\n"); return; } - if (!test_bit(B_RS_H_DONE, &mdev->flags)) { + if (!test_bit(B_RS_H_DONE, &device->flags)) { if (side == C_SYNC_TARGET) { /* Since application IO was locked out during C_WF_BITMAP_T and C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET we check that we might make the data inconsistent. */ - r = drbd_khelper(mdev, "before-resync-target"); + r = drbd_khelper(device, "before-resync-target"); r = (r >> 8) & 0xff; if (r > 0) { - dev_info(DEV, "before-resync-target handler returned %d, " + drbd_info(device, "before-resync-target handler returned %d, " "dropping connection.\n", r); - conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); + conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD); return; } } else /* C_SYNC_SOURCE */ { - r = drbd_khelper(mdev, "before-resync-source"); + r = drbd_khelper(device, "before-resync-source"); r = (r >> 8) & 0xff; if (r > 0) { if (r == 3) { - dev_info(DEV, "before-resync-source handler returned %d, " + drbd_info(device, "before-resync-source handler returned %d, " "ignoring. Old userland tools?", r); } else { - dev_info(DEV, "before-resync-source handler returned %d, " + drbd_info(device, "before-resync-source handler returned %d, " "dropping connection.\n", r); - conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD); + conn_request_state(first_peer_device(device)->connection, + NS(conn, C_DISCONNECTING), CS_HARD); return; } } } } - if (current == mdev->tconn->worker.task) { + if (current == first_peer_device(device)->connection->worker.task) { /* The worker should not sleep waiting for state_mutex, that can take long */ - if (!mutex_trylock(mdev->state_mutex)) { - set_bit(B_RS_H_DONE, &mdev->flags); - mdev->start_resync_timer.expires = jiffies + HZ/5; - add_timer(&mdev->start_resync_timer); + if (!mutex_trylock(device->state_mutex)) { + set_bit(B_RS_H_DONE, &device->flags); + device->start_resync_timer.expires = jiffies + HZ/5; + add_timer(&device->start_resync_timer); return; } } else { - mutex_lock(mdev->state_mutex); + mutex_lock(device->state_mutex); } - clear_bit(B_RS_H_DONE, &mdev->flags); + clear_bit(B_RS_H_DONE, &device->flags); write_lock_irq(&global_state_lock); /* Did some connection breakage or IO error race with us? */ - if (mdev->state.conn < C_CONNECTED - || !get_ldev_if_state(mdev, D_NEGOTIATING)) { + if (device->state.conn < C_CONNECTED + || !get_ldev_if_state(device, D_NEGOTIATING)) { write_unlock_irq(&global_state_lock); - mutex_unlock(mdev->state_mutex); + mutex_unlock(device->state_mutex); return; } - ns = drbd_read_state(mdev); + ns = drbd_read_state(device); - ns.aftr_isp = !_drbd_may_sync_now(mdev); + ns.aftr_isp = !_drbd_may_sync_now(device); ns.conn = side; @@ -1681,43 +1690,43 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) else /* side == C_SYNC_SOURCE */ ns.pdsk = D_INCONSISTENT; - r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL); - ns = drbd_read_state(mdev); + r = __drbd_set_state(device, ns, CS_VERBOSE, NULL); + ns = drbd_read_state(device); if (ns.conn < C_CONNECTED) r = SS_UNKNOWN_ERROR; if (r == SS_SUCCESS) { - unsigned long tw = drbd_bm_total_weight(mdev); + unsigned long tw = drbd_bm_total_weight(device); unsigned long now = jiffies; int i; - mdev->rs_failed = 0; - mdev->rs_paused = 0; - mdev->rs_same_csum = 0; - mdev->rs_last_events = 0; - mdev->rs_last_sect_ev = 0; - mdev->rs_total = tw; - mdev->rs_start = now; + device->rs_failed = 0; + device->rs_paused = 0; + device->rs_same_csum = 0; + device->rs_last_events = 0; + device->rs_last_sect_ev = 0; + device->rs_total = tw; + device->rs_start = now; for (i = 0; i < DRBD_SYNC_MARKS; i++) { - mdev->rs_mark_left[i] = tw; - mdev->rs_mark_time[i] = now; + device->rs_mark_left[i] = tw; + device->rs_mark_time[i] = now; } - _drbd_pause_after(mdev); + _drbd_pause_after(device); } write_unlock_irq(&global_state_lock); if (r == SS_SUCCESS) { /* reset rs_last_bcast when a resync or verify is started, * to deal with potential jiffies wrap. */ - mdev->rs_last_bcast = jiffies - HZ; + device->rs_last_bcast = jiffies - HZ; - dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", + drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", drbd_conn_str(ns.conn), - (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), - (unsigned long) mdev->rs_total); + (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), + (unsigned long) device->rs_total); if (side == C_SYNC_TARGET) - mdev->bm_resync_fo = 0; + device->bm_resync_fo = 0; /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid * with w_send_oos, or the sync target will get confused as to @@ -1726,10 +1735,12 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) * drbd_resync_finished from here in that case. * We drbd_gen_and_send_sync_uuid here for protocol < 96, * and from after_state_ch otherwise. */ - if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96) - drbd_gen_and_send_sync_uuid(mdev); + if (side == C_SYNC_SOURCE && + first_peer_device(device)->connection->agreed_pro_version < 96) + drbd_gen_and_send_sync_uuid(first_peer_device(device)); - if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) { + if (first_peer_device(device)->connection->agreed_pro_version < 95 && + device->rs_total == 0) { /* This still has a race (about when exactly the peers * detect connection loss) that can lead to a full sync * on next handshake. In 8.3.9 we fixed this with explicit @@ -1745,33 +1756,33 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) int timeo; rcu_read_lock(); - nc = rcu_dereference(mdev->tconn->net_conf); + nc = rcu_dereference(first_peer_device(device)->connection->net_conf); timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; rcu_read_unlock(); schedule_timeout_interruptible(timeo); } - drbd_resync_finished(mdev); + drbd_resync_finished(device); } - drbd_rs_controller_reset(mdev); - /* ns.conn may already be != mdev->state.conn, + drbd_rs_controller_reset(device); + /* ns.conn may already be != device->state.conn, * we may have been paused in between, or become paused until * the timer triggers. * No matter, that is handled in resync_timer_fn() */ if (ns.conn == C_SYNC_TARGET) - mod_timer(&mdev->resync_timer, jiffies); + mod_timer(&device->resync_timer, jiffies); - drbd_md_sync(mdev); + drbd_md_sync(device); } - put_ldev(mdev); - mutex_unlock(mdev->state_mutex); + put_ldev(device); + mutex_unlock(device->state_mutex); } /* If the resource already closed the current epoch, but we did not * (because we have not yet seen new requests), we should send the * corresponding barrier now. Must be checked within the same spinlock * that is used to check for new requests. */ -bool need_to_send_barrier(struct drbd_tconn *connection) +static bool need_to_send_barrier(struct drbd_connection *connection) { if (!connection->send.seen_any_write_yet) return false; @@ -1795,7 +1806,7 @@ bool need_to_send_barrier(struct drbd_tconn *connection) return true; } -bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) +static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) { spin_lock_irq(&queue->q_lock); list_splice_init(&queue->q, work_list); @@ -1803,7 +1814,7 @@ bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_li return !list_empty(work_list); } -bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list) +static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list) { spin_lock_irq(&queue->q_lock); if (!list_empty(&queue->q)) @@ -1812,7 +1823,7 @@ bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_lis return !list_empty(work_list); } -void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list) +static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list) { DEFINE_WAIT(wait); struct net_conf *nc; @@ -1842,7 +1853,7 @@ void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list) for (;;) { int send_barrier; prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE); - spin_lock_irq(&connection->req_lock); + spin_lock_irq(&connection->resource->req_lock); spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ /* dequeue single item only, * we still use drbd_queue_work_front() in some places */ @@ -1850,11 +1861,11 @@ void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list) list_move(connection->sender_work.q.next, work_list); spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ if (!list_empty(work_list) || signal_pending(current)) { - spin_unlock_irq(&connection->req_lock); + spin_unlock_irq(&connection->resource->req_lock); break; } send_barrier = need_to_send_barrier(connection); - spin_unlock_irq(&connection->req_lock); + spin_unlock_irq(&connection->resource->req_lock); if (send_barrier) { drbd_send_barrier(connection); connection->send.current_epoch_nr++; @@ -1883,9 +1894,9 @@ void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list) int drbd_worker(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->tconn; + struct drbd_connection *connection = thi->connection; struct drbd_work *w = NULL; - struct drbd_conf *mdev; + struct drbd_peer_device *peer_device; LIST_HEAD(work_list); int vnr; @@ -1895,12 +1906,12 @@ int drbd_worker(struct drbd_thread *thi) /* as long as we use drbd_queue_work_front(), * we may only dequeue single work items here, not batches. */ if (list_empty(&work_list)) - wait_for_work(tconn, &work_list); + wait_for_work(connection, &work_list); if (signal_pending(current)) { flush_signals(current); if (get_t_state(thi) == RUNNING) { - conn_warn(tconn, "Worker got an unexpected signal\n"); + drbd_warn(connection, "Worker got an unexpected signal\n"); continue; } break; @@ -1912,10 +1923,10 @@ int drbd_worker(struct drbd_thread *thi) while (!list_empty(&work_list)) { w = list_first_entry(&work_list, struct drbd_work, list); list_del_init(&w->list); - if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS) == 0) + if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0) continue; - if (tconn->cstate >= C_WF_REPORT_PARAMS) - conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD); + if (connection->cstate >= C_WF_REPORT_PARAMS) + conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); } } @@ -1925,16 +1936,17 @@ int drbd_worker(struct drbd_thread *thi) list_del_init(&w->list); w->cb(w, 1); } - dequeue_work_batch(&tconn->sender_work, &work_list); + dequeue_work_batch(&connection->sender_work, &work_list); } while (!list_empty(&work_list)); rcu_read_lock(); - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); - kref_get(&mdev->kref); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE); + kref_get(&device->kref); rcu_read_unlock(); - drbd_mdev_cleanup(mdev); - kref_put(&mdev->kref, &drbd_minor_destroy); + drbd_device_cleanup(device); + kref_put(&device->kref, drbd_destroy_device); rcu_read_lock(); } rcu_read_unlock(); diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index 328f18e4b4ee..3db9ebaf64f6 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -9,12 +9,12 @@ extern char *drbd_sec_holder; /* sets the number of 512 byte sectors of our virtual device */ -static inline void drbd_set_my_capacity(struct drbd_conf *mdev, +static inline void drbd_set_my_capacity(struct drbd_device *device, sector_t size) { - /* set_capacity(mdev->this_bdev->bd_disk, size); */ - set_capacity(mdev->vdisk, size); - mdev->this_bdev->bd_inode->i_size = (loff_t)size << 9; + /* set_capacity(device->this_bdev->bd_disk, size); */ + set_capacity(device->vdisk, size); + device->this_bdev->bd_inode->i_size = (loff_t)size << 9; } #define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE) @@ -27,20 +27,20 @@ extern void drbd_request_endio(struct bio *bio, int error); /* * used to submit our private bio */ -static inline void drbd_generic_make_request(struct drbd_conf *mdev, +static inline void drbd_generic_make_request(struct drbd_device *device, int fault_type, struct bio *bio) { __release(local); if (!bio->bi_bdev) { printk(KERN_ERR "drbd%d: drbd_generic_make_request: " "bio->bi_bdev == NULL\n", - mdev_to_minor(mdev)); + device_to_minor(device)); dump_stack(); bio_endio(bio, -ENODEV); return; } - if (drbd_insert_fault(mdev, fault_type)) + if (drbd_insert_fault(device, fault_type)) bio_endio(bio, -EIO); else generic_make_request(bio); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 2023043ce7c0..fa9bb742df6e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -961,17 +961,31 @@ static void empty(void) { } -static DECLARE_WORK(floppy_work, NULL); +static void (*floppy_work_fn)(void); + +static void floppy_work_workfn(struct work_struct *work) +{ + floppy_work_fn(); +} + +static DECLARE_WORK(floppy_work, floppy_work_workfn); static void schedule_bh(void (*handler)(void)) { WARN_ON(work_pending(&floppy_work)); - PREPARE_WORK(&floppy_work, (work_func_t)handler); + floppy_work_fn = handler; queue_work(floppy_wq, &floppy_work); } -static DECLARE_DELAYED_WORK(fd_timer, NULL); +static void (*fd_timer_fn)(void) = NULL; + +static void fd_timer_workfn(struct work_struct *work) +{ + fd_timer_fn(); +} + +static DECLARE_DELAYED_WORK(fd_timer, fd_timer_workfn); static void cancel_activity(void) { @@ -982,7 +996,7 @@ static void cancel_activity(void) /* this function makes sure that the disk stays in the drive during the * transfer */ -static void fd_watchdog(struct work_struct *arg) +static void fd_watchdog(void) { debug_dcl(DP->flags, "calling disk change from watchdog\n"); @@ -993,7 +1007,7 @@ static void fd_watchdog(struct work_struct *arg) reset_fdc(); } else { cancel_delayed_work(&fd_timer); - PREPARE_DELAYED_WORK(&fd_timer, fd_watchdog); + fd_timer_fn = fd_watchdog; queue_delayed_work(floppy_wq, &fd_timer, HZ / 10); } } @@ -1005,7 +1019,8 @@ static void main_command_interrupt(void) } /* waits for a delay (spinup or select) to pass */ -static int fd_wait_for_completion(unsigned long expires, work_func_t function) +static int fd_wait_for_completion(unsigned long expires, + void (*function)(void)) { if (FDCS->reset) { reset_fdc(); /* do the reset during sleep to win time @@ -1016,7 +1031,7 @@ static int fd_wait_for_completion(unsigned long expires, work_func_t function) if (time_before(jiffies, expires)) { cancel_delayed_work(&fd_timer); - PREPARE_DELAYED_WORK(&fd_timer, function); + fd_timer_fn = function; queue_delayed_work(floppy_wq, &fd_timer, expires - jiffies); return 1; } @@ -1334,8 +1349,7 @@ static int fdc_dtr(void) * Pause 5 msec to avoid trouble. (Needs to be 2 jiffies) */ FDCS->dtr = raw_cmd->rate & 3; - return fd_wait_for_completion(jiffies + 2UL * HZ / 100, - (work_func_t)floppy_ready); + return fd_wait_for_completion(jiffies + 2UL * HZ / 100, floppy_ready); } /* fdc_dtr */ static void tell_sector(void) @@ -1440,7 +1454,7 @@ static void setup_rw_floppy(void) int flags; int dflags; unsigned long ready_date; - work_func_t function; + void (*function)(void); flags = raw_cmd->flags; if (flags & (FD_RAW_READ | FD_RAW_WRITE)) @@ -1454,9 +1468,9 @@ static void setup_rw_floppy(void) */ if (time_after(ready_date, jiffies + DP->select_delay)) { ready_date -= DP->select_delay; - function = (work_func_t)floppy_start; + function = floppy_start; } else - function = (work_func_t)setup_rw_floppy; + function = setup_rw_floppy; /* wait until the floppy is spinning fast enough */ if (fd_wait_for_completion(ready_date, function)) @@ -1486,7 +1500,7 @@ static void setup_rw_floppy(void) inr = result(); cont->interrupt(); } else if (flags & FD_RAW_NEED_DISK) - fd_watchdog(NULL); + fd_watchdog(); } static int blind_seek; @@ -1863,7 +1877,7 @@ static int start_motor(void (*function)(void)) /* wait_for_completion also schedules reset if needed. */ return fd_wait_for_completion(DRS->select_date + DP->select_delay, - (work_func_t)function); + function); } static void floppy_ready(void) @@ -3053,7 +3067,10 @@ static int raw_cmd_copyout(int cmd, void __user *param, int ret; while (ptr) { - ret = copy_to_user(param, ptr, sizeof(*ptr)); + struct floppy_raw_cmd cmd = *ptr; + cmd.next = NULL; + cmd.kernel_data = NULL; + ret = copy_to_user(param, &cmd, sizeof(cmd)); if (ret) return -EFAULT; param += sizeof(struct floppy_raw_cmd); @@ -3107,10 +3124,11 @@ loop: return -ENOMEM; *rcmd = ptr; ret = copy_from_user(ptr, param, sizeof(*ptr)); - if (ret) - return -EFAULT; ptr->next = NULL; ptr->buffer_length = 0; + ptr->kernel_data = NULL; + if (ret) + return -EFAULT; param += sizeof(struct floppy_raw_cmd); if (ptr->cmd_count > 33) /* the command may now also take up the space @@ -3126,7 +3144,6 @@ loop: for (i = 0; i < 16; i++) ptr->reply[i] = 0; ptr->resultcode = 0; - ptr->kernel_data = NULL; if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) { if (ptr->length <= 0) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 66e8c3b94ef3..f70a230a2945 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -237,7 +237,7 @@ static int __do_lo_send_write(struct file *file, file_end_write(file); if (likely(bw == len)) return 0; - printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", + printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", (unsigned long long)pos, len); if (bw >= 0) bw = -EIO; @@ -277,7 +277,7 @@ static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, return __do_lo_send_write(lo->lo_backing_file, page_address(page), bvec->bv_len, pos); - printk(KERN_ERR "loop: Transfer error at byte offset %llu, " + printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, " "length %i.\n", (unsigned long long)pos, bvec->bv_len); if (ret > 0) ret = -EIO; @@ -316,7 +316,7 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) out: return ret; fail: - printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); + printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); ret = -ENOMEM; goto out; } @@ -345,7 +345,7 @@ lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, size = p->bsize; if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) { - printk(KERN_ERR "loop: transfer error block %ld\n", + printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n", page->index); size = -EINVAL; } diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 516026954be6..59c5abe32f06 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -252,38 +252,45 @@ static void mtip_async_complete(struct mtip_port *port, void *data, int status) { - struct mtip_cmd *command; + struct mtip_cmd *cmd; struct driver_data *dd = data; - int cb_status = status ? -EIO : 0; + int unaligned, cb_status = status ? -EIO : 0; + void (*func)(void *, int); if (unlikely(!dd) || unlikely(!port)) return; - command = &port->commands[tag]; + cmd = &port->commands[tag]; if (unlikely(status == PORT_IRQ_TF_ERR)) { dev_warn(&port->dd->pdev->dev, "Command tag %d failed due to TFE\n", tag); } + /* Clear the active flag */ + atomic_set(&port->commands[tag].active, 0); + /* Upper layer callback */ - if (likely(command->async_callback)) - command->async_callback(command->async_data, cb_status); + func = cmd->async_callback; + if (likely(func && cmpxchg(&cmd->async_callback, func, 0) == func)) { - command->async_callback = NULL; - command->comp_func = NULL; + /* Unmap the DMA scatter list entries */ + dma_unmap_sg(&dd->pdev->dev, + cmd->sg, + cmd->scatter_ents, + cmd->direction); - /* Unmap the DMA scatter list entries */ - dma_unmap_sg(&dd->pdev->dev, - command->sg, - command->scatter_ents, - command->direction); + func(cmd->async_data, cb_status); + unaligned = cmd->unaligned; - /* Clear the allocated and active bits for the command */ - atomic_set(&port->commands[tag].active, 0); - release_slot(port, tag); + /* Clear the allocated bit for the command */ + release_slot(port, tag); - up(&port->cmd_slot); + if (unlikely(unaligned)) + up(&port->cmd_slot_unal); + else + up(&port->cmd_slot); + } } /* @@ -660,11 +667,12 @@ static void mtip_timeout_function(unsigned long int data) { struct mtip_port *port = (struct mtip_port *) data; struct host_to_dev_fis *fis; - struct mtip_cmd *command; - int tag, cmdto_cnt = 0; + struct mtip_cmd *cmd; + int unaligned, tag, cmdto_cnt = 0; unsigned int bit, group; unsigned int num_command_slots; unsigned long to, tagaccum[SLOTBITS_IN_LONGS]; + void (*func)(void *, int); if (unlikely(!port)) return; @@ -694,8 +702,8 @@ static void mtip_timeout_function(unsigned long int data) group = tag >> 5; bit = tag & 0x1F; - command = &port->commands[tag]; - fis = (struct host_to_dev_fis *) command->command; + cmd = &port->commands[tag]; + fis = (struct host_to_dev_fis *) cmd->command; set_bit(tag, tagaccum); cmdto_cnt++; @@ -709,27 +717,30 @@ static void mtip_timeout_function(unsigned long int data) */ writel(1 << bit, port->completed[group]); - /* Call the async completion callback. */ - if (likely(command->async_callback)) - command->async_callback(command->async_data, - -EIO); - command->async_callback = NULL; - command->comp_func = NULL; + /* Clear the active flag for the command */ + atomic_set(&port->commands[tag].active, 0); - /* Unmap the DMA scatter list entries */ - dma_unmap_sg(&port->dd->pdev->dev, - command->sg, - command->scatter_ents, - command->direction); + func = cmd->async_callback; + if (func && + cmpxchg(&cmd->async_callback, func, 0) == func) { - /* - * Clear the allocated bit and active tag for the - * command. - */ - atomic_set(&port->commands[tag].active, 0); - release_slot(port, tag); + /* Unmap the DMA scatter list entries */ + dma_unmap_sg(&port->dd->pdev->dev, + cmd->sg, + cmd->scatter_ents, + cmd->direction); - up(&port->cmd_slot); + func(cmd->async_data, -EIO); + unaligned = cmd->unaligned; + + /* Clear the allocated bit for the command. */ + release_slot(port, tag); + + if (unaligned) + up(&port->cmd_slot_unal); + else + up(&port->cmd_slot); + } } } @@ -4213,6 +4224,7 @@ skip_create_disk: blk_queue_max_hw_sectors(dd->queue, 0xffff); blk_queue_max_segment_size(dd->queue, 0x400000); blk_queue_io_min(dd->queue, 4096); + blk_queue_bounce_limit(dd->queue, dd->pdev->dma_mask); /* * write back cache is not supported in the device. FUA depends on @@ -4498,7 +4510,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, } dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n", my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev), - cpu_to_node(smp_processor_id()), smp_processor_id()); + cpu_to_node(raw_smp_processor_id()), raw_smp_processor_id()); dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node); if (dd == NULL) { @@ -4615,7 +4627,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, if (rv) { dev_warn(&pdev->dev, "Unable to enable MSI interrupt.\n"); - goto block_initialize_err; + goto msi_initialize_err; } /* Initialize the block layer. */ @@ -4645,6 +4657,8 @@ static int mtip_pci_probe(struct pci_dev *pdev, block_initialize_err: pci_disable_msi(pdev); + +msi_initialize_err: if (dd->isr_workq) { flush_workqueue(dd->isr_workq); destroy_workqueue(dd->isr_workq); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index b52e9a6d6aad..ffb955e7ccb9 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -53,7 +53,7 @@ #define MTIP_FTL_REBUILD_TIMEOUT_MS 2400000 /* unaligned IO handling */ -#define MTIP_MAX_UNALIGNED_SLOTS 8 +#define MTIP_MAX_UNALIGNED_SLOTS 2 /* Macro to extract the tag bit number from a tag value. */ #define MTIP_TAG_BIT(tag) (tag & 0x1F) @@ -92,7 +92,7 @@ /* Driver name and version strings */ #define MTIP_DRV_NAME "mtip32xx" -#define MTIP_DRV_VERSION "1.3.0" +#define MTIP_DRV_VERSION "1.3.1" /* Maximum number of minor device numbers per device. */ #define MTIP_MAX_MINORS 16 diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 55298db36b2d..3a70ea2f7cd6 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -630,37 +630,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, } case NBD_CLEAR_SOCK: { - struct file *file; - + struct socket *sock = nbd->sock; nbd->sock = NULL; - file = nbd->file; - nbd->file = NULL; nbd_clear_que(nbd); BUG_ON(!list_empty(&nbd->queue_head)); BUG_ON(!list_empty(&nbd->waiting_queue)); kill_bdev(bdev); - if (file) - fput(file); + if (sock) + sockfd_put(sock); return 0; } case NBD_SET_SOCK: { - struct file *file; - if (nbd->file) + struct socket *sock; + int err; + if (nbd->sock) return -EBUSY; - file = fget(arg); - if (file) { - struct inode *inode = file_inode(file); - if (S_ISSOCK(inode->i_mode)) { - nbd->file = file; - nbd->sock = SOCKET_I(inode); - if (max_part > 0) - bdev->bd_invalidated = 1; - nbd->disconnect = 0; /* we're connected now */ - return 0; - } else { - fput(file); - } + sock = sockfd_lookup(arg, &err); + if (sock) { + nbd->sock = sock; + if (max_part > 0) + bdev->bd_invalidated = 1; + nbd->disconnect = 0; /* we're connected now */ + return 0; } return -EINVAL; } @@ -697,12 +689,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_DO_IT: { struct task_struct *thread; - struct file *file; + struct socket *sock; int error; if (nbd->pid) return -EBUSY; - if (!nbd->file) + if (!nbd->sock) return -EINVAL; mutex_unlock(&nbd->tx_lock); @@ -731,15 +723,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (error) return error; sock_shutdown(nbd, 0); - file = nbd->file; - nbd->file = NULL; + sock = nbd->sock; + nbd->sock = NULL; nbd_clear_que(nbd); dev_warn(disk_to_dev(nbd->disk), "queue cleared\n"); kill_bdev(bdev); queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); set_device_ro(bdev, false); - if (file) - fput(file); + if (sock) + sockfd_put(sock); nbd->flags = 0; nbd->bytesize = 0; bdev->bd_inode->i_size = 0; @@ -875,9 +867,7 @@ static int __init nbd_init(void) for (i = 0; i < nbds_max; i++) { struct gendisk *disk = nbd_dev[i].disk; - nbd_dev[i].file = NULL; nbd_dev[i].magic = NBD_MAGIC; - nbd_dev[i].flags = 0; INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); spin_lock_init(&nbd_dev[i].queue_lock); INIT_LIST_HEAD(&nbd_dev[i].queue_head); diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 51824d1f23ea..7c64fa756cce 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1,6 +1,6 @@ /* * NVM Express device driver - * Copyright (c) 2011, Intel Corporation. + * Copyright (c) 2011-2014, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -20,10 +20,12 @@ #include <linux/bio.h> #include <linux/bitops.h> #include <linux/blkdev.h> +#include <linux/cpu.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/genhd.h> +#include <linux/hdreg.h> #include <linux/idr.h> #include <linux/init.h> #include <linux/interrupt.h> @@ -35,6 +37,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/pci.h> +#include <linux/percpu.h> #include <linux/poison.h> #include <linux/ptrace.h> #include <linux/sched.h> @@ -47,6 +50,11 @@ #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) #define ADMIN_TIMEOUT (60 * HZ) +#define IOD_TIMEOUT (4 * NVME_IO_TIMEOUT) + +unsigned char io_timeout = 30; +module_param(io_timeout, byte, 0644); +MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); static int nvme_major; module_param(nvme_major, int, 0); @@ -58,6 +66,7 @@ static DEFINE_SPINLOCK(dev_list_lock); static LIST_HEAD(dev_list); static struct task_struct *nvme_thread; static struct workqueue_struct *nvme_workq; +static wait_queue_head_t nvme_kthread_wait; static void nvme_reset_failed_dev(struct work_struct *ws); @@ -74,6 +83,7 @@ struct async_cmd_info { * commands and one for I/O commands). */ struct nvme_queue { + struct rcu_head r_head; struct device *q_dmadev; struct nvme_dev *dev; char irqname[24]; /* nvme4294967295-65535\0 */ @@ -85,6 +95,7 @@ struct nvme_queue { wait_queue_head_t sq_full; wait_queue_t sq_cong_wait; struct bio_list sq_cong; + struct list_head iod_bio; u32 __iomem *q_db; u16 q_depth; u16 cq_vector; @@ -95,6 +106,7 @@ struct nvme_queue { u8 cq_phase; u8 cqe_seen; u8 q_suspended; + cpumask_var_t cpu_mask; struct async_cmd_info cmdinfo; unsigned long cmdid_data[]; }; @@ -118,7 +130,7 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); } -typedef void (*nvme_completion_fn)(struct nvme_dev *, void *, +typedef void (*nvme_completion_fn)(struct nvme_queue *, void *, struct nvme_completion *); struct nvme_cmd_info { @@ -190,7 +202,7 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, #define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE) #define CMD_CTX_ABORT (0x31C + CMD_CTX_BASE) -static void special_completion(struct nvme_dev *dev, void *ctx, +static void special_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_completion *cqe) { if (ctx == CMD_CTX_CANCELLED) @@ -198,26 +210,26 @@ static void special_completion(struct nvme_dev *dev, void *ctx, if (ctx == CMD_CTX_FLUSH) return; if (ctx == CMD_CTX_ABORT) { - ++dev->abort_limit; + ++nvmeq->dev->abort_limit; return; } if (ctx == CMD_CTX_COMPLETED) { - dev_warn(&dev->pci_dev->dev, + dev_warn(nvmeq->q_dmadev, "completed id %d twice on queue %d\n", cqe->command_id, le16_to_cpup(&cqe->sq_id)); return; } if (ctx == CMD_CTX_INVALID) { - dev_warn(&dev->pci_dev->dev, + dev_warn(nvmeq->q_dmadev, "invalid id %d completed on queue %d\n", cqe->command_id, le16_to_cpup(&cqe->sq_id)); return; } - dev_warn(&dev->pci_dev->dev, "Unknown special completion %p\n", ctx); + dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx); } -static void async_completion(struct nvme_dev *dev, void *ctx, +static void async_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_completion *cqe) { struct async_cmd_info *cmdinfo = ctx; @@ -262,14 +274,34 @@ static void *cancel_cmdid(struct nvme_queue *nvmeq, int cmdid, return ctx; } -struct nvme_queue *get_nvmeq(struct nvme_dev *dev) +static struct nvme_queue *raw_nvmeq(struct nvme_dev *dev, int qid) +{ + return rcu_dereference_raw(dev->queues[qid]); +} + +static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU) { - return dev->queues[get_cpu() + 1]; + unsigned queue_id = get_cpu_var(*dev->io_queue); + rcu_read_lock(); + return rcu_dereference(dev->queues[queue_id]); } -void put_nvmeq(struct nvme_queue *nvmeq) +static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) { - put_cpu(); + rcu_read_unlock(); + put_cpu_var(nvmeq->dev->io_queue); +} + +static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx) + __acquires(RCU) +{ + rcu_read_lock(); + return rcu_dereference(dev->queues[q_idx]); +} + +static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) +{ + rcu_read_unlock(); } /** @@ -284,6 +316,10 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) unsigned long flags; u16 tail; spin_lock_irqsave(&nvmeq->q_lock, flags); + if (nvmeq->q_suspended) { + spin_unlock_irqrestore(&nvmeq->q_lock, flags); + return -EBUSY; + } tail = nvmeq->sq_tail; memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); if (++tail == nvmeq->q_depth) @@ -323,6 +359,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) iod->npages = -1; iod->length = nbytes; iod->nents = 0; + iod->first_dma = 0ULL; iod->start_time = jiffies; } @@ -371,19 +408,31 @@ static void nvme_end_io_acct(struct bio *bio, unsigned long start_time) part_stat_unlock(); } -static void bio_completion(struct nvme_dev *dev, void *ctx, +static void bio_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_completion *cqe) { struct nvme_iod *iod = ctx; struct bio *bio = iod->private; u16 status = le16_to_cpup(&cqe->status) >> 1; + if (unlikely(status)) { + if (!(status & NVME_SC_DNR || + bio->bi_rw & REQ_FAILFAST_MASK) && + (jiffies - iod->start_time) < IOD_TIMEOUT) { + if (!waitqueue_active(&nvmeq->sq_full)) + add_wait_queue(&nvmeq->sq_full, + &nvmeq->sq_cong_wait); + list_add_tail(&iod->node, &nvmeq->iod_bio); + wake_up(&nvmeq->sq_full); + return; + } + } if (iod->nents) { - dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, + dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents, bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); nvme_end_io_acct(bio, iod->start_time); } - nvme_free_iod(dev, iod); + nvme_free_iod(nvmeq->dev, iod); if (status) bio_endio(bio, -EIO); else @@ -391,8 +440,8 @@ static void bio_completion(struct nvme_dev *dev, void *ctx, } /* length is in bytes. gfp flags indicates whether we may sleep. */ -int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, - struct nvme_iod *iod, int total_len, gfp_t gfp) +int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, + gfp_t gfp) { struct dma_pool *pool; int length = total_len; @@ -405,7 +454,6 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, dma_addr_t prp_dma; int nprps, i; - cmd->prp1 = cpu_to_le64(dma_addr); length -= (PAGE_SIZE - offset); if (length <= 0) return total_len; @@ -420,7 +468,7 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, } if (length <= PAGE_SIZE) { - cmd->prp2 = cpu_to_le64(dma_addr); + iod->first_dma = dma_addr; return total_len; } @@ -435,13 +483,12 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, prp_list = dma_pool_alloc(pool, gfp, &prp_dma); if (!prp_list) { - cmd->prp2 = cpu_to_le64(dma_addr); + iod->first_dma = dma_addr; iod->npages = -1; return (total_len - length) + PAGE_SIZE; } list[0] = prp_list; iod->first_dma = prp_dma; - cmd->prp2 = cpu_to_le64(prp_dma); i = 0; for (;;) { if (i == PAGE_SIZE / 8) { @@ -480,10 +527,11 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq, bio_chain(split, bio); - if (bio_list_empty(&nvmeq->sq_cong)) + if (!waitqueue_active(&nvmeq->sq_full)) add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); bio_list_add(&nvmeq->sq_cong, split); bio_list_add(&nvmeq->sq_cong, bio); + wake_up(&nvmeq->sq_full); return 0; } @@ -536,25 +584,13 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod, return length; } -/* - * We reuse the small pool to allocate the 16-byte range here as it is not - * worth having a special pool for these or additional cases to handle freeing - * the iod. - */ static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, struct bio *bio, struct nvme_iod *iod, int cmdid) { - struct nvme_dsm_range *range; + struct nvme_dsm_range *range = + (struct nvme_dsm_range *)iod_list(iod)[0]; struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; - range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC, - &iod->first_dma); - if (!range) - return -ENOMEM; - - iod_list(iod)[0] = (__le64 *)range; - iod->npages = 0; - range->cattr = cpu_to_le32(0); range->nlb = cpu_to_le32(bio->bi_iter.bi_size >> ns->lba_shift); range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector)); @@ -601,44 +637,22 @@ int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns) return nvme_submit_flush(nvmeq, ns, cmdid); } -/* - * Called with local interrupts disabled and the q_lock held. May not sleep. - */ -static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, - struct bio *bio) +static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) { + struct bio *bio = iod->private; + struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data; struct nvme_command *cmnd; - struct nvme_iod *iod; - enum dma_data_direction dma_dir; - int cmdid, length, result; + int cmdid; u16 control; u32 dsmgmt; - int psegs = bio_phys_segments(ns->queue, bio); - if ((bio->bi_rw & REQ_FLUSH) && psegs) { - result = nvme_submit_flush_data(nvmeq, ns); - if (result) - return result; - } - - result = -ENOMEM; - iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC); - if (!iod) - goto nomem; - iod->private = bio; - - result = -EBUSY; cmdid = alloc_cmdid(nvmeq, iod, bio_completion, NVME_IO_TIMEOUT); if (unlikely(cmdid < 0)) - goto free_iod; + return cmdid; - if (bio->bi_rw & REQ_DISCARD) { - result = nvme_submit_discard(nvmeq, ns, bio, iod, cmdid); - if (result) - goto free_cmdid; - return result; - } - if ((bio->bi_rw & REQ_FLUSH) && !psegs) + if (bio->bi_rw & REQ_DISCARD) + return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid); + if ((bio->bi_rw & REQ_FLUSH) && !iod->nents) return nvme_submit_flush(nvmeq, ns, cmdid); control = 0; @@ -652,42 +666,85 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; - memset(cmnd, 0, sizeof(*cmnd)); - if (bio_data_dir(bio)) { - cmnd->rw.opcode = nvme_cmd_write; - dma_dir = DMA_TO_DEVICE; - } else { - cmnd->rw.opcode = nvme_cmd_read; - dma_dir = DMA_FROM_DEVICE; - } - - result = nvme_map_bio(nvmeq, iod, bio, dma_dir, psegs); - if (result <= 0) - goto free_cmdid; - length = result; + cmnd->rw.opcode = bio_data_dir(bio) ? nvme_cmd_write : nvme_cmd_read; cmnd->rw.command_id = cmdid; cmnd->rw.nsid = cpu_to_le32(ns->ns_id); - length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length, - GFP_ATOMIC); + cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + cmnd->rw.prp2 = cpu_to_le64(iod->first_dma); cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector)); - cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1); + cmnd->rw.length = + cpu_to_le16((bio->bi_iter.bi_size >> ns->lba_shift) - 1); cmnd->rw.control = cpu_to_le16(control); cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); - nvme_start_io_acct(bio); if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; writel(nvmeq->sq_tail, nvmeq->q_db); return 0; +} + +/* + * Called with local interrupts disabled and the q_lock held. May not sleep. + */ +static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, + struct bio *bio) +{ + struct nvme_iod *iod; + int psegs = bio_phys_segments(ns->queue, bio); + int result; + + if ((bio->bi_rw & REQ_FLUSH) && psegs) { + result = nvme_submit_flush_data(nvmeq, ns); + if (result) + return result; + } + + iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC); + if (!iod) + return -ENOMEM; + + iod->private = bio; + if (bio->bi_rw & REQ_DISCARD) { + void *range; + /* + * We reuse the small pool to allocate the 16-byte range here + * as it is not worth having a special pool for these or + * additional cases to handle freeing the iod. + */ + range = dma_pool_alloc(nvmeq->dev->prp_small_pool, + GFP_ATOMIC, + &iod->first_dma); + if (!range) { + result = -ENOMEM; + goto free_iod; + } + iod_list(iod)[0] = (__le64 *)range; + iod->npages = 0; + } else if (psegs) { + result = nvme_map_bio(nvmeq, iod, bio, + bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, + psegs); + if (result <= 0) + goto free_iod; + if (nvme_setup_prps(nvmeq->dev, iod, result, GFP_ATOMIC) != + result) { + result = -ENOMEM; + goto free_iod; + } + nvme_start_io_acct(bio); + } + if (unlikely(nvme_submit_iod(nvmeq, iod))) { + if (!waitqueue_active(&nvmeq->sq_full)) + add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); + list_add_tail(&iod->node, &nvmeq->iod_bio); + } + return 0; - free_cmdid: - free_cmdid(nvmeq, cmdid, NULL); free_iod: nvme_free_iod(nvmeq->dev, iod); - nomem: return result; } @@ -711,7 +768,7 @@ static int nvme_process_cq(struct nvme_queue *nvmeq) } ctx = free_cmdid(nvmeq, cqe.command_id, &fn); - fn(nvmeq->dev, ctx, &cqe); + fn(nvmeq, ctx, &cqe); } /* If the controller ignores the cq head doorbell and continuously @@ -747,7 +804,7 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio) if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong)) result = nvme_submit_bio_queue(nvmeq, ns, bio); if (unlikely(result)) { - if (bio_list_empty(&nvmeq->sq_cong)) + if (!waitqueue_active(&nvmeq->sq_full)) add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); bio_list_add(&nvmeq->sq_cong, bio); } @@ -791,7 +848,7 @@ struct sync_cmd_info { int status; }; -static void sync_completion(struct nvme_dev *dev, void *ctx, +static void sync_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_completion *cqe) { struct sync_cmd_info *cmdinfo = ctx; @@ -804,27 +861,46 @@ static void sync_completion(struct nvme_dev *dev, void *ctx, * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code */ -int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, +static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx, + struct nvme_command *cmd, u32 *result, unsigned timeout) { - int cmdid; + int cmdid, ret; struct sync_cmd_info cmdinfo; + struct nvme_queue *nvmeq; + + nvmeq = lock_nvmeq(dev, q_idx); + if (!nvmeq) { + unlock_nvmeq(nvmeq); + return -ENODEV; + } cmdinfo.task = current; cmdinfo.status = -EINTR; - cmdid = alloc_cmdid_killable(nvmeq, &cmdinfo, sync_completion, - timeout); - if (cmdid < 0) + cmdid = alloc_cmdid(nvmeq, &cmdinfo, sync_completion, timeout); + if (cmdid < 0) { + unlock_nvmeq(nvmeq); return cmdid; + } cmd->common.command_id = cmdid; set_current_state(TASK_KILLABLE); - nvme_submit_cmd(nvmeq, cmd); + ret = nvme_submit_cmd(nvmeq, cmd); + if (ret) { + free_cmdid(nvmeq, cmdid, NULL); + unlock_nvmeq(nvmeq); + set_current_state(TASK_RUNNING); + return ret; + } + unlock_nvmeq(nvmeq); schedule_timeout(timeout); if (cmdinfo.status == -EINTR) { - nvme_abort_command(nvmeq, cmdid); + nvmeq = lock_nvmeq(dev, q_idx); + if (nvmeq) + nvme_abort_command(nvmeq, cmdid); + unlock_nvmeq(nvmeq); return -EINTR; } @@ -845,20 +921,26 @@ static int nvme_submit_async_cmd(struct nvme_queue *nvmeq, return cmdid; cmdinfo->status = -EINTR; cmd->common.command_id = cmdid; - nvme_submit_cmd(nvmeq, cmd); - return 0; + return nvme_submit_cmd(nvmeq, cmd); } int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, u32 *result) { - return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT); + return nvme_submit_sync_cmd(dev, 0, cmd, result, ADMIN_TIMEOUT); +} + +int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_command *cmd, + u32 *result) +{ + return nvme_submit_sync_cmd(dev, smp_processor_id() + 1, cmd, result, + NVME_IO_TIMEOUT); } static int nvme_submit_admin_cmd_async(struct nvme_dev *dev, struct nvme_command *cmd, struct async_cmd_info *cmdinfo) { - return nvme_submit_async_cmd(dev->queues[0], cmd, cmdinfo, + return nvme_submit_async_cmd(raw_nvmeq(dev, 0), cmd, cmdinfo, ADMIN_TIMEOUT); } @@ -985,6 +1067,7 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) struct nvme_command cmd; struct nvme_dev *dev = nvmeq->dev; struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); + struct nvme_queue *adminq; if (!nvmeq->qid || info[cmdid].aborted) { if (work_busy(&dev->reset_work)) @@ -993,7 +1076,7 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) dev_warn(&dev->pci_dev->dev, "I/O %d QID %d timeout, reset controller\n", cmdid, nvmeq->qid); - PREPARE_WORK(&dev->reset_work, nvme_reset_failed_dev); + dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); return; } @@ -1001,7 +1084,8 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) if (!dev->abort_limit) return; - a_cmdid = alloc_cmdid(dev->queues[0], CMD_CTX_ABORT, special_completion, + adminq = rcu_dereference(dev->queues[0]); + a_cmdid = alloc_cmdid(adminq, CMD_CTX_ABORT, special_completion, ADMIN_TIMEOUT); if (a_cmdid < 0) return; @@ -1018,7 +1102,7 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", cmdid, nvmeq->qid); - nvme_submit_cmd(dev->queues[0], &cmd); + nvme_submit_cmd(adminq, &cmd); } /** @@ -1051,23 +1135,38 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", cmdid, nvmeq->qid); ctx = cancel_cmdid(nvmeq, cmdid, &fn); - fn(nvmeq->dev, ctx, &cqe); + fn(nvmeq, ctx, &cqe); } } -static void nvme_free_queue(struct nvme_queue *nvmeq) +static void nvme_free_queue(struct rcu_head *r) { + struct nvme_queue *nvmeq = container_of(r, struct nvme_queue, r_head); + spin_lock_irq(&nvmeq->q_lock); while (bio_list_peek(&nvmeq->sq_cong)) { struct bio *bio = bio_list_pop(&nvmeq->sq_cong); bio_endio(bio, -EIO); } + while (!list_empty(&nvmeq->iod_bio)) { + static struct nvme_completion cqe = { + .status = cpu_to_le16( + (NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1), + }; + struct nvme_iod *iod = list_first_entry(&nvmeq->iod_bio, + struct nvme_iod, + node); + list_del(&iod->node); + bio_completion(nvmeq, iod, &cqe); + } spin_unlock_irq(&nvmeq->q_lock); dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), nvmeq->sq_cmds, nvmeq->sq_dma_addr); + if (nvmeq->qid) + free_cpumask_var(nvmeq->cpu_mask); kfree(nvmeq); } @@ -1076,9 +1175,10 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) int i; for (i = dev->queue_count - 1; i >= lowest; i--) { - nvme_free_queue(dev->queues[i]); + struct nvme_queue *nvmeq = raw_nvmeq(dev, i); + rcu_assign_pointer(dev->queues[i], NULL); + call_rcu(&nvmeq->r_head, nvme_free_queue); dev->queue_count--; - dev->queues[i] = NULL; } } @@ -1098,6 +1198,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) return 1; } nvmeq->q_suspended = 1; + nvmeq->dev->online_queues--; spin_unlock_irq(&nvmeq->q_lock); irq_set_affinity_hint(vector, NULL); @@ -1116,7 +1217,7 @@ static void nvme_clear_queue(struct nvme_queue *nvmeq) static void nvme_disable_queue(struct nvme_dev *dev, int qid) { - struct nvme_queue *nvmeq = dev->queues[qid]; + struct nvme_queue *nvmeq = raw_nvmeq(dev, qid); if (!nvmeq) return; @@ -1152,6 +1253,9 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, if (!nvmeq->sq_cmds) goto free_cqdma; + if (qid && !zalloc_cpumask_var(&nvmeq->cpu_mask, GFP_KERNEL)) + goto free_sqdma; + nvmeq->q_dmadev = dmadev; nvmeq->dev = dev; snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d", @@ -1162,15 +1266,20 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, init_waitqueue_head(&nvmeq->sq_full); init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread); bio_list_init(&nvmeq->sq_cong); + INIT_LIST_HEAD(&nvmeq->iod_bio); nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; nvmeq->cq_vector = vector; nvmeq->qid = qid; nvmeq->q_suspended = 1; dev->queue_count++; + rcu_assign_pointer(dev->queues[qid], nvmeq); return nvmeq; + free_sqdma: + dma_free_coherent(dmadev, SQ_SIZE(depth), (void *)nvmeq->sq_cmds, + nvmeq->sq_dma_addr); free_cqdma: dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); @@ -1203,6 +1312,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); nvme_cancel_ios(nvmeq, false); nvmeq->q_suspended = 0; + dev->online_queues++; } static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) @@ -1311,12 +1421,11 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) if (result < 0) return result; - nvmeq = dev->queues[0]; + nvmeq = raw_nvmeq(dev, 0); if (!nvmeq) { nvmeq = nvme_alloc_queue(dev, 0, 64, 0); if (!nvmeq) return -ENOMEM; - dev->queues[0] = nvmeq; } aqa = nvmeq->q_depth - 1; @@ -1418,7 +1527,6 @@ void nvme_unmap_user_pages(struct nvme_dev *dev, int write, static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_dev *dev = ns->dev; - struct nvme_queue *nvmeq; struct nvme_user_io io; struct nvme_command c; unsigned length, meta_len; @@ -1492,22 +1600,14 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.metadata = cpu_to_le64(meta_dma_addr); } - length = nvme_setup_prps(dev, &c.common, iod, length, GFP_KERNEL); + length = nvme_setup_prps(dev, iod, length, GFP_KERNEL); + c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + c.rw.prp2 = cpu_to_le64(iod->first_dma); - nvmeq = get_nvmeq(dev); - /* - * Since nvme_submit_sync_cmd sleeps, we can't keep preemption - * disabled. We may be preempted at any point, and be rescheduled - * to a different CPU. That will cause cacheline bouncing, but no - * additional races since q_lock already protects against other CPUs. - */ - put_nvmeq(nvmeq); if (length != (io.nblocks + 1) << ns->lba_shift) status = -ENOMEM; - else if (!nvmeq || nvmeq->q_suspended) - status = -EBUSY; else - status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); + status = nvme_submit_io_cmd(dev, &c, NULL); if (meta_len) { if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) { @@ -1572,8 +1672,9 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev, length); if (IS_ERR(iod)) return PTR_ERR(iod); - length = nvme_setup_prps(dev, &c.common, iod, length, - GFP_KERNEL); + length = nvme_setup_prps(dev, iod, length, GFP_KERNEL); + c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + c.common.prp2 = cpu_to_le64(iod->first_dma); } timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) : @@ -1581,8 +1682,7 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev, if (length != cmd.data_len) status = -ENOMEM; else - status = nvme_submit_sync_cmd(dev->queues[0], &c, &cmd.result, - timeout); + status = nvme_submit_sync_cmd(dev, 0, &c, &cmd.result, timeout); if (cmd.data_len) { nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); @@ -1653,25 +1753,51 @@ static void nvme_release(struct gendisk *disk, fmode_t mode) kref_put(&dev->kref, nvme_free_dev); } +static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo) +{ + /* some standard values */ + geo->heads = 1 << 6; + geo->sectors = 1 << 5; + geo->cylinders = get_capacity(bd->bd_disk) >> 11; + return 0; +} + static const struct block_device_operations nvme_fops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, .compat_ioctl = nvme_compat_ioctl, .open = nvme_open, .release = nvme_release, + .getgeo = nvme_getgeo, }; +static void nvme_resubmit_iods(struct nvme_queue *nvmeq) +{ + struct nvme_iod *iod, *next; + + list_for_each_entry_safe(iod, next, &nvmeq->iod_bio, node) { + if (unlikely(nvme_submit_iod(nvmeq, iod))) + break; + list_del(&iod->node); + if (bio_list_empty(&nvmeq->sq_cong) && + list_empty(&nvmeq->iod_bio)) + remove_wait_queue(&nvmeq->sq_full, + &nvmeq->sq_cong_wait); + } +} + static void nvme_resubmit_bios(struct nvme_queue *nvmeq) { while (bio_list_peek(&nvmeq->sq_cong)) { struct bio *bio = bio_list_pop(&nvmeq->sq_cong); struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data; - if (bio_list_empty(&nvmeq->sq_cong)) + if (bio_list_empty(&nvmeq->sq_cong) && + list_empty(&nvmeq->iod_bio)) remove_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); if (nvme_submit_bio_queue(nvmeq, ns, bio)) { - if (bio_list_empty(&nvmeq->sq_cong)) + if (!waitqueue_active(&nvmeq->sq_full)) add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); bio_list_add_head(&nvmeq->sq_cong, bio); @@ -1696,13 +1822,14 @@ static int nvme_kthread(void *data) list_del_init(&dev->node); dev_warn(&dev->pci_dev->dev, "Failed status, reset controller\n"); - PREPARE_WORK(&dev->reset_work, - nvme_reset_failed_dev); + dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); continue; } + rcu_read_lock(); for (i = 0; i < dev->queue_count; i++) { - struct nvme_queue *nvmeq = dev->queues[i]; + struct nvme_queue *nvmeq = + rcu_dereference(dev->queues[i]); if (!nvmeq) continue; spin_lock_irq(&nvmeq->q_lock); @@ -1711,9 +1838,11 @@ static int nvme_kthread(void *data) nvme_process_cq(nvmeq); nvme_cancel_ios(nvmeq, true); nvme_resubmit_bios(nvmeq); + nvme_resubmit_iods(nvmeq); unlock: spin_unlock_irq(&nvmeq->q_lock); } + rcu_read_unlock(); } spin_unlock(&dev_list_lock); schedule_timeout(round_jiffies_relative(HZ)); @@ -1788,6 +1917,143 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, return NULL; } +static int nvme_find_closest_node(int node) +{ + int n, val, min_val = INT_MAX, best_node = node; + + for_each_online_node(n) { + if (n == node) + continue; + val = node_distance(node, n); + if (val < min_val) { + min_val = val; + best_node = n; + } + } + return best_node; +} + +static void nvme_set_queue_cpus(cpumask_t *qmask, struct nvme_queue *nvmeq, + int count) +{ + int cpu; + for_each_cpu(cpu, qmask) { + if (cpumask_weight(nvmeq->cpu_mask) >= count) + break; + if (!cpumask_test_and_set_cpu(cpu, nvmeq->cpu_mask)) + *per_cpu_ptr(nvmeq->dev->io_queue, cpu) = nvmeq->qid; + } +} + +static void nvme_add_cpus(cpumask_t *mask, const cpumask_t *unassigned_cpus, + const cpumask_t *new_mask, struct nvme_queue *nvmeq, int cpus_per_queue) +{ + int next_cpu; + for_each_cpu(next_cpu, new_mask) { + cpumask_or(mask, mask, get_cpu_mask(next_cpu)); + cpumask_or(mask, mask, topology_thread_cpumask(next_cpu)); + cpumask_and(mask, mask, unassigned_cpus); + nvme_set_queue_cpus(mask, nvmeq, cpus_per_queue); + } +} + +static void nvme_create_io_queues(struct nvme_dev *dev) +{ + unsigned i, max; + + max = min(dev->max_qid, num_online_cpus()); + for (i = dev->queue_count; i <= max; i++) + if (!nvme_alloc_queue(dev, i, dev->q_depth, i - 1)) + break; + + max = min(dev->queue_count - 1, num_online_cpus()); + for (i = dev->online_queues; i <= max; i++) + if (nvme_create_queue(raw_nvmeq(dev, i), i)) + break; +} + +/* + * If there are fewer queues than online cpus, this will try to optimally + * assign a queue to multiple cpus by grouping cpus that are "close" together: + * thread siblings, core, socket, closest node, then whatever else is + * available. + */ +static void nvme_assign_io_queues(struct nvme_dev *dev) +{ + unsigned cpu, cpus_per_queue, queues, remainder, i; + cpumask_var_t unassigned_cpus; + + nvme_create_io_queues(dev); + + queues = min(dev->online_queues - 1, num_online_cpus()); + if (!queues) + return; + + cpus_per_queue = num_online_cpus() / queues; + remainder = queues - (num_online_cpus() - queues * cpus_per_queue); + + if (!alloc_cpumask_var(&unassigned_cpus, GFP_KERNEL)) + return; + + cpumask_copy(unassigned_cpus, cpu_online_mask); + cpu = cpumask_first(unassigned_cpus); + for (i = 1; i <= queues; i++) { + struct nvme_queue *nvmeq = lock_nvmeq(dev, i); + cpumask_t mask; + + cpumask_clear(nvmeq->cpu_mask); + if (!cpumask_weight(unassigned_cpus)) { + unlock_nvmeq(nvmeq); + break; + } + + mask = *get_cpu_mask(cpu); + nvme_set_queue_cpus(&mask, nvmeq, cpus_per_queue); + if (cpus_weight(mask) < cpus_per_queue) + nvme_add_cpus(&mask, unassigned_cpus, + topology_thread_cpumask(cpu), + nvmeq, cpus_per_queue); + if (cpus_weight(mask) < cpus_per_queue) + nvme_add_cpus(&mask, unassigned_cpus, + topology_core_cpumask(cpu), + nvmeq, cpus_per_queue); + if (cpus_weight(mask) < cpus_per_queue) + nvme_add_cpus(&mask, unassigned_cpus, + cpumask_of_node(cpu_to_node(cpu)), + nvmeq, cpus_per_queue); + if (cpus_weight(mask) < cpus_per_queue) + nvme_add_cpus(&mask, unassigned_cpus, + cpumask_of_node( + nvme_find_closest_node( + cpu_to_node(cpu))), + nvmeq, cpus_per_queue); + if (cpus_weight(mask) < cpus_per_queue) + nvme_add_cpus(&mask, unassigned_cpus, + unassigned_cpus, + nvmeq, cpus_per_queue); + + WARN(cpumask_weight(nvmeq->cpu_mask) != cpus_per_queue, + "nvme%d qid:%d mis-matched queue-to-cpu assignment\n", + dev->instance, i); + + irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, + nvmeq->cpu_mask); + cpumask_andnot(unassigned_cpus, unassigned_cpus, + nvmeq->cpu_mask); + cpu = cpumask_next(cpu, unassigned_cpus); + if (remainder && !--remainder) + cpus_per_queue++; + unlock_nvmeq(nvmeq); + } + WARN(cpumask_weight(unassigned_cpus), "nvme%d unassigned online cpus\n", + dev->instance); + i = 0; + cpumask_andnot(unassigned_cpus, cpu_possible_mask, cpu_online_mask); + for_each_cpu(cpu, unassigned_cpus) + *per_cpu_ptr(dev->io_queue, cpu) = (i++ % queues) + 1; + free_cpumask_var(unassigned_cpus); +} + static int set_queue_count(struct nvme_dev *dev, int count) { int status; @@ -1806,13 +2072,26 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); } +static int nvme_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + struct nvme_dev *dev = container_of(self, struct nvme_dev, nb); + switch (action) { + case CPU_ONLINE: + case CPU_DEAD: + nvme_assign_io_queues(dev); + break; + } + return NOTIFY_OK; +} + static int nvme_setup_io_queues(struct nvme_dev *dev) { - struct nvme_queue *adminq = dev->queues[0]; + struct nvme_queue *adminq = raw_nvmeq(dev, 0); struct pci_dev *pdev = dev->pci_dev; - int result, cpu, i, vecs, nr_io_queues, size, q_depth; + int result, i, vecs, nr_io_queues, size; - nr_io_queues = num_online_cpus(); + nr_io_queues = num_possible_cpus(); result = set_queue_count(dev, nr_io_queues); if (result < 0) return result; @@ -1831,37 +2110,22 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) size = db_bar_size(dev, nr_io_queues); } while (1); dev->dbs = ((void __iomem *)dev->bar) + 4096; - dev->queues[0]->q_db = dev->dbs; + adminq->q_db = dev->dbs; } /* Deregister the admin queue's interrupt */ free_irq(dev->entry[0].vector, adminq); - vecs = nr_io_queues; - for (i = 0; i < vecs; i++) + for (i = 0; i < nr_io_queues; i++) dev->entry[i].entry = i; - for (;;) { - result = pci_enable_msix(pdev, dev->entry, vecs); - if (result <= 0) - break; - vecs = result; - } - - if (result < 0) { - vecs = nr_io_queues; - if (vecs > 32) - vecs = 32; - for (;;) { - result = pci_enable_msi_block(pdev, vecs); - if (result == 0) { - for (i = 0; i < vecs; i++) - dev->entry[i].vector = i + pdev->irq; - break; - } else if (result < 0) { - vecs = 1; - break; - } - vecs = result; + vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues); + if (vecs < 0) { + vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32)); + if (vecs < 0) { + vecs = 1; + } else { + for (i = 0; i < vecs; i++) + dev->entry[i].vector = i + pdev->irq; } } @@ -1872,6 +2136,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * number of interrupts. */ nr_io_queues = vecs; + dev->max_qid = nr_io_queues; result = queue_request_irq(dev, adminq, adminq->irqname); if (result) { @@ -1880,49 +2145,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) } /* Free previously allocated queues that are no longer usable */ - spin_lock(&dev_list_lock); - for (i = dev->queue_count - 1; i > nr_io_queues; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; + nvme_free_queues(dev, nr_io_queues + 1); + nvme_assign_io_queues(dev); - spin_lock_irq(&nvmeq->q_lock); - nvme_cancel_ios(nvmeq, false); - spin_unlock_irq(&nvmeq->q_lock); - - nvme_free_queue(nvmeq); - dev->queue_count--; - dev->queues[i] = NULL; - } - spin_unlock(&dev_list_lock); - - cpu = cpumask_first(cpu_online_mask); - for (i = 0; i < nr_io_queues; i++) { - irq_set_affinity_hint(dev->entry[i].vector, get_cpu_mask(cpu)); - cpu = cpumask_next(cpu, cpu_online_mask); - } - - q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, - NVME_Q_DEPTH); - for (i = dev->queue_count - 1; i < nr_io_queues; i++) { - dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i); - if (!dev->queues[i + 1]) { - result = -ENOMEM; - goto free_queues; - } - } - - for (; i < num_possible_cpus(); i++) { - int target = i % rounddown_pow_of_two(dev->queue_count - 1); - dev->queues[i + 1] = dev->queues[target + 1]; - } - - for (i = 1; i < dev->queue_count; i++) { - result = nvme_create_queue(dev->queues[i], i); - if (result) { - for (--i; i > 0; i--) - nvme_disable_queue(dev, i); - goto free_queues; - } - } + dev->nb.notifier_call = &nvme_cpu_notify; + result = register_hotcpu_notifier(&dev->nb); + if (result) + goto free_queues; return 0; @@ -2001,6 +2230,7 @@ static int nvme_dev_add(struct nvme_dev *dev) static int nvme_dev_map(struct nvme_dev *dev) { + u64 cap; int bars, result = -ENOMEM; struct pci_dev *pdev = dev->pci_dev; @@ -2024,7 +2254,9 @@ static int nvme_dev_map(struct nvme_dev *dev) result = -ENODEV; goto unmap; } - dev->db_stride = 1 << NVME_CAP_STRIDE(readq(&dev->bar->cap)); + cap = readq(&dev->bar->cap); + dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); + dev->db_stride = 1 << NVME_CAP_STRIDE(cap); dev->dbs = ((void __iomem *)dev->bar) + 4096; return 0; @@ -2180,7 +2412,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) atomic_set(&dq.refcount, 0); dq.worker = &worker; for (i = dev->queue_count - 1; i > 0; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; + struct nvme_queue *nvmeq = raw_nvmeq(dev, i); if (nvme_suspend_queue(nvmeq)) continue; @@ -2193,19 +2425,38 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) kthread_stop(kworker_task); } +/* +* Remove the node from the device list and check +* for whether or not we need to stop the nvme_thread. +*/ +static void nvme_dev_list_remove(struct nvme_dev *dev) +{ + struct task_struct *tmp = NULL; + + spin_lock(&dev_list_lock); + list_del_init(&dev->node); + if (list_empty(&dev_list) && !IS_ERR_OR_NULL(nvme_thread)) { + tmp = nvme_thread; + nvme_thread = NULL; + } + spin_unlock(&dev_list_lock); + + if (tmp) + kthread_stop(tmp); +} + static void nvme_dev_shutdown(struct nvme_dev *dev) { int i; dev->initialized = 0; + unregister_hotcpu_notifier(&dev->nb); - spin_lock(&dev_list_lock); - list_del_init(&dev->node); - spin_unlock(&dev_list_lock); + nvme_dev_list_remove(dev); if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) { for (i = dev->queue_count - 1; i >= 0; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; + struct nvme_queue *nvmeq = raw_nvmeq(dev, i); nvme_suspend_queue(nvmeq); nvme_clear_queue(nvmeq); } @@ -2298,6 +2549,7 @@ static void nvme_free_dev(struct kref *kref) struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); nvme_free_namespaces(dev); + free_percpu(dev->io_queue); kfree(dev->queues); kfree(dev->entry); kfree(dev); @@ -2341,6 +2593,7 @@ static const struct file_operations nvme_dev_fops = { static int nvme_dev_start(struct nvme_dev *dev) { int result; + bool start_thread = false; result = nvme_dev_map(dev); if (result) @@ -2351,9 +2604,24 @@ static int nvme_dev_start(struct nvme_dev *dev) goto unmap; spin_lock(&dev_list_lock); + if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) { + start_thread = true; + nvme_thread = NULL; + } list_add(&dev->node, &dev_list); spin_unlock(&dev_list_lock); + if (start_thread) { + nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); + wake_up(&nvme_kthread_wait); + } else + wait_event_killable(nvme_kthread_wait, nvme_thread); + + if (IS_ERR_OR_NULL(nvme_thread)) { + result = nvme_thread ? PTR_ERR(nvme_thread) : -EINTR; + goto disable; + } + result = nvme_setup_io_queues(dev); if (result && result != -EBUSY) goto disable; @@ -2362,9 +2630,7 @@ static int nvme_dev_start(struct nvme_dev *dev) disable: nvme_disable_queue(dev, 0); - spin_lock(&dev_list_lock); - list_del_init(&dev->node); - spin_unlock(&dev_list_lock); + nvme_dev_list_remove(dev); unmap: nvme_dev_unmap(dev); return result; @@ -2383,18 +2649,10 @@ static int nvme_remove_dead_ctrl(void *arg) static void nvme_remove_disks(struct work_struct *ws) { - int i; struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); nvme_dev_remove(dev); - spin_lock(&dev_list_lock); - for (i = dev->queue_count - 1; i > 0; i--) { - BUG_ON(!dev->queues[i] || !dev->queues[i]->q_suspended); - nvme_free_queue(dev->queues[i]); - dev->queue_count--; - dev->queues[i] = NULL; - } - spin_unlock(&dev_list_lock); + nvme_free_queues(dev, 1); } static int nvme_dev_resume(struct nvme_dev *dev) @@ -2406,7 +2664,7 @@ static int nvme_dev_resume(struct nvme_dev *dev) return ret; if (ret == -EBUSY) { spin_lock(&dev_list_lock); - PREPARE_WORK(&dev->reset_work, nvme_remove_disks); + dev->reset_workfn = nvme_remove_disks; queue_work(nvme_workq, &dev->reset_work); spin_unlock(&dev_list_lock); } @@ -2435,6 +2693,12 @@ static void nvme_reset_failed_dev(struct work_struct *ws) nvme_dev_reset(dev); } +static void nvme_reset_workfn(struct work_struct *work) +{ + struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); + dev->reset_workfn(work); +} + static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int result = -ENOMEM; @@ -2451,9 +2715,13 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) GFP_KERNEL); if (!dev->queues) goto free; + dev->io_queue = alloc_percpu(unsigned short); + if (!dev->io_queue) + goto free; INIT_LIST_HEAD(&dev->namespaces); - INIT_WORK(&dev->reset_work, nvme_reset_failed_dev); + dev->reset_workfn = nvme_reset_failed_dev; + INIT_WORK(&dev->reset_work, nvme_reset_workfn); dev->pci_dev = pdev; pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); @@ -2464,6 +2732,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release; + kref_init(&dev->kref); result = nvme_dev_start(dev); if (result) { if (result == -EBUSY) @@ -2471,7 +2740,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto release_pools; } - kref_init(&dev->kref); result = nvme_dev_add(dev); if (result) goto shutdown; @@ -2500,6 +2768,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) release: nvme_release_instance(dev); free: + free_percpu(dev->io_queue); kfree(dev->queues); kfree(dev->entry); kfree(dev); @@ -2526,6 +2795,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_dev_remove(dev); nvme_dev_shutdown(dev); nvme_free_queues(dev, 0); + rcu_barrier(); nvme_release_instance(dev); nvme_release_prp_pools(dev); kref_put(&dev->kref, nvme_free_dev); @@ -2538,6 +2808,7 @@ static void nvme_remove(struct pci_dev *pdev) #define nvme_slot_reset NULL #define nvme_error_resume NULL +#ifdef CONFIG_PM_SLEEP static int nvme_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -2553,11 +2824,12 @@ static int nvme_resume(struct device *dev) struct nvme_dev *ndev = pci_get_drvdata(pdev); if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) { - PREPARE_WORK(&ndev->reset_work, nvme_reset_failed_dev); + ndev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &ndev->reset_work); } return 0; } +#endif static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume); @@ -2572,7 +2844,7 @@ static const struct pci_error_handlers nvme_err_handler = { /* Move to pci_ids.h later */ #define PCI_CLASS_STORAGE_EXPRESS 0x010802 -static DEFINE_PCI_DEVICE_TABLE(nvme_id_table) = { +static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } }; @@ -2594,14 +2866,11 @@ static int __init nvme_init(void) { int result; - nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); - if (IS_ERR(nvme_thread)) - return PTR_ERR(nvme_thread); + init_waitqueue_head(&nvme_kthread_wait); - result = -ENOMEM; nvme_workq = create_singlethread_workqueue("nvme"); if (!nvme_workq) - goto kill_kthread; + return -ENOMEM; result = register_blkdev(nvme_major, "nvme"); if (result < 0) @@ -2618,8 +2887,6 @@ static int __init nvme_init(void) unregister_blkdev(nvme_major, "nvme"); kill_workq: destroy_workqueue(nvme_workq); - kill_kthread: - kthread_stop(nvme_thread); return result; } @@ -2628,11 +2895,11 @@ static void __exit nvme_exit(void) pci_unregister_driver(&nvme_driver); unregister_blkdev(nvme_major, "nvme"); destroy_workqueue(nvme_workq); - kthread_stop(nvme_thread); + BUG_ON(nvme_thread && !IS_ERR(nvme_thread)); } MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); MODULE_LICENSE("GPL"); -MODULE_VERSION("0.8"); +MODULE_VERSION("0.9"); module_init(nvme_init); module_exit(nvme_exit); diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 4a0ceb64e269..2c3f5be06da1 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -1562,13 +1562,14 @@ static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = PTR_ERR(iod); goto out; } - length = nvme_setup_prps(dev, &c.common, iod, tot_len, - GFP_KERNEL); + length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL); if (length != tot_len) { res = -ENOMEM; goto out_unmap; } + c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + c.dlfw.prp2 = cpu_to_le64(iod->first_dma); c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1); c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS); } else if (opcode == nvme_admin_activate_fw) { @@ -2033,7 +2034,6 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res = SNTI_TRANSLATION_SUCCESS; int nvme_sc; struct nvme_dev *dev = ns->dev; - struct nvme_queue *nvmeq; u32 num_cmds; struct nvme_iod *iod; u64 unit_len; @@ -2045,7 +2045,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_command c; u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read); u16 control; - u32 max_blocks = nvme_block_nr(ns, dev->max_hw_sectors); + u32 max_blocks = queue_max_hw_sectors(ns->queue); num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks); @@ -2093,8 +2093,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = PTR_ERR(iod); goto out; } - retcode = nvme_setup_prps(dev, &c.common, iod, unit_len, - GFP_KERNEL); + retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL); if (retcode != unit_len) { nvme_unmap_user_pages(dev, (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, @@ -2103,21 +2102,12 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = -ENOMEM; goto out; } + c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + c.rw.prp2 = cpu_to_le64(iod->first_dma); nvme_offset += unit_num_blocks; - nvmeq = get_nvmeq(dev); - /* - * Since nvme_submit_sync_cmd sleeps, we can't keep - * preemption disabled. We may be preempted at any - * point, and be rescheduled to a different CPU. That - * will cause cacheline bouncing, but no additional - * races since q_lock already protects against other - * CPUs. - */ - put_nvmeq(nvmeq); - nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, - NVME_IO_TIMEOUT); + nvme_sc = nvme_submit_io_cmd(dev, &c, NULL); if (nvme_sc != NVME_SC_SUCCESS) { nvme_unmap_user_pages(dev, (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, @@ -2644,7 +2634,6 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, { int res = SNTI_TRANSLATION_SUCCESS; int nvme_sc; - struct nvme_queue *nvmeq; struct nvme_command c; u8 immed, pcmod, pc, no_flush, start; @@ -2671,10 +2660,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.opcode = nvme_cmd_flush; c.common.nsid = cpu_to_le32(ns->ns_id); - nvmeq = get_nvmeq(ns->dev); - put_nvmeq(nvmeq); - nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); - + nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL); res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out; @@ -2697,15 +2683,12 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, int res = SNTI_TRANSLATION_SUCCESS; int nvme_sc; struct nvme_command c; - struct nvme_queue *nvmeq; memset(&c, 0, sizeof(c)); c.common.opcode = nvme_cmd_flush; c.common.nsid = cpu_to_le32(ns->ns_id); - nvmeq = get_nvmeq(ns->dev); - put_nvmeq(nvmeq); - nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); + nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL); res = nvme_trans_status_code(hdr, nvme_sc); if (res) @@ -2872,7 +2855,6 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_dev *dev = ns->dev; struct scsi_unmap_parm_list *plist; struct nvme_dsm_range *range; - struct nvme_queue *nvmeq; struct nvme_command c; int i, nvme_sc, res = -ENOMEM; u16 ndesc, list_len; @@ -2914,10 +2896,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.dsm.nr = cpu_to_le32(ndesc - 1); c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - nvmeq = get_nvmeq(dev); - put_nvmeq(nvmeq); - - nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); + nvme_sc = nvme_submit_io_cmd(dev, &c, NULL); res = nvme_trans_status_code(hdr, nvme_sc); dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b365e0dfccb6..4c95b503b09e 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1654,7 +1654,7 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, if (osd_req->r_result < 0) obj_request->result = osd_req->r_result; - BUG_ON(osd_req->r_num_ops > 2); + rbd_assert(osd_req->r_num_ops <= CEPH_OSD_MAX_OP); /* * We support a 64-bit length, but ultimately it has to be @@ -1662,11 +1662,15 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, */ obj_request->xferred = osd_req->r_reply_op_len[0]; rbd_assert(obj_request->xferred < (u64)UINT_MAX); + opcode = osd_req->r_ops[0].op; switch (opcode) { case CEPH_OSD_OP_READ: rbd_osd_read_callback(obj_request); break; + case CEPH_OSD_OP_SETALLOCHINT: + rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE); + /* fall through */ case CEPH_OSD_OP_WRITE: rbd_osd_write_callback(obj_request); break; @@ -1715,9 +1719,16 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) snapc, CEPH_NOSNAP, &mtime); } +/* + * Create an osd request. A read request has one osd op (read). + * A write request has either one (watch) or two (hint+write) osd ops. + * (All rbd data writes are prefixed with an allocation hint op, but + * technically osd watch is a write request, hence this distinction.) + */ static struct ceph_osd_request *rbd_osd_req_create( struct rbd_device *rbd_dev, bool write_request, + unsigned int num_ops, struct rbd_obj_request *obj_request) { struct ceph_snap_context *snapc = NULL; @@ -1733,10 +1744,13 @@ static struct ceph_osd_request *rbd_osd_req_create( snapc = img_request->snapc; } - /* Allocate and initialize the request, for the single op */ + rbd_assert(num_ops == 1 || (write_request && num_ops == 2)); + + /* Allocate and initialize the request, for the num_ops ops */ osdc = &rbd_dev->rbd_client->client->osdc; - osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC); + osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, + GFP_ATOMIC); if (!osd_req) return NULL; /* ENOMEM */ @@ -1756,8 +1770,8 @@ static struct ceph_osd_request *rbd_osd_req_create( /* * Create a copyup osd request based on the information in the - * object request supplied. A copyup request has two osd ops, - * a copyup method call, and a "normal" write request. + * object request supplied. A copyup request has three osd ops, + * a copyup method call, a hint op, and a write op. */ static struct ceph_osd_request * rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) @@ -1773,12 +1787,12 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) rbd_assert(img_request); rbd_assert(img_request_write_test(img_request)); - /* Allocate and initialize the request, for the two ops */ + /* Allocate and initialize the request, for the three ops */ snapc = img_request->snapc; rbd_dev = img_request->rbd_dev; osdc = &rbd_dev->rbd_client->client->osdc; - osd_req = ceph_osdc_alloc_request(osdc, snapc, 2, false, GFP_ATOMIC); + osd_req = ceph_osdc_alloc_request(osdc, snapc, 3, false, GFP_ATOMIC); if (!osd_req) return NULL; /* ENOMEM */ @@ -2109,7 +2123,6 @@ static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) rbd_assert(img_request->obj_request_count > 0); rbd_assert(which != BAD_WHICH); rbd_assert(which < img_request->obj_request_count); - rbd_assert(which >= img_request->next_completion); spin_lock_irq(&img_request->completion_lock); if (which != img_request->next_completion) @@ -2179,6 +2192,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, const char *object_name; u64 offset; u64 length; + unsigned int which = 0; object_name = rbd_segment_name(rbd_dev, img_offset); if (!object_name) @@ -2191,6 +2205,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, rbd_segment_name_free(object_name); if (!obj_request) goto out_unwind; + /* * set obj_request->img_request before creating the * osd_request so that it gets the right snapc @@ -2208,7 +2223,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, clone_size, GFP_ATOMIC); if (!obj_request->bio_list) - goto out_partial; + goto out_unwind; } else { unsigned int page_count; @@ -2221,19 +2236,27 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, } osd_req = rbd_osd_req_create(rbd_dev, write_request, - obj_request); + (write_request ? 2 : 1), + obj_request); if (!osd_req) - goto out_partial; + goto out_unwind; obj_request->osd_req = osd_req; obj_request->callback = rbd_img_obj_callback; - osd_req_op_extent_init(osd_req, 0, opcode, offset, length, - 0, 0); + if (write_request) { + osd_req_op_alloc_hint_init(osd_req, which, + rbd_obj_bytes(&rbd_dev->header), + rbd_obj_bytes(&rbd_dev->header)); + which++; + } + + osd_req_op_extent_init(osd_req, which, opcode, offset, length, + 0, 0); if (type == OBJ_REQUEST_BIO) - osd_req_op_extent_osd_data_bio(osd_req, 0, + osd_req_op_extent_osd_data_bio(osd_req, which, obj_request->bio_list, length); else - osd_req_op_extent_osd_data_pages(osd_req, 0, + osd_req_op_extent_osd_data_pages(osd_req, which, obj_request->pages, length, offset & ~PAGE_MASK, false, false); @@ -2250,11 +2273,9 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, return 0; -out_partial: - rbd_obj_request_put(obj_request); out_unwind: for_each_obj_request_safe(img_request, obj_request, next_obj_request) - rbd_obj_request_put(obj_request); + rbd_img_obj_request_del(img_request, obj_request); return -ENOMEM; } @@ -2354,7 +2375,7 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) /* * The original osd request is of no use to use any more. - * We need a new one that can hold the two ops in a copyup + * We need a new one that can hold the three ops in a copyup * request. Allocate the new copyup osd request for the * original request, and release the old one. */ @@ -2373,17 +2394,22 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0, false, false); - /* Then the original write request op */ + /* Then the hint op */ + + osd_req_op_alloc_hint_init(osd_req, 1, rbd_obj_bytes(&rbd_dev->header), + rbd_obj_bytes(&rbd_dev->header)); + + /* And the original write request op */ offset = orig_request->offset; length = orig_request->length; - osd_req_op_extent_init(osd_req, 1, CEPH_OSD_OP_WRITE, + osd_req_op_extent_init(osd_req, 2, CEPH_OSD_OP_WRITE, offset, length, 0, 0); if (orig_request->type == OBJ_REQUEST_BIO) - osd_req_op_extent_osd_data_bio(osd_req, 1, + osd_req_op_extent_osd_data_bio(osd_req, 2, orig_request->bio_list, length); else - osd_req_op_extent_osd_data_pages(osd_req, 1, + osd_req_op_extent_osd_data_pages(osd_req, 2, orig_request->pages, length, offset & ~PAGE_MASK, false, false); @@ -2604,8 +2630,8 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) rbd_assert(obj_request->img_request); rbd_dev = obj_request->img_request->rbd_dev; - stat_request->osd_req = rbd_osd_req_create(rbd_dev, false, - stat_request); + stat_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, + stat_request); if (!stat_request->osd_req) goto out; stat_request->callback = rbd_img_obj_exists_callback; @@ -2808,7 +2834,8 @@ static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id) return -ENOMEM; ret = -ENOMEM; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, + obj_request); if (!obj_request->osd_req) goto out; @@ -2871,7 +2898,8 @@ static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start) if (!obj_request) goto out_cancel; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, obj_request); + obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1, + obj_request); if (!obj_request->osd_req) goto out_cancel; @@ -2979,7 +3007,8 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, obj_request->pages = pages; obj_request->page_count = page_count; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, + obj_request); if (!obj_request->osd_req) goto out; @@ -3212,7 +3241,8 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, obj_request->pages = pages; obj_request->page_count = page_count; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, + obj_request); if (!obj_request->osd_req) goto out; diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index eb6e1e0e8db2..a69dd93d1bd5 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -3910,18 +3910,22 @@ static void skd_release_msix(struct skd_device *skdev) struct skd_msix_entry *qentry; int i; - if (skdev->msix_entries == NULL) - return; - for (i = 0; i < skdev->msix_count; i++) { - qentry = &skdev->msix_entries[i]; - skdev = qentry->rsp; + if (skdev->msix_entries) { + for (i = 0; i < skdev->msix_count; i++) { + qentry = &skdev->msix_entries[i]; + skdev = qentry->rsp; + + if (qentry->have_irq) + devm_free_irq(&skdev->pdev->dev, + qentry->vector, qentry->rsp); + } - if (qentry->have_irq) - devm_free_irq(&skdev->pdev->dev, - qentry->vector, qentry->rsp); + kfree(skdev->msix_entries); } - pci_disable_msix(skdev->pdev); - kfree(skdev->msix_entries); + + if (skdev->msix_count) + pci_disable_msix(skdev->pdev); + skdev->msix_count = 0; skdev->msix_entries = NULL; } @@ -3929,12 +3933,10 @@ static void skd_release_msix(struct skd_device *skdev) static int skd_acquire_msix(struct skd_device *skdev) { int i, rc; - struct pci_dev *pdev; - struct msix_entry *entries = NULL; + struct pci_dev *pdev = skdev->pdev; + struct msix_entry *entries; struct skd_msix_entry *qentry; - pdev = skdev->pdev; - skdev->msix_count = SKD_MAX_MSIX_COUNT; entries = kzalloc(sizeof(struct msix_entry) * SKD_MAX_MSIX_COUNT, GFP_KERNEL); if (!entries) @@ -3943,40 +3945,26 @@ static int skd_acquire_msix(struct skd_device *skdev) for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) entries[i].entry = i; - rc = pci_enable_msix(pdev, entries, SKD_MAX_MSIX_COUNT); - if (rc < 0) + rc = pci_enable_msix_range(pdev, entries, + SKD_MIN_MSIX_COUNT, SKD_MAX_MSIX_COUNT); + if (rc < 0) { + pr_err("(%s): failed to enable MSI-X %d\n", + skd_name(skdev), rc); goto msix_out; - if (rc) { - if (rc < SKD_MIN_MSIX_COUNT) { - pr_err("(%s): failed to enable MSI-X %d\n", - skd_name(skdev), rc); - goto msix_out; - } - pr_debug("%s:%s:%d %s: <%s> allocated %d MSI-X vectors\n", - skdev->name, __func__, __LINE__, - pci_name(pdev), skdev->name, rc); - - skdev->msix_count = rc; - rc = pci_enable_msix(pdev, entries, skdev->msix_count); - if (rc) { - pr_err("(%s): failed to enable MSI-X " - "support (%d) %d\n", - skd_name(skdev), skdev->msix_count, rc); - goto msix_out; - } } + + skdev->msix_count = rc; skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) * skdev->msix_count, GFP_KERNEL); if (!skdev->msix_entries) { rc = -ENOMEM; - skdev->msix_count = 0; pr_err("(%s): msix table allocation error\n", skd_name(skdev)); goto msix_out; } - qentry = skdev->msix_entries; for (i = 0; i < skdev->msix_count; i++) { + qentry = &skdev->msix_entries[i]; qentry->vector = entries[i].vector; qentry->entry = entries[i].entry; qentry->rsp = NULL; @@ -3985,11 +3973,10 @@ static int skd_acquire_msix(struct skd_device *skdev) skdev->name, __func__, __LINE__, pci_name(pdev), skdev->name, i, qentry->vector, qentry->entry); - qentry++; } /* Enable MSI-X vectors for the base queue */ - for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) { + for (i = 0; i < skdev->msix_count; i++) { qentry = &skdev->msix_entries[i]; snprintf(qentry->isr_name, sizeof(qentry->isr_name), "%s%d-msix %s", DRV_NAME, skdev->devno, @@ -4045,8 +4032,8 @@ RETRY_IRQ_TYPE: case SKD_IRQ_MSI: snprintf(skdev->isr_name, sizeof(skdev->isr_name), "%s%d-msi", DRV_NAME, skdev->devno); - rc = pci_enable_msi(pdev); - if (!rc) { + rc = pci_enable_msi_range(pdev, 1, 1); + if (rc > 0) { rc = devm_request_irq(&pdev->dev, pdev->irq, skd_isr, 0, skdev->isr_name, skdev); if (rc) { diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 20e061c3e023..c74f7b56e7c4 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -30,6 +30,7 @@ #include <linux/mutex.h> #include <linux/module.h> #include <linux/spinlock.h> +#include <linux/wait.h> #include <asm/io.h> #include <asm/dbdma.h> #include <asm/prom.h> @@ -840,14 +841,17 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state, spin_lock_irqsave(&swim3_lock, flags); if (fs->state != idle && fs->state != available) { ++fs->wanted; - while (fs->state != available) { + /* this will enable irqs in order to sleep */ + if (!interruptible) + wait_event_lock_irq(fs->wait, + fs->state == available, + swim3_lock); + else if (wait_event_interruptible_lock_irq(fs->wait, + fs->state == available, + swim3_lock)) { + --fs->wanted; spin_unlock_irqrestore(&swim3_lock, flags); - if (interruptible && signal_pending(current)) { - --fs->wanted; - return -EINTR; - } - interruptible_sleep_on(&fs->wait); - spin_lock_irqsave(&swim3_lock, flags); + return -EINTR; } --fs->wanted; } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index b1cb3f4c4db4..6d8a87f252de 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -158,6 +158,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) unsigned long flags; unsigned int num; const bool last = (req->cmd_flags & REQ_END) != 0; + int err; BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); @@ -198,11 +199,16 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) } spin_lock_irqsave(&vblk->vq_lock, flags); - if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) { + err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num); + if (err) { virtqueue_kick(vblk->vq); spin_unlock_irqrestore(&vblk->vq_lock, flags); blk_mq_stop_hw_queue(hctx); - return BLK_MQ_RQ_QUEUE_BUSY; + /* Out of mem doesn't actually happen, since we fall back + * to direct descriptors */ + if (err == -ENOMEM || err == -ENOSPC) + return BLK_MQ_RQ_QUEUE_BUSY; + return BLK_MQ_RQ_QUEUE_ERROR; } if (last) @@ -485,18 +491,20 @@ static struct blk_mq_ops virtio_mq_ops = { static struct blk_mq_reg virtio_mq_reg = { .ops = &virtio_mq_ops, .nr_hw_queues = 1, - .queue_depth = 64, + .queue_depth = 0, /* Set in virtblk_probe */ .numa_node = NUMA_NO_NODE, .flags = BLK_MQ_F_SHOULD_MERGE, }; +module_param_named(queue_depth, virtio_mq_reg.queue_depth, uint, 0444); -static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx, +static int virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx, struct request *rq, unsigned int nr) { struct virtio_blk *vblk = data; struct virtblk_req *vbr = rq->special; sg_init_table(vbr->sg, vblk->sg_elems); + return 0; } static int virtblk_probe(struct virtio_device *vdev) @@ -552,6 +560,13 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_free_vq; } + /* Default queue sizing is to fill the ring. */ + if (!virtio_mq_reg.queue_depth) { + virtio_mq_reg.queue_depth = vblk->vq->num_free; + /* ... but without indirect descs, we use 2 descs per req */ + if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) + virtio_mq_reg.queue_depth /= 2; + } virtio_mq_reg.cmd_size = sizeof(struct virtblk_req) + sizeof(struct scatterlist) * sg_elems; diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 3450be850399..6489c0fd0ea6 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -15,6 +15,16 @@ config ZRAM See zram.txt for more information. +config ZRAM_LZ4_COMPRESS + bool "Enable LZ4 algorithm support" + depends on ZRAM + select LZ4_COMPRESS + select LZ4_DECOMPRESS + default n + help + This option enables LZ4 compression algorithm support. Compression + algorithm can be changed using `comp_algorithm' device attribute. + config ZRAM_DEBUG bool "Compressed RAM block device debug support" depends on ZRAM diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile index cb0f9ced6a93..be0763ff57a2 100644 --- a/drivers/block/zram/Makefile +++ b/drivers/block/zram/Makefile @@ -1,3 +1,5 @@ -zram-y := zram_drv.o +zram-y := zcomp_lzo.o zcomp.o zram_drv.o + +zram-$(CONFIG_ZRAM_LZ4_COMPRESS) += zcomp_lz4.o obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c new file mode 100644 index 000000000000..f1ff39a3d1c1 --- /dev/null +++ b/drivers/block/zram/zcomp.c @@ -0,0 +1,353 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/wait.h> +#include <linux/sched.h> + +#include "zcomp.h" +#include "zcomp_lzo.h" +#ifdef CONFIG_ZRAM_LZ4_COMPRESS +#include "zcomp_lz4.h" +#endif + +/* + * single zcomp_strm backend + */ +struct zcomp_strm_single { + struct mutex strm_lock; + struct zcomp_strm *zstrm; +}; + +/* + * multi zcomp_strm backend + */ +struct zcomp_strm_multi { + /* protect strm list */ + spinlock_t strm_lock; + /* max possible number of zstrm streams */ + int max_strm; + /* number of available zstrm streams */ + int avail_strm; + /* list of available strms */ + struct list_head idle_strm; + wait_queue_head_t strm_wait; +}; + +static struct zcomp_backend *backends[] = { + &zcomp_lzo, +#ifdef CONFIG_ZRAM_LZ4_COMPRESS + &zcomp_lz4, +#endif + NULL +}; + +static struct zcomp_backend *find_backend(const char *compress) +{ + int i = 0; + while (backends[i]) { + if (sysfs_streq(compress, backends[i]->name)) + break; + i++; + } + return backends[i]; +} + +static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm) +{ + if (zstrm->private) + comp->backend->destroy(zstrm->private); + free_pages((unsigned long)zstrm->buffer, 1); + kfree(zstrm); +} + +/* + * allocate new zcomp_strm structure with ->private initialized by + * backend, return NULL on error + */ +static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) +{ + struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL); + if (!zstrm) + return NULL; + + zstrm->private = comp->backend->create(); + /* + * allocate 2 pages. 1 for compressed data, plus 1 extra for the + * case when compressed size is larger than the original one + */ + zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + if (!zstrm->private || !zstrm->buffer) { + zcomp_strm_free(comp, zstrm); + zstrm = NULL; + } + return zstrm; +} + +/* + * get idle zcomp_strm or wait until other process release + * (zcomp_strm_release()) one for us + */ +static struct zcomp_strm *zcomp_strm_multi_find(struct zcomp *comp) +{ + struct zcomp_strm_multi *zs = comp->stream; + struct zcomp_strm *zstrm; + + while (1) { + spin_lock(&zs->strm_lock); + if (!list_empty(&zs->idle_strm)) { + zstrm = list_entry(zs->idle_strm.next, + struct zcomp_strm, list); + list_del(&zstrm->list); + spin_unlock(&zs->strm_lock); + return zstrm; + } + /* zstrm streams limit reached, wait for idle stream */ + if (zs->avail_strm >= zs->max_strm) { + spin_unlock(&zs->strm_lock); + wait_event(zs->strm_wait, !list_empty(&zs->idle_strm)); + continue; + } + /* allocate new zstrm stream */ + zs->avail_strm++; + spin_unlock(&zs->strm_lock); + + zstrm = zcomp_strm_alloc(comp); + if (!zstrm) { + spin_lock(&zs->strm_lock); + zs->avail_strm--; + spin_unlock(&zs->strm_lock); + wait_event(zs->strm_wait, !list_empty(&zs->idle_strm)); + continue; + } + break; + } + return zstrm; +} + +/* add stream back to idle list and wake up waiter or free the stream */ +static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstrm) +{ + struct zcomp_strm_multi *zs = comp->stream; + + spin_lock(&zs->strm_lock); + if (zs->avail_strm <= zs->max_strm) { + list_add(&zstrm->list, &zs->idle_strm); + spin_unlock(&zs->strm_lock); + wake_up(&zs->strm_wait); + return; + } + + zs->avail_strm--; + spin_unlock(&zs->strm_lock); + zcomp_strm_free(comp, zstrm); +} + +/* change max_strm limit */ +static bool zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm) +{ + struct zcomp_strm_multi *zs = comp->stream; + struct zcomp_strm *zstrm; + + spin_lock(&zs->strm_lock); + zs->max_strm = num_strm; + /* + * if user has lowered the limit and there are idle streams, + * immediately free as much streams (and memory) as we can. + */ + while (zs->avail_strm > num_strm && !list_empty(&zs->idle_strm)) { + zstrm = list_entry(zs->idle_strm.next, + struct zcomp_strm, list); + list_del(&zstrm->list); + zcomp_strm_free(comp, zstrm); + zs->avail_strm--; + } + spin_unlock(&zs->strm_lock); + return true; +} + +static void zcomp_strm_multi_destroy(struct zcomp *comp) +{ + struct zcomp_strm_multi *zs = comp->stream; + struct zcomp_strm *zstrm; + + while (!list_empty(&zs->idle_strm)) { + zstrm = list_entry(zs->idle_strm.next, + struct zcomp_strm, list); + list_del(&zstrm->list); + zcomp_strm_free(comp, zstrm); + } + kfree(zs); +} + +static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm) +{ + struct zcomp_strm *zstrm; + struct zcomp_strm_multi *zs; + + comp->destroy = zcomp_strm_multi_destroy; + comp->strm_find = zcomp_strm_multi_find; + comp->strm_release = zcomp_strm_multi_release; + comp->set_max_streams = zcomp_strm_multi_set_max_streams; + zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL); + if (!zs) + return -ENOMEM; + + comp->stream = zs; + spin_lock_init(&zs->strm_lock); + INIT_LIST_HEAD(&zs->idle_strm); + init_waitqueue_head(&zs->strm_wait); + zs->max_strm = max_strm; + zs->avail_strm = 1; + + zstrm = zcomp_strm_alloc(comp); + if (!zstrm) { + kfree(zs); + return -ENOMEM; + } + list_add(&zstrm->list, &zs->idle_strm); + return 0; +} + +static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp) +{ + struct zcomp_strm_single *zs = comp->stream; + mutex_lock(&zs->strm_lock); + return zs->zstrm; +} + +static void zcomp_strm_single_release(struct zcomp *comp, + struct zcomp_strm *zstrm) +{ + struct zcomp_strm_single *zs = comp->stream; + mutex_unlock(&zs->strm_lock); +} + +static bool zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm) +{ + /* zcomp_strm_single support only max_comp_streams == 1 */ + return false; +} + +static void zcomp_strm_single_destroy(struct zcomp *comp) +{ + struct zcomp_strm_single *zs = comp->stream; + zcomp_strm_free(comp, zs->zstrm); + kfree(zs); +} + +static int zcomp_strm_single_create(struct zcomp *comp) +{ + struct zcomp_strm_single *zs; + + comp->destroy = zcomp_strm_single_destroy; + comp->strm_find = zcomp_strm_single_find; + comp->strm_release = zcomp_strm_single_release; + comp->set_max_streams = zcomp_strm_single_set_max_streams; + zs = kmalloc(sizeof(struct zcomp_strm_single), GFP_KERNEL); + if (!zs) + return -ENOMEM; + + comp->stream = zs; + mutex_init(&zs->strm_lock); + zs->zstrm = zcomp_strm_alloc(comp); + if (!zs->zstrm) { + kfree(zs); + return -ENOMEM; + } + return 0; +} + +/* show available compressors */ +ssize_t zcomp_available_show(const char *comp, char *buf) +{ + ssize_t sz = 0; + int i = 0; + + while (backends[i]) { + if (sysfs_streq(comp, backends[i]->name)) + sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, + "[%s] ", backends[i]->name); + else + sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, + "%s ", backends[i]->name); + i++; + } + sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n"); + return sz; +} + +bool zcomp_set_max_streams(struct zcomp *comp, int num_strm) +{ + return comp->set_max_streams(comp, num_strm); +} + +struct zcomp_strm *zcomp_strm_find(struct zcomp *comp) +{ + return comp->strm_find(comp); +} + +void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm) +{ + comp->strm_release(comp, zstrm); +} + +int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, + const unsigned char *src, size_t *dst_len) +{ + return comp->backend->compress(src, zstrm->buffer, dst_len, + zstrm->private); +} + +int zcomp_decompress(struct zcomp *comp, const unsigned char *src, + size_t src_len, unsigned char *dst) +{ + return comp->backend->decompress(src, src_len, dst); +} + +void zcomp_destroy(struct zcomp *comp) +{ + comp->destroy(comp); + kfree(comp); +} + +/* + * search available compressors for requested algorithm. + * allocate new zcomp and initialize it. return compressing + * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL) + * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in + * case of allocation error. + */ +struct zcomp *zcomp_create(const char *compress, int max_strm) +{ + struct zcomp *comp; + struct zcomp_backend *backend; + + backend = find_backend(compress); + if (!backend) + return ERR_PTR(-EINVAL); + + comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL); + if (!comp) + return ERR_PTR(-ENOMEM); + + comp->backend = backend; + if (max_strm > 1) + zcomp_strm_multi_create(comp, max_strm); + else + zcomp_strm_single_create(comp); + if (!comp->stream) { + kfree(comp); + return ERR_PTR(-ENOMEM); + } + return comp; +} diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h new file mode 100644 index 000000000000..c59d1fca72c0 --- /dev/null +++ b/drivers/block/zram/zcomp.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ZCOMP_H_ +#define _ZCOMP_H_ + +#include <linux/mutex.h> + +struct zcomp_strm { + /* compression/decompression buffer */ + void *buffer; + /* + * The private data of the compression stream, only compression + * stream backend can touch this (e.g. compression algorithm + * working memory) + */ + void *private; + /* used in multi stream backend, protected by backend strm_lock */ + struct list_head list; +}; + +/* static compression backend */ +struct zcomp_backend { + int (*compress)(const unsigned char *src, unsigned char *dst, + size_t *dst_len, void *private); + + int (*decompress)(const unsigned char *src, size_t src_len, + unsigned char *dst); + + void *(*create)(void); + void (*destroy)(void *private); + + const char *name; +}; + +/* dynamic per-device compression frontend */ +struct zcomp { + void *stream; + struct zcomp_backend *backend; + + struct zcomp_strm *(*strm_find)(struct zcomp *comp); + void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm); + bool (*set_max_streams)(struct zcomp *comp, int num_strm); + void (*destroy)(struct zcomp *comp); +}; + +ssize_t zcomp_available_show(const char *comp, char *buf); + +struct zcomp *zcomp_create(const char *comp, int max_strm); +void zcomp_destroy(struct zcomp *comp); + +struct zcomp_strm *zcomp_strm_find(struct zcomp *comp); +void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm); + +int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, + const unsigned char *src, size_t *dst_len); + +int zcomp_decompress(struct zcomp *comp, const unsigned char *src, + size_t src_len, unsigned char *dst); + +bool zcomp_set_max_streams(struct zcomp *comp, int num_strm); +#endif /* _ZCOMP_H_ */ diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c new file mode 100644 index 000000000000..f2afb7e988c3 --- /dev/null +++ b/drivers/block/zram/zcomp_lz4.c @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/lz4.h> + +#include "zcomp_lz4.h" + +static void *zcomp_lz4_create(void) +{ + return kzalloc(LZ4_MEM_COMPRESS, GFP_KERNEL); +} + +static void zcomp_lz4_destroy(void *private) +{ + kfree(private); +} + +static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst, + size_t *dst_len, void *private) +{ + /* return : Success if return 0 */ + return lz4_compress(src, PAGE_SIZE, dst, dst_len, private); +} + +static int zcomp_lz4_decompress(const unsigned char *src, size_t src_len, + unsigned char *dst) +{ + size_t dst_len = PAGE_SIZE; + /* return : Success if return 0 */ + return lz4_decompress_unknownoutputsize(src, src_len, dst, &dst_len); +} + +struct zcomp_backend zcomp_lz4 = { + .compress = zcomp_lz4_compress, + .decompress = zcomp_lz4_decompress, + .create = zcomp_lz4_create, + .destroy = zcomp_lz4_destroy, + .name = "lz4", +}; diff --git a/drivers/block/zram/zcomp_lz4.h b/drivers/block/zram/zcomp_lz4.h new file mode 100644 index 000000000000..60613fb29dd8 --- /dev/null +++ b/drivers/block/zram/zcomp_lz4.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ZCOMP_LZ4_H_ +#define _ZCOMP_LZ4_H_ + +#include "zcomp.h" + +extern struct zcomp_backend zcomp_lz4; + +#endif /* _ZCOMP_LZ4_H_ */ diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c new file mode 100644 index 000000000000..da1bc47d588e --- /dev/null +++ b/drivers/block/zram/zcomp_lzo.c @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/lzo.h> + +#include "zcomp_lzo.h" + +static void *lzo_create(void) +{ + return kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); +} + +static void lzo_destroy(void *private) +{ + kfree(private); +} + +static int lzo_compress(const unsigned char *src, unsigned char *dst, + size_t *dst_len, void *private) +{ + int ret = lzo1x_1_compress(src, PAGE_SIZE, dst, dst_len, private); + return ret == LZO_E_OK ? 0 : ret; +} + +static int lzo_decompress(const unsigned char *src, size_t src_len, + unsigned char *dst) +{ + size_t dst_len = PAGE_SIZE; + int ret = lzo1x_decompress_safe(src, src_len, dst, &dst_len); + return ret == LZO_E_OK ? 0 : ret; +} + +struct zcomp_backend zcomp_lzo = { + .compress = lzo_compress, + .decompress = lzo_decompress, + .create = lzo_create, + .destroy = lzo_destroy, + .name = "lzo", +}; diff --git a/drivers/block/zram/zcomp_lzo.h b/drivers/block/zram/zcomp_lzo.h new file mode 100644 index 000000000000..128c5807fa14 --- /dev/null +++ b/drivers/block/zram/zcomp_lzo.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ZCOMP_LZO_H_ +#define _ZCOMP_LZO_H_ + +#include "zcomp.h" + +extern struct zcomp_backend zcomp_lzo; + +#endif /* _ZCOMP_LZO_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 011e55d820b1..9849b5233bf4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -29,19 +29,36 @@ #include <linux/genhd.h> #include <linux/highmem.h> #include <linux/slab.h> -#include <linux/lzo.h> #include <linux/string.h> #include <linux/vmalloc.h> +#include <linux/err.h> #include "zram_drv.h" /* Globals */ static int zram_major; static struct zram *zram_devices; +static const char *default_compressor = "lzo"; /* Module params (documentation at end) */ static unsigned int num_devices = 1; +#define ZRAM_ATTR_RO(name) \ +static ssize_t zram_attr_##name##_show(struct device *d, \ + struct device_attribute *attr, char *b) \ +{ \ + struct zram *zram = dev_to_zram(d); \ + return scnprintf(b, PAGE_SIZE, "%llu\n", \ + (u64)atomic64_read(&zram->stats.name)); \ +} \ +static struct device_attribute dev_attr_##name = \ + __ATTR(name, S_IRUGO, zram_attr_##name##_show, NULL); + +static inline int init_done(struct zram *zram) +{ + return zram->meta != NULL; +} + static inline struct zram *dev_to_zram(struct device *dev) { return (struct zram *)dev_to_disk(dev)->private_data; @@ -52,92 +69,114 @@ static ssize_t disksize_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%llu\n", zram->disksize); + return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); } static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, char *buf) { + u32 val; struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%u\n", zram->init_done); -} - -static ssize_t num_reads_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); + down_read(&zram->init_lock); + val = init_done(zram); + up_read(&zram->init_lock); - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_reads)); + return scnprintf(buf, PAGE_SIZE, "%u\n", val); } -static ssize_t num_writes_show(struct device *dev, +static ssize_t orig_data_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_writes)); + return scnprintf(buf, PAGE_SIZE, "%llu\n", + (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); } -static ssize_t invalid_io_show(struct device *dev, +static ssize_t mem_used_total_show(struct device *dev, struct device_attribute *attr, char *buf) { + u64 val = 0; struct zram *zram = dev_to_zram(dev); + struct zram_meta *meta = zram->meta; - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.invalid_io)); -} - -static ssize_t notify_free_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); + down_read(&zram->init_lock); + if (init_done(zram)) + val = zs_get_total_size_bytes(meta->mem_pool); + up_read(&zram->init_lock); - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.notify_free)); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); } -static ssize_t zero_pages_show(struct device *dev, +static ssize_t max_comp_streams_show(struct device *dev, struct device_attribute *attr, char *buf) { + int val; struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero)); + down_read(&zram->init_lock); + val = zram->max_comp_streams; + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%d\n", val); } -static ssize_t orig_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t max_comp_streams_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) { + int num; struct zram *zram = dev_to_zram(dev); + int ret; - return sprintf(buf, "%llu\n", - (u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT); -} + ret = kstrtoint(buf, 0, &num); + if (ret < 0) + return ret; + if (num < 1) + return -EINVAL; -static ssize_t compr_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); + down_write(&zram->init_lock); + if (init_done(zram)) { + if (!zcomp_set_max_streams(zram->comp, num)) { + pr_info("Cannot change max compression streams\n"); + ret = -EINVAL; + goto out; + } + } - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.compr_size)); + zram->max_comp_streams = num; + ret = len; +out: + up_write(&zram->init_lock); + return ret; } -static ssize_t mem_used_total_show(struct device *dev, +static ssize_t comp_algorithm_show(struct device *dev, struct device_attribute *attr, char *buf) { - u64 val = 0; + size_t sz; struct zram *zram = dev_to_zram(dev); - struct zram_meta *meta = zram->meta; down_read(&zram->init_lock); - if (zram->init_done) - val = zs_get_total_size_bytes(meta->mem_pool); + sz = zcomp_available_show(zram->compressor, buf); up_read(&zram->init_lock); - return sprintf(buf, "%llu\n", val); + return sz; +} + +static ssize_t comp_algorithm_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + down_write(&zram->init_lock); + if (init_done(zram)) { + up_write(&zram->init_lock); + pr_info("Can't change algorithm for initialized device\n"); + return -EBUSY; + } + strlcpy(zram->compressor, buf, sizeof(zram->compressor)); + up_write(&zram->init_lock); + return len; } /* flag operations needs meta->tb_lock */ @@ -192,8 +231,6 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio) static void zram_meta_free(struct zram_meta *meta) { zs_destroy_pool(meta->mem_pool); - kfree(meta->compress_workmem); - free_pages((unsigned long)meta->compress_buffer, 1); vfree(meta->table); kfree(meta); } @@ -205,22 +242,11 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) if (!meta) goto out; - meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); - if (!meta->compress_workmem) - goto free_meta; - - meta->compress_buffer = - (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); - if (!meta->compress_buffer) { - pr_err("Error allocating compressor buffer space\n"); - goto free_workmem; - } - num_pages = disksize >> PAGE_SHIFT; meta->table = vzalloc(num_pages * sizeof(*meta->table)); if (!meta->table) { pr_err("Error allocating zram address table\n"); - goto free_buffer; + goto free_meta; } meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM); @@ -230,15 +256,10 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) } rwlock_init(&meta->tb_lock); - mutex_init(&meta->buffer_lock); return meta; free_table: vfree(meta->table); -free_buffer: - free_pages((unsigned long)meta->compress_buffer, 1); -free_workmem: - kfree(meta->compress_workmem); free_meta: kfree(meta); meta = NULL; @@ -288,7 +309,6 @@ static void zram_free_page(struct zram *zram, size_t index) { struct zram_meta *meta = zram->meta; unsigned long handle = meta->table[index].handle; - u16 size = meta->table[index].size; if (unlikely(!handle)) { /* @@ -297,21 +317,15 @@ static void zram_free_page(struct zram *zram, size_t index) */ if (zram_test_flag(meta, index, ZRAM_ZERO)) { zram_clear_flag(meta, index, ZRAM_ZERO); - atomic_dec(&zram->stats.pages_zero); + atomic64_dec(&zram->stats.zero_pages); } return; } - if (unlikely(size > max_zpage_size)) - atomic_dec(&zram->stats.bad_compress); - zs_free(meta->mem_pool, handle); - if (size <= PAGE_SIZE / 2) - atomic_dec(&zram->stats.good_compress); - - atomic64_sub(meta->table[index].size, &zram->stats.compr_size); - atomic_dec(&zram->stats.pages_stored); + atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size); + atomic64_dec(&zram->stats.pages_stored); meta->table[index].handle = 0; meta->table[index].size = 0; @@ -319,8 +333,7 @@ static void zram_free_page(struct zram *zram, size_t index) static int zram_decompress_page(struct zram *zram, char *mem, u32 index) { - int ret = LZO_E_OK; - size_t clen = PAGE_SIZE; + int ret = 0; unsigned char *cmem; struct zram_meta *meta = zram->meta; unsigned long handle; @@ -340,12 +353,12 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) if (size == PAGE_SIZE) copy_page(mem, cmem); else - ret = lzo1x_decompress_safe(cmem, size, mem, &clen); + ret = zcomp_decompress(zram->comp, cmem, size, mem); zs_unmap_object(meta->mem_pool, handle); read_unlock(&meta->tb_lock); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) { + if (unlikely(ret)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); atomic64_inc(&zram->stats.failed_reads); return ret; @@ -388,7 +401,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, ret = zram_decompress_page(zram, uncmem, index); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) + if (unlikely(ret)) goto out_cleanup; if (is_partial_io(bvec)) @@ -413,11 +426,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct page *page; unsigned char *user_mem, *cmem, *src, *uncmem = NULL; struct zram_meta *meta = zram->meta; + struct zcomp_strm *zstrm; bool locked = false; page = bvec->bv_page; - src = meta->compress_buffer; - if (is_partial_io(bvec)) { /* * This is a partial IO. We need to read the full page @@ -433,7 +445,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - mutex_lock(&meta->buffer_lock); + zstrm = zcomp_strm_find(zram->comp); locked = true; user_mem = kmap_atomic(page); @@ -454,28 +466,25 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, zram_set_flag(meta, index, ZRAM_ZERO); write_unlock(&zram->meta->tb_lock); - atomic_inc(&zram->stats.pages_zero); + atomic64_inc(&zram->stats.zero_pages); ret = 0; goto out; } - ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, - meta->compress_workmem); + ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen); if (!is_partial_io(bvec)) { kunmap_atomic(user_mem); user_mem = NULL; uncmem = NULL; } - if (unlikely(ret != LZO_E_OK)) { + if (unlikely(ret)) { pr_err("Compression failed! err=%d\n", ret); goto out; } - + src = zstrm->buffer; if (unlikely(clen > max_zpage_size)) { - atomic_inc(&zram->stats.bad_compress); clen = PAGE_SIZE; - src = NULL; if (is_partial_io(bvec)) src = uncmem; } @@ -497,6 +506,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, memcpy(cmem, src, clen); } + zcomp_strm_release(zram->comp, zstrm); + locked = false; zs_unmap_object(meta->mem_pool, handle); /* @@ -511,49 +522,88 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, write_unlock(&zram->meta->tb_lock); /* Update stats */ - atomic64_add(clen, &zram->stats.compr_size); - atomic_inc(&zram->stats.pages_stored); - if (clen <= PAGE_SIZE / 2) - atomic_inc(&zram->stats.good_compress); - + atomic64_add(clen, &zram->stats.compr_data_size); + atomic64_inc(&zram->stats.pages_stored); out: if (locked) - mutex_unlock(&meta->buffer_lock); + zcomp_strm_release(zram->comp, zstrm); if (is_partial_io(bvec)) kfree(uncmem); - if (ret) atomic64_inc(&zram->stats.failed_writes); return ret; } static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, struct bio *bio, int rw) + int offset, struct bio *bio) { int ret; + int rw = bio_data_dir(bio); - if (rw == READ) + if (rw == READ) { + atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset, bio); - else + } else { + atomic64_inc(&zram->stats.num_writes); ret = zram_bvec_write(zram, bvec, index, offset); + } return ret; } +/* + * zram_bio_discard - handler on discard request + * @index: physical block index in PAGE_SIZE units + * @offset: byte offset within physical block + */ +static void zram_bio_discard(struct zram *zram, u32 index, + int offset, struct bio *bio) +{ + size_t n = bio->bi_iter.bi_size; + + /* + * zram manages data in physical block size units. Because logical block + * size isn't identical with physical block size on some arch, we + * could get a discard request pointing to a specific offset within a + * certain physical block. Although we can handle this request by + * reading that physiclal block and decompressing and partially zeroing + * and re-compressing and then re-storing it, this isn't reasonable + * because our intent with a discard request is to save memory. So + * skipping this logical block is appropriate here. + */ + if (offset) { + if (n < offset) + return; + + n -= offset; + index++; + } + + while (n >= PAGE_SIZE) { + /* + * Discard request can be large so the lock hold times could be + * lengthy. So take the lock once per page. + */ + write_lock(&zram->meta->tb_lock); + zram_free_page(zram, index); + write_unlock(&zram->meta->tb_lock); + index++; + n -= PAGE_SIZE; + } +} + static void zram_reset_device(struct zram *zram, bool reset_capacity) { size_t index; struct zram_meta *meta; down_write(&zram->init_lock); - if (!zram->init_done) { + if (!init_done(zram)) { up_write(&zram->init_lock); return; } meta = zram->meta; - zram->init_done = 0; - /* Free all pages that are still in this zram device */ for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { unsigned long handle = meta->table[index].handle; @@ -563,6 +613,9 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) zs_free(meta->mem_pool, handle); } + zcomp_destroy(zram->comp); + zram->max_comp_streams = 1; + zram_meta_free(zram->meta); zram->meta = NULL; /* Reset stats */ @@ -574,37 +627,14 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) up_write(&zram->init_lock); } -static void zram_init_device(struct zram *zram, struct zram_meta *meta) -{ - if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) { - pr_info( - "There is little point creating a zram of greater than " - "twice the size of memory since we expect a 2:1 compression " - "ratio. Note that zram uses about 0.1%% of the size of " - "the disk when not in use so a huge zram is " - "wasteful.\n" - "\tMemory Size: %lu kB\n" - "\tSize you selected: %llu kB\n" - "Continuing anyway ...\n", - (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10 - ); - } - - /* zram devices sort of resembles non-rotational disks */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - - zram->meta = meta; - zram->init_done = 1; - - pr_debug("Initialization done!\n"); -} - static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { u64 disksize; + struct zcomp *comp; struct zram_meta *meta; struct zram *zram = dev_to_zram(dev); + int err; disksize = memparse(buf, NULL); if (!disksize) @@ -612,20 +642,37 @@ static ssize_t disksize_store(struct device *dev, disksize = PAGE_ALIGN(disksize); meta = zram_meta_alloc(disksize); + if (!meta) + return -ENOMEM; + + comp = zcomp_create(zram->compressor, zram->max_comp_streams); + if (IS_ERR(comp)) { + pr_info("Cannot initialise %s compressing backend\n", + zram->compressor); + err = PTR_ERR(comp); + goto out_free_meta; + } + down_write(&zram->init_lock); - if (zram->init_done) { - up_write(&zram->init_lock); - zram_meta_free(meta); + if (init_done(zram)) { pr_info("Cannot change disksize for initialized device\n"); - return -EBUSY; + err = -EBUSY; + goto out_destroy_comp; } + zram->meta = meta; + zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - zram_init_device(zram, meta); up_write(&zram->init_lock); - return len; + +out_destroy_comp: + up_write(&zram->init_lock); + zcomp_destroy(comp); +out_free_meta: + zram_meta_free(meta); + return err; } static ssize_t reset_store(struct device *dev, @@ -669,26 +716,23 @@ out: return ret; } -static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) +static void __zram_make_request(struct zram *zram, struct bio *bio) { int offset; u32 index; struct bio_vec bvec; struct bvec_iter iter; - switch (rw) { - case READ: - atomic64_inc(&zram->stats.num_reads); - break; - case WRITE: - atomic64_inc(&zram->stats.num_writes); - break; - } - index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; + if (unlikely(bio->bi_rw & REQ_DISCARD)) { + zram_bio_discard(zram, index, offset, bio); + bio_endio(bio, 0); + return; + } + bio_for_each_segment(bvec, bio, iter) { int max_transfer_size = PAGE_SIZE - offset; @@ -703,16 +747,15 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) bv.bv_len = max_transfer_size; bv.bv_offset = bvec.bv_offset; - if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0) + if (zram_bvec_rw(zram, &bv, index, offset, bio) < 0) goto out; bv.bv_len = bvec.bv_len - max_transfer_size; bv.bv_offset += max_transfer_size; - if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0) + if (zram_bvec_rw(zram, &bv, index + 1, 0, bio) < 0) goto out; } else - if (zram_bvec_rw(zram, &bvec, index, offset, bio, rw) - < 0) + if (zram_bvec_rw(zram, &bvec, index, offset, bio) < 0) goto out; update_position(&index, &offset, &bvec); @@ -734,7 +777,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) struct zram *zram = queue->queuedata; down_read(&zram->init_lock); - if (unlikely(!zram->init_done)) + if (unlikely(!init_done(zram))) goto error; if (!valid_io_request(zram, bio)) { @@ -742,7 +785,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) goto error; } - __zram_make_request(zram, bio, bio_data_dir(bio)); + __zram_make_request(zram, bio); up_read(&zram->init_lock); return; @@ -776,14 +819,21 @@ static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, disksize_show, disksize_store); static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); -static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); -static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); -static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); -static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); -static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); -static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); +static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, + max_comp_streams_show, max_comp_streams_store); +static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, + comp_algorithm_show, comp_algorithm_store); + +ZRAM_ATTR_RO(num_reads); +ZRAM_ATTR_RO(num_writes); +ZRAM_ATTR_RO(failed_reads); +ZRAM_ATTR_RO(failed_writes); +ZRAM_ATTR_RO(invalid_io); +ZRAM_ATTR_RO(notify_free); +ZRAM_ATTR_RO(zero_pages); +ZRAM_ATTR_RO(compr_data_size); static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, @@ -791,12 +841,16 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_reset.attr, &dev_attr_num_reads.attr, &dev_attr_num_writes.attr, + &dev_attr_failed_reads.attr, + &dev_attr_failed_writes.attr, &dev_attr_invalid_io.attr, &dev_attr_notify_free.attr, &dev_attr_zero_pages.attr, &dev_attr_orig_data_size.attr, &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, + &dev_attr_max_comp_streams.attr, + &dev_attr_comp_algorithm.attr, NULL, }; @@ -837,7 +891,8 @@ static int create_device(struct zram *zram, int device_id) /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ set_capacity(zram->disk, 0); - + /* zram devices sort of resembles non-rotational disks */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); /* * To ensure that we always get PAGE_SIZE aligned * and n*PAGE_SIZED sized I/O requests. @@ -847,6 +902,21 @@ static int create_device(struct zram *zram, int device_id) ZRAM_LOGICAL_BLOCK_SIZE); blk_queue_io_min(zram->disk->queue, PAGE_SIZE); blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); + zram->disk->queue->limits.discard_granularity = PAGE_SIZE; + zram->disk->queue->limits.max_discard_sectors = UINT_MAX; + /* + * zram_bio_discard() will clear all logical blocks if logical block + * size is identical with physical block size(PAGE_SIZE). But if it is + * different, we will skip discarding some parts of logical blocks in + * the part of the request range which isn't aligned to physical block + * size. So we can't ensure that all discarded logical blocks are + * zeroed. + */ + if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) + zram->disk->queue->limits.discard_zeroes_data = 1; + else + zram->disk->queue->limits.discard_zeroes_data = 0; + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); add_disk(zram->disk); @@ -856,8 +926,9 @@ static int create_device(struct zram *zram, int device_id) pr_warn("Error creating sysfs group"); goto out_free_disk; } - - zram->init_done = 0; + strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); + zram->meta = NULL; + zram->max_comp_streams = 1; return 0; out_free_disk: diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index ad8aa35bae00..7f21c145e317 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -16,9 +16,10 @@ #define _ZRAM_DRV_H_ #include <linux/spinlock.h> -#include <linux/mutex.h> #include <linux/zsmalloc.h> +#include "zcomp.h" + /* * Some arbitrary value. This is just to catch * invalid value for num_devices module parameter. @@ -64,38 +65,33 @@ enum zram_pageflags { struct table { unsigned long handle; u16 size; /* object size (excluding header) */ - u8 count; /* object ref count (not yet used) */ u8 flags; } __aligned(4); struct zram_stats { - atomic64_t compr_size; /* compressed size of pages stored */ + atomic64_t compr_data_size; /* compressed size of pages stored */ atomic64_t num_reads; /* failed + successful */ atomic64_t num_writes; /* --do-- */ atomic64_t failed_reads; /* should NEVER! happen */ atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ - atomic_t pages_zero; /* no. of zero filled pages */ - atomic_t pages_stored; /* no. of pages currently stored */ - atomic_t good_compress; /* % of pages with compression ratio<=50% */ - atomic_t bad_compress; /* % of pages with compression ratio>=75% */ + atomic64_t zero_pages; /* no. of zero filled pages */ + atomic64_t pages_stored; /* no. of pages currently stored */ }; struct zram_meta { rwlock_t tb_lock; /* protect table */ - void *compress_workmem; - void *compress_buffer; struct table *table; struct zs_pool *mem_pool; - struct mutex buffer_lock; /* protect compress buffers */ }; struct zram { struct zram_meta *meta; struct request_queue *queue; struct gendisk *disk; - int init_done; + struct zcomp *comp; + /* Prevent concurrent execution of device init, reset and R/W request */ struct rw_semaphore init_lock; /* @@ -103,7 +99,8 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ - + int max_comp_streams; struct zram_stats stats; + char compressor[10]; }; #endif |