summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2014-04-28 20:43:19 +0400
committerJens Axboe <axboe@fb.com>2014-04-30 23:46:54 +0400
commite82998743385ca861b9ec919eb2ba8177ce72180 (patch)
tree4cf895d0a9633eff1f11363f98ebe60713a4bcd7
parent0e49d7b014c5d591a053d08888a455bd74a88646 (diff)
downloadlinux-e82998743385ca861b9ec919eb2ba8177ce72180.tar.xz
drbd: don't let application IO pre-empt resync too often
Before, application IO could pre-empt resync activity for up to hardcoded 20 seconds per resync request. A very busy server could throttle the effective resync bandwidth down to one request per 20 seconds. Now, we only let application IO pre-empt resync traffic while the current resync rate estimate is above c-min-rate. If you disable the c-min-rate throttle feature (set c-min-rate = 0), application IO will no longer pre-empt resync traffic at all. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/block/drbd/drbd_actlog.c13
-rw-r--r--drivers/block/drbd/drbd_int.h3
-rw-r--r--drivers/block/drbd/drbd_receiver.c47
3 files changed, 34 insertions, 29 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 7e7b0e143655..8dd09a7f23c6 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -1022,8 +1022,7 @@ int drbd_rs_begin_io(struct drbd_device *device, sector_t sector)
unsigned int enr = BM_SECT_TO_EXT(sector);
struct bm_extent *bm_ext;
int i, sig;
- int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
- 200 times -> 20 seconds. */
+ bool sa;
retry:
sig = wait_event_interruptible(device->al_wait,
@@ -1034,12 +1033,15 @@ retry:
if (test_bit(BME_LOCKED, &bm_ext->flags))
return 0;
+ /* step aside only while we are above c-min-rate; unless disabled. */
+ sa = drbd_rs_c_min_rate_throttle(device);
+
for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
sig = wait_event_interruptible(device->al_wait,
!_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) ||
- test_bit(BME_PRIORITY, &bm_ext->flags));
+ (sa && test_bit(BME_PRIORITY, &bm_ext->flags)));
- if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
+ if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) {
spin_lock_irq(&device->al_lock);
if (lc_put(device->resync, &bm_ext->lce) == 0) {
bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
@@ -1051,9 +1053,6 @@ retry:
return -EINTR;
if (schedule_timeout_interruptible(HZ/10))
return -EINTR;
- if (sa && --sa == 0)
- drbd_warn(device, "drbd_rs_begin_io() stepped aside for 20sec."
- "Resync stalled?\n");
goto retry;
}
}
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 361a2e9cd727..f0cabea5cda2 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1339,7 +1339,8 @@ extern void start_resync_timer_fn(unsigned long data);
/* drbd_receiver.c */
extern int drbd_receiver(struct drbd_thread *thi);
extern int drbd_asender(struct drbd_thread *thi);
-extern int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
+extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
+extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
extern int drbd_submit_peer_request(struct drbd_device *,
struct drbd_peer_request *, const unsigned,
const int);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 6ffbc22eba0b..10d2dcb16bff 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2323,39 +2323,45 @@ out_interrupted:
* The current sync rate used here uses only the most recent two step marks,
* to have a short time average so we can react faster.
*/
-int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
+bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
{
- struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
- unsigned long db, dt, dbdt;
struct lc_element *tmp;
- int curr_events;
- int throttle = 0;
- unsigned int c_min_rate;
+ bool throttle = true;
- rcu_read_lock();
- c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
- rcu_read_unlock();
-
- /* feature disabled? */
- if (c_min_rate == 0)
- return 0;
+ if (!drbd_rs_c_min_rate_throttle(device))
+ return false;
spin_lock_irq(&device->al_lock);
tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
if (tmp) {
struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
- if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
- spin_unlock_irq(&device->al_lock);
- return 0;
- }
+ if (test_bit(BME_PRIORITY, &bm_ext->flags))
+ throttle = false;
/* Do not slow down if app IO is already waiting for this extent */
}
spin_unlock_irq(&device->al_lock);
+ return throttle;
+}
+
+bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
+{
+ struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
+ unsigned long db, dt, dbdt;
+ unsigned int c_min_rate;
+ int curr_events;
+
+ rcu_read_lock();
+ c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
+ rcu_read_unlock();
+
+ /* feature disabled? */
+ if (c_min_rate == 0)
+ return false;
+
curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
(int)part_stat_read(&disk->part0, sectors[1]) -
atomic_read(&device->rs_sect_ev);
-
if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
unsigned long rs_left;
int i;
@@ -2378,12 +2384,11 @@ int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
dbdt = Bit2KB(db/dt);
if (dbdt > c_min_rate)
- throttle = 1;
+ return true;
}
- return throttle;
+ return false;
}
-
static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
{
struct drbd_peer_device *peer_device;