diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid5-cache.c | 143 |
1 files changed, 54 insertions, 89 deletions
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 289ca3f5d4b3..604c64505232 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -51,6 +51,9 @@ struct r5l_log { sector_t log_start; /* log head. where new data appends */ u64 seq; /* log head sequence */ + sector_t next_checkpoint; + u64 next_cp_seq; + struct mutex io_mutex; struct r5l_io_unit *current_io; /* current io_unit accepting new data */ @@ -65,9 +68,6 @@ struct r5l_log { * cache flush */ struct list_head flushed_ios; /* io_units which settle down in log disk */ struct bio flush_bio; - struct list_head stripe_end_ios;/* io_units which have been completely - * written to the RAID but have not yet - * been considered for updating super */ struct kmem_cache *io_kc; @@ -186,35 +186,6 @@ static void r5l_move_io_unit_list(struct list_head *from, struct list_head *to, } } -/* - * We don't want too many io_units reside in stripe_end_ios list, which will - * waste a lot of memory. So we try to remove some. But we must keep at least 2 - * io_units. The superblock must point to a valid meta, if it's the last meta, - * recovery can scan less - */ -static void r5l_compress_stripe_end_list(struct r5l_log *log) -{ - struct r5l_io_unit *first, *last, *io; - - first = list_first_entry(&log->stripe_end_ios, - struct r5l_io_unit, log_sibling); - last = list_last_entry(&log->stripe_end_ios, - struct r5l_io_unit, log_sibling); - if (first == last) - return; - list_del(&first->log_sibling); - list_del(&last->log_sibling); - while (!list_empty(&log->stripe_end_ios)) { - io = list_first_entry(&log->stripe_end_ios, - struct r5l_io_unit, log_sibling); - list_del(&io->log_sibling); - first->log_end = io->log_end; - r5l_free_io_unit(log, io); - } - list_add_tail(&first->log_sibling, &log->stripe_end_ios); - list_add_tail(&last->log_sibling, &log->stripe_end_ios); -} - static void __r5l_set_io_unit_state(struct r5l_io_unit *io, enum r5l_io_unit_state state) { @@ -546,31 +517,52 @@ static void r5l_run_no_space_stripes(struct r5l_log *log) spin_unlock(&log->no_space_stripes_lock); } +static sector_t r5l_reclaimable_space(struct r5l_log *log) +{ + return r5l_ring_distance(log, log->last_checkpoint, + log->next_checkpoint); +} + +static bool r5l_complete_flushed_ios(struct r5l_log *log) +{ + struct r5l_io_unit *io, *next; + bool found = false; + + assert_spin_locked(&log->io_list_lock); + + list_for_each_entry_safe(io, next, &log->flushed_ios, log_sibling) { + /* don't change list order */ + if (io->state < IO_UNIT_STRIPE_END) + break; + + log->next_checkpoint = io->log_start; + log->next_cp_seq = io->seq; + + list_del(&io->log_sibling); + r5l_free_io_unit(log, io); + + found = true; + } + + return found; +} + static void __r5l_stripe_write_finished(struct r5l_io_unit *io) { struct r5l_log *log = io->log; - struct r5l_io_unit *last; - sector_t reclaimable_space; unsigned long flags; spin_lock_irqsave(&log->io_list_lock, flags); __r5l_set_io_unit_state(io, IO_UNIT_STRIPE_END); - /* might move 0 entry */ - r5l_move_io_unit_list(&log->flushed_ios, &log->stripe_end_ios, - IO_UNIT_STRIPE_END); - if (list_empty(&log->stripe_end_ios)) { + + if (!r5l_complete_flushed_ios(log)) { spin_unlock_irqrestore(&log->io_list_lock, flags); return; } - last = list_last_entry(&log->stripe_end_ios, - struct r5l_io_unit, log_sibling); - reclaimable_space = r5l_ring_distance(log, log->last_checkpoint, - last->log_end); - if (reclaimable_space >= log->max_free_space) + if (r5l_reclaimable_space(log) > log->max_free_space) r5l_wake_reclaim(log, 0); - r5l_compress_stripe_end_list(log); spin_unlock_irqrestore(&log->io_list_lock, flags); wake_up(&log->iounit_wait); } @@ -646,20 +638,13 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log) submit_bio(WRITE_FLUSH, &log->flush_bio); } -static void r5l_kick_io_unit(struct r5l_log *log) -{ - md_wakeup_thread(log->rdev->mddev->thread); - wait_event_lock_irq(log->iounit_wait, !list_empty(&log->stripe_end_ios), - log->io_list_lock); -} - static void r5l_write_super(struct r5l_log *log, sector_t cp); static void r5l_do_reclaim(struct r5l_log *log) { - struct r5l_io_unit *io, *last; - LIST_HEAD(list); - sector_t free = 0; sector_t reclaim_target = xchg(&log->reclaim_target, 0); + sector_t reclaimable; + sector_t next_checkpoint; + u64 next_cp_seq; spin_lock_irq(&log->io_list_lock); /* @@ -668,60 +653,41 @@ static void r5l_do_reclaim(struct r5l_log *log) * shouldn't reuse space of an unreclaimable io_unit */ while (1) { - struct list_head *target_list = NULL; - - while (!list_empty(&log->stripe_end_ios)) { - io = list_first_entry(&log->stripe_end_ios, - struct r5l_io_unit, log_sibling); - list_move_tail(&io->log_sibling, &list); - free += r5l_ring_distance(log, io->log_start, - io->log_end); - } - - if (free >= reclaim_target || + reclaimable = r5l_reclaimable_space(log); + if (reclaimable >= reclaim_target || (list_empty(&log->running_ios) && list_empty(&log->io_end_ios) && list_empty(&log->flushing_ios) && list_empty(&log->flushed_ios))) break; - /* Below waiting mostly happens when we shutdown the raid */ - if (!list_empty(&log->flushed_ios)) - target_list = &log->flushed_ios; - else if (!list_empty(&log->flushing_ios)) - target_list = &log->flushing_ios; - else if (!list_empty(&log->io_end_ios)) - target_list = &log->io_end_ios; - else if (!list_empty(&log->running_ios)) - target_list = &log->running_ios; - - r5l_kick_io_unit(log); + md_wakeup_thread(log->rdev->mddev->thread); + wait_event_lock_irq(log->iounit_wait, + r5l_reclaimable_space(log) > reclaimable, + log->io_list_lock); } + + next_checkpoint = log->next_checkpoint; + next_cp_seq = log->next_cp_seq; spin_unlock_irq(&log->io_list_lock); - if (list_empty(&list)) + BUG_ON(reclaimable < 0); + if (reclaimable == 0) return; - /* super always point to last valid meta */ - last = list_last_entry(&list, struct r5l_io_unit, log_sibling); /* * write_super will flush cache of each raid disk. We must write super * here, because the log area might be reused soon and we don't want to * confuse recovery */ - r5l_write_super(log, last->log_start); + r5l_write_super(log, next_checkpoint); mutex_lock(&log->io_mutex); - log->last_checkpoint = last->log_start; - log->last_cp_seq = last->seq; + log->last_checkpoint = next_checkpoint; + log->last_cp_seq = next_cp_seq; mutex_unlock(&log->io_mutex); - r5l_run_no_space_stripes(log); - while (!list_empty(&list)) { - io = list_first_entry(&list, struct r5l_io_unit, log_sibling); - list_del(&io->log_sibling); - r5l_free_io_unit(log, io); - } + r5l_run_no_space_stripes(log); } static void r5l_reclaim_thread(struct md_thread *thread) @@ -1104,7 +1070,6 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) spin_lock_init(&log->io_list_lock); INIT_LIST_HEAD(&log->running_ios); INIT_LIST_HEAD(&log->io_end_ios); - INIT_LIST_HEAD(&log->stripe_end_ios); INIT_LIST_HEAD(&log->flushing_ios); INIT_LIST_HEAD(&log->flushed_ios); bio_init(&log->flush_bio); |