diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-02-19 01:39:42 +0300 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-23 00:08:16 +0300 |
commit | d16b4a77a5c64fca52ff637c22668b679b47ef22 (patch) | |
tree | 7c5260d0bf950ee04a8bb6495b82ca7f6ea7a8f3 | |
parent | ecf37a4a80ec029d640b9c18f87880d4ec4a726f (diff) | |
download | linux-d16b4a77a5c64fca52ff637c22668b679b47ef22.tar.xz |
bcachefs: Assorted journal refactoring
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/journal.c | 371 | ||||
-rw-r--r-- | fs/bcachefs/journal.h | 24 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 70 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/journal_types.h | 18 |
5 files changed, 231 insertions, 254 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index cf4729b7a083..91d0e5d443ed 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -17,23 +17,14 @@ #include "super-io.h" #include "trace.h" -static bool journal_entry_is_open(struct journal *j) +static bool __journal_entry_is_open(union journal_res_state state) { - return j->reservations.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL; + return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL; } -void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set) +static bool journal_entry_is_open(struct journal *j) { - struct journal_buf *w = journal_prev_buf(j); - - atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count); - - if (!need_write_just_set && - test_bit(JOURNAL_NEED_WRITE, &j->flags)) - bch2_time_stats_update(j->delay_time, - j->need_write_time); - - closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL); + return __journal_entry_is_open(j->reservations); } static void journal_pin_new_entry(struct journal *j, int count) @@ -77,39 +68,76 @@ static inline bool journal_entry_empty(struct jset *j) return true; } -static enum { - JOURNAL_ENTRY_ERROR, - JOURNAL_ENTRY_INUSE, - JOURNAL_ENTRY_CLOSED, - JOURNAL_UNLOCKED, -} journal_buf_switch(struct journal *j, bool need_write_just_set) +void bch2_journal_halt(struct journal *j) +{ + union journal_res_state old, new; + u64 v = atomic64_read(&j->reservations.counter); + + do { + old.v = new.v = v; + if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) + return; + + new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL; + } while ((v = atomic64_cmpxchg(&j->reservations.counter, + old.v, new.v)) != old.v); + + journal_wake(j); + closure_wake_up(&journal_cur_buf(j)->wait); + closure_wake_up(&journal_prev_buf(j)->wait); +} + +/* journal entry close/open: */ + +void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set) +{ + struct journal_buf *w = journal_prev_buf(j); + + atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count); + + if (!need_write_just_set && + test_bit(JOURNAL_NEED_WRITE, &j->flags)) + bch2_time_stats_update(j->delay_time, + j->need_write_time); + + clear_bit(JOURNAL_NEED_WRITE, &j->flags); + + closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL); +} + +/* + * Returns true if journal entry is now closed: + */ +static bool __journal_entry_close(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *buf = journal_cur_buf(j); union journal_res_state old, new; u64 v = atomic64_read(&j->reservations.counter); + bool set_need_write = false; + unsigned sectors; lockdep_assert_held(&j->lock); do { old.v = new.v = v; if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL) - return JOURNAL_ENTRY_CLOSED; + return true; if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) { /* this entry will never be written: */ closure_wake_up(&buf->wait); - return JOURNAL_ENTRY_ERROR; + return true; } - if (new.prev_buf_unwritten) - return JOURNAL_ENTRY_INUSE; + if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) { + set_bit(JOURNAL_NEED_WRITE, &j->flags); + j->need_write_time = local_clock(); + set_need_write = true; + } - /* - * avoid race between setting buf->data->u64s and - * journal_res_put starting write: - */ - journal_state_inc(&new); + if (new.prev_buf_unwritten) + return false; new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL; new.idx++; @@ -119,15 +147,12 @@ static enum { } while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v); - clear_bit(JOURNAL_NEED_WRITE, &j->flags); - buf->data->u64s = cpu_to_le32(old.cur_entry_offset); - j->prev_buf_sectors = - vstruct_blocks_plus(buf->data, c->block_bits, - buf->u64s_reserved) * - c->opts.block_size; - BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors); + sectors = vstruct_blocks_plus(buf->data, c->block_bits, + buf->u64s_reserved) << c->block_bits; + BUG_ON(sectors > buf->sectors); + buf->sectors = sectors; bkey_extent_init(&buf->key); @@ -163,32 +188,22 @@ static enum { bch2_journal_buf_init(j); cancel_delayed_work(&j->write_work); - spin_unlock(&j->lock); /* ugh - might be called from __journal_res_get() under wait_event() */ __set_current_state(TASK_RUNNING); - bch2_journal_buf_put(j, old.idx, need_write_just_set); - - return JOURNAL_UNLOCKED; + bch2_journal_buf_put(j, old.idx, set_need_write); + return true; } -void bch2_journal_halt(struct journal *j) +static bool journal_entry_close(struct journal *j) { - union journal_res_state old, new; - u64 v = atomic64_read(&j->reservations.counter); - - do { - old.v = new.v = v; - if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) - return; + bool ret; - new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL; - } while ((v = atomic64_cmpxchg(&j->reservations.counter, - old.v, new.v)) != old.v); + spin_lock(&j->lock); + ret = __journal_entry_close(j); + spin_unlock(&j->lock); - journal_wake(j); - closure_wake_up(&journal_cur_buf(j)->wait); - closure_wake_up(&journal_prev_buf(j)->wait); + return ret; } /* @@ -196,17 +211,16 @@ void bch2_journal_halt(struct journal *j) * journal reservation - journal entry is open means journal is dirty: * * returns: - * 1: success - * 0: journal currently full (must wait) - * -EROFS: insufficient rw devices - * -EIO: journal error + * 0: success + * -ENOSPC: journal currently full, must invoke reclaim + * -EAGAIN: journal blocked, must wait + * -EROFS: insufficient rw devices or journal error */ static int journal_entry_open(struct journal *j) { struct journal_buf *buf = journal_cur_buf(j); union journal_res_state old, new; - ssize_t u64s; - int sectors; + int u64s, ret; u64 v; lockdep_assert_held(&j->lock); @@ -216,29 +230,22 @@ static int journal_entry_open(struct journal *j) return -EAGAIN; if (!fifo_free(&j->pin)) - return 0; + return -ENOSPC; - sectors = bch2_journal_entry_sectors(j); - if (sectors <= 0) - return sectors; + ret = bch2_journal_space_available(j); + if (ret) + return ret; - buf->disk_sectors = sectors; buf->u64s_reserved = j->entry_u64s_reserved; + buf->disk_sectors = j->cur_entry_sectors; + buf->sectors = min(buf->disk_sectors, buf->buf_size >> 9); - sectors = min_t(unsigned, sectors, buf->size >> 9); - j->cur_buf_sectors = sectors; - - u64s = (sectors << 9) / sizeof(u64); - - /* Subtract the journal header */ - u64s -= sizeof(struct jset) / sizeof(u64); - u64s -= buf->u64s_reserved; - u64s = max_t(ssize_t, 0L, u64s); - - BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL); + u64s = (int) (buf->sectors << 9) / sizeof(u64) - + journal_entry_overhead(j); + u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); if (u64s <= le32_to_cpu(buf->data->u64s)) - return 0; + return -ENOSPC; /* * Must be set before marking the journal entry as open: @@ -250,10 +257,11 @@ static int journal_entry_open(struct journal *j) old.v = new.v = v; if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) - return -EIO; + return -EROFS; /* Handle any already added entries */ new.cur_entry_offset = le32_to_cpu(buf->data->u64s); + journal_state_inc(&new); } while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v); @@ -266,48 +274,16 @@ static int journal_entry_open(struct journal *j) &j->write_work, msecs_to_jiffies(j->write_delay_ms)); journal_wake(j); - return 1; -} - -static bool __journal_entry_close(struct journal *j) -{ - bool set_need_write; - - if (!journal_entry_is_open(j)) { - spin_unlock(&j->lock); - return true; - } - - set_need_write = !test_and_set_bit(JOURNAL_NEED_WRITE, &j->flags); - if (set_need_write) - j->need_write_time = local_clock(); - - switch (journal_buf_switch(j, set_need_write)) { - case JOURNAL_ENTRY_INUSE: - spin_unlock(&j->lock); - return false; - default: - spin_unlock(&j->lock); - fallthrough; - case JOURNAL_UNLOCKED: - return false; - } -} - -static bool journal_entry_close(struct journal *j) -{ - spin_lock(&j->lock); - return __journal_entry_close(j); + return 0; } static bool journal_quiesced(struct journal *j) { - bool ret; + union journal_res_state state = READ_ONCE(j->reservations); + bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state); - spin_lock(&j->lock); - ret = !j->reservations.prev_buf_unwritten && - !journal_entry_is_open(j); - __journal_entry_close(j); + if (!ret) + journal_entry_close(j); return ret; } @@ -357,7 +333,11 @@ retry: if (journal_res_get_fast(j, res, flags)) return 0; + if (bch2_journal_error(j)) + return -EROFS; + spin_lock(&j->lock); + /* * Recheck after taking the lock, so we don't race with another thread * that just did journal_entry_open() and call journal_entry_close() @@ -375,56 +355,42 @@ retry: */ buf = journal_cur_buf(j); if (journal_entry_is_open(j) && - buf->size >> 9 < buf->disk_sectors && - buf->size < JOURNAL_ENTRY_SIZE_MAX) - j->buf_size_want = max(j->buf_size_want, buf->size << 1); + buf->buf_size >> 9 < buf->disk_sectors && + buf->buf_size < JOURNAL_ENTRY_SIZE_MAX) + j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1); - /* - * Close the current journal entry if necessary, then try to start a new - * one: - */ - switch (journal_buf_switch(j, false)) { - case JOURNAL_ENTRY_ERROR: - spin_unlock(&j->lock); - return -EROFS; - case JOURNAL_ENTRY_INUSE: + if (journal_entry_is_open(j) && + !__journal_entry_close(j)) { /* - * The current journal entry is still open, but we failed to get - * a journal reservation because there's not enough space in it, - * and we can't close it and start another because we haven't - * finished writing out the previous entry: + * We failed to get a reservation on the current open journal + * entry because it's full, and we can't close it because + * there's still a previous one in flight: */ - spin_unlock(&j->lock); trace_journal_entry_full(c); - goto blocked; - case JOURNAL_ENTRY_CLOSED: - break; - case JOURNAL_UNLOCKED: - goto retry; + ret = -EAGAIN; + } else { + ret = journal_entry_open(j); } - /* We now have a new, closed journal buf - see if we can open it: */ - ret = journal_entry_open(j); + if ((ret == -EAGAIN || ret == -ENOSPC) && + !j->res_get_blocked_start) + j->res_get_blocked_start = local_clock() ?: 1; + spin_unlock(&j->lock); - if (ret < 0) - return ret; - if (ret) + if (!ret) goto retry; + if (ret == -ENOSPC) { + /* + * Journal is full - can't rely on reclaim from work item due to + * freezing: + */ + trace_journal_full(c); + bch2_journal_reclaim_work(&j->reclaim_work.work); + ret = -EAGAIN; + } - /* Journal's full, we have to wait */ - - /* - * Direct reclaim - can't rely on reclaim from work item - * due to freezing.. - */ - bch2_journal_reclaim_work(&j->reclaim_work.work); - - trace_journal_full(c); -blocked: - if (!j->res_get_blocked_start) - j->res_get_blocked_start = local_clock() ?: 1; - return -EAGAIN; + return ret; } /* @@ -461,7 +427,7 @@ void bch2_journal_entry_res_resize(struct journal *j, j->entry_u64s_reserved += d; if (d <= 0) - goto out_unlock; + goto out; j->cur_entry_u64s -= d; smp_mb(); @@ -474,15 +440,12 @@ void bch2_journal_entry_res_resize(struct journal *j, * Not enough room in current journal entry, have to flush it: */ __journal_entry_close(j); - goto out; + } else { + journal_cur_buf(j)->u64s_reserved += d; } - - journal_cur_buf(j)->u64s_reserved += d; -out_unlock: - spin_unlock(&j->lock); out: + spin_unlock(&j->lock); res->u64s += d; - return; } /* journal flushing: */ @@ -512,47 +475,47 @@ int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl) { struct bch_fs *c = container_of(j, struct bch_fs, journal); int ret; -retry: + spin_lock(&j->lock); - if (seq < journal_cur_seq(j) || + /* + * Can't try to open more than one sequence number ahead: + */ + BUG_ON(journal_cur_seq(j) < seq && !journal_entry_is_open(j)); + + if (journal_cur_seq(j) > seq || journal_entry_is_open(j)) { spin_unlock(&j->lock); return 0; } - if (journal_cur_seq(j) < seq) { - switch (journal_buf_switch(j, false)) { - case JOURNAL_ENTRY_ERROR: - spin_unlock(&j->lock); - return -EROFS; - case JOURNAL_ENTRY_INUSE: - /* haven't finished writing out the previous one: */ - trace_journal_entry_full(c); - goto blocked; - case JOURNAL_ENTRY_CLOSED: - break; - case JOURNAL_UNLOCKED: - goto retry; - } - } - - BUG_ON(journal_cur_seq(j) < seq); + if (journal_cur_seq(j) < seq && + !__journal_entry_close(j)) { + /* haven't finished writing out the previous one: */ + trace_journal_entry_full(c); + ret = -EAGAIN; + } else { + BUG_ON(journal_cur_seq(j) != seq); - ret = journal_entry_open(j); - if (ret) { - spin_unlock(&j->lock); - return ret < 0 ? ret : 0; + ret = journal_entry_open(j); } -blocked: - if (!j->res_get_blocked_start) + + if ((ret == -EAGAIN || ret == -ENOSPC) && + !j->res_get_blocked_start) j->res_get_blocked_start = local_clock() ?: 1; - closure_wait(&j->async_wait, cl); + if (ret == -EAGAIN || ret == -ENOSPC) + closure_wait(&j->async_wait, cl); + spin_unlock(&j->lock); - bch2_journal_reclaim_work(&j->reclaim_work.work); - return -EAGAIN; + if (ret == -ENOSPC) { + trace_journal_full(c); + bch2_journal_reclaim_work(&j->reclaim_work.work); + ret = -EAGAIN; + } + + return ret; } static int journal_seq_error(struct journal *j, u64 seq) @@ -635,8 +598,7 @@ void bch2_journal_flush_seq_async(struct journal *j, u64 seq, if (seq == journal_cur_seq(j)) __journal_entry_close(j); - else - spin_unlock(&j->lock); + spin_unlock(&j->lock); } static int journal_seq_flushed(struct journal *j, u64 seq) @@ -648,8 +610,7 @@ static int journal_seq_flushed(struct journal *j, u64 seq) if (seq == journal_cur_seq(j)) __journal_entry_close(j); - else - spin_unlock(&j->lock); + spin_unlock(&j->lock); return ret; } @@ -783,7 +744,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, goto err; journal_buckets = bch2_sb_resize_journal(&ca->disk_sb, - nr + sizeof(*journal_buckets) / sizeof(u64)); + nr + sizeof(*journal_buckets) / sizeof(u64)); if (!journal_buckets) goto err; @@ -846,9 +807,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, ja->nr++; bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL, - ca->mi.bucket_size, - gc_phase(GC_PHASE_SB), - 0); + ca->mi.bucket_size, + gc_phase(GC_PHASE_SB), + 0); if (c) { spin_unlock(&c->journal.lock); @@ -899,7 +860,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, */ if (bch2_disk_reservation_get(c, &disk_res, - bucket_to_sector(ca, nr - ja->nr), 1, 0)) { + bucket_to_sector(ca, nr - ja->nr), 1, 0)) { mutex_unlock(&c->sb_lock); return -ENOSPC; } @@ -996,7 +957,7 @@ void bch2_fs_journal_start(struct journal *j) journal_pin_new_entry(j, 0); /* - * journal_buf_switch() only inits the next journal entry when it + * __journal_entry_close() only inits the next journal entry when it * closes an open journal entry - the very first journal entry gets * initialized here: */ @@ -1063,8 +1024,8 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) void bch2_fs_journal_exit(struct journal *j) { - kvpfree(j->buf[1].data, j->buf[1].size); - kvpfree(j->buf[0].data, j->buf[0].size); + kvpfree(j->buf[1].data, j->buf[1].buf_size); + kvpfree(j->buf[0].data, j->buf[0].buf_size); free_fifo(&j->pin); } @@ -1088,8 +1049,8 @@ int bch2_fs_journal_init(struct journal *j) lockdep_init_map(&j->res_map, "journal res", &res_key, 0); - j->buf[0].size = JOURNAL_ENTRY_SIZE_MIN; - j->buf[1].size = JOURNAL_ENTRY_SIZE_MIN; + j->buf[0].buf_size = JOURNAL_ENTRY_SIZE_MIN; + j->buf[1].buf_size = JOURNAL_ENTRY_SIZE_MIN; j->write_delay_ms = 1000; j->reclaim_delay_ms = 100; @@ -1102,8 +1063,8 @@ int bch2_fs_journal_init(struct journal *j) { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || - !(j->buf[0].data = kvpmalloc(j->buf[0].size, GFP_KERNEL)) || - !(j->buf[1].data = kvpmalloc(j->buf[1].size, GFP_KERNEL))) { + !(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) || + !(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) { ret = -ENOMEM; goto out; } diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 5290cdeab585..4acb0f59396d 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -179,6 +179,11 @@ static inline unsigned jset_u64s(unsigned u64s) return u64s + sizeof(struct jset_entry) / sizeof(u64); } +static inline int journal_entry_overhead(struct journal *j) +{ + return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved; +} + static inline struct jset_entry * bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s) { @@ -225,7 +230,7 @@ static inline void bch2_journal_add_keys(struct journal *j, struct journal_res * id, 0, k, k->k.u64s); } -void bch2_journal_buf_put_slowpath(struct journal *, bool); +void __bch2_journal_buf_put(struct journal *, bool); static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, bool need_write_just_set) @@ -236,17 +241,10 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, .buf0_count = idx == 0, .buf1_count = idx == 1, }).v, &j->reservations.counter); - - EBUG_ON(s.idx != idx && !s.prev_buf_unwritten); - - /* - * Do not initiate a journal write if the journal is in an error state - * (previous journal entry write may have failed) - */ - if (s.idx != idx && - !journal_state_count(s, idx) && - s.cur_entry_offset != JOURNAL_ENTRY_ERROR_VAL) - bch2_journal_buf_put_slowpath(j, need_write_just_set); + if (!journal_state_count(s, idx)) { + EBUG_ON(s.idx == idx || !s.prev_buf_unwritten); + __bch2_journal_buf_put(j, need_write_just_set); + } } /* @@ -333,6 +331,8 @@ out: return 0; } +/* journal_entry_res: */ + void bch2_journal_entry_res_resize(struct journal *, struct journal_entry_res *, unsigned); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 17eba4269719..e5e50be80126 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -902,13 +902,16 @@ static unsigned journal_dev_buckets_available(struct journal *j, return available; } -/* returns number of sectors available for next journal entry: */ -int bch2_journal_entry_sectors(struct journal *j) +int bch2_journal_space_available(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; - unsigned sectors_available = UINT_MAX; + unsigned sectors_next_entry = UINT_MAX; unsigned i, nr_online = 0, nr_devs = 0; + unsigned unwritten_sectors = j->reservations.prev_buf_unwritten + ? journal_prev_buf(j)->sectors + : 0; + int ret = 0; lockdep_assert_held(&j->lock); @@ -921,16 +924,16 @@ int bch2_journal_entry_sectors(struct journal *j) if (!ja->nr) continue; + nr_online++; + buckets_this_device = journal_dev_buckets_available(j, ja); sectors_this_device = ja->sectors_free; - nr_online++; - /* * We that we don't allocate the space for a journal entry * until we write it out - thus, account for it here: */ - if (j->prev_buf_sectors >= sectors_this_device) { + if (unwritten_sectors >= sectors_this_device) { if (!buckets_this_device) continue; @@ -938,7 +941,7 @@ int bch2_journal_entry_sectors(struct journal *j) sectors_this_device = ca->mi.bucket_size; } - sectors_this_device -= j->prev_buf_sectors; + sectors_this_device -= unwritten_sectors; if (buckets_this_device) sectors_this_device = ca->mi.bucket_size; @@ -946,19 +949,26 @@ int bch2_journal_entry_sectors(struct journal *j) if (!sectors_this_device) continue; - sectors_available = min(sectors_available, - sectors_this_device); + sectors_next_entry = min(sectors_next_entry, + sectors_this_device); + nr_devs++; } rcu_read_unlock(); - if (nr_online < c->opts.metadata_replicas_required) - return -EROFS; + if (nr_online < c->opts.metadata_replicas_required) { + ret = -EROFS; + sectors_next_entry = 0; + } else if (!sectors_next_entry || + nr_devs < min_t(unsigned, nr_online, + c->opts.metadata_replicas)) { + ret = -ENOSPC; + sectors_next_entry = 0; + } - if (nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas)) - return 0; + WRITE_ONCE(j->cur_entry_sectors, sectors_next_entry); - return sectors_available; + return ret; } static void __journal_write_alloc(struct journal *j, @@ -1059,9 +1069,6 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w, __journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want); done: - if (replicas >= replicas_want) - j->prev_buf_sectors = 0; - spin_unlock(&j->lock); rcu_read_unlock(); @@ -1117,17 +1124,17 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) unsigned new_size = READ_ONCE(j->buf_size_want); void *new_buf; - if (buf->size >= new_size) + if (buf->buf_size >= new_size) return; new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN); if (!new_buf) return; - memcpy(new_buf, buf->data, buf->size); - kvpfree(buf->data, buf->size); + memcpy(new_buf, buf->data, buf->buf_size); + kvpfree(buf->data, buf->buf_size); buf->data = new_buf; - buf->size = new_size; + buf->buf_size = new_size; } static void journal_write_done(struct closure *cl) @@ -1227,15 +1234,14 @@ void bch2_journal_write(struct closure *cl) j->write_start_time = local_clock(); - start = vstruct_last(w->data); + start = vstruct_last(jset); end = bch2_journal_super_entries_add_common(c, start, le64_to_cpu(jset->seq)); u64s = (u64 *) end - (u64 *) start; BUG_ON(u64s > j->entry_u64s_reserved); - le32_add_cpu(&w->data->u64s, u64s); - BUG_ON(vstruct_sectors(jset, c->block_bits) > - w->disk_sectors); + le32_add_cpu(&jset->u64s, u64s); + BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors); journal_write_compact(jset); @@ -1273,10 +1279,10 @@ void bch2_journal_write(struct closure *cl) goto err; sectors = vstruct_sectors(jset, c->block_bits); - BUG_ON(sectors > j->prev_buf_sectors); + BUG_ON(sectors > w->sectors); - bytes = vstruct_bytes(w->data); - memset((void *) w->data + bytes, 0, (sectors << 9) - bytes); + bytes = vstruct_bytes(jset); + memset((void *) jset + bytes, 0, (sectors << 9) - bytes); if (journal_write_alloc(j, w, sectors)) { bch2_journal_halt(j); @@ -1287,6 +1293,12 @@ void bch2_journal_write(struct closure *cl) } /* + * write is allocated, no longer need to account for it in + * bch2_journal_entry_sectors: + */ + w->sectors = 0; + + /* * XXX: we really should just disable the entire journal in nochanges * mode */ @@ -1316,7 +1328,7 @@ void bch2_journal_write(struct closure *cl) trace_journal_write(bio); closure_bio_submit(bio, cl); - ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq); + ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(jset->seq); } for_each_rw_member(ca, c, i) diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h index e19e549baf8a..d1409039724d 100644 --- a/fs/bcachefs/journal_io.h +++ b/fs/bcachefs/journal_io.h @@ -40,7 +40,7 @@ int bch2_journal_read(struct bch_fs *, struct list_head *); void bch2_journal_entries_free(struct list_head *); int bch2_journal_replay(struct bch_fs *, struct list_head *); -int bch2_journal_entry_sectors(struct journal *); +int bch2_journal_space_available(struct journal *); void bch2_journal_write(struct closure *); #endif /* _BCACHEFS_JOURNAL_IO_H */ diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index e952eb06eff5..3372e87be124 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -22,8 +22,10 @@ struct journal_buf { struct closure_waitlist wait; - unsigned size; - unsigned disk_sectors; + unsigned buf_size; /* size in bytes of @data */ + unsigned sectors; /* maximum size for current entry */ + unsigned disk_sectors; /* maximum size entry could have been, if + buf_size was bigger */ unsigned u64s_reserved; /* bloom filter: */ unsigned long has_inode[1024 / sizeof(unsigned long)]; @@ -129,9 +131,14 @@ struct journal { unsigned long flags; union journal_res_state reservations; + + /* Max size of current journal entry */ unsigned cur_entry_u64s; - unsigned prev_buf_sectors; - unsigned cur_buf_sectors; + unsigned cur_entry_sectors; + + /* Reserved space in journal entry to be used just prior to write */ + unsigned entry_u64s_reserved; + unsigned buf_size_want; /* @@ -159,9 +166,6 @@ struct journal { u64 seq_ondisk; u64 last_seq_ondisk; - /* Reserved space in journal entry to be used just prior to write */ - unsigned entry_u64s_reserved; - /* * FIFO of journal entries whose btree updates have not yet been * written out. |