diff options
Diffstat (limited to 'fs/jbd2/commit.c')
-rw-r--r-- | fs/jbd2/commit.c | 257 |
1 files changed, 203 insertions, 54 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6986f334c643..4f302d279279 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -20,6 +20,8 @@ #include <linux/slab.h> #include <linux/mm.h> #include <linux/pagemap.h> +#include <linux/jiffies.h> +#include <linux/crc32.h> /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -92,19 +94,23 @@ static int inverted_lock(journal_t *journal, struct buffer_head *bh) return 1; } -/* Done it all: now write the commit record. We should have +/* + * Done it all: now submit the commit record. We should have * cleaned up our previous buffers by now, so if we are in abort * mode we can now just skip the rest of the journal write * entirely. * * Returns 1 if the journal needs to be aborted or 0 on success */ -static int journal_write_commit_record(journal_t *journal, - transaction_t *commit_transaction) +static int journal_submit_commit_record(journal_t *journal, + transaction_t *commit_transaction, + struct buffer_head **cbh, + __u32 crc32_sum) { struct journal_head *descriptor; + struct commit_header *tmp; struct buffer_head *bh; - int i, ret; + int ret; int barrier_done = 0; if (is_journal_aborted(journal)) @@ -116,21 +122,33 @@ static int journal_write_commit_record(journal_t *journal, bh = jh2bh(descriptor); - /* AKPM: buglet - add `i' to tmp! */ - for (i = 0; i < bh->b_size; i += 512) { - journal_header_t *tmp = (journal_header_t*)bh->b_data; - tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); - tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); - tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); + tmp = (struct commit_header *)bh->b_data; + tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); + tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); + tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); + + if (JBD2_HAS_COMPAT_FEATURE(journal, + JBD2_FEATURE_COMPAT_CHECKSUM)) { + tmp->h_chksum_type = JBD2_CRC32_CHKSUM; + tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; + tmp->h_chksum[0] = cpu_to_be32(crc32_sum); } - JBUFFER_TRACE(descriptor, "write commit block"); + JBUFFER_TRACE(descriptor, "submit commit block"); + lock_buffer(bh); + set_buffer_dirty(bh); - if (journal->j_flags & JBD2_BARRIER) { + set_buffer_uptodate(bh); + bh->b_end_io = journal_end_buffer_io_sync; + + if (journal->j_flags & JBD2_BARRIER && + !JBD2_HAS_COMPAT_FEATURE(journal, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { set_buffer_ordered(bh); barrier_done = 1; } - ret = sync_dirty_buffer(bh); + ret = submit_bh(WRITE, bh); + /* is it possible for another commit to fail at roughly * the same time as this one? If so, we don't want to * trust the barrier flag in the super, but instead want @@ -151,14 +169,72 @@ static int journal_write_commit_record(journal_t *journal, clear_buffer_ordered(bh); set_buffer_uptodate(bh); set_buffer_dirty(bh); - ret = sync_dirty_buffer(bh); + ret = submit_bh(WRITE, bh); } - put_bh(bh); /* One for getblk() */ - jbd2_journal_put_journal_head(descriptor); + *cbh = bh; + return ret; +} + +/* + * This function along with journal_submit_commit_record + * allows to write the commit record asynchronously. + */ +static int journal_wait_on_commit_record(struct buffer_head *bh) +{ + int ret = 0; + + clear_buffer_dirty(bh); + wait_on_buffer(bh); + + if (unlikely(!buffer_uptodate(bh))) + ret = -EIO; + put_bh(bh); /* One for getblk() */ + jbd2_journal_put_journal_head(bh2jh(bh)); - return (ret == -EIO); + return ret; } +/* + * Wait for all submitted IO to complete. + */ +static int journal_wait_on_locked_list(journal_t *journal, + transaction_t *commit_transaction) +{ + int ret = 0; + struct journal_head *jh; + + while (commit_transaction->t_locked_list) { + struct buffer_head *bh; + + jh = commit_transaction->t_locked_list->b_tprev; + bh = jh2bh(jh); + get_bh(bh); + if (buffer_locked(bh)) { + spin_unlock(&journal->j_list_lock); + wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) + ret = -EIO; + spin_lock(&journal->j_list_lock); + } + if (!inverted_lock(journal, bh)) { + put_bh(bh); + spin_lock(&journal->j_list_lock); + continue; + } + if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { + __jbd2_journal_unfile_buffer(jh); + jbd_unlock_bh_state(bh); + jbd2_journal_remove_journal_head(bh); + put_bh(bh); + } else { + jbd_unlock_bh_state(bh); + } + put_bh(bh); + cond_resched_lock(&journal->j_list_lock); + } + return ret; + } + static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) { int i; @@ -265,7 +341,7 @@ write_out_data: put_bh(bh); } - if (lock_need_resched(&journal->j_list_lock)) { + if (need_resched() || spin_needbreak(&journal->j_list_lock)) { spin_unlock(&journal->j_list_lock); goto write_out_data; } @@ -274,7 +350,21 @@ write_out_data: journal_do_submit_data(wbuf, bufs); } -static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, +static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) +{ + struct page *page = bh->b_page; + char *addr; + __u32 checksum; + + addr = kmap_atomic(page, KM_USER0); + checksum = crc32_be(crc32_sum, + (void *)(addr + offset_in_page(bh->b_data)), bh->b_size); + kunmap_atomic(addr, KM_USER0); + + return checksum; +} + +static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, unsigned long long block) { tag->t_blocknr = cpu_to_be32(block & (u32)~0); @@ -290,6 +380,7 @@ static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, */ void jbd2_journal_commit_transaction(journal_t *journal) { + struct transaction_stats_s stats; transaction_t *commit_transaction; struct journal_head *jh, *new_jh, *descriptor; struct buffer_head **wbuf = journal->j_wbuf; @@ -305,6 +396,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) int tag_flag; int i; int tag_bytes = journal_tag_bytes(journal); + struct buffer_head *cbh = NULL; /* For transactional checksums */ + __u32 crc32_sum = ~0; /* * First job: lock down the current transaction and wait for @@ -337,6 +430,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_LOCKED; + stats.u.run.rs_wait = commit_transaction->t_max_wait; + stats.u.run.rs_locked = jiffies; + stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, + stats.u.run.rs_locked); + spin_lock(&commit_transaction->t_handle_lock); while (commit_transaction->t_updates) { DEFINE_WAIT(wait); @@ -407,6 +505,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_switch_revoke_table(journal); + stats.u.run.rs_flushing = jiffies; + stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked, + stats.u.run.rs_flushing); + commit_transaction->t_state = T_FLUSH; journal->j_committing_transaction = commit_transaction; journal->j_running_transaction = NULL; @@ -440,38 +542,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) journal_submit_data_buffers(journal, commit_transaction); /* - * Wait for all previously submitted IO to complete. + * Wait for all previously submitted IO to complete if commit + * record is to be written synchronously. */ spin_lock(&journal->j_list_lock); - while (commit_transaction->t_locked_list) { - struct buffer_head *bh; + if (!JBD2_HAS_INCOMPAT_FEATURE(journal, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) + err = journal_wait_on_locked_list(journal, + commit_transaction); - jh = commit_transaction->t_locked_list->b_tprev; - bh = jh2bh(jh); - get_bh(bh); - if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - if (unlikely(!buffer_uptodate(bh))) - err = -EIO; - spin_lock(&journal->j_list_lock); - } - if (!inverted_lock(journal, bh)) { - put_bh(bh); - spin_lock(&journal->j_list_lock); - continue; - } - if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { - __jbd2_journal_unfile_buffer(jh); - jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); - put_bh(bh); - } else { - jbd_unlock_bh_state(bh); - } - put_bh(bh); - cond_resched_lock(&journal->j_list_lock); - } spin_unlock(&journal->j_list_lock); if (err) @@ -498,6 +577,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ commit_transaction->t_state = T_COMMIT; + stats.u.run.rs_logging = jiffies; + stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, + stats.u.run.rs_logging); + stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits; + stats.u.run.rs_blocks_logged = 0; + descriptor = NULL; bufs = 0; while (commit_transaction->t_buffers) { @@ -639,6 +724,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) start_journal_io: for (i = 0; i < bufs; i++) { struct buffer_head *bh = wbuf[i]; + /* + * Compute checksum. + */ + if (JBD2_HAS_COMPAT_FEATURE(journal, + JBD2_FEATURE_COMPAT_CHECKSUM)) { + crc32_sum = + jbd2_checksum_data(crc32_sum, bh); + } + lock_buffer(bh); clear_buffer_dirty(bh); set_buffer_uptodate(bh); @@ -646,6 +740,7 @@ start_journal_io: submit_bh(WRITE, bh); } cond_resched(); + stats.u.run.rs_blocks_logged += bufs; /* Force a new descriptor to be generated next time round the loop. */ @@ -654,6 +749,23 @@ start_journal_io: } } + /* Done it all: now write the commit record asynchronously. */ + + if (JBD2_HAS_INCOMPAT_FEATURE(journal, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { + err = journal_submit_commit_record(journal, commit_transaction, + &cbh, crc32_sum); + if (err) + __jbd2_journal_abort_hard(journal); + + spin_lock(&journal->j_list_lock); + err = journal_wait_on_locked_list(journal, + commit_transaction); + spin_unlock(&journal->j_list_lock); + if (err) + __jbd2_journal_abort_hard(journal); + } + /* Lo and behold: we have just managed to send a transaction to the log. Before we can commit it, wait for the IO so far to complete. Control buffers being written are on the @@ -753,8 +865,14 @@ wait_for_iobuf: jbd_debug(3, "JBD: commit phase 6\n"); - if (journal_write_commit_record(journal, commit_transaction)) - err = -EIO; + if (!JBD2_HAS_INCOMPAT_FEATURE(journal, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { + err = journal_submit_commit_record(journal, commit_transaction, + &cbh, crc32_sum); + if (err) + __jbd2_journal_abort_hard(journal); + } + err = journal_wait_on_commit_record(cbh); if (err) jbd2_journal_abort(journal, err); @@ -816,6 +934,7 @@ restart_loop: cp_transaction = jh->b_cp_transaction; if (cp_transaction) { JBUFFER_TRACE(jh, "remove from old cp transaction"); + cp_transaction->t_chp_stats.cs_dropped++; __jbd2_journal_remove_checkpoint(jh); } @@ -867,10 +986,10 @@ restart_loop: } spin_unlock(&journal->j_list_lock); /* - * This is a bit sleazy. We borrow j_list_lock to protect - * journal->j_committing_transaction in __jbd2_journal_remove_checkpoint. - * Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but - * it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint + * This is a bit sleazy. We use j_list_lock to protect transition + * of a transaction into T_FINISHED state and calling + * __jbd2_journal_drop_transaction(). Otherwise we could race with + * other checkpointing code processing the transaction... */ spin_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); @@ -890,6 +1009,36 @@ restart_loop: J_ASSERT(commit_transaction->t_state == T_COMMIT); + commit_transaction->t_start = jiffies; + stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging, + commit_transaction->t_start); + + /* + * File the transaction for history + */ + stats.ts_type = JBD2_STATS_RUN; + stats.ts_tid = commit_transaction->t_tid; + stats.u.run.rs_handle_count = commit_transaction->t_handle_count; + spin_lock(&journal->j_history_lock); + memcpy(journal->j_history + journal->j_history_cur, &stats, + sizeof(stats)); + if (++journal->j_history_cur == journal->j_history_max) + journal->j_history_cur = 0; + + /* + * Calculate overall stats + */ + journal->j_stats.ts_tid++; + journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait; + journal->j_stats.u.run.rs_running += stats.u.run.rs_running; + journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked; + journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing; + journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging; + journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count; + journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks; + journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged; + spin_unlock(&journal->j_history_lock); + commit_transaction->t_state = T_FINISHED; J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid; |