From 259709b07da103edc40b8c5bdb2d5c0e77374f94 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 21 May 2013 23:20:03 -0400 Subject: jbd2: change jbd2_journal_invalidatepage to accept length invalidatepage now accepts range to invalidate and there are two file system using jbd2 also implementing punch hole feature which can benefit from this. We need to implement the same thing for jbd2 layer in order to allow those file system take benefit of this functionality. This commit adds length argument to the jbd2_journal_invalidatepage() and updates all instances in ext4 and ocfs2. Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/jbd2.h') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 6e051f472edb..682a63c34d26 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1090,7 +1090,7 @@ extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); extern void journal_sync_buffer (struct buffer_head *); extern int jbd2_journal_invalidatepage(journal_t *, - struct page *, unsigned long); + struct page *, unsigned int, unsigned int); extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); extern int jbd2_journal_stop(handle_t *); extern int jbd2_journal_flush (journal_t *); -- cgit v1.2.3 From f5113effc2a2ee6b86a4b345ce557353dcbcfffe Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:01:45 -0400 Subject: jbd2: don't create journal_head for temporary journal buffers When writing metadata to the journal, we create temporary buffer heads for that task. We also attach journal heads to these buffer heads but the only purpose of the journal heads is to keep buffers linked in transaction's BJ_IO list. We remove the need for journal heads by reusing buffer_head's b_assoc_buffers list for that purpose. Also since BJ_IO list is just a temporary list for transaction commit, we use a private list in jbd2_journal_commit_transaction() for that thus removing BJ_IO list from transaction completely. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 1 - fs/jbd2/commit.c | 65 +++++++++++++++++---------------------------------- fs/jbd2/journal.c | 36 +++++++++++----------------- fs/jbd2/transaction.c | 14 ++++------- include/linux/jbd2.h | 32 ++++++++++++------------- 5 files changed, 56 insertions(+), 92 deletions(-) (limited to 'include/linux/jbd2.h') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index c78841ee81cf..2735fef6e55e 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -690,7 +690,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact J_ASSERT(transaction->t_state == T_FINISHED); J_ASSERT(transaction->t_buffers == NULL); J_ASSERT(transaction->t_forget == NULL); - J_ASSERT(transaction->t_iobuf_list == NULL); J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index e61d7224a729..57bd2ff97888 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -369,7 +369,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) { struct transaction_stats_s stats; transaction_t *commit_transaction; - struct journal_head *jh, *new_jh, *descriptor; + struct journal_head *jh, *descriptor; struct buffer_head **wbuf = journal->j_wbuf; int bufs; int flags; @@ -393,6 +393,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) tid_t first_tid; int update_tail; int csum_size = 0; + LIST_HEAD(io_bufs); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) csum_size = sizeof(struct jbd2_journal_block_tail); @@ -659,29 +660,22 @@ void jbd2_journal_commit_transaction(journal_t *journal) /* Bump b_count to prevent truncate from stumbling over the shadowed buffer! @@@ This can go if we ever get - rid of the BJ_IO/BJ_Shadow pairing of buffers. */ + rid of the shadow pairing of buffers. */ atomic_inc(&jh2bh(jh)->b_count); - /* Make a temporary IO buffer with which to write it out - (this will requeue both the metadata buffer and the - temporary IO buffer). new_bh goes on BJ_IO*/ - - set_bit(BH_JWrite, &jh2bh(jh)->b_state); /* - * akpm: jbd2_journal_write_metadata_buffer() sets - * new_bh->b_transaction to commit_transaction. - * We need to clean this up before we release new_bh - * (which is of type BJ_IO) + * Make a temporary IO buffer with which to write it out + * (this will requeue the metadata buffer to BJ_Shadow). */ + set_bit(BH_JWrite, &jh2bh(jh)->b_state); JBUFFER_TRACE(jh, "ph3: write metadata"); flags = jbd2_journal_write_metadata_buffer(commit_transaction, - jh, &new_jh, blocknr); + jh, &wbuf[bufs], blocknr); if (flags < 0) { jbd2_journal_abort(journal, flags); continue; } - set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); - wbuf[bufs++] = jh2bh(new_jh); + jbd2_file_log_bh(&io_bufs, wbuf[bufs]); /* Record the new block's tag in the current descriptor buffer */ @@ -695,10 +689,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) tag = (journal_block_tag_t *) tagp; write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); tag->t_flags = cpu_to_be16(tag_flag); - jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), + jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], commit_transaction->t_tid); tagp += tag_bytes; space_left -= tag_bytes; + bufs++; if (first_tag) { memcpy (tagp, journal->j_uuid, 16); @@ -810,7 +805,7 @@ start_journal_io: the log. Before we can commit it, wait for the IO so far to complete. Control buffers being written are on the transaction's t_log_list queue, and metadata buffers are on - the t_iobuf_list queue. + the io_bufs list. Wait for the buffers in reverse order. That way we are less likely to be woken up until all IOs have completed, and @@ -819,46 +814,31 @@ start_journal_io: jbd_debug(3, "JBD2: commit phase 3\n"); - /* - * akpm: these are BJ_IO, and j_list_lock is not needed. - * See __journal_try_to_free_buffer. - */ -wait_for_iobuf: - while (commit_transaction->t_iobuf_list != NULL) { - struct buffer_head *bh; + while (!list_empty(&io_bufs)) { + struct buffer_head *bh = list_entry(io_bufs.prev, + struct buffer_head, + b_assoc_buffers); - jh = commit_transaction->t_iobuf_list->b_tprev; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - wait_on_buffer(bh); - goto wait_for_iobuf; - } - if (cond_resched()) - goto wait_for_iobuf; + wait_on_buffer(bh); + cond_resched(); if (unlikely(!buffer_uptodate(bh))) err = -EIO; - - clear_buffer_jwrite(bh); - - JBUFFER_TRACE(jh, "ph4: unfile after journal write"); - jbd2_journal_unfile_buffer(journal, jh); + jbd2_unfile_log_bh(bh); /* - * ->t_iobuf_list should contain only dummy buffer_heads - * which were created by jbd2_journal_write_metadata_buffer(). + * The list contains temporary buffer heads created by + * jbd2_journal_write_metadata_buffer(). */ BUFFER_TRACE(bh, "dumping temporary bh"); - jbd2_journal_put_journal_head(jh); __brelse(bh); J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); free_buffer_head(bh); - /* We also have to unlock and free the corresponding - shadowed buffer */ + /* We also have to refile the corresponding shadowed buffer */ jh = commit_transaction->t_shadow_list->b_tprev; bh = jh2bh(jh); - clear_bit(BH_JWrite, &bh->b_state); + clear_buffer_jwrite(bh); J_ASSERT_BH(bh, buffer_jbddirty(bh)); /* The metadata is now released for reuse, but we need @@ -953,7 +933,6 @@ wait_for_iobuf: J_ASSERT(list_empty(&commit_transaction->t_inode_list)); J_ASSERT(commit_transaction->t_buffers == NULL); J_ASSERT(commit_transaction->t_checkpoint_list == NULL); - J_ASSERT(commit_transaction->t_iobuf_list == NULL); J_ASSERT(commit_transaction->t_shadow_list == NULL); J_ASSERT(commit_transaction->t_log_list == NULL); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 3cdd285df204..45cdc080e466 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -310,14 +310,12 @@ static void journal_kill_thread(journal_t *journal) * * If the source buffer has already been modified by a new transaction * since we took the last commit snapshot, we use the frozen copy of - * that data for IO. If we end up using the existing buffer_head's data - * for the write, then we *have* to lock the buffer to prevent anyone - * else from using and possibly modifying it while the IO is in - * progress. + * that data for IO. If we end up using the existing buffer_head's data + * for the write, then we have to make sure nobody modifies it while the + * IO is in progress. do_get_write_access() handles this. * - * The function returns a pointer to the buffer_heads to be used for IO. - * - * We assume that the journal has already been locked in this function. + * The function returns a pointer to the buffer_head to be used for IO. + * * * Return value: * <0: Error @@ -330,15 +328,14 @@ static void journal_kill_thread(journal_t *journal) int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, - struct journal_head **jh_out, - unsigned long long blocknr) + struct buffer_head **bh_out, + sector_t blocknr) { int need_copy_out = 0; int done_copy_out = 0; int do_escape = 0; char *mapped_data; struct buffer_head *new_bh; - struct journal_head *new_jh; struct page *new_page; unsigned int new_offset; struct buffer_head *bh_in = jh2bh(jh_in); @@ -368,14 +365,13 @@ retry_alloc: /* keep subsequent assertions sane */ atomic_set(&new_bh->b_count, 1); - new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ + jbd_lock_bh_state(bh_in); +repeat: /* * If a new transaction has already done a buffer copy-out, then * we use that version of the data for the commit. */ - jbd_lock_bh_state(bh_in); -repeat: if (jh_in->b_frozen_data) { done_copy_out = 1; new_page = virt_to_page(jh_in->b_frozen_data); @@ -415,7 +411,7 @@ repeat: jbd_unlock_bh_state(bh_in); tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); if (!tmp) { - jbd2_journal_put_journal_head(new_jh); + brelse(new_bh); return -ENOMEM; } jbd_lock_bh_state(bh_in); @@ -426,7 +422,7 @@ repeat: jh_in->b_frozen_data = tmp; mapped_data = kmap_atomic(new_page); - memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); + memcpy(tmp, mapped_data + new_offset, bh_in->b_size); kunmap_atomic(mapped_data); new_page = virt_to_page(tmp); @@ -452,14 +448,13 @@ repeat: } set_bh_page(new_bh, new_page, new_offset); - new_jh->b_transaction = NULL; - new_bh->b_size = jh2bh(jh_in)->b_size; - new_bh->b_bdev = transaction->t_journal->j_dev; + new_bh->b_size = bh_in->b_size; + new_bh->b_bdev = journal->j_dev; new_bh->b_blocknr = blocknr; set_buffer_mapped(new_bh); set_buffer_dirty(new_bh); - *jh_out = new_jh; + *bh_out = new_bh; /* * The to-be-written buffer needs to get moved to the io queue, @@ -472,9 +467,6 @@ repeat: spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh_in); - JBUFFER_TRACE(new_jh, "file as BJ_IO"); - jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); - return do_escape | (done_copy_out << 1); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 5d8268ad364a..983010900258 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1601,10 +1601,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) * Remove a buffer from the appropriate transaction list. * * Note that this function can *change* the value of - * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, - * t_log_list or t_reserved_list. If the caller is holding onto a copy of one - * of these pointers, it could go bad. Generally the caller needs to re-read - * the pointer from the transaction_t. + * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or + * t_reserved_list. If the caller is holding onto a copy of one of these + * pointers, it could go bad. Generally the caller needs to re-read the + * pointer from the transaction_t. * * Called under j_list_lock. */ @@ -1634,9 +1634,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) case BJ_Forget: list = &transaction->t_forget; break; - case BJ_IO: - list = &transaction->t_iobuf_list; - break; case BJ_Shadow: list = &transaction->t_shadow_list; break; @@ -2148,9 +2145,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, case BJ_Forget: list = &transaction->t_forget; break; - case BJ_IO: - list = &transaction->t_iobuf_list; - break; case BJ_Shadow: list = &transaction->t_shadow_list; break; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 682a63c34d26..57210844b228 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -523,12 +523,6 @@ struct transaction_s */ struct journal_head *t_checkpoint_io_list; - /* - * Doubly-linked circular list of temporary buffers currently undergoing - * IO in the log [j_list_lock] - */ - struct journal_head *t_iobuf_list; - /* * Doubly-linked circular list of metadata buffers being shadowed by log * IO. The IO buffers on the iobuf list and the shadow buffers on this @@ -991,6 +985,14 @@ extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, i extern void __journal_free_buffer(struct journal_head *bh); extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); extern void __journal_clean_data_list(transaction_t *transaction); +static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh) +{ + list_add_tail(&bh->b_assoc_buffers, head); +} +static inline void jbd2_unfile_log_bh(struct buffer_head *bh) +{ + list_del_init(&bh->b_assoc_buffers); +} /* Log buffer allocation */ extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); @@ -1039,11 +1041,10 @@ extern void jbd2_buffer_abort_trigger(struct journal_head *jh, struct jbd2_buffer_trigger_type *triggers); /* Buffer IO */ -extern int -jbd2_journal_write_metadata_buffer(transaction_t *transaction, - struct journal_head *jh_in, - struct journal_head **jh_out, - unsigned long long blocknr); +extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction, + struct journal_head *jh_in, + struct buffer_head **bh_out, + sector_t blocknr); /* Transaction locking */ extern void __wait_on_journal (journal_t *); @@ -1286,11 +1287,10 @@ static inline int jbd_space_needed(journal_t *journal) #define BJ_None 0 /* Not journaled */ #define BJ_Metadata 1 /* Normal journaled metadata */ #define BJ_Forget 2 /* Buffer superseded by this transaction */ -#define BJ_IO 3 /* Buffer is for temporary IO use */ -#define BJ_Shadow 4 /* Buffer contents being shadowed to the log */ -#define BJ_LogCtl 5 /* Buffer contains log descriptors */ -#define BJ_Reserved 6 /* Buffer is reserved for access by journal */ -#define BJ_Types 7 +#define BJ_Shadow 3 /* Buffer contents being shadowed to the log */ +#define BJ_LogCtl 4 /* Buffer contains log descriptors */ +#define BJ_Reserved 5 /* Buffer is reserved for access by journal */ +#define BJ_Types 6 extern int jbd_blocks_per_page(struct inode *inode); -- cgit v1.2.3 From e5a120aeb57f40ae568a5ca1dd6ace53d0213582 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:06:01 -0400 Subject: jbd2: remove journal_head from descriptor buffers Similarly as for metadata buffers, also log descriptor buffers don't really need the journal head. So strip it and remove BJ_LogCtl list. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 1 - fs/jbd2/commit.c | 78 ++++++++++++++++++++------------------------------- fs/jbd2/journal.c | 4 +-- fs/jbd2/revoke.c | 49 ++++++++++++++++---------------- fs/jbd2/transaction.c | 6 ---- include/linux/jbd2.h | 19 +++++-------- 6 files changed, 64 insertions(+), 93 deletions(-) (limited to 'include/linux/jbd2.h') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 2735fef6e55e..65ec076e41f2 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -691,7 +691,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact J_ASSERT(transaction->t_buffers == NULL); J_ASSERT(transaction->t_forget == NULL); J_ASSERT(transaction->t_shadow_list == NULL); - J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(atomic_read(&transaction->t_updates) == 0); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 57bd2ff97888..7c6f7eea2316 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -85,8 +85,7 @@ nope: __brelse(bh); } -static void jbd2_commit_block_csum_set(journal_t *j, - struct journal_head *descriptor) +static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) { struct commit_header *h; __u32 csum; @@ -94,12 +93,11 @@ static void jbd2_commit_block_csum_set(journal_t *j, if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return; - h = (struct commit_header *)(jh2bh(descriptor)->b_data); + h = (struct commit_header *)(bh->b_data); h->h_chksum_type = 0; h->h_chksum_size = 0; h->h_chksum[0] = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); + csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); h->h_chksum[0] = cpu_to_be32(csum); } @@ -116,7 +114,6 @@ static int journal_submit_commit_record(journal_t *journal, struct buffer_head **cbh, __u32 crc32_sum) { - struct journal_head *descriptor; struct commit_header *tmp; struct buffer_head *bh; int ret; @@ -127,12 +124,10 @@ static int journal_submit_commit_record(journal_t *journal, if (is_journal_aborted(journal)) return 0; - descriptor = jbd2_journal_get_descriptor_buffer(journal); - if (!descriptor) + bh = jbd2_journal_get_descriptor_buffer(journal); + if (!bh) return 1; - bh = jh2bh(descriptor); - tmp = (struct commit_header *)bh->b_data; tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); @@ -146,9 +141,9 @@ static int journal_submit_commit_record(journal_t *journal, tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; tmp->h_chksum[0] = cpu_to_be32(crc32_sum); } - jbd2_commit_block_csum_set(journal, descriptor); + jbd2_commit_block_csum_set(journal, bh); - JBUFFER_TRACE(descriptor, "submit commit block"); + BUFFER_TRACE(bh, "submit commit block"); lock_buffer(bh); clear_buffer_dirty(bh); set_buffer_uptodate(bh); @@ -180,7 +175,6 @@ static int journal_wait_on_commit_record(journal_t *journal, if (unlikely(!buffer_uptodate(bh))) ret = -EIO; put_bh(bh); /* One for getblk() */ - jbd2_journal_put_journal_head(bh2jh(bh)); return ret; } @@ -321,7 +315,7 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, } static void jbd2_descr_block_csum_set(journal_t *j, - struct journal_head *descriptor) + struct buffer_head *bh) { struct jbd2_journal_block_tail *tail; __u32 csum; @@ -329,12 +323,10 @@ static void jbd2_descr_block_csum_set(journal_t *j, if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return; - tail = (struct jbd2_journal_block_tail *) - (jh2bh(descriptor)->b_data + j->j_blocksize - + tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); tail->t_checksum = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); + csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); tail->t_checksum = cpu_to_be32(csum); } @@ -369,7 +361,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) { struct transaction_stats_s stats; transaction_t *commit_transaction; - struct journal_head *jh, *descriptor; + struct journal_head *jh; + struct buffer_head *descriptor; struct buffer_head **wbuf = journal->j_wbuf; int bufs; int flags; @@ -394,6 +387,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int update_tail; int csum_size = 0; LIST_HEAD(io_bufs); + LIST_HEAD(log_bufs); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) csum_size = sizeof(struct jbd2_journal_block_tail); @@ -547,7 +541,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) blk_start_plug(&plug); jbd2_journal_write_revoke_records(journal, commit_transaction, - WRITE_SYNC); + &log_bufs, WRITE_SYNC); blk_finish_plug(&plug); jbd_debug(3, "JBD2: commit phase 2\n"); @@ -573,8 +567,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) atomic_read(&commit_transaction->t_outstanding_credits)); err = 0; - descriptor = NULL; bufs = 0; + descriptor = NULL; blk_start_plug(&plug); while (commit_transaction->t_buffers) { @@ -606,8 +600,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) record the metadata buffer. */ if (!descriptor) { - struct buffer_head *bh; - J_ASSERT (bufs == 0); jbd_debug(4, "JBD2: get descriptor\n"); @@ -618,26 +610,26 @@ void jbd2_journal_commit_transaction(journal_t *journal) continue; } - bh = jh2bh(descriptor); jbd_debug(4, "JBD2: got buffer %llu (%p)\n", - (unsigned long long)bh->b_blocknr, bh->b_data); - header = (journal_header_t *)&bh->b_data[0]; + (unsigned long long)descriptor->b_blocknr, + descriptor->b_data); + header = (journal_header_t *)descriptor->b_data; header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); header->h_sequence = cpu_to_be32(commit_transaction->t_tid); - tagp = &bh->b_data[sizeof(journal_header_t)]; - space_left = bh->b_size - sizeof(journal_header_t); + tagp = &descriptor->b_data[sizeof(journal_header_t)]; + space_left = descriptor->b_size - + sizeof(journal_header_t); first_tag = 1; - set_buffer_jwrite(bh); - set_buffer_dirty(bh); - wbuf[bufs++] = bh; + set_buffer_jwrite(descriptor); + set_buffer_dirty(descriptor); + wbuf[bufs++] = descriptor; /* Record it so that we can wait for IO completion later */ - BUFFER_TRACE(bh, "ph3: file as descriptor"); - jbd2_journal_file_buffer(descriptor, commit_transaction, - BJ_LogCtl); + BUFFER_TRACE(descriptor, "ph3: file as descriptor"); + jbd2_file_log_bh(&log_bufs, descriptor); } /* Where is the buffer to be written? */ @@ -864,26 +856,19 @@ start_journal_io: jbd_debug(3, "JBD2: commit phase 4\n"); /* Here we wait for the revoke record and descriptor record buffers */ - wait_for_ctlbuf: - while (commit_transaction->t_log_list != NULL) { + while (!list_empty(&log_bufs)) { struct buffer_head *bh; - jh = commit_transaction->t_log_list->b_tprev; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - wait_on_buffer(bh); - goto wait_for_ctlbuf; - } - if (cond_resched()) - goto wait_for_ctlbuf; + bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers); + wait_on_buffer(bh); + cond_resched(); if (unlikely(!buffer_uptodate(bh))) err = -EIO; BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); clear_buffer_jwrite(bh); - jbd2_journal_unfile_buffer(journal, jh); - jbd2_journal_put_journal_head(jh); + jbd2_unfile_log_bh(bh); __brelse(bh); /* One for getblk */ /* AKPM: bforget here */ } @@ -934,7 +919,6 @@ start_journal_io: J_ASSERT(commit_transaction->t_buffers == NULL); J_ASSERT(commit_transaction->t_checkpoint_list == NULL); J_ASSERT(commit_transaction->t_shadow_list == NULL); - J_ASSERT(commit_transaction->t_log_list == NULL); restart_loop: /* diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 45cdc080e466..b0a8d1e4703e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -790,7 +790,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, * But we don't bother doing that, so there will be coherency problems with * mmaps of blockdevs which hold live JBD-controlled filesystems. */ -struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) +struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) { struct buffer_head *bh; unsigned long long blocknr; @@ -809,7 +809,7 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) set_buffer_uptodate(bh); unlock_buffer(bh); BUFFER_TRACE(bh, "return this buffer"); - return jbd2_journal_add_journal_head(bh); + return bh; } /* diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index f30b80b4ce8b..198c9c10276d 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -122,9 +122,10 @@ struct jbd2_revoke_table_s #ifdef __KERNEL__ static void write_one_revoke_record(journal_t *, transaction_t *, - struct journal_head **, int *, + struct list_head *, + struct buffer_head **, int *, struct jbd2_revoke_record_s *, int); -static void flush_descriptor(journal_t *, struct journal_head *, int, int); +static void flush_descriptor(journal_t *, struct buffer_head *, int, int); #endif /* Utility functions to maintain the revoke table */ @@ -531,9 +532,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) */ void jbd2_journal_write_revoke_records(journal_t *journal, transaction_t *transaction, + struct list_head *log_bufs, int write_op) { - struct journal_head *descriptor; + struct buffer_head *descriptor; struct jbd2_revoke_record_s *record; struct jbd2_revoke_table_s *revoke; struct list_head *hash_list; @@ -553,7 +555,7 @@ void jbd2_journal_write_revoke_records(journal_t *journal, while (!list_empty(hash_list)) { record = (struct jbd2_revoke_record_s *) hash_list->next; - write_one_revoke_record(journal, transaction, + write_one_revoke_record(journal, transaction, log_bufs, &descriptor, &offset, record, write_op); count++; @@ -573,13 +575,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal, static void write_one_revoke_record(journal_t *journal, transaction_t *transaction, - struct journal_head **descriptorp, + struct list_head *log_bufs, + struct buffer_head **descriptorp, int *offsetp, struct jbd2_revoke_record_s *record, int write_op) { int csum_size = 0; - struct journal_head *descriptor; + struct buffer_head *descriptor; int offset; journal_header_t *header; @@ -609,26 +612,26 @@ static void write_one_revoke_record(journal_t *journal, descriptor = jbd2_journal_get_descriptor_buffer(journal); if (!descriptor) return; - header = (journal_header_t *) &jh2bh(descriptor)->b_data[0]; + header = (journal_header_t *)descriptor->b_data; header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); header->h_sequence = cpu_to_be32(transaction->t_tid); /* Record it so that we can wait for IO completion later */ - JBUFFER_TRACE(descriptor, "file as BJ_LogCtl"); - jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl); + BUFFER_TRACE(descriptor, "file in log_bufs"); + jbd2_file_log_bh(log_bufs, descriptor); offset = sizeof(jbd2_journal_revoke_header_t); *descriptorp = descriptor; } if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { - * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) = + * ((__be64 *)(&descriptor->b_data[offset])) = cpu_to_be64(record->blocknr); offset += 8; } else { - * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = + * ((__be32 *)(&descriptor->b_data[offset])) = cpu_to_be32(record->blocknr); offset += 4; } @@ -636,8 +639,7 @@ static void write_one_revoke_record(journal_t *journal, *offsetp = offset; } -static void jbd2_revoke_csum_set(journal_t *j, - struct journal_head *descriptor) +static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) { struct jbd2_journal_revoke_tail *tail; __u32 csum; @@ -645,12 +647,10 @@ static void jbd2_revoke_csum_set(journal_t *j, if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return; - tail = (struct jbd2_journal_revoke_tail *) - (jh2bh(descriptor)->b_data + j->j_blocksize - + tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - sizeof(struct jbd2_journal_revoke_tail)); tail->r_checksum = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); + csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); tail->r_checksum = cpu_to_be32(csum); } @@ -662,25 +662,24 @@ static void jbd2_revoke_csum_set(journal_t *j, */ static void flush_descriptor(journal_t *journal, - struct journal_head *descriptor, + struct buffer_head *descriptor, int offset, int write_op) { jbd2_journal_revoke_header_t *header; - struct buffer_head *bh = jh2bh(descriptor); if (is_journal_aborted(journal)) { - put_bh(bh); + put_bh(descriptor); return; } - header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; + header = (jbd2_journal_revoke_header_t *)descriptor->b_data; header->r_count = cpu_to_be32(offset); jbd2_revoke_csum_set(journal, descriptor); - set_buffer_jwrite(bh); - BUFFER_TRACE(bh, "write"); - set_buffer_dirty(bh); - write_dirty_buffer(bh, write_op); + set_buffer_jwrite(descriptor); + BUFFER_TRACE(descriptor, "write"); + set_buffer_dirty(descriptor); + write_dirty_buffer(descriptor, write_op); } #endif diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 983010900258..f1c5392e62b6 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1637,9 +1637,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) case BJ_Shadow: list = &transaction->t_shadow_list; break; - case BJ_LogCtl: - list = &transaction->t_log_list; - break; case BJ_Reserved: list = &transaction->t_reserved_list; break; @@ -2148,9 +2145,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, case BJ_Shadow: list = &transaction->t_shadow_list; break; - case BJ_LogCtl: - list = &transaction->t_log_list; - break; case BJ_Reserved: list = &transaction->t_reserved_list; break; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 57210844b228..b7dc40da99e0 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -530,12 +530,6 @@ struct transaction_s */ struct journal_head *t_shadow_list; - /* - * Doubly-linked circular list of control buffers being written to the - * log. [j_list_lock] - */ - struct journal_head *t_log_list; - /* * List of inodes whose data we've modified in data=ordered mode. * [j_list_lock] @@ -995,7 +989,7 @@ static inline void jbd2_unfile_log_bh(struct buffer_head *bh) } /* Log buffer allocation */ -extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); +struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal); int jbd2_journal_next_log_block(journal_t *, unsigned long long *); int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, unsigned long *block); @@ -1179,8 +1173,10 @@ extern int jbd2_journal_init_revoke_caches(void); extern void jbd2_journal_destroy_revoke(journal_t *); extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); -extern void jbd2_journal_write_revoke_records(journal_t *, - transaction_t *, int); +extern void jbd2_journal_write_revoke_records(journal_t *journal, + transaction_t *transaction, + struct list_head *log_bufs, + int write_op); /* Recovery revoke support */ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); @@ -1288,9 +1284,8 @@ static inline int jbd_space_needed(journal_t *journal) #define BJ_Metadata 1 /* Normal journaled metadata */ #define BJ_Forget 2 /* Buffer superseded by this transaction */ #define BJ_Shadow 3 /* Buffer contents being shadowed to the log */ -#define BJ_LogCtl 4 /* Buffer contains log descriptors */ -#define BJ_Reserved 5 /* Buffer is reserved for access by journal */ -#define BJ_Types 6 +#define BJ_Reserved 4 /* Buffer is reserved for access by journal */ +#define BJ_Types 5 extern int jbd_blocks_per_page(struct inode *inode); -- cgit v1.2.3 From b34090e5e22a02fba0e4473056cce9420ad9dd0b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:08:56 -0400 Subject: jbd2: refine waiting for shadow buffers Currently when we add a buffer to a transaction, we wait until the buffer is removed from BJ_Shadow list (so that we prevent any changes to the buffer that is just written to the journal). This can take unnecessarily long as a lot happens between the time the buffer is submitted to the journal and the time when we remove the buffer from BJ_Shadow list. (e.g. We wait for all data buffers in the transaction, we issue a cache flush, etc.) Also this creates a dependency of do_get_write_access() on transaction commit (namely waiting for data IO to complete) which we want to avoid when implementing transaction reservation. So we modify commit code to set new BH_Shadow flag when temporary shadowing buffer is created and we clear that flag once IO on that buffer is complete. This allows do_get_write_access() to wait only for BH_Shadow bit and thus removes the dependency on data IO completion. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 18 +++++++++--------- fs/jbd2/journal.c | 2 ++ fs/jbd2/transaction.c | 44 +++++++++++++++++++------------------------- include/linux/jbd.h | 25 +++++++++++++++++++++++++ include/linux/jbd2.h | 28 ++++++++++++++++++++++++++++ include/linux/jbd_common.h | 26 -------------------------- 6 files changed, 83 insertions(+), 60 deletions(-) (limited to 'include/linux/jbd2.h') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7c6f7eea2316..d73a0d808ec1 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -30,15 +30,22 @@ #include /* - * Default IO end handler for temporary BJ_IO buffer_heads. + * IO end handler for temporary buffer_heads handling writes to the journal. */ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) { + struct buffer_head *orig_bh = bh->b_private; + BUFFER_TRACE(bh, ""); if (uptodate) set_buffer_uptodate(bh); else clear_buffer_uptodate(bh); + if (orig_bh) { + clear_bit_unlock(BH_Shadow, &orig_bh->b_state); + smp_mb__after_clear_bit(); + wake_up_bit(&orig_bh->b_state, BH_Shadow); + } unlock_buffer(bh); } @@ -832,6 +839,7 @@ start_journal_io: bh = jh2bh(jh); clear_buffer_jwrite(bh); J_ASSERT_BH(bh, buffer_jbddirty(bh)); + J_ASSERT_BH(bh, !buffer_shadow(bh)); /* The metadata is now released for reuse, but we need to remember it against this transaction so that when @@ -839,14 +847,6 @@ start_journal_io: required. */ JBUFFER_TRACE(jh, "file as BJ_Forget"); jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); - /* - * Wake up any transactions which were waiting for this IO to - * complete. The barrier must be here so that changes by - * jbd2_journal_file_buffer() take effect before wake_up_bit() - * does the waitqueue check. - */ - smp_mb(); - wake_up_bit(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "brelse shadowed buffer"); __brelse(bh); } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b0a8d1e4703e..5ef0712e2f7a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -451,6 +451,7 @@ repeat: new_bh->b_size = bh_in->b_size; new_bh->b_bdev = journal->j_dev; new_bh->b_blocknr = blocknr; + new_bh->b_private = bh_in; set_buffer_mapped(new_bh); set_buffer_dirty(new_bh); @@ -465,6 +466,7 @@ repeat: spin_lock(&journal->j_list_lock); __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); spin_unlock(&journal->j_list_lock); + set_buffer_shadow(bh_in); jbd_unlock_bh_state(bh_in); return do_escape | (done_copy_out << 1); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index f1c5392e62b6..6f4248dd8759 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -619,6 +619,12 @@ static void warn_dirty_buffer(struct buffer_head *bh) bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); } +static int sleep_on_shadow_bh(void *word) +{ + io_schedule(); + return 0; +} + /* * If the buffer is already part of the current transaction, then there * is nothing we need to do. If it is already part of a prior @@ -754,41 +760,29 @@ repeat: * journaled. If the primary copy is already going to * disk then we cannot do copy-out here. */ - if (jh->b_jlist == BJ_Shadow) { - DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); - wait_queue_head_t *wqh; - - wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); - + if (buffer_shadow(bh)) { JBUFFER_TRACE(jh, "on shadow: sleep"); jbd_unlock_bh_state(bh); - /* commit wakes up all shadow buffers after IO */ - for ( ; ; ) { - prepare_to_wait(wqh, &wait.wait, - TASK_UNINTERRUPTIBLE); - if (jh->b_jlist != BJ_Shadow) - break; - schedule(); - } - finish_wait(wqh, &wait.wait); + wait_on_bit(&bh->b_state, BH_Shadow, + sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE); goto repeat; } - /* Only do the copy if the currently-owning transaction - * still needs it. If it is on the Forget list, the - * committing transaction is past that stage. The - * buffer had better remain locked during the kmalloc, - * but that should be true --- we hold the journal lock - * still and the buffer is already on the BUF_JOURNAL - * list so won't be flushed. + /* + * Only do the copy if the currently-owning transaction still + * needs it. If buffer isn't on BJ_Metadata list, the + * committing transaction is past that stage (here we use the + * fact that BH_Shadow is set under bh_state lock together with + * refiling to BJ_Shadow list and at this point we know the + * buffer doesn't have BH_Shadow set). * * Subtle point, though: if this is a get_undo_access, * then we will be relying on the frozen_data to contain * the new value of the committed_data record after the * transaction, so we HAVE to force the frozen_data copy - * in that case. */ - - if (jh->b_jlist != BJ_Forget || force_copy) { + * in that case. + */ + if (jh->b_jlist == BJ_Metadata || force_copy) { JBUFFER_TRACE(jh, "generate frozen data"); if (!frozen_buffer) { JBUFFER_TRACE(jh, "allocate memory for buffer"); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 9c505f1aa1fd..2439054a6c9a 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -244,6 +244,31 @@ typedef struct journal_superblock_s #include #include + +enum jbd_state_bits { + BH_JBD /* Has an attached ext3 journal_head */ + = BH_PrivateStart, + BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ + BH_Freed, /* Has been freed (truncated) */ + BH_Revoked, /* Has been revoked from the log */ + BH_RevokeValid, /* Revoked flag is valid */ + BH_JBDDirty, /* Is dirty but journaled */ + BH_State, /* Pins most journal_head state */ + BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ + BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ + BH_JBDPrivateStart, /* First bit available for private use by FS */ +}; + +BUFFER_FNS(JBD, jbd) +BUFFER_FNS(JWrite, jwrite) +BUFFER_FNS(JBDDirty, jbddirty) +TAS_BUFFER_FNS(JBDDirty, jbddirty) +BUFFER_FNS(Revoked, revoked) +TAS_BUFFER_FNS(Revoked, revoked) +BUFFER_FNS(RevokeValid, revokevalid) +TAS_BUFFER_FNS(RevokeValid, revokevalid) +BUFFER_FNS(Freed, freed) + #include #define J_ASSERT(assert) BUG_ON(!(assert)) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b7dc40da99e0..e33e84b3d5c8 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -302,6 +302,34 @@ typedef struct journal_superblock_s #include #include + +enum jbd_state_bits { + BH_JBD /* Has an attached ext3 journal_head */ + = BH_PrivateStart, + BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ + BH_Freed, /* Has been freed (truncated) */ + BH_Revoked, /* Has been revoked from the log */ + BH_RevokeValid, /* Revoked flag is valid */ + BH_JBDDirty, /* Is dirty but journaled */ + BH_State, /* Pins most journal_head state */ + BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ + BH_Shadow, /* IO on shadow buffer is running */ + BH_Verified, /* Metadata block has been verified ok */ + BH_JBDPrivateStart, /* First bit available for private use by FS */ +}; + +BUFFER_FNS(JBD, jbd) +BUFFER_FNS(JWrite, jwrite) +BUFFER_FNS(JBDDirty, jbddirty) +TAS_BUFFER_FNS(JBDDirty, jbddirty) +BUFFER_FNS(Revoked, revoked) +TAS_BUFFER_FNS(Revoked, revoked) +BUFFER_FNS(RevokeValid, revokevalid) +TAS_BUFFER_FNS(RevokeValid, revokevalid) +BUFFER_FNS(Freed, freed) +BUFFER_FNS(Shadow, shadow) +BUFFER_FNS(Verified, verified) + #include #define J_ASSERT(assert) BUG_ON(!(assert)) diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h index 6133679bc4c0..b1f708976ffd 100644 --- a/include/linux/jbd_common.h +++ b/include/linux/jbd_common.h @@ -1,32 +1,6 @@ #ifndef _LINUX_JBD_STATE_H #define _LINUX_JBD_STATE_H -enum jbd_state_bits { - BH_JBD /* Has an attached ext3 journal_head */ - = BH_PrivateStart, - BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ - BH_Freed, /* Has been freed (truncated) */ - BH_Revoked, /* Has been revoked from the log */ - BH_RevokeValid, /* Revoked flag is valid */ - BH_JBDDirty, /* Is dirty but journaled */ - BH_State, /* Pins most journal_head state */ - BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ - BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ - BH_Verified, /* Metadata block has been verified ok */ - BH_JBDPrivateStart, /* First bit available for private use by FS */ -}; - -BUFFER_FNS(JBD, jbd) -BUFFER_FNS(JWrite, jwrite) -BUFFER_FNS(JBDDirty, jbddirty) -TAS_BUFFER_FNS(JBDDirty, jbddirty) -BUFFER_FNS(Revoked, revoked) -TAS_BUFFER_FNS(Revoked, revoked) -BUFFER_FNS(RevokeValid, revokevalid) -TAS_BUFFER_FNS(RevokeValid, revokevalid) -BUFFER_FNS(Freed, freed) -BUFFER_FNS(Verified, verified) - static inline struct buffer_head *jh2bh(struct journal_head *jh) { return jh->b_bh; -- cgit v1.2.3 From 76c39904561004ac8675f858a290129e439d5168 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:12:57 -0400 Subject: jbd2: cleanup needed free block estimates when starting a transaction __jbd2_log_space_left() and jbd_space_needed() were kind of odd. jbd_space_needed() accounted also credits needed for currently committing transaction while it didn't account for credits needed for control blocks. __jbd2_log_space_left() then accounted for control blocks as a fraction of free space. Since results of these two functions are always only compared against each other, this works correct but is somewhat strange. Move the estimates so that jbd_space_needed() returns number of blocks needed for a transaction including control blocks and __jbd2_log_space_left() returns free space in the journal (with the committing transaction already subtracted). Rename functions to jbd2_log_space_left() and jbd2_space_needed() while we are changing them. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 8 ++++---- fs/jbd2/journal.c | 29 ----------------------------- fs/jbd2/transaction.c | 9 +++++---- include/linux/jbd2.h | 32 ++++++++++++++++++++++++++------ 4 files changed, 35 insertions(+), 43 deletions(-) (limited to 'include/linux/jbd2.h') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 65ec076e41f2..a572383bcf99 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -120,8 +120,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) int nblocks, space_left; /* assert_spin_locked(&journal->j_state_lock); */ - nblocks = jbd_space_needed(journal); - while (__jbd2_log_space_left(journal) < nblocks) { + nblocks = jbd2_space_needed(journal); + while (jbd2_log_space_left(journal) < nblocks) { if (journal->j_flags & JBD2_ABORT) return; write_unlock(&journal->j_state_lock); @@ -140,8 +140,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) */ write_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); - nblocks = jbd_space_needed(journal); - space_left = __jbd2_log_space_left(journal); + nblocks = jbd2_space_needed(journal); + space_left = jbd2_log_space_left(journal); if (space_left < nblocks) { int chkpt = journal->j_checkpoint_transactions != NULL; tid_t tid = 0; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5ef0712e2f7a..8e5486d62e89 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -477,35 +477,6 @@ repeat: * journal, so that we can begin checkpointing when appropriate. */ -/* - * __jbd2_log_space_left: Return the number of free blocks left in the journal. - * - * Called with the journal already locked. - * - * Called under j_state_lock - */ - -int __jbd2_log_space_left(journal_t *journal) -{ - int left = journal->j_free; - - /* assert_spin_locked(&journal->j_state_lock); */ - - /* - * Be pessimistic here about the number of those free blocks which - * might be required for log descriptor control blocks. - */ - -#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ - - left -= MIN_LOG_RESERVED_BLOCKS; - - if (left <= 0) - return 0; - left -= (left >> 3); - return left; -} - /* * Called with j_state_lock locked for writing. * Returns true if a transaction commit was started. diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 60361c634b5d..f9cd43190b43 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -283,12 +283,12 @@ repeat: * reduce the free space arbitrarily. Be careful to account for * those buffers when checkpointing. */ - if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { + if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) { jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); atomic_sub(nblocks, &transaction->t_outstanding_credits); read_unlock(&journal->j_state_lock); write_lock(&journal->j_state_lock); - if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) + if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) __jbd2_log_wait_for_space(journal); write_unlock(&journal->j_state_lock); goto repeat; @@ -306,7 +306,7 @@ repeat: jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", handle, nblocks, atomic_read(&transaction->t_outstanding_credits), - __jbd2_log_space_left(journal)); + jbd2_log_space_left(journal)); read_unlock(&journal->j_state_lock); lock_map_acquire(&handle->h_lockdep_map); @@ -441,7 +441,8 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) goto unlock; } - if (wanted > __jbd2_log_space_left(journal)) { + if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) > + jbd2_log_space_left(journal)) { jbd_debug(3, "denied handle %p %d blocks: " "insufficient log space\n", handle, nblocks); goto unlock; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e33e84b3d5c8..7a1f6cd864c8 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1220,7 +1220,6 @@ extern void jbd2_clear_buffer_revoked_flags(journal_t *journal); * transitions on demand. */ -int __jbd2_log_space_left(journal_t *); /* Called with journal locked */ int jbd2_log_start_commit(journal_t *journal, tid_t tid); int __jbd2_log_start_commit(journal_t *journal, tid_t tid); int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); @@ -1290,17 +1289,38 @@ static inline int tid_geq(tid_t x, tid_t y) extern int jbd2_journal_blocks_per_page(struct inode *inode); extern size_t journal_tag_bytes(journal_t *journal); +/* + * We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for + * transaction control blocks. + */ +#define JBD2_CONTROL_BLOCKS_SHIFT 5 + /* * Return the minimum number of blocks which must be free in the journal * before a new transaction may be started. Must be called under j_state_lock. */ -static inline int jbd_space_needed(journal_t *journal) +static inline int jbd2_space_needed(journal_t *journal) { int nblocks = journal->j_max_transaction_buffers; - if (journal->j_committing_transaction) - nblocks += atomic_read(&journal->j_committing_transaction-> - t_outstanding_credits); - return nblocks; + return nblocks + (nblocks >> JBD2_CONTROL_BLOCKS_SHIFT); +} + +/* + * Return number of free blocks in the log. Must be called under j_state_lock. + */ +static inline unsigned long jbd2_log_space_left(journal_t *journal) +{ + /* Allow for rounding errors */ + unsigned long free = journal->j_free - 32; + + if (journal->j_committing_transaction) { + unsigned long committing = atomic_read(&journal-> + j_committing_transaction->t_outstanding_credits); + + /* Transaction + control blocks */ + free -= committing + (committing >> JBD2_CONTROL_BLOCKS_SHIFT); + } + return free; } /* -- cgit v1.2.3 From f29fad72105287e6899d9128a9d494514f220e77 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:24:11 -0400 Subject: jbd2: remove unused waitqueues j_wait_logspace and j_wait_checkpoint are unused. Remove them. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 4 ---- fs/jbd2/journal.c | 2 -- include/linux/jbd2.h | 8 -------- 3 files changed, 14 deletions(-) (limited to 'include/linux/jbd2.h') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index a572383bcf99..75a15f371b00 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -625,10 +625,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) __jbd2_journal_drop_transaction(journal, transaction); jbd2_journal_free_transaction(transaction); - - /* Just in case anybody was waiting for more transactions to be - checkpointed... */ - wake_up(&journal->j_wait_logspace); ret = 1; out: return ret; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8e5486d62e89..f43f97ba002e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1027,9 +1027,7 @@ static journal_t * journal_init_common (void) return NULL; init_waitqueue_head(&journal->j_wait_transaction_locked); - init_waitqueue_head(&journal->j_wait_logspace); init_waitqueue_head(&journal->j_wait_done_commit); - init_waitqueue_head(&journal->j_wait_checkpoint); init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); mutex_init(&journal->j_barrier); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 7a1f6cd864c8..8028dd581cb0 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -687,9 +687,7 @@ jbd2_time_diff(unsigned long start, unsigned long end) * waiting for checkpointing * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction * to start committing, or for a barrier lock to be released - * @j_wait_logspace: Wait queue for waiting for checkpointing to complete * @j_wait_done_commit: Wait queue for waiting for commit to complete - * @j_wait_checkpoint: Wait queue to trigger checkpointing * @j_wait_commit: Wait queue to trigger commit * @j_wait_updates: Wait queue to wait for updates to complete * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints @@ -794,15 +792,9 @@ struct journal_s */ wait_queue_head_t j_wait_transaction_locked; - /* Wait queue for waiting for checkpointing to complete */ - wait_queue_head_t j_wait_logspace; - /* Wait queue for waiting for commit to complete */ wait_queue_head_t j_wait_done_commit; - /* Wait queue to trigger checkpointing */ - wait_queue_head_t j_wait_checkpoint; - /* Wait queue to trigger commit */ wait_queue_head_t j_wait_commit; -- cgit v1.2.3 From 8f7d89f36829b9061a14f9040cda1372f264c4fe Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:35:11 -0400 Subject: jbd2: transaction reservation support In some cases we cannot start a transaction because of locking constraints and passing started transaction into those places is not handy either because we could block transaction commit for too long. Transaction reservation is designed to solve these issues. It reserves a handle with given number of credits in the journal and the handle can be later attached to the running transaction without blocking on commit or checkpointing. Reserved handles do not block transaction commit in any way, they only reduce maximum size of the running transaction (because we have to always be prepared to accomodate request for attaching reserved handle). Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4_jbd2.c | 2 +- fs/jbd2/commit.c | 6 + fs/jbd2/journal.c | 2 + fs/jbd2/transaction.c | 328 ++++++++++++++++++++++++++++++++++++-------------- include/linux/jbd2.h | 28 ++++- 5 files changed, 273 insertions(+), 93 deletions(-) (limited to 'include/linux/jbd2.h') diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 451eb4045330..bd25e782ec6b 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -62,7 +62,7 @@ handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, ext4_abort(sb, "Detected aborted journal"); return ERR_PTR(-EROFS); } - return jbd2__journal_start(journal, nblocks, GFP_NOFS, type, line); + return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, type, line); } int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index d73a0d808ec1..cfbce48adc0b 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -523,6 +523,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_switch_revoke_table(journal); + /* + * Reserved credits cannot be claimed anymore, free them + */ + atomic_sub(atomic_read(&journal->j_reserved_credits), + &commit_transaction->t_outstanding_credits); + trace_jbd2_commit_flushing(journal, commit_transaction); stats.run.rs_flushing = jiffies; stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index f43f97ba002e..70990d682a0c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1030,6 +1030,7 @@ static journal_t * journal_init_common (void) init_waitqueue_head(&journal->j_wait_done_commit); init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); + init_waitqueue_head(&journal->j_wait_reserved); mutex_init(&journal->j_barrier); mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); @@ -1039,6 +1040,7 @@ static journal_t * journal_init_common (void) journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); journal->j_min_batch_time = 0; journal->j_max_batch_time = 15000; /* 15ms */ + atomic_set(&journal->j_reserved_credits, 0); /* The journal is marked for error until we succeed with recovery! */ journal->j_flags = JBD2_ABORT; diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index f14288b04042..f33342a2a95e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -89,7 +89,8 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) transaction->t_expires = jiffies + journal->j_commit_interval; spin_lock_init(&transaction->t_handle_lock); atomic_set(&transaction->t_updates, 0); - atomic_set(&transaction->t_outstanding_credits, 0); + atomic_set(&transaction->t_outstanding_credits, + atomic_read(&journal->j_reserved_credits)); atomic_set(&transaction->t_handle_count, 0); INIT_LIST_HEAD(&transaction->t_inode_list); INIT_LIST_HEAD(&transaction->t_private_list); @@ -140,6 +141,112 @@ static inline void update_t_max_wait(transaction_t *transaction, #endif } +/* + * Wait until running transaction passes T_LOCKED state. Also starts the commit + * if needed. The function expects running transaction to exist and releases + * j_state_lock. + */ +static void wait_transaction_locked(journal_t *journal) + __releases(journal->j_state_lock) +{ + DEFINE_WAIT(wait); + int need_to_start; + tid_t tid = journal->j_running_transaction->t_tid; + + prepare_to_wait(&journal->j_wait_transaction_locked, &wait, + TASK_UNINTERRUPTIBLE); + need_to_start = !tid_geq(journal->j_commit_request, tid); + read_unlock(&journal->j_state_lock); + if (need_to_start) + jbd2_log_start_commit(journal, tid); + schedule(); + finish_wait(&journal->j_wait_transaction_locked, &wait); +} + +static void sub_reserved_credits(journal_t *journal, int blocks) +{ + atomic_sub(blocks, &journal->j_reserved_credits); + wake_up(&journal->j_wait_reserved); +} + +/* + * Wait until we can add credits for handle to the running transaction. Called + * with j_state_lock held for reading. Returns 0 if handle joined the running + * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and + * caller must retry. + */ +static int add_transaction_credits(journal_t *journal, int blocks, + int rsv_blocks) +{ + transaction_t *t = journal->j_running_transaction; + int needed; + int total = blocks + rsv_blocks; + + /* + * If the current transaction is locked down for commit, wait + * for the lock to be released. + */ + if (t->t_state == T_LOCKED) { + wait_transaction_locked(journal); + return 1; + } + + /* + * If there is not enough space left in the log to write all + * potential buffers requested by this operation, we need to + * stall pending a log checkpoint to free some more log space. + */ + needed = atomic_add_return(total, &t->t_outstanding_credits); + if (needed > journal->j_max_transaction_buffers) { + /* + * If the current transaction is already too large, + * then start to commit it: we can then go back and + * attach this handle to a new transaction. + */ + atomic_sub(total, &t->t_outstanding_credits); + wait_transaction_locked(journal); + return 1; + } + + /* + * The commit code assumes that it can get enough log space + * without forcing a checkpoint. This is *critical* for + * correctness: a checkpoint of a buffer which is also + * associated with a committing transaction creates a deadlock, + * so commit simply cannot force through checkpoints. + * + * We must therefore ensure the necessary space in the journal + * *before* starting to dirty potentially checkpointed buffers + * in the new transaction. + */ + if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) { + atomic_sub(total, &t->t_outstanding_credits); + read_unlock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); + if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) + __jbd2_log_wait_for_space(journal); + write_unlock(&journal->j_state_lock); + return 1; + } + + /* No reservation? We are done... */ + if (!rsv_blocks) + return 0; + + needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits); + /* We allow at most half of a transaction to be reserved */ + if (needed > journal->j_max_transaction_buffers / 2) { + sub_reserved_credits(journal, rsv_blocks); + atomic_sub(total, &t->t_outstanding_credits); + read_unlock(&journal->j_state_lock); + wait_event(journal->j_wait_reserved, + atomic_read(&journal->j_reserved_credits) + rsv_blocks + <= journal->j_max_transaction_buffers / 2); + return 1; + } + return 0; +} + /* * start_this_handle: Given a handle, deal with any locking or stalling * needed to make sure that there is enough journal space for the handle @@ -151,18 +258,24 @@ static int start_this_handle(journal_t *journal, handle_t *handle, gfp_t gfp_mask) { transaction_t *transaction, *new_transaction = NULL; - tid_t tid; - int needed, need_to_start; - int nblocks = handle->h_buffer_credits; + int blocks = handle->h_buffer_credits; + int rsv_blocks = 0; unsigned long ts = jiffies; - if (nblocks > journal->j_max_transaction_buffers) { + /* + * 1/2 of transaction can be reserved so we can practically handle + * only 1/2 of maximum transaction size per operation + */ + if (WARN_ON(blocks > journal->j_max_transaction_buffers / 2)) { printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", - current->comm, nblocks, - journal->j_max_transaction_buffers); + current->comm, blocks, + journal->j_max_transaction_buffers / 2); return -ENOSPC; } + if (handle->h_rsv_handle) + rsv_blocks = handle->h_rsv_handle->h_buffer_credits; + alloc_transaction: if (!journal->j_running_transaction) { new_transaction = kmem_cache_zalloc(transaction_cache, @@ -199,8 +312,12 @@ repeat: return -EROFS; } - /* Wait on the journal's transaction barrier if necessary */ - if (journal->j_barrier_count) { + /* + * Wait on the journal's transaction barrier if necessary. Specifically + * we allow reserved handles to proceed because otherwise commit could + * deadlock on page writeback not being able to complete. + */ + if (!handle->h_reserved && journal->j_barrier_count) { read_unlock(&journal->j_state_lock); wait_event(journal->j_wait_transaction_locked, journal->j_barrier_count == 0); @@ -213,7 +330,7 @@ repeat: goto alloc_transaction; write_lock(&journal->j_state_lock); if (!journal->j_running_transaction && - !journal->j_barrier_count) { + (handle->h_reserved || !journal->j_barrier_count)) { jbd2_get_transaction(journal, new_transaction); new_transaction = NULL; } @@ -223,75 +340,18 @@ repeat: transaction = journal->j_running_transaction; - /* - * If the current transaction is locked down for commit, wait for the - * lock to be released. - */ - if (transaction->t_state == T_LOCKED) { - DEFINE_WAIT(wait); - - prepare_to_wait(&journal->j_wait_transaction_locked, - &wait, TASK_UNINTERRUPTIBLE); - read_unlock(&journal->j_state_lock); - schedule(); - finish_wait(&journal->j_wait_transaction_locked, &wait); - goto repeat; - } - - /* - * If there is not enough space left in the log to write all potential - * buffers requested by this operation, we need to stall pending a log - * checkpoint to free some more log space. - */ - needed = atomic_add_return(nblocks, - &transaction->t_outstanding_credits); - - if (needed > journal->j_max_transaction_buffers) { + if (!handle->h_reserved) { + /* We may have dropped j_state_lock - restart in that case */ + if (add_transaction_credits(journal, blocks, rsv_blocks)) + goto repeat; + } else { /* - * If the current transaction is already too large, then start - * to commit it: we can then go back and attach this handle to - * a new transaction. + * We have handle reserved so we are allowed to join T_LOCKED + * transaction and we don't have to check for transaction size + * and journal space. */ - DEFINE_WAIT(wait); - - jbd_debug(2, "Handle %p starting new commit...\n", handle); - atomic_sub(nblocks, &transaction->t_outstanding_credits); - prepare_to_wait(&journal->j_wait_transaction_locked, &wait, - TASK_UNINTERRUPTIBLE); - tid = transaction->t_tid; - need_to_start = !tid_geq(journal->j_commit_request, tid); - read_unlock(&journal->j_state_lock); - if (need_to_start) - jbd2_log_start_commit(journal, tid); - schedule(); - finish_wait(&journal->j_wait_transaction_locked, &wait); - goto repeat; - } - - /* - * The commit code assumes that it can get enough log space - * without forcing a checkpoint. This is *critical* for - * correctness: a checkpoint of a buffer which is also - * associated with a committing transaction creates a deadlock, - * so commit simply cannot force through checkpoints. - * - * We must therefore ensure the necessary space in the journal - * *before* starting to dirty potentially checkpointed buffers - * in the new transaction. - * - * The worst part is, any transaction currently committing can - * reduce the free space arbitrarily. Be careful to account for - * those buffers when checkpointing. - */ - if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) { - jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); - atomic_sub(nblocks, &transaction->t_outstanding_credits); - read_unlock(&journal->j_state_lock); - write_lock(&journal->j_state_lock); - if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) - __jbd2_log_wait_for_space(journal); - write_unlock(&journal->j_state_lock); - goto repeat; + sub_reserved_credits(journal, blocks); + handle->h_reserved = 0; } /* OK, account for the buffers that this operation expects to @@ -299,12 +359,12 @@ repeat: */ update_t_max_wait(transaction, ts); handle->h_transaction = transaction; - handle->h_requested_credits = nblocks; + handle->h_requested_credits = blocks; handle->h_start_jiffies = jiffies; atomic_inc(&transaction->t_updates); atomic_inc(&transaction->t_handle_count); - jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", - handle, nblocks, + jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n", + handle, blocks, atomic_read(&transaction->t_outstanding_credits), jbd2_log_space_left(journal)); read_unlock(&journal->j_state_lock); @@ -338,16 +398,21 @@ static handle_t *new_handle(int nblocks) * * We make sure that the transaction can guarantee at least nblocks of * modified buffers in the log. We block until the log can guarantee - * that much space. - * - * This function is visible to journal users (like ext3fs), so is not - * called with the journal already locked. + * that much space. Additionally, if rsv_blocks > 0, we also create another + * handle with rsv_blocks reserved blocks in the journal. This handle is + * is stored in h_rsv_handle. It is not attached to any particular transaction + * and thus doesn't block transaction commit. If the caller uses this reserved + * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop() + * on the parent handle will dispose the reserved one. Reserved handle has to + * be converted to a normal handle using jbd2_journal_start_reserved() before + * it can be used. * * Return a pointer to a newly allocated handle, or an ERR_PTR() value * on failure. */ -handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask, - unsigned int type, unsigned int line_no) +handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks, + gfp_t gfp_mask, unsigned int type, + unsigned int line_no) { handle_t *handle = journal_current_handle(); int err; @@ -364,11 +429,25 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask, handle = new_handle(nblocks); if (!handle) return ERR_PTR(-ENOMEM); + if (rsv_blocks) { + handle_t *rsv_handle; + + rsv_handle = new_handle(rsv_blocks); + if (!rsv_handle) { + jbd2_free_handle(handle); + return ERR_PTR(-ENOMEM); + } + rsv_handle->h_reserved = 1; + rsv_handle->h_journal = journal; + handle->h_rsv_handle = rsv_handle; + } current->journal_info = handle; err = start_this_handle(journal, handle, gfp_mask); if (err < 0) { + if (handle->h_rsv_handle) + jbd2_free_handle(handle->h_rsv_handle); jbd2_free_handle(handle); current->journal_info = NULL; return ERR_PTR(err); @@ -385,10 +464,68 @@ EXPORT_SYMBOL(jbd2__journal_start); handle_t *jbd2_journal_start(journal_t *journal, int nblocks) { - return jbd2__journal_start(journal, nblocks, GFP_NOFS, 0, 0); + return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0); } EXPORT_SYMBOL(jbd2_journal_start); +void jbd2_journal_free_reserved(handle_t *handle) +{ + journal_t *journal = handle->h_journal; + + WARN_ON(!handle->h_reserved); + sub_reserved_credits(journal, handle->h_buffer_credits); + jbd2_free_handle(handle); +} +EXPORT_SYMBOL(jbd2_journal_free_reserved); + +/** + * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle + * @handle: handle to start + * + * Start handle that has been previously reserved with jbd2_journal_reserve(). + * This attaches @handle to the running transaction (or creates one if there's + * not transaction running). Unlike jbd2_journal_start() this function cannot + * block on journal commit, checkpointing, or similar stuff. It can block on + * memory allocation or frozen journal though. + * + * Return 0 on success, non-zero on error - handle is freed in that case. + */ +int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, + unsigned int line_no) +{ + journal_t *journal = handle->h_journal; + int ret = -EIO; + + if (WARN_ON(!handle->h_reserved)) { + /* Someone passed in normal handle? Just stop it. */ + jbd2_journal_stop(handle); + return ret; + } + /* + * Usefulness of mixing of reserved and unreserved handles is + * questionable. So far nobody seems to need it so just error out. + */ + if (WARN_ON(current->journal_info)) { + jbd2_journal_free_reserved(handle); + return ret; + } + + handle->h_journal = NULL; + current->journal_info = handle; + /* + * GFP_NOFS is here because callers are likely from writeback or + * similarly constrained call sites + */ + ret = start_this_handle(journal, handle, GFP_NOFS); + if (ret < 0) { + current->journal_info = NULL; + jbd2_journal_free_reserved(handle); + } + handle->h_type = type; + handle->h_line_no = line_no; + return ret; +} +EXPORT_SYMBOL(jbd2_journal_start_reserved); /** * int jbd2_journal_extend() - extend buffer credits. @@ -483,7 +620,8 @@ out: * to a running handle, a call to jbd2_journal_restart will commit the * handle's transaction so far and reattach the handle to a new * transaction capabable of guaranteeing the requested number of - * credits. + * credits. We preserve reserved handle if there's any attached to the + * passed in handle. */ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) { @@ -508,6 +646,10 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) spin_lock(&transaction->t_handle_lock); atomic_sub(handle->h_buffer_credits, &transaction->t_outstanding_credits); + if (handle->h_rsv_handle) { + sub_reserved_credits(journal, + handle->h_rsv_handle->h_buffer_credits); + } if (atomic_dec_and_test(&transaction->t_updates)) wake_up(&journal->j_wait_updates); spin_unlock(&transaction->t_handle_lock); @@ -550,6 +692,14 @@ void jbd2_journal_lock_updates(journal_t *journal) write_lock(&journal->j_state_lock); ++journal->j_barrier_count; + /* Wait until there are no reserved handles */ + if (atomic_read(&journal->j_reserved_credits)) { + write_unlock(&journal->j_state_lock); + wait_event(journal->j_wait_reserved, + atomic_read(&journal->j_reserved_credits) == 0); + write_lock(&journal->j_state_lock); + } + /* Wait until there are no running updates */ while (1) { transaction_t *transaction = journal->j_running_transaction; @@ -1505,6 +1655,8 @@ int jbd2_journal_stop(handle_t *handle) lock_map_release(&handle->h_lockdep_map); + if (handle->h_rsv_handle) + jbd2_journal_free_reserved(handle->h_rsv_handle); jbd2_free_handle(handle); return err; } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 8028dd581cb0..fb91c8debe6a 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -410,8 +410,15 @@ struct jbd2_revoke_table_s; struct jbd2_journal_handle { - /* Which compound transaction is this update a part of? */ - transaction_t *h_transaction; + union { + /* Which compound transaction is this update a part of? */ + transaction_t *h_transaction; + /* Which journal handle belongs to - used iff h_reserved set */ + journal_t *h_journal; + }; + + /* Handle reserved for finishing the logical operation */ + handle_t *h_rsv_handle; /* Number of remaining buffers we are allowed to dirty: */ int h_buffer_credits; @@ -426,6 +433,7 @@ struct jbd2_journal_handle /* Flags [no locking] */ unsigned int h_sync: 1; /* sync-on-close */ unsigned int h_jdata: 1; /* force data journaling */ + unsigned int h_reserved: 1; /* handle with reserved credits */ unsigned int h_aborted: 1; /* fatal error on handle */ unsigned int h_type: 8; /* for handle statistics */ unsigned int h_line_no: 16; /* for handle statistics */ @@ -690,6 +698,7 @@ jbd2_time_diff(unsigned long start, unsigned long end) * @j_wait_done_commit: Wait queue for waiting for commit to complete * @j_wait_commit: Wait queue to trigger commit * @j_wait_updates: Wait queue to wait for updates to complete + * @j_wait_reserved: Wait queue to wait for reserved buffer credits to drop * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints * @j_head: Journal head - identifies the first unused block in the journal * @j_tail: Journal tail - identifies the oldest still-used block in the @@ -703,6 +712,7 @@ jbd2_time_diff(unsigned long start, unsigned long end) * journal * @j_fs_dev: Device which holds the client fs. For internal journal this will * be equal to j_dev + * @j_reserved_credits: Number of buffers reserved from the running transaction * @j_maxlen: Total maximum capacity of the journal region on disk. * @j_list_lock: Protects the buffer lists and internal buffer state. * @j_inode: Optional inode where we store the journal. If present, all journal @@ -801,6 +811,9 @@ struct journal_s /* Wait queue to wait for updates to complete */ wait_queue_head_t j_wait_updates; + /* Wait queue to wait for reserved buffer credits to drop */ + wait_queue_head_t j_wait_reserved; + /* Semaphore for locking against concurrent checkpoints */ struct mutex j_checkpoint_mutex; @@ -855,6 +868,9 @@ struct journal_s /* Total maximum capacity of the journal region on disk. */ unsigned int j_maxlen; + /* Number of buffers reserved from the running transaction */ + atomic_t j_reserved_credits; + /* * Protects the buffer lists and internal buffer state. */ @@ -1091,10 +1107,14 @@ static inline handle_t *journal_current_handle(void) */ extern handle_t *jbd2_journal_start(journal_t *, int nblocks); -extern handle_t *jbd2__journal_start(journal_t *, int nblocks, gfp_t gfp_mask, - unsigned int type, unsigned int line_no); +extern handle_t *jbd2__journal_start(journal_t *, int blocks, int rsv_blocks, + gfp_t gfp_mask, unsigned int type, + unsigned int line_no); extern int jbd2_journal_restart(handle_t *, int nblocks); extern int jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask); +extern int jbd2_journal_start_reserved(handle_t *handle, + unsigned int type, unsigned int line_no); +extern void jbd2_journal_free_reserved(handle_t *handle); extern int jbd2_journal_extend (handle_t *, int nblocks); extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); -- cgit v1.2.3 From 9ff864462477206bc23b405a6ae506e92fb6dc9c Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 12 Jun 2013 22:24:07 -0400 Subject: jbd2: optimize jbd2_journal_force_commit Current implementation of jbd2_journal_force_commit() is suboptimal because result in empty and useless commits. But callers just want to force and wait any unfinished commits. We already have jbd2_journal_force_commit_nested() which does exactly what we want, except we are guaranteed that we do not hold journal transaction open. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 62 +++++++++++++++++++++++++++++++++++++++------------ fs/jbd2/transaction.c | 23 ------------------- include/linux/jbd2.h | 2 +- 3 files changed, 49 insertions(+), 38 deletions(-) (limited to 'include/linux/jbd2.h') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 70990d682a0c..0e6c13bdcc14 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -529,20 +529,17 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid) } /* - * Force and wait upon a commit if the calling process is not within - * transaction. This is used for forcing out undo-protected data which contains - * bitmaps, when the fs is running out of space. - * - * We can only force the running transaction if we don't have an active handle; - * otherwise, we will deadlock. - * - * Returns true if a transaction was started. + * Force and wait any uncommitted transactions. We can only force the running + * transaction if we don't have an active handle, otherwise, we will deadlock. + * Returns: <0 in case of error, + * 0 if nothing to commit, + * 1 if transaction was successfully committed. */ -int jbd2_journal_force_commit_nested(journal_t *journal) +static int __jbd2_journal_force_commit(journal_t *journal) { transaction_t *transaction = NULL; tid_t tid; - int need_to_start = 0; + int need_to_start = 0, ret = 0; read_lock(&journal->j_state_lock); if (journal->j_running_transaction && !current->journal_info) { @@ -553,16 +550,53 @@ int jbd2_journal_force_commit_nested(journal_t *journal) transaction = journal->j_committing_transaction; if (!transaction) { + /* Nothing to commit */ read_unlock(&journal->j_state_lock); - return 0; /* Nothing to retry */ + return 0; } - tid = transaction->t_tid; read_unlock(&journal->j_state_lock); if (need_to_start) jbd2_log_start_commit(journal, tid); - jbd2_log_wait_commit(journal, tid); - return 1; + ret = jbd2_log_wait_commit(journal, tid); + if (!ret) + ret = 1; + + return ret; +} + +/** + * Force and wait upon a commit if the calling process is not within + * transaction. This is used for forcing out undo-protected data which contains + * bitmaps, when the fs is running out of space. + * + * @journal: journal to force + * Returns true if progress was made. + */ +int jbd2_journal_force_commit_nested(journal_t *journal) +{ + int ret; + + ret = __jbd2_journal_force_commit(journal); + return ret > 0; +} + +/** + * int journal_force_commit() - force any uncommitted transactions + * @journal: journal to force + * + * Caller want unconditional commit. We can only force the running transaction + * if we don't have an active handle, otherwise, we will deadlock. + */ +int jbd2_journal_force_commit(journal_t *journal) +{ + int ret; + + J_ASSERT(!current->journal_info); + ret = __jbd2_journal_force_commit(journal); + if (ret > 0) + ret = 0; + return ret; } /* diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index f33342a2a95e..dd422e680418 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1661,29 +1661,6 @@ int jbd2_journal_stop(handle_t *handle) return err; } -/** - * int jbd2_journal_force_commit() - force any uncommitted transactions - * @journal: journal to force - * - * For synchronous operations: force any uncommitted transactions - * to disk. May seem kludgy, but it reuses all the handle batching - * code in a very simple manner. - */ -int jbd2_journal_force_commit(journal_t *journal) -{ - handle_t *handle; - int ret; - - handle = jbd2_journal_start(journal, 1); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - } else { - handle->h_sync = 1; - ret = jbd2_journal_stop(handle); - } - return ret; -} - /* * * List management code snippets: various functions for manipulating the diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fb91c8debe6a..c3645b9475f1 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1160,6 +1160,7 @@ extern void jbd2_journal_ack_err (journal_t *); extern int jbd2_journal_clear_err (journal_t *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_force_commit(journal_t *); +extern int jbd2_journal_force_commit_nested(journal_t *); extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, struct jbd2_inode *inode, loff_t new_size); @@ -1235,7 +1236,6 @@ extern void jbd2_clear_buffer_revoked_flags(journal_t *journal); int jbd2_log_start_commit(journal_t *journal, tid_t tid); int __jbd2_log_start_commit(journal_t *journal, tid_t tid); int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); -int jbd2_journal_force_commit_nested(journal_t *journal); int jbd2_log_wait_commit(journal_t *journal, tid_t tid); int jbd2_complete_transaction(journal_t *journal, tid_t tid); int jbd2_log_do_checkpoint(journal_t *journal); -- cgit v1.2.3 From 06a407f13daf9e48f0ef7189c7e54082b53940c7 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 12 Jun 2013 22:25:07 -0400 Subject: ext4: fix data integrity for ext4_sync_fs Inode's data or non journaled quota may be written w/o jounral so we _must_ send a barrier at the end of ext4_sync_fs. But it can be skipped if journal commit will do it for us. Also fix data integrity for nojournal mode. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 36 +++++++++++++++++++++++++++++++++++- include/linux/jbd2.h | 13 +++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include/linux/jbd2.h') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7c8e1713e203..0f77c2e4b888 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -69,6 +69,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb, static void ext4_clear_journal_err(struct super_block *sb, struct ext4_super_block *es); static int ext4_sync_fs(struct super_block *sb, int wait); +static int ext4_sync_fs_nojournal(struct super_block *sb, int wait); static int ext4_remount(struct super_block *sb, int *flags, char *data); static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); @@ -1097,6 +1098,7 @@ static const struct super_operations ext4_nojournal_sops = { .dirty_inode = ext4_dirty_inode, .drop_inode = ext4_drop_inode, .evict_inode = ext4_evict_inode, + .sync_fs = ext4_sync_fs_nojournal, .put_super = ext4_put_super, .statfs = ext4_statfs, .remount_fs = ext4_remount, @@ -4553,6 +4555,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) { int ret = 0; tid_t target; + bool needs_barrier = false; struct ext4_sb_info *sbi = EXT4_SB(sb); trace_ext4_sync_fs(sb, wait); @@ -4563,10 +4566,41 @@ static int ext4_sync_fs(struct super_block *sb, int wait) * no dirty dquots */ dquot_writeback_dquots(sb, -1); + /* + * Data writeback is possible w/o journal transaction, so barrier must + * being sent at the end of the function. But we can skip it if + * transaction_commit will do it for us. + */ + target = jbd2_get_latest_transaction(sbi->s_journal); + if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && + !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) + needs_barrier = true; + if (jbd2_journal_start_commit(sbi->s_journal, &target)) { if (wait) - jbd2_log_wait_commit(sbi->s_journal, target); + ret = jbd2_log_wait_commit(sbi->s_journal, target); + } + if (needs_barrier) { + int err; + err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); + if (!ret) + ret = err; } + + return ret; +} + +static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) +{ + int ret = 0; + + trace_ext4_sync_fs(sb, wait); + flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); + flush_workqueue(EXT4_SB(sb)->unrsv_conversion_wq); + dquot_writeback_dquots(sb, -1); + if (wait && test_opt(sb, BARRIER)) + ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); + return ret; } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index c3645b9475f1..a79783faecad 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1374,6 +1374,19 @@ static inline u32 jbd2_chksum(journal_t *journal, u32 crc, return *(u32 *)desc.ctx; } +/* Return most recent uncommitted transaction */ +static inline tid_t jbd2_get_latest_transaction(journal_t *journal) +{ + tid_t tid; + + read_lock(&journal->j_state_lock); + tid = journal->j_commit_request; + if (journal->j_running_transaction) + tid = journal->j_running_transaction->t_tid; + read_unlock(&journal->j_state_lock); + return tid; +} + #ifdef __KERNEL__ #define buffer_trace_init(bh) do {} while (0) -- cgit v1.2.3 From c9b3a8ccb77e063a28a9567eb72e0c1222c4ade4 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 12 Jun 2013 23:02:35 -0400 Subject: jbd/jbd2: relocate bit_spinlock header to jbd_common The bit_spinlock functions are only used for the jbd_lock_bh_state functions (and friends) in jbd_common.h and are not directly used by either of jbd.h or jbd2.h content. The jbd_common file is new as of commit 446066724c36 ("jdb/jbd2: factor out common functions from the jbd[2] header files") but common (and isolated) headers were not considered for factoring at that time. Signed-off-by: Paul Gortmaker Signed-off-by: "Theodore Ts'o" --- include/linux/jbd.h | 1 - include/linux/jbd2.h | 1 - include/linux/jbd_common.h | 2 ++ 3 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/jbd2.h') diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 2439054a6c9a..8685d1be12c7 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a79783faecad..25d9c9ea7a14 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h index b1f708976ffd..3dc53432355f 100644 --- a/include/linux/jbd_common.h +++ b/include/linux/jbd_common.h @@ -1,6 +1,8 @@ #ifndef _LINUX_JBD_STATE_H #define _LINUX_JBD_STATE_H +#include + static inline struct buffer_head *jh2bh(struct journal_head *jh) { return jh->b_bh; -- cgit v1.2.3 From 169f1a2a87aae44034da4b9f81be1683d33de6d0 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 12 Jun 2013 23:04:04 -0400 Subject: jbd2: use a single printk for jbd_debug() Since the jbd_debug() is implemented with two separate printk() calls, it can lead to corrupted and misleading debug output like the following (see lines marked with "*"): [ 290.339362] (fs/jbd2/journal.c, 203): kjournald2: kjournald2 wakes [ 290.339365] (fs/jbd2/journal.c, 155): kjournald2: commit_sequence=42103, commit_request=42104 [ 290.339369] (fs/jbd2/journal.c, 158): kjournald2: OK, requests differ [* 290.339376] (fs/jbd2/journal.c, 648): jbd2_log_wait_commit: [* 290.339379] (fs/jbd2/commit.c, 370): jbd2_journal_commit_transaction: JBD2: want 42104, j_commit_sequence=42103 [* 290.339382] JBD2: starting commit of transaction 42104 [ 290.339410] (fs/jbd2/revoke.c, 566): jbd2_journal_write_revoke_records: Wrote 0 revoke records [ 290.376555] (fs/jbd2/commit.c, 1088): jbd2_journal_commit_transaction: JBD2: commit 42104 complete, head 42079 i.e. the debug output from log_wait_commit and journal_commit_transaction have become interleaved. The output should have been: (fs/jbd2/journal.c, 648): jbd2_log_wait_commit: JBD2: want 42104, j_commit_sequence=42103 (fs/jbd2/commit.c, 370): jbd2_journal_commit_transaction: JBD2: starting commit of transaction 42104 It is expected that this is not easy to replicate -- I was only able to cause it on preempt-rt kernels, and even then only under heavy I/O load. Reported-by: Paul Gortmaker Suggested-by: "Theodore Ts'o" Signed-off-by: Paul Gortmaker Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 18 ++++++++++++++++++ include/linux/jbd2.h | 14 +++++--------- 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include/linux/jbd2.h') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0e6c13bdcc14..915dd575cd46 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -103,6 +103,24 @@ EXPORT_SYMBOL(jbd2_inode_cache); static void __journal_abort_soft (journal_t *journal, int errno); static int jbd2_journal_create_slab(size_t slab_size); +#ifdef CONFIG_JBD2_DEBUG +void __jbd2_debug(int level, const char *file, const char *func, + unsigned int line, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if (level > jbd2_journal_enable_debug) + return; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf); + va_end(args); +} +EXPORT_SYMBOL(__jbd2_debug); +#endif + /* Checksumming functions */ int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) { diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 25d9c9ea7a14..0302f3f14063 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -56,17 +56,13 @@ */ #define JBD2_EXPENSIVE_CHECKING extern ushort jbd2_journal_enable_debug; +void __jbd2_debug(int level, const char *file, const char *func, + unsigned int line, const char *fmt, ...); -#define jbd_debug(n, f, a...) \ - do { \ - if ((n) <= jbd2_journal_enable_debug) { \ - printk (KERN_DEBUG "(%s, %d): %s: ", \ - __FILE__, __LINE__, __func__); \ - printk (f, ## a); \ - } \ - } while (0) +#define jbd_debug(n, fmt, a...) \ + __jbd2_debug((n), __FILE__, __func__, __LINE__, (fmt), ##a) #else -#define jbd_debug(f, a...) /**/ +#define jbd_debug(n, fmt, a...) /**/ #endif extern void *jbd2_alloc(size_t size, gfp_t flags); -- cgit v1.2.3 From 41a5b913197c3a25fddef1735dc9b3d1fdc57428 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Jul 2013 08:12:41 -0400 Subject: jbd2: invalidate handle if jbd2_journal_restart() fails If jbd2_journal_restart() fails the handle will have been disconnected from the current transaction. In this situation, the handle must not be used for for any jbd2 function other than jbd2_journal_stop(). Enforce this with by treating a handle which has a NULL transaction pointer as an aborted handle, and issue a kernel warning if jbd2_journal_extent(), jbd2_journal_get_write_access(), jbd2_journal_dirty_metadata(), etc. is called with an invalid handle. This commit also fixes a bug where jbd2_journal_stop() would trip over a kernel jbd2 assertion check when trying to free an invalid handle. Also move the responsibility of setting current->journal_info to start_this_handle(), simplifying the three users of this function. Signed-off-by: "Theodore Ts'o" Reported-by: Younger Liu Cc: Jan Kara --- fs/jbd2/transaction.c | 74 ++++++++++++++++++++++++++++++--------------------- include/linux/jbd2.h | 2 +- 2 files changed, 44 insertions(+), 32 deletions(-) (limited to 'include/linux/jbd2.h') diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 383b0fbc6e19..7aa9a32573bb 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -368,6 +368,7 @@ repeat: atomic_read(&transaction->t_outstanding_credits), jbd2_log_space_left(journal)); read_unlock(&journal->j_state_lock); + current->journal_info = handle; lock_map_acquire(&handle->h_lockdep_map); jbd2_journal_free_transaction(new_transaction); @@ -442,14 +443,11 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks, handle->h_rsv_handle = rsv_handle; } - current->journal_info = handle; - err = start_this_handle(journal, handle, gfp_mask); if (err < 0) { if (handle->h_rsv_handle) jbd2_free_handle(handle->h_rsv_handle); jbd2_free_handle(handle); - current->journal_info = NULL; return ERR_PTR(err); } handle->h_type = type; @@ -511,16 +509,13 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, } handle->h_journal = NULL; - current->journal_info = handle; /* * GFP_NOFS is here because callers are likely from writeback or * similarly constrained call sites */ ret = start_this_handle(journal, handle, GFP_NOFS); - if (ret < 0) { - current->journal_info = NULL; + if (ret < 0) jbd2_journal_free_reserved(handle); - } handle->h_type = type; handle->h_line_no = line_no; return ret; @@ -550,20 +545,21 @@ EXPORT_SYMBOL(jbd2_journal_start_reserved); int jbd2_journal_extend(handle_t *handle, int nblocks) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; + journal_t *journal; int result; int wanted; - result = -EIO; + WARN_ON(!transaction); if (is_handle_aborted(handle)) - goto out; + return -EROFS; + journal = transaction->t_journal; result = 1; read_lock(&journal->j_state_lock); /* Don't extend a locked-down transaction! */ - if (handle->h_transaction->t_state != T_RUNNING) { + if (transaction->t_state != T_RUNNING) { jbd_debug(3, "denied handle %p %d blocks: " "transaction not running\n", handle, nblocks); goto error_out; @@ -589,7 +585,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) } trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, - handle->h_transaction->t_tid, + transaction->t_tid, handle->h_type, handle->h_line_no, handle->h_buffer_credits, nblocks); @@ -603,7 +599,6 @@ unlock: spin_unlock(&transaction->t_handle_lock); error_out: read_unlock(&journal->j_state_lock); -out: return result; } @@ -626,14 +621,16 @@ out: int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; + journal_t *journal; tid_t tid; int need_to_start, ret; + WARN_ON(!transaction); /* If we've had an abort of any type, don't even think about * actually doing the restart! */ if (is_handle_aborted(handle)) return 0; + journal = transaction->t_journal; /* * First unlink the handle from its current transaction, and start the @@ -654,6 +651,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) wake_up(&journal->j_wait_updates); tid = transaction->t_tid; spin_unlock(&transaction->t_handle_lock); + handle->h_transaction = NULL; + current->journal_info = NULL; jbd_debug(2, "restarting handle %p\n", handle); need_to_start = !tid_geq(journal->j_commit_request, tid); @@ -783,17 +782,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) { struct buffer_head *bh; - transaction_t *transaction; + transaction_t *transaction = handle->h_transaction; journal_t *journal; int error; char *frozen_buffer = NULL; int need_copy = 0; unsigned long start_lock, time_lock; + WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; - - transaction = handle->h_transaction; journal = transaction->t_journal; jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); @@ -1052,14 +1050,16 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; + journal_t *journal; struct journal_head *jh = jbd2_journal_add_journal_head(bh); int err; jbd_debug(5, "journal_head %p\n", jh); + WARN_ON(!transaction); err = -EROFS; if (is_handle_aborted(handle)) goto out; + journal = transaction->t_journal; err = 0; JBUFFER_TRACE(jh, "entry"); @@ -1265,12 +1265,14 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh, int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; + journal_t *journal; struct journal_head *jh; int ret = 0; + WARN_ON(!transaction); if (is_handle_aborted(handle)) - goto out; + return -EROFS; + journal = transaction->t_journal; jh = jbd2_journal_grab_journal_head(bh); if (!jh) { ret = -EUCLEAN; @@ -1364,7 +1366,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) JBUFFER_TRACE(jh, "file as BJ_Metadata"); spin_lock(&journal->j_list_lock); - __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); + __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata); spin_unlock(&journal->j_list_lock); out_unlock_bh: jbd_unlock_bh_state(bh); @@ -1395,12 +1397,17 @@ out: int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; + journal_t *journal; struct journal_head *jh; int drop_reserve = 0; int err = 0; int was_modified = 0; + WARN_ON(!transaction); + if (is_handle_aborted(handle)) + return -EROFS; + journal = transaction->t_journal; + BUFFER_TRACE(bh, "entry"); jbd_lock_bh_state(bh); @@ -1427,7 +1434,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) */ jh->b_modified = 0; - if (jh->b_transaction == handle->h_transaction) { + if (jh->b_transaction == transaction) { J_ASSERT_JH(jh, !jh->b_frozen_data); /* If we are forgetting a buffer which is already part @@ -1522,19 +1529,21 @@ drop: int jbd2_journal_stop(handle_t *handle) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - int err, wait_for_commit = 0; + journal_t *journal; + int err = 0, wait_for_commit = 0; tid_t tid; pid_t pid; + if (!transaction) + goto free_and_exit; + journal = transaction->t_journal; + J_ASSERT(journal_current_handle() == handle); if (is_handle_aborted(handle)) err = -EIO; - else { + else J_ASSERT(atomic_read(&transaction->t_updates) > 0); - err = 0; - } if (--handle->h_ref > 0) { jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, @@ -1544,7 +1553,7 @@ int jbd2_journal_stop(handle_t *handle) jbd_debug(4, "Handle %p going down\n", handle); trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, - handle->h_transaction->t_tid, + transaction->t_tid, handle->h_type, handle->h_line_no, jiffies - handle->h_start_jiffies, handle->h_sync, handle->h_requested_credits, @@ -1657,6 +1666,7 @@ int jbd2_journal_stop(handle_t *handle) if (handle->h_rsv_handle) jbd2_journal_free_reserved(handle->h_rsv_handle); +free_and_exit: jbd2_free_handle(handle); return err; } @@ -2362,10 +2372,12 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; + journal_t *journal; + WARN_ON(!transaction); if (is_handle_aborted(handle)) - return -EIO; + return -EROFS; + journal = transaction->t_journal; jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, transaction->t_tid); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0302f3f14063..d5b50a19463c 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1266,7 +1266,7 @@ static inline int is_journal_aborted(journal_t *journal) static inline int is_handle_aborted(handle_t *handle) { - if (handle->h_aborted) + if (handle->h_aborted || !handle->h_transaction) return 1; return is_journal_aborted(handle->h_transaction->t_journal); } -- cgit v1.2.3