summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/ext4.h13
-rw-r--r--fs/ext4/extents.c49
-rw-r--r--fs/ext4/inode.c136
-rw-r--r--fs/ext4/page-io.c110
-rw-r--r--fs/ext4/super.c10
-rw-r--r--fs/jbd2/commit.c13
-rw-r--r--fs/jbd2/journal.c30
-rw-r--r--fs/jbd2/transaction.c144
-rw-r--r--fs/ocfs2/suballoc.c19
-rw-r--r--include/linux/jbd2.h27
-rw-r--r--include/linux/journal-head.h21
11 files changed, 332 insertions, 240 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a606d17a80b0..c3ea2c818542 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -198,6 +198,12 @@ struct ext4_system_blocks {
*/
#define EXT4_IO_END_UNWRITTEN 0x0001
+struct ext4_io_end_vec {
+ struct list_head list; /* list of io_end_vec */
+ loff_t offset; /* offset in the file */
+ ssize_t size; /* size of the extent */
+};
+
/*
* For converting unwritten extents on a work queue. 'handle' is used for
* buffered writeback.
@@ -211,8 +217,7 @@ typedef struct ext4_io_end {
* bios covering the extent */
unsigned int flag; /* unwritten or not */
atomic_t count; /* reference counter */
- loff_t offset; /* offset in the file */
- ssize_t size; /* size of the extent */
+ struct list_head list_vec; /* list of ext4_io_end_vec */
} ext4_io_end_t;
struct ext4_io_submit {
@@ -3263,6 +3268,8 @@ extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
loff_t offset, ssize_t len);
+extern int ext4_convert_unwritten_io_end_vec(handle_t *handle,
+ ext4_io_end_t *io_end);
extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
@@ -3325,6 +3332,8 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
int len,
struct writeback_control *wbc,
bool keep_towrite);
+extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
+extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);
/* mmp.c */
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ed28b21b826d..59f741bedf2f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4990,23 +4990,13 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
int ret = 0;
int ret2 = 0;
struct ext4_map_blocks map;
- unsigned int credits, blkbits = inode->i_blkbits;
+ unsigned int blkbits = inode->i_blkbits;
+ unsigned int credits = 0;
map.m_lblk = offset >> blkbits;
max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
- /*
- * This is somewhat ugly but the idea is clear: When transaction is
- * reserved, everything goes into it. Otherwise we rather start several
- * smaller transactions for conversion of each extent separately.
- */
- if (handle) {
- handle = ext4_journal_start_reserved(handle,
- EXT4_HT_EXT_CONVERT);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
- credits = 0;
- } else {
+ if (!handle) {
/*
* credits to insert 1 extent into extent tree
*/
@@ -5037,11 +5027,40 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
if (ret <= 0 || ret2)
break;
}
- if (!credits)
- ret2 = ext4_journal_stop(handle);
return ret > 0 ? ret2 : ret;
}
+int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
+{
+ int ret, err = 0;
+ struct ext4_io_end_vec *io_end_vec;
+
+ /*
+ * This is somewhat ugly but the idea is clear: When transaction is
+ * reserved, everything goes into it. Otherwise we rather start several
+ * smaller transactions for conversion of each extent separately.
+ */
+ if (handle) {
+ handle = ext4_journal_start_reserved(handle,
+ EXT4_HT_EXT_CONVERT);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ }
+
+ list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
+ ret = ext4_convert_unwritten_extents(handle, io_end->inode,
+ io_end_vec->offset,
+ io_end_vec->size);
+ if (ret)
+ break;
+ }
+
+ if (handle)
+ err = ext4_journal_stop(handle);
+
+ return ret < 0 ? ret : err;
+}
+
/*
* If newes is not existing extent (newes->ec_pblk equals zero) find
* delayed extent at start of newes and update newes accordingly and
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ed8d9c593d60..61052c67952e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2324,6 +2324,75 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
}
/*
+ * mpage_process_page - update page buffers corresponding to changed extent and
+ * may submit fully mapped page for IO
+ *
+ * @mpd - description of extent to map, on return next extent to map
+ * @m_lblk - logical block mapping.
+ * @m_pblk - corresponding physical mapping.
+ * @map_bh - determines on return whether this page requires any further
+ * mapping or not.
+ * Scan given page buffers corresponding to changed extent and update buffer
+ * state according to new extent state.
+ * We map delalloc buffers to their physical location, clear unwritten bits.
+ * If the given page is not fully mapped, we update @map to the next extent in
+ * the given page that needs mapping & return @map_bh as true.
+ */
+static int mpage_process_page(struct mpage_da_data *mpd, struct page *page,
+ ext4_lblk_t *m_lblk, ext4_fsblk_t *m_pblk,
+ bool *map_bh)
+{
+ struct buffer_head *head, *bh;
+ ext4_io_end_t *io_end = mpd->io_submit.io_end;
+ ext4_lblk_t lblk = *m_lblk;
+ ext4_fsblk_t pblock = *m_pblk;
+ int err = 0;
+ int blkbits = mpd->inode->i_blkbits;
+ ssize_t io_end_size = 0;
+ struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end);
+
+ bh = head = page_buffers(page);
+ do {
+ if (lblk < mpd->map.m_lblk)
+ continue;
+ if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
+ /*
+ * Buffer after end of mapped extent.
+ * Find next buffer in the page to map.
+ */
+ mpd->map.m_len = 0;
+ mpd->map.m_flags = 0;
+ io_end_vec->size += io_end_size;
+ io_end_size = 0;
+
+ err = mpage_process_page_bufs(mpd, head, bh, lblk);
+ if (err > 0)
+ err = 0;
+ if (!err && mpd->map.m_len && mpd->map.m_lblk > lblk) {
+ io_end_vec = ext4_alloc_io_end_vec(io_end);
+ io_end_vec->offset = mpd->map.m_lblk << blkbits;
+ }
+ *map_bh = true;
+ goto out;
+ }
+ if (buffer_delay(bh)) {
+ clear_buffer_delay(bh);
+ bh->b_blocknr = pblock++;
+ }
+ clear_buffer_unwritten(bh);
+ io_end_size += (1 << blkbits);
+ } while (lblk++, (bh = bh->b_this_page) != head);
+
+ io_end_vec->size += io_end_size;
+ io_end_size = 0;
+ *map_bh = false;
+out:
+ *m_lblk = lblk;
+ *m_pblk = pblock;
+ return err;
+}
+
+/*
* mpage_map_buffers - update buffers corresponding to changed extent and
* submit fully mapped pages for IO
*
@@ -2342,12 +2411,12 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
struct pagevec pvec;
int nr_pages, i;
struct inode *inode = mpd->inode;
- struct buffer_head *head, *bh;
int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
pgoff_t start, end;
ext4_lblk_t lblk;
- sector_t pblock;
+ ext4_fsblk_t pblock;
int err;
+ bool map_bh = false;
start = mpd->map.m_lblk >> bpp_bits;
end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
@@ -2363,50 +2432,19 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- bh = head = page_buffers(page);
- do {
- if (lblk < mpd->map.m_lblk)
- continue;
- if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
- /*
- * Buffer after end of mapped extent.
- * Find next buffer in the page to map.
- */
- mpd->map.m_len = 0;
- mpd->map.m_flags = 0;
- /*
- * FIXME: If dioread_nolock supports
- * blocksize < pagesize, we need to make
- * sure we add size mapped so far to
- * io_end->size as the following call
- * can submit the page for IO.
- */
- err = mpage_process_page_bufs(mpd, head,
- bh, lblk);
- pagevec_release(&pvec);
- if (err > 0)
- err = 0;
- return err;
- }
- if (buffer_delay(bh)) {
- clear_buffer_delay(bh);
- bh->b_blocknr = pblock++;
- }
- clear_buffer_unwritten(bh);
- } while (lblk++, (bh = bh->b_this_page) != head);
-
+ err = mpage_process_page(mpd, page, &lblk, &pblock,
+ &map_bh);
/*
- * FIXME: This is going to break if dioread_nolock
- * supports blocksize < pagesize as we will try to
- * convert potentially unmapped parts of inode.
+ * If map_bh is true, means page may require further bh
+ * mapping, or maybe the page was submitted for IO.
+ * So we return to call further extent mapping.
*/
- mpd->io_submit.io_end->size += PAGE_SIZE;
+ if (err < 0 || map_bh == true)
+ goto out;
/* Page fully mapped - let IO run! */
err = mpage_submit_page(mpd, page);
- if (err < 0) {
- pagevec_release(&pvec);
- return err;
- }
+ if (err < 0)
+ goto out;
}
pagevec_release(&pvec);
}
@@ -2414,6 +2452,9 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
mpd->map.m_len = 0;
mpd->map.m_flags = 0;
return 0;
+out:
+ pagevec_release(&pvec);
+ return err;
}
static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
@@ -2493,9 +2534,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,
int err;
loff_t disksize;
int progress = 0;
+ ext4_io_end_t *io_end = mpd->io_submit.io_end;
+ struct ext4_io_end_vec *io_end_vec = ext4_alloc_io_end_vec(io_end);
- mpd->io_submit.io_end->offset =
- ((loff_t)map->m_lblk) << inode->i_blkbits;
+ io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits;
do {
err = mpage_map_one_extent(handle, mpd);
if (err < 0) {
@@ -3596,6 +3638,7 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private)
{
ext4_io_end_t *io_end = private;
+ struct ext4_io_end_vec *io_end_vec;
/* if not async direct IO just return */
if (!io_end)
@@ -3613,8 +3656,9 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ext4_clear_io_unwritten_flag(io_end);
size = 0;
}
- io_end->offset = offset;
- io_end->size = size;
+ io_end_vec = ext4_alloc_io_end_vec(io_end);
+ io_end_vec->offset = offset;
+ io_end_vec->size = size;
ext4_put_io_end(io_end);
return 0;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 12ceadef32c5..893913d1c2fe 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -31,18 +31,56 @@
#include "acl.h"
static struct kmem_cache *io_end_cachep;
+static struct kmem_cache *io_end_vec_cachep;
int __init ext4_init_pageio(void)
{
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
if (io_end_cachep == NULL)
return -ENOMEM;
+
+ io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0);
+ if (io_end_vec_cachep == NULL) {
+ kmem_cache_destroy(io_end_cachep);
+ return -ENOMEM;
+ }
return 0;
}
void ext4_exit_pageio(void)
{
kmem_cache_destroy(io_end_cachep);
+ kmem_cache_destroy(io_end_vec_cachep);
+}
+
+struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end)
+{
+ struct ext4_io_end_vec *io_end_vec;
+
+ io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS);
+ if (!io_end_vec)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&io_end_vec->list);
+ list_add_tail(&io_end_vec->list, &io_end->list_vec);
+ return io_end_vec;
+}
+
+static void ext4_free_io_end_vec(ext4_io_end_t *io_end)
+{
+ struct ext4_io_end_vec *io_end_vec, *tmp;
+
+ if (list_empty(&io_end->list_vec))
+ return;
+ list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) {
+ list_del(&io_end_vec->list);
+ kmem_cache_free(io_end_vec_cachep, io_end_vec);
+ }
+}
+
+struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end)
+{
+ BUG_ON(list_empty(&io_end->list_vec));
+ return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list);
}
/*
@@ -125,6 +163,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
ext4_finish_bio(bio);
bio_put(bio);
}
+ ext4_free_io_end_vec(io_end);
kmem_cache_free(io_end_cachep, io_end);
}
@@ -136,29 +175,26 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
* cannot get to ext4_ext_truncate() before all IOs overlapping that range are
* completed (happens from ext4_free_ioend()).
*/
-static int ext4_end_io(ext4_io_end_t *io)
+static int ext4_end_io_end(ext4_io_end_t *io_end)
{
- struct inode *inode = io->inode;
- loff_t offset = io->offset;
- ssize_t size = io->size;
- handle_t *handle = io->handle;
+ struct inode *inode = io_end->inode;
+ handle_t *handle = io_end->handle;
int ret = 0;
- ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
+ ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p,"
"list->prev 0x%p\n",
- io, inode->i_ino, io->list.next, io->list.prev);
+ io_end, inode->i_ino, io_end->list.next, io_end->list.prev);
- io->handle = NULL; /* Following call will use up the handle */
- ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
+ io_end->handle = NULL; /* Following call will use up the handle */
+ ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written "
"extents -- potential data loss! "
- "(inode %lu, offset %llu, size %zd, error %d)",
- inode->i_ino, offset, size, ret);
+ "(inode %lu, error %d)", inode->i_ino, ret);
}
- ext4_clear_io_unwritten_flag(io);
- ext4_release_io_end(io);
+ ext4_clear_io_unwritten_flag(io_end);
+ ext4_release_io_end(io_end);
return ret;
}
@@ -166,21 +202,21 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head)
{
#ifdef EXT4FS_DEBUG
struct list_head *cur, *before, *after;
- ext4_io_end_t *io, *io0, *io1;
+ ext4_io_end_t *io_end, *io_end0, *io_end1;
if (list_empty(head))
return;
ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
- list_for_each_entry(io, head, list) {
- cur = &io->list;
+ list_for_each_entry(io_end, head, list) {
+ cur = &io_end->list;
before = cur->prev;
- io0 = container_of(before, ext4_io_end_t, list);
+ io_end0 = container_of(before, ext4_io_end_t, list);
after = cur->next;
- io1 = container_of(after, ext4_io_end_t, list);
+ io_end1 = container_of(after, ext4_io_end_t, list);
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
- io, inode->i_ino, io0, io1);
+ io_end, inode->i_ino, io_end0, io_end1);
}
#endif
}
@@ -207,7 +243,7 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end)
static int ext4_do_flush_completed_IO(struct inode *inode,
struct list_head *head)
{
- ext4_io_end_t *io;
+ ext4_io_end_t *io_end;
struct list_head unwritten;
unsigned long flags;
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -219,11 +255,11 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
while (!list_empty(&unwritten)) {
- io = list_entry(unwritten.next, ext4_io_end_t, list);
- BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
- list_del_init(&io->list);
+ io_end = list_entry(unwritten.next, ext4_io_end_t, list);
+ BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
+ list_del_init(&io_end->list);
- err = ext4_end_io(io);
+ err = ext4_end_io_end(io_end);
if (unlikely(!ret && err))
ret = err;
}
@@ -242,19 +278,22 @@ void ext4_end_io_rsv_work(struct work_struct *work)
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
{
- ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
- if (io) {
- io->inode = inode;
- INIT_LIST_HEAD(&io->list);
- atomic_set(&io->count, 1);
+ ext4_io_end_t *io_end = kmem_cache_zalloc(io_end_cachep, flags);
+
+ if (io_end) {
+ io_end->inode = inode;
+ INIT_LIST_HEAD(&io_end->list);
+ INIT_LIST_HEAD(&io_end->list_vec);
+ atomic_set(&io_end->count, 1);
}
- return io;
+ return io_end;
}
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
{
if (atomic_dec_and_test(&io_end->count)) {
- if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
+ list_empty(&io_end->list_vec)) {
ext4_release_io_end(io_end);
return;
}
@@ -268,9 +307,8 @@ int ext4_put_io_end(ext4_io_end_t *io_end)
if (atomic_dec_and_test(&io_end->count)) {
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
- err = ext4_convert_unwritten_extents(io_end->handle,
- io_end->inode, io_end->offset,
- io_end->size);
+ err = ext4_convert_unwritten_io_end_vec(io_end->handle,
+ io_end);
io_end->handle = NULL;
ext4_clear_io_unwritten_flag(io_end);
}
@@ -307,10 +345,8 @@ static void ext4_end_bio(struct bio *bio)
struct inode *inode = io_end->inode;
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
- "(offset %llu size %ld starting block %llu)",
+ "starting block %llu)",
bio->bi_status, inode->i_ino,
- (unsigned long long) io_end->offset,
- (long) io_end->size,
(unsigned long long)
bi_sector >> (inode->i_blkbits - 9));
mapping_set_error(inode->i_mapping,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dd654e53ba3d..7796e2ffc294 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2105,16 +2105,6 @@ static int parse_options(char *options, struct super_block *sb,
}
}
#endif
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- int blocksize =
- BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
-
- if (blocksize < PAGE_SIZE) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "dioread_nolock if block size != PAGE_SIZE");
- return 0;
- }
- }
return 1;
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 9047f8e269d0..7f0b362b3842 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -482,10 +482,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (jh->b_committed_data) {
struct buffer_head *bh = jh2bh(jh);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
jbd2_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
}
jbd2_journal_refile_buffer(journal, jh);
}
@@ -921,6 +921,7 @@ restart_loop:
transaction_t *cp_transaction;
struct buffer_head *bh;
int try_to_free = 0;
+ bool drop_ref;
jh = commit_transaction->t_forget;
spin_unlock(&journal->j_list_lock);
@@ -930,7 +931,7 @@ restart_loop:
* done with it.
*/
get_bh(bh);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
/*
@@ -1025,8 +1026,10 @@ restart_loop:
try_to_free = 1;
}
JBUFFER_TRACE(jh, "refile or unfile buffer");
- __jbd2_journal_refile_buffer(jh);
- jbd_unlock_bh_state(bh);
+ drop_ref = __jbd2_journal_refile_buffer(jh);
+ spin_unlock(&jh->b_state_lock);
+ if (drop_ref)
+ jbd2_journal_put_journal_head(jh);
if (try_to_free)
release_buffer_page(bh); /* Drops bh reference */
else
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eef809f61722..5e408ee24a1a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -363,7 +363,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
/* keep subsequent assertions sane */
atomic_set(&new_bh->b_count, 1);
- jbd_lock_bh_state(bh_in);
+ spin_lock(&jh_in->b_state_lock);
repeat:
/*
* If a new transaction has already done a buffer copy-out, then
@@ -405,13 +405,13 @@ repeat:
if (need_copy_out && !done_copy_out) {
char *tmp;
- jbd_unlock_bh_state(bh_in);
+ spin_unlock(&jh_in->b_state_lock);
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
if (!tmp) {
brelse(new_bh);
return -ENOMEM;
}
- jbd_lock_bh_state(bh_in);
+ spin_lock(&jh_in->b_state_lock);
if (jh_in->b_frozen_data) {
jbd2_free(tmp, bh_in->b_size);
goto repeat;
@@ -464,7 +464,7 @@ repeat:
__jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
spin_unlock(&journal->j_list_lock);
set_buffer_shadow(bh_in);
- jbd_unlock_bh_state(bh_in);
+ spin_unlock(&jh_in->b_state_lock);
return do_escape | (done_copy_out << 1);
}
@@ -2443,6 +2443,8 @@ static struct journal_head *journal_alloc_journal_head(void)
ret = kmem_cache_zalloc(jbd2_journal_head_cache,
GFP_NOFS | __GFP_NOFAIL);
}
+ if (ret)
+ spin_lock_init(&ret->b_state_lock);
return ret;
}
@@ -2562,17 +2564,23 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
J_ASSERT_BH(bh, buffer_jbd(bh));
J_ASSERT_BH(bh, jh2bh(jh) == bh);
BUFFER_TRACE(bh, "remove journal_head");
+
+ /* Unlink before dropping the lock */
+ bh->b_private = NULL;
+ jh->b_bh = NULL; /* debug, really */
+ clear_buffer_jbd(bh);
+}
+
+static void journal_release_journal_head(struct journal_head *jh, size_t b_size)
+{
if (jh->b_frozen_data) {
printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
- jbd2_free(jh->b_frozen_data, bh->b_size);
+ jbd2_free(jh->b_frozen_data, b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
- jbd2_free(jh->b_committed_data, bh->b_size);
+ jbd2_free(jh->b_committed_data, b_size);
}
- bh->b_private = NULL;
- jh->b_bh = NULL; /* debug, really */
- clear_buffer_jbd(bh);
journal_free_journal_head(jh);
}
@@ -2590,9 +2598,11 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
if (!jh->b_jcount) {
__journal_remove_journal_head(bh);
jbd_unlock_bh_journal_head(bh);
+ journal_release_journal_head(jh, bh->b_size);
__brelse(bh);
- } else
+ } else {
jbd_unlock_bh_journal_head(bh);
+ }
}
/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a9d3a2208506..c068912408dd 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -950,7 +950,7 @@ repeat:
start_lock = jiffies;
lock_buffer(bh);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
/* If it takes too long to lock the buffer, trace it */
time_lock = jbd2_time_diff(start_lock, jiffies);
@@ -1000,7 +1000,7 @@ repeat:
error = -EROFS;
if (is_handle_aborted(handle)) {
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
goto out;
}
error = 0;
@@ -1064,7 +1064,7 @@ repeat:
*/
if (buffer_shadow(bh)) {
JBUFFER_TRACE(jh, "on shadow: sleep");
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
goto repeat;
}
@@ -1085,7 +1085,7 @@ repeat:
JBUFFER_TRACE(jh, "generate frozen data");
if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer");
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
GFP_NOFS | __GFP_NOFAIL);
goto repeat;
@@ -1104,7 +1104,7 @@ attach_next:
jh->b_next_transaction = transaction;
done:
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
/*
* If we are about to journal a buffer, then any revoke pending on it is
@@ -1243,7 +1243,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
* that case: the transaction must have deleted the buffer for it to be
* reused here.
*/
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
jh->b_transaction == NULL ||
(jh->b_transaction == journal->j_committing_transaction &&
@@ -1278,7 +1278,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
jh->b_next_transaction = transaction;
spin_unlock(&journal->j_list_lock);
}
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
/*
* akpm: I added this. ext3_alloc_branch can pick up new indirect
@@ -1346,13 +1346,13 @@ repeat:
committed_data = jbd2_alloc(jh2bh(jh)->b_size,
GFP_NOFS|__GFP_NOFAIL);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
if (!jh->b_committed_data) {
/* Copy out the current buffer contents into the
* preserved, committed copy. */
JBUFFER_TRACE(jh, "generate b_committed data");
if (!committed_data) {
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
goto repeat;
}
@@ -1360,7 +1360,7 @@ repeat:
committed_data = NULL;
memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
}
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
out:
jbd2_journal_put_journal_head(jh);
if (unlikely(committed_data))
@@ -1461,16 +1461,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
*/
if (jh->b_transaction != transaction &&
jh->b_next_transaction != transaction) {
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
jh->b_next_transaction == transaction);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
}
if (jh->b_modified == 1) {
/* If it's in our transaction it must be in BJ_Metadata list. */
if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata) {
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata)
pr_err("JBD2: assertion failure: h_type=%u "
@@ -1480,13 +1480,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
jh->b_jlist);
J_ASSERT_JH(jh, jh->b_transaction != transaction ||
jh->b_jlist == BJ_Metadata);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
}
goto out;
}
journal = transaction->t_journal;
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
if (jh->b_modified == 0) {
/*
@@ -1572,7 +1572,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
__jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
spin_unlock(&journal->j_list_lock);
out_unlock_bh:
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
out:
JBUFFER_TRACE(jh, "exit");
return ret;
@@ -1610,18 +1610,20 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
BUFFER_TRACE(bh, "entry");
- jbd_lock_bh_state(bh);
+ jh = jbd2_journal_grab_journal_head(bh);
+ if (!jh) {
+ __bforget(bh);
+ return 0;
+ }
- if (!buffer_jbd(bh))
- goto not_jbd;
- jh = bh2jh(bh);
+ spin_lock(&jh->b_state_lock);
/* Critical error: attempting to delete a bitmap buffer, maybe?
* Don't do any jbd operations, and return an error. */
if (!J_EXPECT_JH(jh, !jh->b_committed_data,
"inconsistent data on disk")) {
err = -EIO;
- goto not_jbd;
+ goto drop;
}
/* keep track of whether or not this transaction modified us */
@@ -1669,10 +1671,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
} else {
__jbd2_journal_unfile_buffer(jh);
- if (!buffer_jbd(bh)) {
- spin_unlock(&journal->j_list_lock);
- goto not_jbd;
- }
+ jbd2_journal_put_journal_head(jh);
}
spin_unlock(&journal->j_list_lock);
} else if (jh->b_transaction) {
@@ -1714,7 +1713,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
if (!jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "belongs to none transaction");
spin_unlock(&journal->j_list_lock);
- goto not_jbd;
+ goto drop;
}
/*
@@ -1724,7 +1723,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
if (!buffer_dirty(bh)) {
__jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
- goto not_jbd;
+ goto drop;
}
/*
@@ -1737,20 +1736,15 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
spin_unlock(&journal->j_list_lock);
}
-
- jbd_unlock_bh_state(bh);
- __brelse(bh);
drop:
+ __brelse(bh);
+ spin_unlock(&jh->b_state_lock);
+ jbd2_journal_put_journal_head(jh);
if (drop_reserve) {
/* no need to reserve log space for this block -bzzz */
handle->h_total_credits++;
}
return err;
-
-not_jbd:
- jbd_unlock_bh_state(bh);
- __bforget(bh);
- goto drop;
}
/**
@@ -1920,7 +1914,7 @@ free_and_exit:
*
* j_list_lock is held.
*
- * jbd_lock_bh_state(jh2bh(jh)) is held.
+ * jh->b_state_lock is held.
*/
static inline void
@@ -1944,7 +1938,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
*
* Called with j_list_lock held, and the journal may not be locked.
*
- * jbd_lock_bh_state(jh2bh(jh)) is held.
+ * jh->b_state_lock is held.
*/
static inline void
@@ -1976,7 +1970,7 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
transaction_t *transaction;
struct buffer_head *bh = jh2bh(jh);
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+ lockdep_assert_held(&jh->b_state_lock);
transaction = jh->b_transaction;
if (transaction)
assert_spin_locked(&transaction->t_journal->j_list_lock);
@@ -2013,17 +2007,15 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
}
/*
- * Remove buffer from all transactions.
+ * Remove buffer from all transactions. The caller is responsible for dropping
+ * the jh reference that belonged to the transaction.
*
* Called with bh_state lock and j_list_lock
- *
- * jh and bh may be already freed when this function returns.
*/
static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
{
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
- jbd2_journal_put_journal_head(jh);
}
void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
@@ -2032,18 +2024,19 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
/* Get reference so that buffer cannot be freed before we unlock it */
get_bh(bh);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
spin_lock(&journal->j_list_lock);
__jbd2_journal_unfile_buffer(jh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
+ jbd2_journal_put_journal_head(jh);
__brelse(bh);
}
/*
* Called from jbd2_journal_try_to_free_buffers().
*
- * Called under jbd_lock_bh_state(bh)
+ * Called under jh->b_state_lock
*/
static void
__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
@@ -2130,10 +2123,10 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
if (!jh)
continue;
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
__journal_try_to_free_buffer(journal, bh);
+ spin_unlock(&jh->b_state_lock);
jbd2_journal_put_journal_head(jh);
- jbd_unlock_bh_state(bh);
if (buffer_jbd(bh))
goto busy;
} while ((bh = bh->b_this_page) != head);
@@ -2154,7 +2147,7 @@ busy:
*
* Called under j_list_lock.
*
- * Called under jbd_lock_bh_state(bh).
+ * Called under jh->b_state_lock.
*/
static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
{
@@ -2175,6 +2168,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
} else {
JBUFFER_TRACE(jh, "on running transaction");
__jbd2_journal_unfile_buffer(jh);
+ jbd2_journal_put_journal_head(jh);
}
return may_free;
}
@@ -2241,18 +2235,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
* holding the page lock. --sct
*/
- if (!buffer_jbd(bh))
+ jh = jbd2_journal_grab_journal_head(bh);
+ if (!jh)
goto zap_buffer_unlocked;
/* OK, we have data buffer in journaled mode */
write_lock(&journal->j_state_lock);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
spin_lock(&journal->j_list_lock);
- jh = jbd2_journal_grab_journal_head(bh);
- if (!jh)
- goto zap_buffer_no_jh;
-
/*
* We cannot remove the buffer from checkpoint lists until the
* transaction adding inode to orphan list (let's call it T)
@@ -2331,10 +2322,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
* for commit and try again.
*/
if (partial_page) {
- jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
write_unlock(&journal->j_state_lock);
+ jbd2_journal_put_journal_head(jh);
return -EBUSY;
}
/*
@@ -2346,10 +2337,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
set_buffer_freed(bh);
if (journal->j_running_transaction && buffer_jbddirty(bh))
jh->b_next_transaction = journal->j_running_transaction;
- jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
write_unlock(&journal->j_state_lock);
+ jbd2_journal_put_journal_head(jh);
return 0;
} else {
/* Good, the buffer belongs to the running transaction.
@@ -2373,11 +2364,10 @@ zap_buffer:
* here.
*/
jh->b_modified = 0;
- jbd2_journal_put_journal_head(jh);
-zap_buffer_no_jh:
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
write_unlock(&journal->j_state_lock);
+ jbd2_journal_put_journal_head(jh);
zap_buffer_unlocked:
clear_buffer_dirty(bh);
J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@@ -2464,7 +2454,7 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
int was_dirty = 0;
struct buffer_head *bh = jh2bh(jh);
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+ lockdep_assert_held(&jh->b_state_lock);
assert_spin_locked(&transaction->t_journal->j_list_lock);
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
@@ -2526,11 +2516,11 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
void jbd2_journal_file_buffer(struct journal_head *jh,
transaction_t *transaction, int jlist)
{
- jbd_lock_bh_state(jh2bh(jh));
+ spin_lock(&jh->b_state_lock);
spin_lock(&transaction->t_journal->j_list_lock);
__jbd2_journal_file_buffer(jh, transaction, jlist);
spin_unlock(&transaction->t_journal->j_list_lock);
- jbd_unlock_bh_state(jh2bh(jh));
+ spin_unlock(&jh->b_state_lock);
}
/*
@@ -2540,23 +2530,25 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
* buffer on that transaction's metadata list.
*
* Called under j_list_lock
- * Called under jbd_lock_bh_state(jh2bh(jh))
+ * Called under jh->b_state_lock
*
- * jh and bh may be already free when this function returns
+ * When this function returns true, there's no next transaction to refile to
+ * and the caller has to drop jh reference through
+ * jbd2_journal_put_journal_head().
*/
-void __jbd2_journal_refile_buffer(struct journal_head *jh)
+bool __jbd2_journal_refile_buffer(struct journal_head *jh)
{
int was_dirty, jlist;
struct buffer_head *bh = jh2bh(jh);
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+ lockdep_assert_held(&jh->b_state_lock);
if (jh->b_transaction)
assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
/* If the buffer is now unused, just drop it. */
if (jh->b_next_transaction == NULL) {
__jbd2_journal_unfile_buffer(jh);
- return;
+ return true;
}
/*
@@ -2584,6 +2576,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
if (was_dirty)
set_buffer_jbddirty(bh);
+ return false;
}
/*
@@ -2594,16 +2587,15 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
*/
void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
{
- struct buffer_head *bh = jh2bh(jh);
+ bool drop;
- /* Get reference so that buffer cannot be freed before we unlock it */
- get_bh(bh);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
spin_lock(&journal->j_list_lock);
- __jbd2_journal_refile_buffer(jh);
- jbd_unlock_bh_state(bh);
+ drop = __jbd2_journal_refile_buffer(jh);
+ spin_unlock(&jh->b_state_lock);
spin_unlock(&journal->j_list_lock);
- __brelse(bh);
+ if (drop)
+ jbd2_journal_put_journal_head(jh);
}
/*
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 69c21a3843af..4180c3ef0a68 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1252,6 +1252,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
int nr)
{
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
+ struct journal_head *jh;
int ret;
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
@@ -1260,13 +1261,14 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
if (!buffer_jbd(bg_bh))
return 1;
- jbd_lock_bh_state(bg_bh);
- bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
+ jh = bh2jh(bg_bh);
+ spin_lock(&jh->b_state_lock);
+ bg = (struct ocfs2_group_desc *) jh->b_committed_data;
if (bg)
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
else
ret = 1;
- jbd_unlock_bh_state(bg_bh);
+ spin_unlock(&jh->b_state_lock);
return ret;
}
@@ -2387,6 +2389,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
int status;
unsigned int tmp;
struct ocfs2_group_desc *undo_bg = NULL;
+ struct journal_head *jh;
/* The caller got this descriptor from
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
@@ -2405,10 +2408,10 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
goto bail;
}
+ jh = bh2jh(group_bh);
if (undo_fn) {
- jbd_lock_bh_state(group_bh);
- undo_bg = (struct ocfs2_group_desc *)
- bh2jh(group_bh)->b_committed_data;
+ spin_lock(&jh->b_state_lock);
+ undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data;
BUG_ON(!undo_bg);
}
@@ -2423,7 +2426,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
le16_add_cpu(&bg->bg_free_bits_count, num_bits);
if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
if (undo_fn)
- jbd_unlock_bh_state(group_bh);
+ spin_unlock(&jh->b_state_lock);
return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
(unsigned long long)le64_to_cpu(bg->bg_blkno),
le16_to_cpu(bg->bg_bits),
@@ -2432,7 +2435,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
}
if (undo_fn)
- jbd_unlock_bh_state(group_bh);
+ spin_unlock(&jh->b_state_lock);
ocfs2_journal_dirty(handle, group_bh);
bail:
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 3115eeb44039..587c146d3987 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -313,7 +313,6 @@ enum jbd_state_bits {
BH_Revoked, /* Has been revoked from the log */
BH_RevokeValid, /* Revoked flag is valid */
BH_JBDDirty, /* Is dirty but journaled */
- BH_State, /* Pins most journal_head state */
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
BH_Shadow, /* IO on shadow buffer is running */
BH_Verified, /* Metadata block has been verified ok */
@@ -342,26 +341,6 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
return bh->b_private;
}
-static inline void jbd_lock_bh_state(struct buffer_head *bh)
-{
- bit_spin_lock(BH_State, &bh->b_state);
-}
-
-static inline int jbd_trylock_bh_state(struct buffer_head *bh)
-{
- return bit_spin_trylock(BH_State, &bh->b_state);
-}
-
-static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
-{
- return bit_spin_is_locked(BH_State, &bh->b_state);
-}
-
-static inline void jbd_unlock_bh_state(struct buffer_head *bh)
-{
- bit_spin_unlock(BH_State, &bh->b_state);
-}
-
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
{
bit_spin_lock(BH_JournalHead, &bh->b_state);
@@ -561,9 +540,9 @@ struct transaction_chp_stats_s {
* ->jbd_lock_bh_journal_head() (This is "innermost")
*
* j_state_lock
- * ->jbd_lock_bh_state()
+ * ->b_state_lock
*
- * jbd_lock_bh_state()
+ * b_state_lock
* ->j_list_lock
*
* j_state_lock
@@ -1277,7 +1256,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3)
/* Filing buffers */
extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
-extern void __jbd2_journal_refile_buffer(struct journal_head *);
+extern bool __jbd2_journal_refile_buffer(struct journal_head *);
extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
extern void __journal_free_buffer(struct journal_head *bh);
diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h
index 9fb870524314..75bc56109031 100644
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h
@@ -11,6 +11,8 @@
#ifndef JOURNAL_HEAD_H_INCLUDED
#define JOURNAL_HEAD_H_INCLUDED
+#include <linux/spinlock.h>
+
typedef unsigned int tid_t; /* Unique transaction ID */
typedef struct transaction_s transaction_t; /* Compound transaction type */
@@ -24,13 +26,18 @@ struct journal_head {
struct buffer_head *b_bh;
/*
+ * Protect the buffer head state
+ */
+ spinlock_t b_state_lock;
+
+ /*
* Reference count - see description in journal.c
* [jbd_lock_bh_journal_head()]
*/
int b_jcount;
/*
- * Journalling list for this buffer [jbd_lock_bh_state()]
+ * Journalling list for this buffer [b_state_lock]
* NOTE: We *cannot* combine this with b_modified into a bitfield
* as gcc would then (which the C standard allows but which is
* very unuseful) make 64-bit accesses to the bitfield and clobber
@@ -41,20 +48,20 @@ struct journal_head {
/*
* This flag signals the buffer has been modified by
* the currently running transaction
- * [jbd_lock_bh_state()]
+ * [b_state_lock]
*/
unsigned b_modified;
/*
* Copy of the buffer data frozen for writing to the log.
- * [jbd_lock_bh_state()]
+ * [b_state_lock]
*/
char *b_frozen_data;
/*
* Pointer to a saved copy of the buffer containing no uncommitted
* deallocation references, so that allocations can avoid overwriting
- * uncommitted deletes. [jbd_lock_bh_state()]
+ * uncommitted deletes. [b_state_lock]
*/
char *b_committed_data;
@@ -63,7 +70,7 @@ struct journal_head {
* metadata: either the running transaction or the committing
* transaction (if there is one). Only applies to buffers on a
* transaction's data or metadata journaling list.
- * [j_list_lock] [jbd_lock_bh_state()]
+ * [j_list_lock] [b_state_lock]
* Either of these locks is enough for reading, both are needed for
* changes.
*/
@@ -73,13 +80,13 @@ struct journal_head {
* Pointer to the running compound transaction which is currently
* modifying the buffer's metadata, if there was already a transaction
* committing it when the new transaction touched it.
- * [t_list_lock] [jbd_lock_bh_state()]
+ * [t_list_lock] [b_state_lock]
*/
transaction_t *b_next_transaction;
/*
* Doubly-linked list of buffers on a transaction's data, metadata or
- * forget queue. [t_list_lock] [jbd_lock_bh_state()]
+ * forget queue. [t_list_lock] [b_state_lock]
*/
struct journal_head *b_tnext, *b_tprev;