diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-11-29 22:21:43 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-11-29 22:47:20 +0400 |
commit | 45bce8f3e3436bbe2e03dd2b076abdce79ffabb7 (patch) | |
tree | 554d8cf7254eb58210e98f2a1053f531f68f024b | |
parent | 9489e9dcae718d5fde988e4a684a0f55b5f94d17 (diff) | |
download | linux-45bce8f3e3436bbe2e03dd2b076abdce79ffabb7.tar.xz |
fs/buffer.c: make block-size be per-page and protected by the page lock
This makes the buffer size handling be a per-page thing, which allows us
to not have to worry about locking too much when changing the buffer
size. If a page doesn't have buffers, we still need to read the block
size from the inode, but we can do that with ACCESS_ONCE(), so that even
if the size is changing, we get a consistent value.
This doesn't convert all functions - many of the buffer functions are
used purely by filesystems, which in turn results in the buffer size
being fixed at mount-time. So they don't have the same consistency
issues that the raw device access can have.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/buffer.c | 79 |
1 files changed, 48 insertions, 31 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index b5f044283edb..28a74ff5324b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1553,6 +1553,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block) EXPORT_SYMBOL(unmap_underlying_metadata); /* + * Size is a power-of-two in the range 512..PAGE_SIZE, + * and the case we care about most is PAGE_SIZE. + * + * So this *could* possibly be written with those + * constraints in mind (relevant mostly if some + * architecture has a slow bit-scan instruction) + */ +static inline int block_size_bits(unsigned int blocksize) +{ + return ilog2(blocksize); +} + +static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state) +{ + BUG_ON(!PageLocked(page)); + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); + return page_buffers(page); +} + +/* * NOTE! All mapped/uptodate combinations are valid: * * Mapped Uptodate Meaning @@ -1589,19 +1611,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, sector_t block; sector_t last_block; struct buffer_head *bh, *head; - const unsigned blocksize = 1 << inode->i_blkbits; + unsigned int blocksize, bbits; int nr_underway = 0; int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); - BUG_ON(!PageLocked(page)); - - last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; - - if (!page_has_buffers(page)) { - create_empty_buffers(page, blocksize, + head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); - } /* * Be very careful. We have no exclusion from __set_page_dirty_buffers @@ -1613,9 +1629,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page, * handle that here by just cleaning them. */ - block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - head = page_buffers(page); bh = head; + blocksize = bh->b_size; + bbits = block_size_bits(blocksize); + + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); + last_block = (i_size_read(inode) - 1) >> bbits; /* * Get all the dirty buffers mapped to disk addresses and @@ -1806,12 +1825,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, BUG_ON(to > PAGE_CACHE_SIZE); BUG_ON(from > to); - blocksize = 1 << inode->i_blkbits; - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - head = page_buffers(page); + head = create_page_buffers(page, inode, 0); + blocksize = head->b_size; + bbits = block_size_bits(blocksize); - bbits = inode->i_blkbits; block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); for(bh = head, block_start = 0; bh != head || !block_start; @@ -1881,11 +1898,11 @@ static int __block_commit_write(struct inode *inode, struct page *page, unsigned blocksize; struct buffer_head *bh, *head; - blocksize = 1 << inode->i_blkbits; + bh = head = page_buffers(page); + blocksize = bh->b_size; - for(bh = head = page_buffers(page), block_start = 0; - bh != head || !block_start; - block_start=block_end, bh = bh->b_this_page) { + block_start = 0; + do { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { if (!buffer_uptodate(bh)) @@ -1895,7 +1912,10 @@ static int __block_commit_write(struct inode *inode, struct page *page, mark_buffer_dirty(bh); } clear_buffer_new(bh); - } + + block_start = block_end; + bh = bh->b_this_page; + } while (bh != head); /* * If this is a partial write which happened to make all buffers @@ -2020,7 +2040,6 @@ EXPORT_SYMBOL(generic_write_end); int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, unsigned long from) { - struct inode *inode = page->mapping->host; unsigned block_start, block_end, blocksize; unsigned to; struct buffer_head *bh, *head; @@ -2029,13 +2048,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, if (!page_has_buffers(page)) return 0; - blocksize = 1 << inode->i_blkbits; + head = page_buffers(page); + blocksize = head->b_size; to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); to = from + to; if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) return 0; - head = page_buffers(page); bh = head; block_start = 0; do { @@ -2068,18 +2087,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block) struct inode *inode = page->mapping->host; sector_t iblock, lblock; struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; - unsigned int blocksize; + unsigned int blocksize, bbits; int nr, i; int fully_mapped = 1; - BUG_ON(!PageLocked(page)); - blocksize = 1 << inode->i_blkbits; - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - head = page_buffers(page); + head = create_page_buffers(page, inode, 0); + blocksize = head->b_size; + bbits = block_size_bits(blocksize); - iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; + iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); + lblock = (i_size_read(inode)+blocksize-1) >> bbits; bh = head; nr = 0; i = 0; |