summaryrefslogtreecommitdiff
path: root/fs/gfs2/bmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/gfs2/bmap.c')
-rw-r--r--fs/gfs2/bmap.c414
1 files changed, 335 insertions, 79 deletions
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index ed6699705c13..03128ed1f34e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -28,6 +28,7 @@
#include "trans.h"
#include "dir.h"
#include "util.h"
+#include "aops.h"
#include "trace_gfs2.h"
/* This doesn't need to be that large as max 64 bit pointers in a 4k
@@ -41,6 +42,8 @@ struct metapath {
int mp_aheight; /* actual height (lookup height) */
};
+static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
+
/**
* gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
* @ip: the inode
@@ -389,7 +392,7 @@ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
return mp->mp_aheight - x - 1;
}
-static inline void release_metapath(struct metapath *mp)
+static void release_metapath(struct metapath *mp)
{
int i;
@@ -397,27 +400,23 @@ static inline void release_metapath(struct metapath *mp)
if (mp->mp_bh[i] == NULL)
break;
brelse(mp->mp_bh[i]);
+ mp->mp_bh[i] = NULL;
}
}
/**
* gfs2_extent_length - Returns length of an extent of blocks
- * @start: Start of the buffer
- * @len: Length of the buffer in bytes
- * @ptr: Current position in the buffer
- * @limit: Max extent length to return (0 = unlimited)
+ * @bh: The metadata block
+ * @ptr: Current position in @bh
+ * @limit: Max extent length to return
* @eob: Set to 1 if we hit "end of block"
*
- * If the first block is zero (unallocated) it will return the number of
- * unallocated blocks in the extent, otherwise it will return the number
- * of contiguous blocks in the extent.
- *
* Returns: The length of the extent (minimum of one block)
*/
-static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
+static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
{
- const __be64 *end = (start + len);
+ const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
const __be64 *first = ptr;
u64 d = be64_to_cpu(*ptr);
@@ -426,14 +425,11 @@ static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __b
ptr++;
if (ptr >= end)
break;
- if (limit && --limit == 0)
- break;
- if (d)
- d++;
+ d++;
} while(be64_to_cpu(*ptr) == d);
if (ptr >= end)
*eob = 1;
- return (ptr - first);
+ return ptr - first;
}
typedef const __be64 *(*gfs2_metadata_walker)(
@@ -609,11 +605,13 @@ enum alloc_state {
* ii) Indirect blocks to fill in lower part of the metadata tree
* iii) Data blocks
*
- * The function is in two parts. The first part works out the total
- * number of blocks which we need. The second part does the actual
- * allocation asking for an extent at a time (if enough contiguous free
- * blocks are available, there will only be one request per bmap call)
- * and uses the state machine to initialise the blocks in order.
+ * This function is called after gfs2_iomap_get, which works out the
+ * total number of blocks which we need via gfs2_alloc_size.
+ *
+ * We then do the actual allocation asking for an extent at a time (if
+ * enough contiguous free blocks are available, there will only be one
+ * allocation request per call) and uses the state machine to initialise
+ * the blocks in order.
*
* Right now, this function will allocate at most one indirect block
* worth of data -- with a default block size of 4K, that's slightly
@@ -633,39 +631,26 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
struct buffer_head *dibh = mp->mp_bh[0];
u64 bn;
unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
- unsigned dblks = 0;
- unsigned ptrs_per_blk;
+ size_t dblks = iomap->length >> inode->i_blkbits;
const unsigned end_of_metadata = mp->mp_fheight - 1;
int ret;
enum alloc_state state;
__be64 *ptr;
__be64 zero_bn = 0;
- size_t maxlen = iomap->length >> inode->i_blkbits;
BUG_ON(mp->mp_aheight < 1);
BUG_ON(dibh == NULL);
+ BUG_ON(dblks < 1);
gfs2_trans_add_meta(ip->i_gl, dibh);
down_write(&ip->i_rw_mutex);
if (mp->mp_fheight == mp->mp_aheight) {
- struct buffer_head *bh;
- int eob;
-
- /* Bottom indirect block exists, find unalloced extent size */
- ptr = metapointer(end_of_metadata, mp);
- bh = mp->mp_bh[end_of_metadata];
- dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
- maxlen, &eob);
- BUG_ON(dblks < 1);
+ /* Bottom indirect block exists */
state = ALLOC_DATA;
} else {
/* Need to allocate indirect blocks */
- ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
- sdp->sd_diptrs;
- dblks = min(maxlen, (size_t)(ptrs_per_blk -
- mp->mp_list[end_of_metadata]));
if (mp->mp_fheight == ip->i_height) {
/* Writing into existing tree, extend tree down */
iblks = mp->mp_fheight - mp->mp_aheight;
@@ -750,6 +735,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
}
} while (iomap->addr == IOMAP_NULL_ADDR);
+ iomap->type = IOMAP_MAPPED;
iomap->length = (u64)dblks << inode->i_blkbits;
ip->i_height = mp->mp_fheight;
gfs2_add_inode_blocks(&ip->i_inode, alloced);
@@ -759,18 +745,51 @@ out:
return ret;
}
-static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
+#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
+
+/**
+ * gfs2_alloc_size - Compute the maximum allocation size
+ * @inode: The inode
+ * @mp: The metapath
+ * @size: Requested size in blocks
+ *
+ * Compute the maximum size of the next allocation at @mp.
+ *
+ * Returns: size in blocks
+ */
+static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
{
struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ const __be64 *first, *ptr, *end;
- iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
- sizeof(struct gfs2_dinode);
- iomap->offset = 0;
- iomap->length = i_size_read(inode);
- iomap->type = IOMAP_INLINE;
-}
+ /*
+ * For writes to stuffed files, this function is called twice via
+ * gfs2_iomap_get, before and after unstuffing. The size we return the
+ * first time needs to be large enough to get the reservation and
+ * allocation sizes right. The size we return the second time must
+ * be exact or else gfs2_iomap_alloc won't do the right thing.
+ */
-#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
+ if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
+ unsigned int maxsize = mp->mp_fheight > 1 ?
+ sdp->sd_inptrs : sdp->sd_diptrs;
+ maxsize -= mp->mp_list[mp->mp_fheight - 1];
+ if (size > maxsize)
+ size = maxsize;
+ return size;
+ }
+
+ first = metapointer(ip->i_height - 1, mp);
+ end = metaend(ip->i_height - 1, mp);
+ if (end - first > size)
+ end = first + size;
+ for (ptr = first; ptr < end; ptr++) {
+ if (*ptr)
+ break;
+ }
+ return ptr - first;
+}
/**
* gfs2_iomap_get - Map blocks from an inode to disk blocks
@@ -789,37 +808,63 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
+ loff_t size = i_size_read(inode);
__be64 *ptr;
sector_t lblock;
sector_t lblock_stop;
int ret;
int eob;
u64 len;
- struct buffer_head *bh;
+ struct buffer_head *dibh = NULL, *bh;
u8 height;
if (!length)
return -EINVAL;
+ down_read(&ip->i_rw_mutex);
+
+ ret = gfs2_meta_inode_buffer(ip, &dibh);
+ if (ret)
+ goto unlock;
+ iomap->private = dibh;
+
if (gfs2_is_stuffed(ip)) {
- if (flags & IOMAP_REPORT) {
- if (pos >= i_size_read(inode))
- return -ENOENT;
- gfs2_stuffed_iomap(inode, iomap);
- return 0;
+ if (flags & IOMAP_WRITE) {
+ loff_t max_size = gfs2_max_stuffed_size(ip);
+
+ if (pos + length > max_size)
+ goto unstuff;
+ iomap->length = max_size;
+ } else {
+ if (pos >= size) {
+ if (flags & IOMAP_REPORT) {
+ ret = -ENOENT;
+ goto unlock;
+ } else {
+ /* report a hole */
+ iomap->offset = pos;
+ iomap->length = length;
+ goto do_alloc;
+ }
+ }
+ iomap->length = size;
}
- BUG_ON(!(flags & IOMAP_WRITE));
+ iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
+ sizeof(struct gfs2_dinode);
+ iomap->type = IOMAP_INLINE;
+ iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
+ goto out;
}
+
+unstuff:
lblock = pos >> inode->i_blkbits;
iomap->offset = lblock << inode->i_blkbits;
lblock_stop = (pos + length - 1) >> inode->i_blkbits;
len = lblock_stop - lblock + 1;
+ iomap->length = len << inode->i_blkbits;
- down_read(&ip->i_rw_mutex);
-
- ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]);
- if (ret)
- goto unlock;
+ get_bh(dibh);
+ mp->mp_bh[0] = dibh;
height = ip->i_height;
while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
@@ -840,12 +885,12 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
goto do_alloc;
bh = mp->mp_bh[ip->i_height - 1];
- len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob);
+ len = gfs2_extent_length(bh, ptr, len, &eob);
iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
iomap->length = len << inode->i_blkbits;
iomap->type = IOMAP_MAPPED;
- iomap->flags = IOMAP_F_MERGED;
+ iomap->flags |= IOMAP_F_MERGED;
if (eob)
iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
@@ -853,25 +898,185 @@ out:
iomap->bdev = inode->i_sb->s_bdev;
unlock:
up_read(&ip->i_rw_mutex);
+ if (ret && dibh)
+ brelse(dibh);
return ret;
do_alloc:
iomap->addr = IOMAP_NULL_ADDR;
- iomap->length = len << inode->i_blkbits;
iomap->type = IOMAP_HOLE;
- iomap->flags = 0;
if (flags & IOMAP_REPORT) {
- loff_t size = i_size_read(inode);
if (pos >= size)
ret = -ENOENT;
else if (height == ip->i_height)
ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
else
iomap->length = size - pos;
+ } else if (flags & IOMAP_WRITE) {
+ u64 alloc_size;
+
+ if (flags & IOMAP_DIRECT)
+ goto out; /* (see gfs2_file_direct_write) */
+
+ len = gfs2_alloc_size(inode, mp, len);
+ alloc_size = len << inode->i_blkbits;
+ if (alloc_size < iomap->length)
+ iomap->length = alloc_size;
+ } else {
+ if (pos < size && height == ip->i_height)
+ ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
}
goto out;
}
+static int gfs2_write_lock(struct inode *inode)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ int error;
+
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+ error = gfs2_glock_nq(&ip->i_gh);
+ if (error)
+ goto out_uninit;
+ if (&ip->i_inode == sdp->sd_rindex) {
+ struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
+
+ error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
+ GL_NOCACHE, &m_ip->i_gh);
+ if (error)
+ goto out_unlock;
+ }
+ return 0;
+
+out_unlock:
+ gfs2_glock_dq(&ip->i_gh);
+out_uninit:
+ gfs2_holder_uninit(&ip->i_gh);
+ return error;
+}
+
+static void gfs2_write_unlock(struct inode *inode)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+ if (&ip->i_inode == sdp->sd_rindex) {
+ struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
+
+ gfs2_glock_dq_uninit(&m_ip->i_gh);
+ }
+ gfs2_glock_dq_uninit(&ip->i_gh);
+}
+
+static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
+ unsigned copied, struct page *page,
+ struct iomap *iomap)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+
+ gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
+}
+
+static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
+ loff_t length, unsigned flags,
+ struct iomap *iomap)
+{
+ struct metapath mp = { .mp_aheight = 1, };
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
+ bool unstuff, alloc_required;
+ int ret;
+
+ ret = gfs2_write_lock(inode);
+ if (ret)
+ return ret;
+
+ unstuff = gfs2_is_stuffed(ip) &&
+ pos + length > gfs2_max_stuffed_size(ip);
+
+ ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+ if (ret)
+ goto out_release;
+
+ alloc_required = unstuff || iomap->type == IOMAP_HOLE;
+
+ if (alloc_required || gfs2_is_jdata(ip))
+ gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
+ &ind_blocks);
+
+ if (alloc_required) {
+ struct gfs2_alloc_parms ap = {
+ .target = data_blocks + ind_blocks
+ };
+
+ ret = gfs2_quota_lock_check(ip, &ap);
+ if (ret)
+ goto out_release;
+
+ ret = gfs2_inplace_reserve(ip, &ap);
+ if (ret)
+ goto out_qunlock;
+ }
+
+ rblocks = RES_DINODE + ind_blocks;
+ if (gfs2_is_jdata(ip))
+ rblocks += data_blocks;
+ if (ind_blocks || data_blocks)
+ rblocks += RES_STATFS + RES_QUOTA;
+ if (inode == sdp->sd_rindex)
+ rblocks += 2 * RES_STATFS;
+ if (alloc_required)
+ rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
+
+ ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits);
+ if (ret)
+ goto out_trans_fail;
+
+ if (unstuff) {
+ ret = gfs2_unstuff_dinode(ip, NULL);
+ if (ret)
+ goto out_trans_end;
+ release_metapath(&mp);
+ brelse(iomap->private);
+ iomap->private = NULL;
+ ret = gfs2_iomap_get(inode, iomap->offset, iomap->length,
+ flags, iomap, &mp);
+ if (ret)
+ goto out_trans_end;
+ }
+
+ if (iomap->type == IOMAP_HOLE) {
+ ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+ if (ret) {
+ gfs2_trans_end(sdp);
+ gfs2_inplace_release(ip);
+ punch_hole(ip, iomap->offset, iomap->length);
+ goto out_qunlock;
+ }
+ }
+ release_metapath(&mp);
+ if (gfs2_is_jdata(ip))
+ iomap->page_done = gfs2_iomap_journaled_page_done;
+ return 0;
+
+out_trans_end:
+ gfs2_trans_end(sdp);
+out_trans_fail:
+ if (alloc_required)
+ gfs2_inplace_release(ip);
+out_qunlock:
+ if (alloc_required)
+ gfs2_quota_unlock(ip);
+out_release:
+ if (iomap->private)
+ brelse(iomap->private);
+ release_metapath(&mp);
+ gfs2_write_unlock(inode);
+ return ret;
+}
+
static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, struct iomap *iomap)
{
@@ -879,22 +1084,79 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
struct metapath mp = { .mp_aheight = 1, };
int ret;
+ iomap->flags |= IOMAP_F_BUFFER_HEAD;
+
trace_gfs2_iomap_start(ip, pos, length, flags);
- if (flags & IOMAP_WRITE) {
- ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
- if (!ret && iomap->type == IOMAP_HOLE)
- ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
- release_metapath(&mp);
+ if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
+ ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
} else {
ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
release_metapath(&mp);
+ /*
+ * Silently fall back to buffered I/O for stuffed files or if
+ * we've hot a hole (see gfs2_file_direct_write).
+ */
+ if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
+ iomap->type != IOMAP_MAPPED)
+ ret = -ENOTBLK;
}
trace_gfs2_iomap_end(ip, iomap, ret);
return ret;
}
+static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+ ssize_t written, unsigned flags, struct iomap *iomap)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct gfs2_trans *tr = current->journal_info;
+ struct buffer_head *dibh = iomap->private;
+
+ if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
+ goto out;
+
+ if (iomap->type != IOMAP_INLINE) {
+ gfs2_ordered_add_inode(ip);
+
+ if (tr->tr_num_buf_new)
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ else
+ gfs2_trans_add_meta(ip->i_gl, dibh);
+ }
+
+ if (inode == sdp->sd_rindex) {
+ adjust_fs_space(inode);
+ sdp->sd_rindex_uptodate = 0;
+ }
+
+ gfs2_trans_end(sdp);
+ gfs2_inplace_release(ip);
+
+ if (length != written && (iomap->flags & IOMAP_F_NEW)) {
+ /* Deallocate blocks that were just allocated. */
+ loff_t blockmask = i_blocksize(inode) - 1;
+ loff_t end = (pos + length) & ~blockmask;
+
+ pos = (pos + written + blockmask) & ~blockmask;
+ if (pos < end) {
+ truncate_pagecache_range(inode, pos, end - 1);
+ punch_hole(ip, pos, end - pos);
+ }
+ }
+
+ if (ip->i_qadata && ip->i_qadata->qa_qd_num)
+ gfs2_quota_unlock(ip);
+ gfs2_write_unlock(inode);
+
+out:
+ if (dibh)
+ brelse(dibh);
+ return 0;
+}
+
const struct iomap_ops gfs2_iomap_ops = {
.iomap_begin = gfs2_iomap_begin,
+ .iomap_end = gfs2_iomap_end,
};
/**
@@ -941,12 +1203,6 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
} else {
ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
release_metapath(&mp);
-
- /* Return unmapped buffer beyond the end of file. */
- if (ret == -ENOENT) {
- ret = 0;
- goto out;
- }
}
if (ret)
goto out;
@@ -2060,7 +2316,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
lblock = offset >> shift;
lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
- if (lblock_stop > end_of_file)
+ if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
return 1;
size = (lblock_stop - lblock) << shift;
@@ -2154,11 +2410,11 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
if (error)
goto out;
} else {
- unsigned int start_off, end_off, blocksize;
+ unsigned int start_off, end_len, blocksize;
blocksize = i_blocksize(inode);
start_off = offset & (blocksize - 1);
- end_off = (offset + length) & (blocksize - 1);
+ end_len = (offset + length) & (blocksize - 1);
if (start_off) {
unsigned int len = length;
if (length > blocksize - start_off)
@@ -2167,11 +2423,11 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
if (error)
goto out;
if (start_off + length < blocksize)
- end_off = 0;
+ end_len = 0;
}
- if (end_off) {
+ if (end_len) {
error = gfs2_block_zero_range(inode,
- offset + length - end_off, end_off);
+ offset + length - end_len, end_len);
if (error)
goto out;
}