From c969f58ca43fc403c75f5d3da4cf1e21de7afaa0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 7 Apr 2009 14:13:01 +0100 Subject: GFS2: Update the rw flags After Jens recent updates: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=a1f242524c3c1f5d40f1c9c343427e34d1aadd6e et al. this is a patch to bring gfs2 uptodate with the core code. Also I've managed to squash another call to ll_rw_block() along the way. There is still one part of the GFS2 I/O paths which are not correctly annotated and that is due to the sharing of the writeback code between the data and metadata address spaces. I would like to change that too, but this patch is still worth doing on its own, I think. Signed-off-by: Steven Whitehouse --- fs/gfs2/log.c | 6 +++--- fs/gfs2/lops.c | 14 ++++++++------ fs/gfs2/meta_io.c | 38 +++++++++++++++++++++++++++----------- 3 files changed, 38 insertions(+), 20 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 98918a756410..aa62cf5976e8 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -120,7 +120,7 @@ __acquires(&sdp->sd_log_lock) lock_buffer(bh); if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); } else { unlock_buffer(bh); brelse(bh); @@ -604,7 +604,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) goto skip_barrier; get_bh(bh); - submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh); + submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); @@ -664,7 +664,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) lock_buffer(bh); if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); } else { unlock_buffer(bh); brelse(bh); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 80e4f5f898bb..00315f50fa46 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "gfs2.h" #include "incore.h" @@ -189,7 +191,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) } gfs2_log_unlock(sdp); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); gfs2_log_lock(sdp); n = 0; @@ -199,7 +201,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) gfs2_log_unlock(sdp); lock_buffer(bd2->bd_bh); bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); gfs2_log_lock(sdp); if (++n >= num) break; @@ -341,7 +343,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) sdp->sd_log_num_revoke--; if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); bh = gfs2_log_get_buf(sdp); mh = (struct gfs2_meta_header *)bh->b_data; @@ -358,7 +360,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) } gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); } static void revoke_lo_before_scan(struct gfs2_jdesc *jd, @@ -560,7 +562,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, ptr = bh_log_ptr(bh); get_bh(bh); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); gfs2_log_lock(sdp); while(!list_empty(list)) { bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); @@ -586,7 +588,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, } else { bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); } - submit_bh(WRITE, bh1); + submit_bh(WRITE_SYNC_PLUG, bh1); gfs2_log_lock(sdp); ptr += 2; } diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 8d6f13256b26..75b2aec06f85 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -201,16 +201,32 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head **bhp) { - *bhp = gfs2_getbuf(gl, blkno, CREATE); - if (!buffer_uptodate(*bhp)) { - ll_rw_block(READ_META, 1, bhp); - if (flags & DIO_WAIT) { - int error = gfs2_meta_wait(gl->gl_sbd, *bhp); - if (error) { - brelse(*bhp); - return error; - } - } + struct gfs2_sbd *sdp = gl->gl_sbd; + struct buffer_head *bh; + + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return -EIO; + + *bhp = bh = gfs2_getbuf(gl, blkno, CREATE); + + lock_buffer(bh); + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + return 0; + } + bh->b_end_io = end_buffer_read_sync; + get_bh(bh); + submit_bh(READ_SYNC | (1 << BIO_RW_META), bh); + if (!(flags & DIO_WAIT)) + return 0; + + wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) { + struct gfs2_trans *tr = current->journal_info; + if (tr && tr->tr_touched) + gfs2_io_error_bh(sdp, bh); + brelse(bh); + return -EIO; } return 0; @@ -404,7 +420,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(READ_META, 1, &first_bh); + ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh); dblock++; extlen--; -- cgit v1.2.3 From 4a0f9a321a113392b448e477018311d14fba2b34 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 20 Apr 2009 08:16:26 +0100 Subject: GFS2: Optimise writepage for metadata This adds a GFS2 specific writepage for metadata, rather than continuing to use the VFS function. As a result we now tag all our metadata I/O with the correct flag so that blktraces will now be less confusing. Also, the generic function was checking for a number of corner cases which cannot happen on the metadata address spaces so that this should be faster too. Signed-off-by: Steven Whitehouse --- fs/gfs2/meta_io.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 9 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 75b2aec06f85..78a5f4312667 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -33,17 +33,65 @@ #include "util.h" #include "ops_address.h" -static int aspace_get_block(struct inode *inode, sector_t lblock, - struct buffer_head *bh_result, int create) +static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) { - gfs2_assert_warn(inode->i_sb->s_fs_info, 0); - return -EOPNOTSUPP; -} + int err; + struct buffer_head *bh, *head; + int nr_underway = 0; + int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC_PLUG : WRITE)); + + BUG_ON(!PageLocked(page)); + BUG_ON(!page_has_buffers(page)); + + head = page_buffers(page); + bh = head; + + do { + if (!buffer_mapped(bh)) + continue; + /* + * If it's a fully non-blocking write attempt and we cannot + * lock the buffer then redirty the page. Note that this can + * potentially cause a busy-wait loop from pdflush and kswapd + * activity, but those code paths have their own higher-level + * throttling. + */ + if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { + lock_buffer(bh); + } else if (!trylock_buffer(bh)) { + redirty_page_for_writepage(wbc, page); + continue; + } + if (test_clear_buffer_dirty(bh)) { + mark_buffer_async_write(bh); + } else { + unlock_buffer(bh); + } + } while ((bh = bh->b_this_page) != head); + + /* + * The page and its buffers are protected by PageWriteback(), so we can + * drop the bh refcounts early. + */ + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + submit_bh(write_op, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); -static int gfs2_aspace_writepage(struct page *page, - struct writeback_control *wbc) -{ - return block_write_full_page(page, aspace_get_block, wbc); + err = 0; + if (nr_underway == 0) + end_page_writeback(page); + + return err; } static const struct address_space_operations aspace_aops = { -- cgit v1.2.3 From 48bf2b1711dc498494e77705c415ee46bb508fd9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 29 Apr 2009 13:59:35 +0100 Subject: GFS2: Something nonlinear this way comes! For some reason GFS2 has been missing support for non-linear mappings. This patch fixes that, and also avoids taking any locks for mmap in the O_NOATIME case. In fact we don't actually need to take the lock here at all - just doing file_accessed() would be enough, but we have to take the lock eventually and this helps it hit disk (and thus be seen by other nodes) faster. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_file.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 5d82e91887e3..0ee7bd287c5a 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -425,33 +425,36 @@ static struct vm_operations_struct gfs2_vm_ops = { .page_mkwrite = gfs2_page_mkwrite, }; - /** * gfs2_mmap - * @file: The file to map * @vma: The VMA which described the mapping * - * Returns: 0 or error code + * There is no need to get a lock here unless we should be updating + * atime. We ignore any locking errors since the only consequence is + * a missed atime update (which will just be deferred until later). + * + * Returns: 0 */ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - struct gfs2_holder i_gh; - int error; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); - error = gfs2_glock_nq(&i_gh); - if (error) { - gfs2_holder_uninit(&i_gh); - return error; - } + if (!(file->f_flags & O_NOATIME)) { + struct gfs2_holder i_gh; + int error; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + error = gfs2_glock_nq(&i_gh); + file_accessed(file); + if (error == 0) + gfs2_glock_dq_uninit(&i_gh); + } vma->vm_ops = &gfs2_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; - gfs2_glock_dq_uninit(&i_gh); - - return error; + return 0; } /** -- cgit v1.2.3 From 7537d81aa7b7cd31b0caeac8091456e93d96fa8d Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Tue, 12 May 2009 11:16:20 -0500 Subject: GFS2: Fix timestamps on write This patch copies the timestamps from the vfs inode into gfs2 and syncs it to the disk inode during writes. Signed-off-by: Abhijith Das Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_address.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index a6dde1751e17..e5664210f0d8 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -781,10 +781,12 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, unlock_page(page); page_cache_release(page); - if (inode->i_size < to) { - i_size_write(inode, to); - ip->i_disksize = inode->i_size; - di->di_size = cpu_to_be64(inode->i_size); + if (copied) { + if (inode->i_size < to) { + i_size_write(inode, to); + ip->i_disksize = inode->i_size; + } + gfs2_dinode_out(ip, di); mark_inode_dirty(inode); } @@ -824,7 +826,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh; struct gfs2_alloc *al = ip->i_alloc; - struct gfs2_dinode *di; unsigned int from = pos & (PAGE_CACHE_SIZE - 1); unsigned int to = from + len; int ret; @@ -847,11 +848,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, gfs2_page_add_databufs(ip, page, from, to); ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - - if (likely(ret >= 0) && (inode->i_size > ip->i_disksize)) { - di = (struct gfs2_dinode *)dibh->b_data; - ip->i_disksize = inode->i_size; - di->di_size = cpu_to_be64(inode->i_size); + if (ret > 0) { + if (inode->i_size > ip->i_disksize) + ip->i_disksize = inode->i_size; + gfs2_dinode_out(ip, dibh->b_data); mark_inode_dirty(inode); } -- cgit v1.2.3 From a1c0643ff9f360a30644f6e3cd643ca2a5083aea Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 13 May 2009 10:56:52 +0100 Subject: GFS2: Move journal live test at transaction start There seems little point grabbing the transaction glock only to have to release it again if the journal isn't live. This moves the test earlier to avoid grabbing the lock when we don't need it in the first place. Signed-off-by: Steven Whitehouse --- fs/gfs2/trans.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 053752d4b27f..4ef0e9fa3549 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -33,6 +33,9 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, BUG_ON(current->journal_info); BUG_ON(blocks == 0 && revokes == 0); + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + return -EROFS; + tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS); if (!tr) return -ENOMEM; @@ -54,12 +57,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, if (error) goto fail_holder_uninit; - if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { - tr->tr_t_gh.gh_flags |= GL_NOCACHE; - error = -EROFS; - goto fail_gunlock; - } - error = gfs2_log_reserve(sdp, tr->tr_reserved); if (error) goto fail_gunlock; -- cgit v1.2.3 From 48c2b613616235d7c97fda5982f50100a6c79166 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 13 May 2009 14:49:48 +0100 Subject: GFS2: Add commit= mount option It has always been possible to adjust the gfs2 log commit interval, but only from the sysfs interface. This adds a mount option, commit=, which will be familar to ext3 users. The sysfs interface continues to be available as well, although this might be removed in the future. Also this patch cleans up some duplicated structures in the GFS2 sysfs code. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 + fs/gfs2/mount.c | 10 ++++++ fs/gfs2/ops_fstype.c | 4 ++- fs/gfs2/ops_super.c | 13 +++++++- fs/gfs2/sys.c | 92 +++++++++++++++++++--------------------------------- 5 files changed, 60 insertions(+), 60 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 399d1b978049..65f438e9537a 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -418,6 +418,7 @@ struct gfs2_args { unsigned int ar_data:2; /* ordered/writeback */ unsigned int ar_meta:1; /* mount metafs */ unsigned int ar_discard:1; /* discard requests */ + int ar_commit; /* Commit interval */ }; struct gfs2_tune { diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index f7e8527a21e0..947af151fa24 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -45,6 +45,7 @@ enum { Opt_meta, Opt_discard, Opt_nodiscard, + Opt_commit, Opt_err, }; @@ -73,6 +74,7 @@ static const match_table_t tokens = { {Opt_meta, "meta"}, {Opt_discard, "discard"}, {Opt_nodiscard, "nodiscard"}, + {Opt_commit, "commit=%d"}, {Opt_err, NULL} }; @@ -89,6 +91,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) char *o; int token; substring_t tmp[MAX_OPT_ARGS]; + int rv; /* Split the options into tokens with the "," character and process them */ @@ -173,6 +176,13 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) case Opt_nodiscard: args->ar_discard = 0; break; + case Opt_commit: + rv = match_int(&tmp[0], &args->ar_commit); + if (rv || args->ar_commit <= 0) { + fs_info(sdp, "commit mount option requires a positive numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; case Opt_err: default: fs_info(sdp, "invalid mount option: %s\n", o); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1ff9473ea753..7981fbc9fc3b 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -55,7 +55,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) spin_lock_init(>->gt_spin); gt->gt_incore_log_blocks = 1024; - gt->gt_log_flush_secs = 60; gt->gt_recoverd_secs = 60; gt->gt_logd_secs = 1; gt->gt_quota_simul_sync = 64; @@ -1165,6 +1164,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; + sdp->sd_args.ar_commit = 60; error = gfs2_mount_args(sdp, &sdp->sd_args, data); if (error) { @@ -1191,6 +1191,8 @@ static int fill_super(struct super_block *sb, void *data, int silent) GFS2_BASIC_BLOCK_SHIFT; sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; + sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit; + error = init_names(sdp, silent); if (error) goto fail; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 458019569dcb..0677a8378560 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -436,8 +436,12 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_args args = sdp->sd_args; /* Default to current settings */ + struct gfs2_tune *gt = &sdp->sd_tune; int error; + spin_lock(>->gt_spin); + args.ar_commit = gt->gt_log_flush_secs; + spin_unlock(>->gt_spin); error = gfs2_mount_args(sdp, &args, data); if (error) return error; @@ -473,6 +477,10 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) sb->s_flags |= MS_POSIXACL; else sb->s_flags &= ~MS_POSIXACL; + spin_lock(>->gt_spin); + gt->gt_log_flush_secs = args.ar_commit; + spin_unlock(>->gt_spin); + return 0; } @@ -550,6 +558,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) { struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; + int lfsecs; if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) seq_printf(s, ",meta"); @@ -610,7 +619,9 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) } if (args->ar_discard) seq_printf(s, ",discard"); - + lfsecs = sdp->sd_tune.gt_log_flush_secs; + if (lfsecs != 60) + seq_printf(s, ",commit=%d", lfsecs); return 0; } diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 7655f5025fec..d53b22edc980 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -26,6 +26,36 @@ #include "util.h" #include "glops.h" +struct gfs2_attr { + struct attribute attr; + ssize_t (*show)(struct gfs2_sbd *, char *); + ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); +}; + +static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); + struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); + return a->show ? a->show(sdp, buf) : 0; +} + +static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); + struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); + return a->store ? a->store(sdp, buf, len) : len; +} + +static struct sysfs_ops gfs2_attr_ops = { + .show = gfs2_attr_show, + .store = gfs2_attr_store, +}; + + +static struct kset *gfs2_kset; + static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) { return snprintf(buf, PAGE_SIZE, "%u:%u\n", @@ -212,11 +242,6 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len return len; } -struct gfs2_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); - ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); -}; #define GFS2_ATTR(name, mode, show, store) \ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store) @@ -246,49 +271,21 @@ static struct attribute *gfs2_attrs[] = { NULL, }; -static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); - return a->show ? a->show(sdp, buf) : 0; -} - -static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) -{ - struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); - return a->store ? a->store(sdp, buf, len) : len; -} - -static struct sysfs_ops gfs2_attr_ops = { - .show = gfs2_attr_show, - .store = gfs2_attr_store, -}; - static struct kobj_type gfs2_ktype = { .default_attrs = gfs2_attrs, .sysfs_ops = &gfs2_attr_ops, }; -static struct kset *gfs2_kset; - /* * display struct lm_lockstruct fields */ -struct lockstruct_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); -}; - #define LOCKSTRUCT_ATTR(name, fmt) \ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ { \ return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \ } \ -static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name) +static struct gfs2_attr lockstruct_attr_##name = __ATTR_RO(name) LOCKSTRUCT_ATTR(jid, "%u\n"); LOCKSTRUCT_ATTR(first, "%u\n"); @@ -401,14 +398,8 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%d\n", ls->ls_recover_jid_status); } -struct gdlm_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *sdp, char *); - ssize_t (*store)(struct gfs2_sbd *sdp, const char *, size_t); -}; - #define GDLM_ATTR(_name,_mode,_show,_store) \ -static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) +static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); GDLM_ATTR(block, 0644, block_show, block_store); @@ -434,21 +425,12 @@ static struct attribute *lock_module_attrs[] = { NULL, }; -/* - * display struct gfs2_args fields - */ - -struct args_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); -}; - #define ARGS_ATTR(name, fmt) \ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ { \ return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name); \ } \ -static struct args_attr args_attr_##name = __ATTR_RO(name) +static struct gfs2_attr args_attr_##name = __ATTR_RO(name) ARGS_ATTR(lockproto, "%s\n"); ARGS_ATTR(locktable, "%s\n"); @@ -531,14 +513,8 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field, return len; } -struct tune_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); - ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); -}; - #define TUNE_ATTR_3(name, show, store) \ -static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store) +static struct gfs2_attr tune_attr_##name = __ATTR(name, 0644, show, store) #define TUNE_ATTR_2(name, store) \ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ -- cgit v1.2.3 From 9582d41135c0d362f04ed6bf3dc8d693a7eafee2 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 13 May 2009 15:06:25 +0100 Subject: GFS2: Remove a couple of unused sysfs entries These two tunables are pointless and would never need to be changed anyway. There is also a race between them and umount as the deamons which they refer to might have gone away. The easiest way to fix the race is to remove the interface. Signed-off-by: Steven Whitehouse --- fs/gfs2/sys.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index d53b22edc980..894bf773ec93 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -530,15 +530,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\ } \ TUNE_ATTR_2(name, name##_store) -#define TUNE_ATTR_DAEMON(name, process) \ -static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\ -{ \ - ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \ - wake_up_process(sdp->sd_##process); \ - return r; \ -} \ -TUNE_ATTR_2(name, name##_store) - TUNE_ATTR(incore_log_blocks, 0); TUNE_ATTR(log_flush_secs, 0); TUNE_ATTR(quota_warn_period, 0); @@ -550,8 +541,6 @@ TUNE_ATTR(new_files_jdata, 0); TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(stall_secs, 1); TUNE_ATTR(statfs_quantum, 1); -TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); -TUNE_ATTR_DAEMON(logd_secs, logd_process); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); static struct attribute *tune_attrs[] = { @@ -565,8 +554,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_quota_simul_sync.attr, &tune_attr_stall_secs.attr, &tune_attr_statfs_quantum.attr, - &tune_attr_recoverd_secs.attr, - &tune_attr_logd_secs.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, NULL, -- cgit v1.2.3 From fe64d517df0970a68417184a12fcd4ba0589cc28 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 19 May 2009 10:01:18 +0100 Subject: GFS2: Umount recovery race fix This patch fixes a race condition where we can receive recovery requests part way through processing a umount. This was causing problems since the recovery thread had already gone away. Looking in more detail at the recovery code, it was really trying to implement a slight variation on a work queue, and that happens to align nicely with the recently introduced slow-work subsystem. As a result I've updated the code to use slow-work, rather than its own home grown variety of work queue. When using the wait_on_bit() function, I noticed that the wait function that was supplied as an argument was appearing in the WCHAN field, so I've updated the function names in order to produce more meaningful output. Signed-off-by: Steven Whitehouse --- fs/gfs2/Kconfig | 1 + fs/gfs2/glock.c | 21 +++++++++-- fs/gfs2/incore.h | 14 +++---- fs/gfs2/main.c | 8 ++++ fs/gfs2/ops_fstype.c | 20 +++------- fs/gfs2/ops_super.c | 25 ++++++++++++- fs/gfs2/recovery.c | 102 +++++++++++++++++---------------------------------- fs/gfs2/recovery.h | 2 +- fs/gfs2/sys.c | 53 +++++++++++++------------- 9 files changed, 122 insertions(+), 124 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 3a981b7f64ca..cad957cdb1e5 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -7,6 +7,7 @@ config GFS2_FS select IP_SCTP if DLM_SCTP select FS_POSIX_ACL select CRC32 + select SLOW_WORK help A cluster filesystem. diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ff4981090489..2bf62bcc5181 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -796,22 +796,37 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) gh->gh_ip = 0; } -static int just_schedule(void *word) +/** + * gfs2_glock_holder_wait + * @word: unused + * + * This function and gfs2_glock_demote_wait both show up in the WCHAN + * field. Thus I've separated these otherwise identical functions in + * order to be more informative to the user. + */ + +static int gfs2_glock_holder_wait(void *word) { schedule(); return 0; } +static int gfs2_glock_demote_wait(void *word) +{ + schedule(); + return 0; +} + static void wait_on_holder(struct gfs2_holder *gh) { might_sleep(); - wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); } static void wait_on_demote(struct gfs2_glock *gl) { might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 65f438e9537a..0060e9564bb9 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -376,11 +377,11 @@ struct gfs2_journal_extent { struct gfs2_jdesc { struct list_head jd_list; struct list_head extent_list; - + struct slow_work jd_work; struct inode *jd_inode; + unsigned long jd_flags; +#define JDF_RECOVERY 1 unsigned int jd_jid; - int jd_dirty; - unsigned int jd_blocks; }; @@ -390,9 +391,6 @@ struct gfs2_statfs_change_host { s64 sc_dinodes; }; -#define GFS2_GLOCKD_DEFAULT 1 -#define GFS2_GLOCKD_MAX 16 - #define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF #define GFS2_QUOTA_OFF 0 #define GFS2_QUOTA_ACCOUNT 1 @@ -427,7 +425,6 @@ struct gfs2_tune { unsigned int gt_incore_log_blocks; unsigned int gt_log_flush_secs; - unsigned int gt_recoverd_secs; unsigned int gt_logd_secs; unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ @@ -448,6 +445,7 @@ enum { SDF_JOURNAL_LIVE = 1, SDF_SHUTDOWN = 2, SDF_NOBARRIERS = 3, + SDF_NORECOVERY = 4, }; #define GFS2_FSNAME_LEN 256 @@ -494,7 +492,6 @@ struct lm_lockstruct { unsigned long ls_flags; dlm_lockspace_t *ls_dlm; - int ls_recover_jid; int ls_recover_jid_done; int ls_recover_jid_status; }; @@ -583,7 +580,6 @@ struct gfs2_sbd { /* Daemon stuff */ - struct task_struct *sd_recoverd_process; struct task_struct *sd_logd_process; struct task_struct *sd_quotad_process; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index a6892ed0840a..eacd78a5d082 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -113,12 +114,18 @@ static int __init init_gfs2_fs(void) if (error) goto fail_unregister; + error = slow_work_register_user(); + if (error) + goto fail_slow; + gfs2_register_debugfs(); printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); return 0; +fail_slow: + unregister_filesystem(&gfs2meta_fs_type); fail_unregister: unregister_filesystem(&gfs2_fs_type); fail: @@ -156,6 +163,7 @@ static void __exit exit_gfs2_fs(void) gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); + slow_work_unregister_user(); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 7981fbc9fc3b..2cd1164c88d7 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -55,7 +56,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) spin_lock_init(>->gt_spin); gt->gt_incore_log_blocks = 1024; - gt->gt_recoverd_secs = 60; gt->gt_logd_secs = 1; gt->gt_quota_simul_sync = 64; gt->gt_quota_warn_period = 10; @@ -675,6 +675,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) break; INIT_LIST_HEAD(&jd->extent_list); + slow_work_init(&jd->jd_work, &gfs2_recover_ops); jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { if (!jd->jd_inode) @@ -700,14 +701,13 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) { struct inode *master = sdp->sd_master_dir->d_inode; struct gfs2_holder ji_gh; - struct task_struct *p; struct gfs2_inode *ip; int jindex = 1; int error = 0; if (undo) { jindex = 0; - goto fail_recoverd; + goto fail_jinode_gh; } sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); @@ -800,18 +800,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) gfs2_glock_dq_uninit(&ji_gh); jindex = 0; - p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd"); - error = IS_ERR(p); - if (error) { - fs_err(sdp, "can't start recoverd thread: %d\n", error); - goto fail_jinode_gh; - } - sdp->sd_recoverd_process = p; - return 0; -fail_recoverd: - kthread_stop(sdp->sd_recoverd_process); fail_jinode_gh: if (!sdp->sd_args.ar_spectator) gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); @@ -1172,8 +1162,10 @@ static int fill_super(struct super_block *sb, void *data, int silent) goto fail; } - if (sdp->sd_args.ar_spectator) + if (sdp->sd_args.ar_spectator) { sb->s_flags |= MS_RDONLY; + set_bit(SDF_NORECOVERY, &sdp->sd_flags); + } if (sdp->sd_args.ar_posix_acl) sb->s_flags |= MS_POSIXACL; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 0677a8378560..a3c2272e7cad 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -121,6 +121,12 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) return error; } +static int gfs2_umount_recovery_wait(void *word) +{ + schedule(); + return 0; +} + /** * gfs2_put_super - Unmount the filesystem * @sb: The VFS superblock @@ -131,6 +137,7 @@ static void gfs2_put_super(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; int error; + struct gfs2_jdesc *jd; /* Unfreeze the filesystem, if we need to */ @@ -139,9 +146,25 @@ static void gfs2_put_super(struct super_block *sb) gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); mutex_unlock(&sdp->sd_freeze_lock); + /* No more recovery requests */ + set_bit(SDF_NORECOVERY, &sdp->sd_flags); + smp_mb(); + + /* Wait on outstanding recovery */ +restart: + spin_lock(&sdp->sd_jindex_spin); + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + if (!test_bit(JDF_RECOVERY, &jd->jd_flags)) + continue; + spin_unlock(&sdp->sd_jindex_spin); + wait_on_bit(&jd->jd_flags, JDF_RECOVERY, + gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE); + goto restart; + } + spin_unlock(&sdp->sd_jindex_spin); + kthread_stop(sdp->sd_quotad_process); kthread_stop(sdp->sd_logd_process); - kthread_stop(sdp->sd_recoverd_process); if (!(sb->s_flags & MS_RDONLY)) { error = gfs2_make_fs_ro(sdp); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 247e8f7d6b3d..59d2695509d3 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -13,8 +13,7 @@ #include #include #include -#include -#include +#include #include "gfs2.h" #include "incore.h" @@ -441,18 +440,25 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); } -/** - * gfs2_recover_journal - recover a given journal - * @jd: the struct gfs2_jdesc describing the journal - * - * Acquire the journal's lock, check to see if the journal is clean, and - * do recovery if necessary. - * - * Returns: errno - */ +static int gfs2_recover_get_ref(struct slow_work *work) +{ + struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); + if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags)) + return -EBUSY; + return 0; +} -int gfs2_recover_journal(struct gfs2_jdesc *jd) +static void gfs2_recover_put_ref(struct slow_work *work) +{ + struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); + clear_bit(JDF_RECOVERY, &jd->jd_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&jd->jd_flags, JDF_RECOVERY); +} + +static void gfs2_recover_work(struct slow_work *work) { + struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_log_header_host head; @@ -569,7 +575,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) gfs2_glock_dq_uninit(&j_gh); fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); - return 0; + return; fail_gunlock_tr: gfs2_glock_dq_uninit(&t_gh); @@ -584,70 +590,28 @@ fail_gunlock_j: fail: gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); - return error; } -static struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp) -{ - struct gfs2_jdesc *jd; - int found = 0; - - spin_lock(&sdp->sd_jindex_spin); +struct slow_work_ops gfs2_recover_ops = { + .get_ref = gfs2_recover_get_ref, + .put_ref = gfs2_recover_put_ref, + .execute = gfs2_recover_work, +}; - list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { - if (jd->jd_dirty) { - jd->jd_dirty = 0; - found = 1; - break; - } - } - spin_unlock(&sdp->sd_jindex_spin); - - if (!found) - jd = NULL; - return jd; -} - -/** - * gfs2_check_journals - Recover any dirty journals - * @sdp: the filesystem - * - */ - -static void gfs2_check_journals(struct gfs2_sbd *sdp) +static int gfs2_recovery_wait(void *word) { - struct gfs2_jdesc *jd; - - for (;;) { - jd = gfs2_jdesc_find_dirty(sdp); - if (!jd) - break; - - if (jd != sdp->sd_jdesc) - gfs2_recover_journal(jd); - } + schedule(); + return 0; } -/** - * gfs2_recoverd - Recover dead machine's journals - * @sdp: Pointer to GFS2 superblock - * - */ - -int gfs2_recoverd(void *data) +int gfs2_recover_journal(struct gfs2_jdesc *jd) { - struct gfs2_sbd *sdp = data; - unsigned long t; - - while (!kthread_should_stop()) { - gfs2_check_journals(sdp); - t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ; - if (freezing(current)) - refrigerator(); - schedule_timeout_interruptible(t); - } - + int rv; + rv = slow_work_enqueue(&jd->jd_work); + if (rv) + return rv; + wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); return 0; } diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index a8218ea15b57..1616ac22569a 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -28,7 +28,7 @@ extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head); extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); -extern int gfs2_recoverd(void *data); +extern struct slow_work_ops gfs2_recover_ops; #endif /* __RECOVERY_DOT_H__ */ diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 894bf773ec93..9f6d48b75fd2 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -356,34 +356,33 @@ static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%d\n", ls->ls_first_done); } -static ssize_t recover_show(struct gfs2_sbd *sdp, char *buf) -{ - struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_recover_jid); -} - -static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid) +static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { + unsigned jid; struct gfs2_jdesc *jd; + int rv; + + rv = sscanf(buf, "%u", &jid); + if (rv != 1) + return -EINVAL; + rv = -ESHUTDOWN; spin_lock(&sdp->sd_jindex_spin); + if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) + goto out; + rv = -EBUSY; + if (sdp->sd_jdesc->jd_jid == jid) + goto out; + rv = -ENOENT; list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { if (jd->jd_jid != jid) continue; - jd->jd_dirty = 1; + rv = slow_work_enqueue(&jd->jd_work); break; } +out: spin_unlock(&sdp->sd_jindex_spin); -} - -static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) -{ - struct lm_lockstruct *ls = &sdp->sd_lockstruct; - ls->ls_recover_jid = simple_strtol(buf, NULL, 0); - gfs2_jdesc_make_dirty(sdp, ls->ls_recover_jid); - if (sdp->sd_recoverd_process) - wake_up_process(sdp->sd_recoverd_process); - return len; + return rv ? rv : len; } static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf) @@ -401,15 +400,15 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) #define GDLM_ATTR(_name,_mode,_show,_store) \ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) -GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); -GDLM_ATTR(block, 0644, block_show, block_store); -GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); -GDLM_ATTR(id, 0444, lkid_show, NULL); -GDLM_ATTR(first, 0444, lkfirst_show, NULL); -GDLM_ATTR(first_done, 0444, first_done_show, NULL); -GDLM_ATTR(recover, 0644, recover_show, recover_store); -GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); -GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); +GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); +GDLM_ATTR(block, 0644, block_show, block_store); +GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); +GDLM_ATTR(id, 0444, lkid_show, NULL); +GDLM_ATTR(first, 0444, lkfirst_show, NULL); +GDLM_ATTR(first_done, 0444, first_done_show, NULL); +GDLM_ATTR(recover, 0200, NULL, recover_store); +GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); +GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); static struct attribute *lock_module_attrs[] = { &gdlm_attr_proto_name.attr, -- cgit v1.2.3 From ef9e8b14a5c1d0afbaf12b4c3b271188ddfc52a4 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 19 May 2009 14:25:16 +0100 Subject: GFS2: Don't warn when delete inode fails on ro filesystem If the filesystem is read-only, then we expect that delete inode will fail, so there is no need to warn about it. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index a3c2272e7cad..2fd1dcbcc5b7 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -714,7 +714,7 @@ out_unlock: gfs2_glock_dq(&ip->i_iopen_gh); gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_dq_uninit(&gh); - if (error && error != GLR_TRYFAILED) + if (error && error != GLR_TRYFAILED && error != -EROFS) fs_warn(sdp, "gfs2_delete_inode: %d\n", error); out: truncate_inode_pages(&inode->i_data, 0); -- cgit v1.2.3 From 09010978345e8883003bf411bb99753710eb5a3a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 20 May 2009 10:48:47 +0100 Subject: GFS2: Improve resource group error handling This patch improves the error handling in the case where we discover that the summary information in the resource group doesn't match the bitmap information while in the process of allocating blocks. Originally this resulted in a kernel bug, but this patch changes that so that we return -EIO and print some messages explaining what went wrong, and how to fix it. We also remember locally not to try and allocate from the same rgrp again, so that a subsequent allocation in a different rgrp should succeed. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 9 +++++++-- fs/gfs2/dir.c | 11 +++++++++-- fs/gfs2/eattr.c | 14 +++++++++++--- fs/gfs2/glops.c | 20 +------------------ fs/gfs2/incore.h | 7 ++++--- fs/gfs2/rgrp.c | 58 +++++++++++++++++++++++++++++++++++++++++--------------- fs/gfs2/rgrp.h | 47 +++++++++++++++++++++++---------------------- 7 files changed, 99 insertions(+), 67 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3a5d3f883e10..253e1a39f841 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -136,7 +136,9 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) and write it out to disk */ unsigned int n = 1; - block = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &block, &n); + if (error) + goto out_brelse; if (isdir) { gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1); error = gfs2_dir_get_new_buffer(ip, block, &bh); @@ -476,8 +478,11 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, blks = dblks + iblks; i = sheight; do { + int error; n = blks - alloced; - bn = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &bn, &n); + if (error) + return error; alloced += n; if (state != ALLOC_DATA || gfs2_is_jdata(ip)) gfs2_trans_add_unrevoke(sdp, bn, n); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index aef4d0c06748..297d7e5cebad 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -803,13 +803,20 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, { struct gfs2_inode *ip = GFS2_I(inode); unsigned int n = 1; - u64 bn = gfs2_alloc_block(ip, &n); - struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn); + u64 bn; + int error; + struct buffer_head *bh; struct gfs2_leaf *leaf; struct gfs2_dirent *dent; struct qstr name = { .name = "", .len = 0, .hash = 0 }; + + error = gfs2_alloc_block(ip, &bn, &n); + if (error) + return NULL; + bh = gfs2_meta_new(ip->i_gl, bn); if (!bh) return NULL; + gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); gfs2_trans_add_bh(ip->i_gl, bh, 1); gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 899763aed217..07ea9529adda 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -582,8 +582,11 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) struct gfs2_ea_header *ea; unsigned int n = 1; u64 block; + int error; - block = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &block, &n); + if (error) + return error; gfs2_trans_add_unrevoke(sdp, block, 1); *bhp = gfs2_meta_new(ip->i_gl, block); gfs2_trans_add_bh(ip->i_gl, *bhp, 1); @@ -617,6 +620,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, struct gfs2_ea_request *er) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + int error; ea->ea_data_len = cpu_to_be32(er->er_data_len); ea->ea_name_len = er->er_name_len; @@ -642,7 +646,9 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, int mh_size = sizeof(struct gfs2_meta_header); unsigned int n = 1; - block = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &block, &n); + if (error) + return error; gfs2_trans_add_unrevoke(sdp, block, 1); bh = gfs2_meta_new(ip->i_gl, block); gfs2_trans_add_bh(ip->i_gl, bh, 1); @@ -963,7 +969,9 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, } else { u64 blk; unsigned int n = 1; - blk = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &blk, &n); + if (error) + return error; gfs2_trans_add_unrevoke(sdp, blk, 1); indbh = gfs2_meta_new(ip->i_gl, blk); gfs2_trans_add_bh(ip->i_gl, indbh, 1); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 70f87f43afa2..d5e4ab155ca0 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -309,24 +309,6 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) gfs2_rgrp_bh_put(gh->gh_gl->gl_object); } -/** - * rgrp_go_dump - print out an rgrp - * @seq: The iterator - * @gl: The glock in question - * - */ - -static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) -{ - const struct gfs2_rgrpd *rgd = gl->gl_object; - if (rgd == NULL) - return 0; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", - (unsigned long long)rgd->rd_addr, rgd->rd_flags, - rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); - return 0; -} - /** * trans_go_sync - promote/demote the transaction glock * @gl: the glock @@ -410,7 +392,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_demote_ok = rgrp_go_demote_ok, .go_lock = rgrp_go_lock, .go_unlock = rgrp_go_unlock, - .go_dump = rgrp_go_dump, + .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, .go_min_hold_time = HZ / 5, }; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 0060e9564bb9..de50d86fec12 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -92,9 +92,10 @@ struct gfs2_rgrpd { unsigned int rd_bh_count; u32 rd_last_alloc; unsigned char rd_flags; -#define GFS2_RDF_CHECK 0x01 /* Need to check for unlinked inodes */ -#define GFS2_RDF_NOALLOC 0x02 /* rg prohibits allocation */ -#define GFS2_RDF_UPTODATE 0x04 /* rg is up to date */ +#define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ +#define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ +#define GFS2_RDF_ERROR 0x40000000 /* error in rg */ +#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ }; enum gfs2_state_bits { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 565038243fa2..fbacf09ee34e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -701,10 +701,7 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) u32 rg_flags; rg_flags = be32_to_cpu(str->rg_flags); - if (rg_flags & GFS2_RGF_NOALLOC) - rgd->rd_flags |= GFS2_RDF_NOALLOC; - else - rgd->rd_flags &= ~GFS2_RDF_NOALLOC; + rg_flags &= ~GFS2_RDF_MASK; rgd->rd_free = be32_to_cpu(str->rg_free); rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); @@ -713,11 +710,8 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) { struct gfs2_rgrp *str = buf; - u32 rg_flags = 0; - if (rgd->rd_flags & GFS2_RDF_NOALLOC) - rg_flags |= GFS2_RGF_NOALLOC; - str->rg_flags = cpu_to_be32(rg_flags); + str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); str->rg_free = cpu_to_be32(rgd->rd_free); str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); str->__pad = cpu_to_be32(0); @@ -942,7 +936,7 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) struct gfs2_sbd *sdp = rgd->rd_sbd; int ret = 0; - if (rgd->rd_flags & GFS2_RDF_NOALLOC) + if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; spin_lock(&sdp->sd_rindex_spin); @@ -1435,13 +1429,33 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, } /** - * gfs2_alloc_block - Allocate a block + * gfs2_rgrp_dump - print out an rgrp + * @seq: The iterator + * @gl: The glock in question + * + */ + +int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) +{ + const struct gfs2_rgrpd *rgd = gl->gl_object; + if (rgd == NULL) + return 0; + gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", + (unsigned long long)rgd->rd_addr, rgd->rd_flags, + rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); + return 0; +} + +/** + * gfs2_alloc_block - Allocate one or more blocks * @ip: the inode to allocate the block for + * @bn: Used to return the starting block number + * @n: requested number of blocks/extent length (value/result) * - * Returns: the allocated block + * Returns: 0 or error */ -u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) +int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; @@ -1457,7 +1471,10 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) goal = rgd->rd_last_alloc; blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); - BUG_ON(blk == BFITNOENT); + + /* Since all blocks are reserved in advance, this shouldn't happen */ + if (blk == BFITNOENT) + goto rgrp_error; rgd->rd_last_alloc = blk; block = rgd->rd_data0 + blk; @@ -1469,7 +1486,9 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); brelse(dibh); } - gfs2_assert_withdraw(sdp, rgd->rd_free >= *n); + if (rgd->rd_free < *n) + goto rgrp_error; + rgd->rd_free -= *n; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); @@ -1484,7 +1503,16 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) rgd->rd_free_clone -= *n; spin_unlock(&sdp->sd_rindex_spin); - return block; + *bn = block; + return 0; + +rgrp_error: + fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", + (unsigned long long)rgd->rd_addr); + fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); + gfs2_rgrp_dump(NULL, rgd->rd_gl); + rgd->rd_flags |= GFS2_RDF_ERROR; + return -EIO; } /** diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3181c7e624bf..1e76ff0f3e00 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -14,22 +14,22 @@ struct gfs2_rgrpd; struct gfs2_sbd; struct gfs2_holder; -void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); -void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); -int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh); +extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); +extern int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh); -int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd); -void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd); -void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd); +extern int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd); -void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); -struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); +extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); static inline void gfs2_alloc_put(struct gfs2_inode *ip) { BUG_ON(ip->i_alloc == NULL); @@ -37,22 +37,22 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip) ip->i_alloc = NULL; } -int gfs2_inplace_reserve_i(struct gfs2_inode *ip, - char *file, unsigned int line); +extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, + unsigned int line); #define gfs2_inplace_reserve(ip) \ gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) -void gfs2_inplace_release(struct gfs2_inode *ip); +extern void gfs2_inplace_release(struct gfs2_inode *ip); -unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); +extern unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); -u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n); -u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); +extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); +extern u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); -void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); -void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); -void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); -void gfs2_unlink_di(struct inode *inode); +extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); +extern void gfs2_unlink_di(struct inode *inode); struct gfs2_rgrp_list { unsigned int rl_rgrps; @@ -61,10 +61,11 @@ struct gfs2_rgrp_list { struct gfs2_holder *rl_ghs; }; -void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, - u64 block); -void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); -void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); -u64 gfs2_ri_total(struct gfs2_sbd *sdp); +extern void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, + u64 block); +extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); +extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); +extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); +extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); #endif /* __RGRP_DOT_H__ */ -- cgit v1.2.3 From 60a0b8f93664621a07b93273fc8ebc29590c62f5 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 21 May 2009 12:23:12 +0100 Subject: GFS2: Add a rgrp bitmap full flag During block allocation, it is useful to know if sections of disk are full on a finer grained basis than a single resource group. This can make a performance difference when resource groups have larger numbers of bitmap blocks, since we no longer have to search them all block by block in each individual bitmap. The full flag is set on a per-bitmap basis when it has been searched and found to have no free space. It is then skipped in subsequent searches until the flag is reset. The resetting occurs if we have to drop the glock on the resource group for any reason, or if we deallocate some blocks within that resource group and thus free up some space. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 3 +++ fs/gfs2/rgrp.c | 77 ++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 50 insertions(+), 30 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index de50d86fec12..dd87379b61e6 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -64,9 +64,12 @@ struct gfs2_log_element { const struct gfs2_log_operations *le_ops; }; +#define GBF_FULL 1 + struct gfs2_bitmap { struct buffer_head *bi_bh; char *bi_clone; + unsigned long bi_flags; u32 bi_offset; u32 bi_start; u32 bi_len; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index fbacf09ee34e..23637b9d1c73 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -442,6 +442,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) for (x = 0; x < length; x++) { bi = rgd->rd_bits + x; + bi->bi_flags = 0; /* small rgrp; bitmap stored completely in header block */ if (length == 1) { bytes = bytes_left; @@ -769,6 +770,8 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) } if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { + for (x = 0; x < length; x++) + clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); rgd->rd_flags |= GFS2_RDF_UPTODATE; } @@ -897,6 +900,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) continue; if (sdp->sd_args.ar_discard) gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi); + clear_bit(GBF_FULL, &bi->bi_flags); memcpy(bi->bi_clone + bi->bi_offset, bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); } @@ -1309,30 +1313,37 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, { struct gfs2_bitmap *bi = NULL; const u32 length = rgd->rd_length; - u32 blk = 0; + u32 blk = BFITNOENT; unsigned int buf, x; const unsigned int elen = *n; - const u8 *buffer; + const u8 *buffer = NULL; *n = 0; /* Find bitmap block that contains bits for goal block */ for (buf = 0; buf < length; buf++) { bi = rgd->rd_bits + buf; - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; + /* Convert scope of "goal" from rgrp-wide to within found bit block */ + if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { + goal -= bi->bi_start * GFS2_NBBY; + goto do_search; + } } + buf = 0; + goal = 0; - gfs2_assert(rgd->rd_sbd, buf < length); - - /* Convert scope of "goal" from rgrp-wide to within found bit block */ - goal -= bi->bi_start * GFS2_NBBY; - +do_search: /* Search (up to entire) bitmap in this rgrp for allocatable block. "x <= length", instead of "x < length", because we typically start the search in the middle of a bit block, but if we can't find an allocatable block anywhere else, we want to be able wrap around and search in the first part of our first-searched bit block. */ for (x = 0; x <= length; x++) { + bi = rgd->rd_bits + buf; + + if (test_bit(GBF_FULL, &bi->bi_flags) && + (old_state == GFS2_BLKST_FREE)) + goto skip; + /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone bitmaps, so we must search the originals for that. */ buffer = bi->bi_bh->b_data + bi->bi_offset; @@ -1343,33 +1354,39 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, if (blk != BFITNOENT) break; + if ((goal == 0) && (old_state == GFS2_BLKST_FREE)) + set_bit(GBF_FULL, &bi->bi_flags); + /* Try next bitmap block (wrap back to rgrp header if at end) */ - buf = (buf + 1) % length; - bi = rgd->rd_bits + buf; +skip: + buf++; + buf %= length; goal = 0; } - if (blk != BFITNOENT && old_state != new_state) { - *n = 1; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + if (blk == BFITNOENT) + return blk; + *n = 1; + if (old_state == new_state) + goto out; + + gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, + bi->bi_len, blk, new_state); + goal = blk; + while (*n < elen) { + goal++; + if (goal >= (bi->bi_len * GFS2_NBBY)) + break; + if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != + GFS2_BLKST_FREE) + break; gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, - bi->bi_len, blk, new_state); - goal = blk; - while (*n < elen) { - goal++; - if (goal >= (bi->bi_len * GFS2_NBBY)) - break; - if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != - GFS2_BLKST_FREE) - break; - gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, - bi->bi_offset, bi->bi_len, goal, - new_state); - (*n)++; - } + bi->bi_len, goal, new_state); + (*n)++; } - - return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk; +out: + return (bi->bi_start * GFS2_NBBY) + blk; } /** -- cgit v1.2.3 From 1ce97e564b628bee30b8dbb64e5e653a484308f6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 21 May 2009 15:18:19 +0100 Subject: GFS2: Be more aggressive in reclaiming unlinked inodes This patch increases the frequency with which gfs2 looks for unlinked, but still allocated inodes. Its the equivalent operation to ext3's orphan list, but done with bitmaps in the resource groups. This also fixes a bug where a field in the rgrp was too small. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 2 +- fs/gfs2/rgrp.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index dd87379b61e6..225347fbff3c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -94,7 +94,7 @@ struct gfs2_rgrpd { struct gfs2_sbd *rd_sbd; unsigned int rd_bh_count; u32 rd_last_alloc; - unsigned char rd_flags; + u32 rd_flags; #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 23637b9d1c73..ee3d5c1876a3 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -581,7 +581,6 @@ static int read_rindex_entry(struct gfs2_inode *ip, rgd->rd_gl->gl_object = rgd; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; - rgd->rd_flags |= GFS2_RDF_CHECK; return error; } @@ -703,6 +702,8 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) rg_flags = be32_to_cpu(str->rg_flags); rg_flags &= ~GFS2_RDF_MASK; + rgd->rd_flags &= GFS2_RDF_MASK; + rgd->rd_flags |= rg_flags; rgd->rd_free = be32_to_cpu(str->rg_free); rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); @@ -773,7 +774,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) for (x = 0; x < length; x++) clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); - rgd->rd_flags |= GFS2_RDF_UPTODATE; + rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); } spin_lock(&sdp->sd_rindex_spin); -- cgit v1.2.3 From b1e71b0622974953e46a284aa986504a90869a9b Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 22 May 2009 10:01:55 +0100 Subject: GFS2: Clean up some file names This patch renames the ops_*.c files which have no counterpart without the ops_ prefix in order to shorten the name and make it more readable. In addition, ops_address.h (which was very small) is moved into inode.h and inode.h is cleaned up by adding extern where required. Signed-off-by: Steven Whitehouse --- fs/gfs2/Makefile | 2 +- fs/gfs2/aops.c | 1145 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/bmap.c | 1 - fs/gfs2/dentry.c | 114 +++++ fs/gfs2/export.c | 285 ++++++++++++ fs/gfs2/file.c | 765 +++++++++++++++++++++++++++++++++ fs/gfs2/inode.c | 1 - fs/gfs2/inode.h | 57 +-- fs/gfs2/meta_io.c | 1 - fs/gfs2/ops_address.c | 1146 ------------------------------------------------- fs/gfs2/ops_address.h | 23 - fs/gfs2/ops_dentry.c | 114 ----- fs/gfs2/ops_export.c | 285 ------------ fs/gfs2/ops_file.c | 766 --------------------------------- fs/gfs2/quota.c | 1 - fs/gfs2/rgrp.c | 1 - 16 files changed, 2343 insertions(+), 2364 deletions(-) create mode 100644 fs/gfs2/aops.c create mode 100644 fs/gfs2/dentry.c create mode 100644 fs/gfs2/export.c create mode 100644 fs/gfs2/file.c delete mode 100644 fs/gfs2/ops_address.c delete mode 100644 fs/gfs2/ops_address.h delete mode 100644 fs/gfs2/ops_dentry.c delete mode 100644 fs/gfs2/ops_export.c delete mode 100644 fs/gfs2/ops_file.c (limited to 'fs/gfs2') diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index a851ea4bdf70..4f7332c7682f 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ glops.o inode.o log.o lops.o main.o meta_io.o \ - mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ + mount.o aops.o dentry.o export.o file.o \ ops_fstype.o ops_inode.o ops_super.o quota.o \ recovery.o rgrp.o super.o sys.o trans.o util.o diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c new file mode 100644 index 000000000000..03ebb439ace0 --- /dev/null +++ b/fs/gfs2/aops.c @@ -0,0 +1,1145 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License version 2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "incore.h" +#include "bmap.h" +#include "glock.h" +#include "inode.h" +#include "log.h" +#include "meta_io.h" +#include "quota.h" +#include "trans.h" +#include "rgrp.h" +#include "super.h" +#include "util.h" +#include "glops.h" + + +static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, + unsigned int from, unsigned int to) +{ + struct buffer_head *head = page_buffers(page); + unsigned int bsize = head->b_size; + struct buffer_head *bh; + unsigned int start, end; + + for (bh = head, start = 0; bh != head || !start; + bh = bh->b_this_page, start = end) { + end = start + bsize; + if (end <= from || start >= to) + continue; + if (gfs2_is_jdata(ip)) + set_buffer_uptodate(bh); + gfs2_trans_add_bh(ip->i_gl, bh, 0); + } +} + +/** + * gfs2_get_block_noalloc - Fills in a buffer head with details about a block + * @inode: The inode + * @lblock: The block number to look up + * @bh_result: The buffer head to return the result in + * @create: Non-zero if we may add block to the file + * + * Returns: errno + */ + +static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, + struct buffer_head *bh_result, int create) +{ + int error; + + error = gfs2_block_map(inode, lblock, bh_result, 0); + if (error) + return error; + if (!buffer_mapped(bh_result)) + return -EIO; + return 0; +} + +static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, + struct buffer_head *bh_result, int create) +{ + return gfs2_block_map(inode, lblock, bh_result, 0); +} + +/** + * gfs2_writepage_common - Common bits of writepage + * @page: The page to be written + * @wbc: The writeback control + * + * Returns: 1 if writepage is ok, otherwise an error code or zero if no error. + */ + +static int gfs2_writepage_common(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + loff_t i_size = i_size_read(inode); + pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset; + + if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) + goto out; + if (current->journal_info) + goto redirty; + /* Is the page fully outside i_size? (truncate in progress) */ + offset = i_size & (PAGE_CACHE_SIZE-1); + if (page->index > end_index || (page->index == end_index && !offset)) { + page->mapping->a_ops->invalidatepage(page, 0); + goto out; + } + return 1; +redirty: + redirty_page_for_writepage(wbc, page); +out: + unlock_page(page); + return 0; +} + +/** + * gfs2_writeback_writepage - Write page for writeback mappings + * @page: The page + * @wbc: The writeback control + * + */ + +static int gfs2_writeback_writepage(struct page *page, + struct writeback_control *wbc) +{ + int ret; + + ret = gfs2_writepage_common(page, wbc); + if (ret <= 0) + return ret; + + ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc); + if (ret == -EAGAIN) + ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc); + return ret; +} + +/** + * gfs2_ordered_writepage - Write page for ordered data files + * @page: The page to write + * @wbc: The writeback control + * + */ + +static int gfs2_ordered_writepage(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + int ret; + + ret = gfs2_writepage_common(page, wbc); + if (ret <= 0) + return ret; + + if (!page_has_buffers(page)) { + create_empty_buffers(page, inode->i_sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1); + return block_write_full_page(page, gfs2_get_block_noalloc, wbc); +} + +/** + * __gfs2_jdata_writepage - The core of jdata writepage + * @page: The page to write + * @wbc: The writeback control + * + * This is shared between writepage and writepages and implements the + * core of the writepage operation. If a transaction is required then + * PageChecked will have been set and the transaction will have + * already been started before this is called. + */ + +static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + + if (PageChecked(page)) { + ClearPageChecked(page); + if (!page_has_buffers(page)) { + create_empty_buffers(page, inode->i_sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); + } + return block_write_full_page(page, gfs2_get_block_noalloc, wbc); +} + +/** + * gfs2_jdata_writepage - Write complete page + * @page: Page to write + * + * Returns: errno + * + */ + +static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct gfs2_sbd *sdp = GFS2_SB(inode); + int ret; + int done_trans = 0; + + if (PageChecked(page)) { + if (wbc->sync_mode != WB_SYNC_ALL) + goto out_ignore; + ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); + if (ret) + goto out_ignore; + done_trans = 1; + } + ret = gfs2_writepage_common(page, wbc); + if (ret > 0) + ret = __gfs2_jdata_writepage(page, wbc); + if (done_trans) + gfs2_trans_end(sdp); + return ret; + +out_ignore: + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; +} + +/** + * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk + * @mapping: The mapping to write + * @wbc: Write-back control + * + * For the data=writeback case we can already ignore buffer heads + * and write whole extents at once. This is a big reduction in the + * number of I/O requests we send and the bmap calls we make in this case. + */ +static int gfs2_writeback_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); +} + +/** + * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages + * @mapping: The mapping + * @wbc: The writeback control + * @writepage: The writepage function to call for each page + * @pvec: The vector of pages + * @nr_pages: The number of pages to write + * + * Returns: non-zero if loop should terminate, zero otherwise + */ + +static int gfs2_write_jdata_pagevec(struct address_space *mapping, + struct writeback_control *wbc, + struct pagevec *pvec, + int nr_pages, pgoff_t end) +{ + struct inode *inode = mapping->host; + struct gfs2_sbd *sdp = GFS2_SB(inode); + loff_t i_size = i_size_read(inode); + pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset = i_size & (PAGE_CACHE_SIZE-1); + unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); + struct backing_dev_info *bdi = mapping->backing_dev_info; + int i; + int ret; + + ret = gfs2_trans_begin(sdp, nrblocks, nrblocks); + if (ret < 0) + return ret; + + for(i = 0; i < nr_pages; i++) { + struct page *page = pvec->pages[i]; + + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + ret = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + /* Is the page fully outside i_size? (truncate in progress) */ + if (page->index > end_index || (page->index == end_index && !offset)) { + page->mapping->a_ops->invalidatepage(page, 0); + unlock_page(page); + continue; + } + + ret = __gfs2_jdata_writepage(page, wbc); + + if (ret || (--(wbc->nr_to_write) <= 0)) + ret = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + ret = 1; + } + + } + gfs2_trans_end(sdp); + return ret; +} + +/** + * gfs2_write_cache_jdata - Like write_cache_pages but different + * @mapping: The mapping to write + * @wbc: The writeback control + * @writepage: The writepage function to call + * @data: The data to pass to writepage + * + * The reason that we use our own function here is that we need to + * start transactions before we grab page locks. This allows us + * to get the ordering right. + */ + +static int gfs2_write_cache_jdata(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } + +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + scanned = 1; + ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); + if (ret) + done = 1; + if (ret > 0) + ret = 0; + + pagevec_release(&pvec); + cond_resched(); + } + + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} + + +/** + * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk + * @mapping: The mapping to write + * @wbc: The writeback control + * + */ + +static int gfs2_jdata_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct gfs2_inode *ip = GFS2_I(mapping->host); + struct gfs2_sbd *sdp = GFS2_SB(mapping->host); + int ret; + + ret = gfs2_write_cache_jdata(mapping, wbc); + if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { + gfs2_log_flush(sdp, ip->i_gl); + ret = gfs2_write_cache_jdata(mapping, wbc); + } + return ret; +} + +/** + * stuffed_readpage - Fill in a Linux page with stuffed file data + * @ip: the inode + * @page: the page + * + * Returns: errno + */ + +static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) +{ + struct buffer_head *dibh; + void *kaddr; + int error; + + /* + * Due to the order of unstuffing files and ->fault(), we can be + * asked for a zero page in the case of a stuffed file being extended, + * so we need to supply one here. It doesn't happen often. + */ + if (unlikely(page->index)) { + zero_user(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + return 0; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + kaddr = kmap_atomic(page, KM_USER0); + memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), + ip->i_disksize); + memset(kaddr + ip->i_disksize, 0, PAGE_CACHE_SIZE - ip->i_disksize); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page); + brelse(dibh); + SetPageUptodate(page); + + return 0; +} + + +/** + * __gfs2_readpage - readpage + * @file: The file to read a page for + * @page: The page to read + * + * This is the core of gfs2's readpage. Its used by the internal file + * reading code as in that case we already hold the glock. Also its + * called by gfs2_readpage() once the required lock has been granted. + * + */ + +static int __gfs2_readpage(void *file, struct page *page) +{ + struct gfs2_inode *ip = GFS2_I(page->mapping->host); + struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); + int error; + + if (gfs2_is_stuffed(ip)) { + error = stuffed_readpage(ip, page); + unlock_page(page); + } else { + error = mpage_readpage(page, gfs2_block_map); + } + + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return -EIO; + + return error; +} + +/** + * gfs2_readpage - read a page of a file + * @file: The file to read + * @page: The page of the file + * + * This deals with the locking required. We have to unlock and + * relock the page in order to get the locking in the right + * order. + */ + +static int gfs2_readpage(struct file *file, struct page *page) +{ + struct address_space *mapping = page->mapping; + struct gfs2_inode *ip = GFS2_I(mapping->host); + struct gfs2_holder gh; + int error; + + unlock_page(page); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + error = gfs2_glock_nq(&gh); + if (unlikely(error)) + goto out; + error = AOP_TRUNCATED_PAGE; + lock_page(page); + if (page->mapping == mapping && !PageUptodate(page)) + error = __gfs2_readpage(file, page); + else + unlock_page(page); + gfs2_glock_dq(&gh); +out: + gfs2_holder_uninit(&gh); + if (error && error != AOP_TRUNCATED_PAGE) + lock_page(page); + return error; +} + +/** + * gfs2_internal_read - read an internal file + * @ip: The gfs2 inode + * @ra_state: The readahead state (or NULL for no readahead) + * @buf: The buffer to fill + * @pos: The file position + * @size: The amount to read + * + */ + +int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, + char *buf, loff_t *pos, unsigned size) +{ + struct address_space *mapping = ip->i_inode.i_mapping; + unsigned long index = *pos / PAGE_CACHE_SIZE; + unsigned offset = *pos & (PAGE_CACHE_SIZE - 1); + unsigned copied = 0; + unsigned amt; + struct page *page; + void *p; + + do { + amt = size - copied; + if (offset + size > PAGE_CACHE_SIZE) + amt = PAGE_CACHE_SIZE - offset; + page = read_cache_page(mapping, index, __gfs2_readpage, NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + p = kmap_atomic(page, KM_USER0); + memcpy(buf + copied, p + offset, amt); + kunmap_atomic(p, KM_USER0); + mark_page_accessed(page); + page_cache_release(page); + copied += amt; + index++; + offset = 0; + } while(copied < size); + (*pos) += size; + return size; +} + +/** + * gfs2_readpages - Read a bunch of pages at once + * + * Some notes: + * 1. This is only for readahead, so we can simply ignore any things + * which are slightly inconvenient (such as locking conflicts between + * the page lock and the glock) and return having done no I/O. Its + * obviously not something we'd want to do on too regular a basis. + * Any I/O we ignore at this time will be done via readpage later. + * 2. We don't handle stuffed files here we let readpage do the honours. + * 3. mpage_readpages() does most of the heavy lifting in the common case. + * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places. + */ + +static int gfs2_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + struct inode *inode = mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_holder gh; + int ret; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (unlikely(ret)) + goto out_uninit; + if (!gfs2_is_stuffed(ip)) + ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map); + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + ret = -EIO; + return ret; +} + +/** + * gfs2_write_begin - Begin to write to a file + * @file: The file to write to + * @mapping: The mapping in which to write + * @pos: The file offset at which to start writing + * @len: Length of the write + * @flags: Various flags + * @pagep: Pointer to return the page + * @fsdata: Pointer to return fs data (unused by GFS2) + * + * Returns: errno + */ + +static int gfs2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct gfs2_inode *ip = GFS2_I(mapping->host); + struct gfs2_sbd *sdp = GFS2_SB(mapping->host); + unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + int alloc_required; + int error = 0; + struct gfs2_alloc *al; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + unsigned to = from + len; + struct page *page; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); + error = gfs2_glock_nq(&ip->i_gh); + if (unlikely(error)) + goto out_uninit; + + error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); + if (error) + goto out_unlock; + + if (alloc_required || gfs2_is_jdata(ip)) + gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); + + if (alloc_required) { + al = gfs2_alloc_get(ip); + if (!al) { + error = -ENOMEM; + goto out_unlock; + } + + error = gfs2_quota_lock_check(ip); + if (error) + goto out_alloc_put; + + al->al_requested = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip); + if (error) + goto out_qunlock; + } + + rblocks = RES_DINODE + ind_blocks; + if (gfs2_is_jdata(ip)) + rblocks += data_blocks ? data_blocks : 1; + if (ind_blocks || data_blocks) + rblocks += RES_STATFS + RES_QUOTA; + + error = gfs2_trans_begin(sdp, rblocks, + PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); + if (error) + goto out_trans_fail; + + error = -ENOMEM; + flags |= AOP_FLAG_NOFS; + page = grab_cache_page_write_begin(mapping, index, flags); + *pagep = page; + if (unlikely(!page)) + goto out_endtrans; + + if (gfs2_is_stuffed(ip)) { + error = 0; + if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { + error = gfs2_unstuff_dinode(ip, page); + if (error == 0) + goto prepare_write; + } else if (!PageUptodate(page)) { + error = stuffed_readpage(ip, page); + } + goto out; + } + +prepare_write: + error = block_prepare_write(page, from, to, gfs2_block_map); +out: + if (error == 0) + return 0; + + page_cache_release(page); + if (pos + len > ip->i_inode.i_size) + vmtruncate(&ip->i_inode, ip->i_inode.i_size); +out_endtrans: + gfs2_trans_end(sdp); +out_trans_fail: + if (alloc_required) { + gfs2_inplace_release(ip); +out_qunlock: + gfs2_quota_unlock(ip); +out_alloc_put: + gfs2_alloc_put(ip); + } +out_unlock: + gfs2_glock_dq(&ip->i_gh); +out_uninit: + gfs2_holder_uninit(&ip->i_gh); + return error; +} + +/** + * adjust_fs_space - Adjusts the free space available due to gfs2_grow + * @inode: the rindex inode + */ +static void adjust_fs_space(struct inode *inode) +{ + struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; + u64 fs_total, new_free; + + /* Total up the file system space, according to the latest rindex. */ + fs_total = gfs2_ri_total(sdp); + + spin_lock(&sdp->sd_statfs_spin); + if (fs_total > (m_sc->sc_total + l_sc->sc_total)) + new_free = fs_total - (m_sc->sc_total + l_sc->sc_total); + else + new_free = 0; + spin_unlock(&sdp->sd_statfs_spin); + fs_warn(sdp, "File system extended by %llu blocks.\n", + (unsigned long long)new_free); + gfs2_statfs_change(sdp, new_free, new_free, 0); +} + +/** + * gfs2_stuffed_write_end - Write end for stuffed files + * @inode: The inode + * @dibh: The buffer_head containing the on-disk inode + * @pos: The file position + * @len: The length of the write + * @copied: How much was actually copied by the VFS + * @page: The page + * + * This copies the data from the page into the inode block after + * the inode data structure itself. + * + * Returns: errno + */ +static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, + loff_t pos, unsigned len, unsigned copied, + struct page *page) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + u64 to = pos + copied; + void *kaddr; + unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); + struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; + + BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); + kaddr = kmap_atomic(page, KM_USER0); + memcpy(buf + pos, kaddr + pos, copied); + memset(kaddr + pos + copied, 0, len - copied); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + + if (!PageUptodate(page)) + SetPageUptodate(page); + unlock_page(page); + page_cache_release(page); + + if (copied) { + if (inode->i_size < to) { + i_size_write(inode, to); + ip->i_disksize = inode->i_size; + } + gfs2_dinode_out(ip, di); + mark_inode_dirty(inode); + } + + if (inode == sdp->sd_rindex) + adjust_fs_space(inode); + + brelse(dibh); + gfs2_trans_end(sdp); + gfs2_glock_dq(&ip->i_gh); + gfs2_holder_uninit(&ip->i_gh); + return copied; +} + +/** + * gfs2_write_end + * @file: The file to write to + * @mapping: The address space to write to + * @pos: The file position + * @len: The length of the data + * @copied: + * @page: The page that has been written + * @fsdata: The fsdata (unused in GFS2) + * + * The main write_end function for GFS2. We have a separate one for + * stuffed files as they are slightly different, otherwise we just + * put our locking around the VFS provided functions. + * + * Returns: errno + */ + +static int gfs2_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct buffer_head *dibh; + struct gfs2_alloc *al = ip->i_alloc; + unsigned int from = pos & (PAGE_CACHE_SIZE - 1); + unsigned int to = from + len; + int ret; + + BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); + + ret = gfs2_meta_inode_buffer(ip, &dibh); + if (unlikely(ret)) { + unlock_page(page); + page_cache_release(page); + goto failed; + } + + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + + if (gfs2_is_stuffed(ip)) + return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); + + if (!gfs2_is_writeback(ip)) + gfs2_page_add_databufs(ip, page, from, to); + + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (ret > 0) { + if (inode->i_size > ip->i_disksize) + ip->i_disksize = inode->i_size; + gfs2_dinode_out(ip, dibh->b_data); + mark_inode_dirty(inode); + } + + if (inode == sdp->sd_rindex) + adjust_fs_space(inode); + + brelse(dibh); + gfs2_trans_end(sdp); +failed: + if (al) { + gfs2_inplace_release(ip); + gfs2_quota_unlock(ip); + gfs2_alloc_put(ip); + } + gfs2_glock_dq(&ip->i_gh); + gfs2_holder_uninit(&ip->i_gh); + return ret; +} + +/** + * gfs2_set_page_dirty - Page dirtying function + * @page: The page to dirty + * + * Returns: 1 if it dirtyed the page, or 0 otherwise + */ + +static int gfs2_set_page_dirty(struct page *page) +{ + SetPageChecked(page); + return __set_page_dirty_buffers(page); +} + +/** + * gfs2_bmap - Block map function + * @mapping: Address space info + * @lblock: The block to map + * + * Returns: The disk address for the block or 0 on hole or error + */ + +static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) +{ + struct gfs2_inode *ip = GFS2_I(mapping->host); + struct gfs2_holder i_gh; + sector_t dblock = 0; + int error; + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return 0; + + if (!gfs2_is_stuffed(ip)) + dblock = generic_block_bmap(mapping, lblock, gfs2_block_map); + + gfs2_glock_dq_uninit(&i_gh); + + return dblock; +} + +static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) +{ + struct gfs2_bufdata *bd; + + lock_buffer(bh); + gfs2_log_lock(sdp); + clear_buffer_dirty(bh); + bd = bh->b_private; + if (bd) { + if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh)) + list_del_init(&bd->bd_le.le_list); + else + gfs2_remove_from_journal(bh, current->journal_info, 0); + } + bh->b_bdev = NULL; + clear_buffer_mapped(bh); + clear_buffer_req(bh); + clear_buffer_new(bh); + gfs2_log_unlock(sdp); + unlock_buffer(bh); +} + +static void gfs2_invalidatepage(struct page *page, unsigned long offset) +{ + struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); + struct buffer_head *bh, *head; + unsigned long pos = 0; + + BUG_ON(!PageLocked(page)); + if (offset == 0) + ClearPageChecked(page); + if (!page_has_buffers(page)) + goto out; + + bh = head = page_buffers(page); + do { + if (offset <= pos) + gfs2_discard(sdp, bh); + pos += bh->b_size; + bh = bh->b_this_page; + } while (bh != head); +out: + if (offset == 0) + try_to_release_page(page, 0); +} + +/** + * gfs2_ok_for_dio - check that dio is valid on this file + * @ip: The inode + * @rw: READ or WRITE + * @offset: The offset at which we are reading or writing + * + * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) + * 1 (to accept the i/o request) + */ +static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) +{ + /* + * Should we return an error here? I can't see that O_DIRECT for + * a stuffed file makes any sense. For now we'll silently fall + * back to buffered I/O + */ + if (gfs2_is_stuffed(ip)) + return 0; + + if (offset >= i_size_read(&ip->i_inode)) + return 0; + return 1; +} + + + +static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + int rv; + + /* + * Deferred lock, even if its a write, since we do no allocation + * on this path. All we need change is atime, and this lock mode + * ensures that other nodes have flushed their buffered read caches + * (i.e. their page cache entries for this inode). We do not, + * unfortunately have the option of only flushing a range like + * the VFS does. + */ + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); + rv = gfs2_glock_nq(&gh); + if (rv) + return rv; + rv = gfs2_ok_for_dio(ip, rw, offset); + if (rv != 1) + goto out; /* dio not valid, fall back to buffered i/o */ + + rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, + iov, offset, nr_segs, + gfs2_get_block_direct, NULL); +out: + gfs2_glock_dq_m(1, &gh); + gfs2_holder_uninit(&gh); + return rv; +} + +/** + * gfs2_releasepage - free the metadata associated with a page + * @page: the page that's being released + * @gfp_mask: passed from Linux VFS, ignored by us + * + * Call try_to_free_buffers() if the buffers in this page can be + * released. + * + * Returns: 0 + */ + +int gfs2_releasepage(struct page *page, gfp_t gfp_mask) +{ + struct inode *aspace = page->mapping->host; + struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info; + struct buffer_head *bh, *head; + struct gfs2_bufdata *bd; + + if (!page_has_buffers(page)) + return 0; + + gfs2_log_lock(sdp); + head = bh = page_buffers(page); + do { + if (atomic_read(&bh->b_count)) + goto cannot_release; + bd = bh->b_private; + if (bd && bd->bd_ail) + goto cannot_release; + gfs2_assert_warn(sdp, !buffer_pinned(bh)); + gfs2_assert_warn(sdp, !buffer_dirty(bh)); + bh = bh->b_this_page; + } while(bh != head); + gfs2_log_unlock(sdp); + + head = bh = page_buffers(page); + do { + gfs2_log_lock(sdp); + bd = bh->b_private; + if (bd) { + gfs2_assert_warn(sdp, bd->bd_bh == bh); + gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr)); + if (!list_empty(&bd->bd_le.le_list)) { + if (!buffer_pinned(bh)) + list_del_init(&bd->bd_le.le_list); + else + bd = NULL; + } + if (bd) + bd->bd_bh = NULL; + bh->b_private = NULL; + } + gfs2_log_unlock(sdp); + if (bd) + kmem_cache_free(gfs2_bufdata_cachep, bd); + + bh = bh->b_this_page; + } while (bh != head); + + return try_to_free_buffers(page); +cannot_release: + gfs2_log_unlock(sdp); + return 0; +} + +static const struct address_space_operations gfs2_writeback_aops = { + .writepage = gfs2_writeback_writepage, + .writepages = gfs2_writeback_writepages, + .readpage = gfs2_readpage, + .readpages = gfs2_readpages, + .sync_page = block_sync_page, + .write_begin = gfs2_write_begin, + .write_end = gfs2_write_end, + .bmap = gfs2_bmap, + .invalidatepage = gfs2_invalidatepage, + .releasepage = gfs2_releasepage, + .direct_IO = gfs2_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, +}; + +static const struct address_space_operations gfs2_ordered_aops = { + .writepage = gfs2_ordered_writepage, + .readpage = gfs2_readpage, + .readpages = gfs2_readpages, + .sync_page = block_sync_page, + .write_begin = gfs2_write_begin, + .write_end = gfs2_write_end, + .set_page_dirty = gfs2_set_page_dirty, + .bmap = gfs2_bmap, + .invalidatepage = gfs2_invalidatepage, + .releasepage = gfs2_releasepage, + .direct_IO = gfs2_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, +}; + +static const struct address_space_operations gfs2_jdata_aops = { + .writepage = gfs2_jdata_writepage, + .writepages = gfs2_jdata_writepages, + .readpage = gfs2_readpage, + .readpages = gfs2_readpages, + .sync_page = block_sync_page, + .write_begin = gfs2_write_begin, + .write_end = gfs2_write_end, + .set_page_dirty = gfs2_set_page_dirty, + .bmap = gfs2_bmap, + .invalidatepage = gfs2_invalidatepage, + .releasepage = gfs2_releasepage, + .is_partially_uptodate = block_is_partially_uptodate, +}; + +void gfs2_set_aops(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + + if (gfs2_is_writeback(ip)) + inode->i_mapping->a_ops = &gfs2_writeback_aops; + else if (gfs2_is_ordered(ip)) + inode->i_mapping->a_ops = &gfs2_ordered_aops; + else if (gfs2_is_jdata(ip)) + inode->i_mapping->a_ops = &gfs2_jdata_aops; + else + BUG(); +} + diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 253e1a39f841..1153a078920c 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -25,7 +25,6 @@ #include "trans.h" #include "dir.h" #include "util.h" -#include "ops_address.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k * block is 512, so __u16 is fine for that. It saves stack space to diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c new file mode 100644 index 000000000000..022c66cd5606 --- /dev/null +++ b/fs/gfs2/dentry.c @@ -0,0 +1,114 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License version 2. + */ + +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "incore.h" +#include "dir.h" +#include "glock.h" +#include "super.h" +#include "util.h" +#include "inode.h" + +/** + * gfs2_drevalidate - Check directory lookup consistency + * @dentry: the mapping to check + * @nd: + * + * Check to make sure the lookup necessary to arrive at this inode from its + * parent is still good. + * + * Returns: 1 if the dentry is ok, 0 if it isn't + */ + +static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *parent = dget_parent(dentry); + struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode); + struct gfs2_inode *dip = GFS2_I(parent->d_inode); + struct inode *inode = dentry->d_inode; + struct gfs2_holder d_gh; + struct gfs2_inode *ip = NULL; + int error; + int had_lock = 0; + + if (inode) { + if (is_bad_inode(inode)) + goto invalid; + ip = GFS2_I(inode); + } + + if (sdp->sd_args.ar_localcaching) + goto valid; + + had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); + if (!had_lock) { + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) + goto fail; + } + + error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip); + switch (error) { + case 0: + if (!inode) + goto invalid_gunlock; + break; + case -ENOENT: + if (!inode) + goto valid_gunlock; + goto invalid_gunlock; + default: + goto fail_gunlock; + } + +valid_gunlock: + if (!had_lock) + gfs2_glock_dq_uninit(&d_gh); +valid: + dput(parent); + return 1; + +invalid_gunlock: + if (!had_lock) + gfs2_glock_dq_uninit(&d_gh); +invalid: + if (inode && S_ISDIR(inode->i_mode)) { + if (have_submounts(dentry)) + goto valid; + shrink_dcache_parent(dentry); + } + d_drop(dentry); + dput(parent); + return 0; + +fail_gunlock: + gfs2_glock_dq_uninit(&d_gh); +fail: + dput(parent); + return 0; +} + +static int gfs2_dhash(struct dentry *dentry, struct qstr *str) +{ + str->hash = gfs2_disk_hash(str->name, str->len); + return 0; +} + +const struct dentry_operations gfs2_dops = { + .d_revalidate = gfs2_drevalidate, + .d_hash = gfs2_dhash, +}; + diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c new file mode 100644 index 000000000000..9200ef221716 --- /dev/null +++ b/fs/gfs2/export.c @@ -0,0 +1,285 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License version 2. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "incore.h" +#include "dir.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "super.h" +#include "rgrp.h" +#include "util.h" + +#define GFS2_SMALL_FH_SIZE 4 +#define GFS2_LARGE_FH_SIZE 8 +#define GFS2_OLD_FH_SIZE 10 + +static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, + int connectable) +{ + __be32 *fh = (__force __be32 *)p; + struct inode *inode = dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct gfs2_inode *ip = GFS2_I(inode); + + if (*len < GFS2_SMALL_FH_SIZE || + (connectable && *len < GFS2_LARGE_FH_SIZE)) + return 255; + + fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); + fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); + fh[2] = cpu_to_be32(ip->i_no_addr >> 32); + fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); + *len = GFS2_SMALL_FH_SIZE; + + if (!connectable || inode == sb->s_root->d_inode) + return *len; + + spin_lock(&dentry->d_lock); + inode = dentry->d_parent->d_inode; + ip = GFS2_I(inode); + igrab(inode); + spin_unlock(&dentry->d_lock); + + fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32); + fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); + fh[6] = cpu_to_be32(ip->i_no_addr >> 32); + fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); + *len = GFS2_LARGE_FH_SIZE; + + iput(inode); + + return *len; +} + +struct get_name_filldir { + struct gfs2_inum_host inum; + char *name; +}; + +static int get_name_filldir(void *opaque, const char *name, int length, + loff_t offset, u64 inum, unsigned int type) +{ + struct get_name_filldir *gnfd = opaque; + + if (inum != gnfd->inum.no_addr) + return 0; + + memcpy(gnfd->name, name, length); + gnfd->name[length] = 0; + + return 1; +} + +static int gfs2_get_name(struct dentry *parent, char *name, + struct dentry *child) +{ + struct inode *dir = parent->d_inode; + struct inode *inode = child->d_inode; + struct gfs2_inode *dip, *ip; + struct get_name_filldir gnfd; + struct gfs2_holder gh; + u64 offset = 0; + int error; + + if (!dir) + return -EINVAL; + + if (!S_ISDIR(dir->i_mode) || !inode) + return -EINVAL; + + dip = GFS2_I(dir); + ip = GFS2_I(inode); + + *name = 0; + gnfd.inum.no_addr = ip->i_no_addr; + gnfd.inum.no_formal_ino = ip->i_no_formal_ino; + gnfd.name = name; + + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); + if (error) + return error; + + error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir); + + gfs2_glock_dq_uninit(&gh); + + if (!error && !*name) + error = -ENOENT; + + return error; +} + +static struct dentry *gfs2_get_parent(struct dentry *child) +{ + struct qstr dotdot; + struct dentry *dentry; + + /* + * XXX(hch): it would be a good idea to keep this around as a + * static variable. + */ + gfs2_str2qstr(&dotdot, ".."); + + dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1)); + if (!IS_ERR(dentry)) + dentry->d_op = &gfs2_dops; + return dentry; +} + +static struct dentry *gfs2_get_dentry(struct super_block *sb, + struct gfs2_inum_host *inum) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + struct gfs2_holder i_gh, ri_gh, rgd_gh; + struct gfs2_rgrpd *rgd; + struct inode *inode; + struct dentry *dentry; + int error; + + /* System files? */ + + inode = gfs2_ilookup(sb, inum->no_addr); + if (inode) { + if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { + iput(inode); + return ERR_PTR(-ESTALE); + } + goto out_inode; + } + + error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, + LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return ERR_PTR(error); + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + goto fail; + + error = -EINVAL; + rgd = gfs2_blk2rgrpd(sdp, inum->no_addr); + if (!rgd) + goto fail_rindex; + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); + if (error) + goto fail_rindex; + + error = -ESTALE; + if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE) + goto fail_rgd; + + gfs2_glock_dq_uninit(&rgd_gh); + gfs2_glock_dq_uninit(&ri_gh); + + inode = gfs2_inode_lookup(sb, DT_UNKNOWN, + inum->no_addr, + 0, 0); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + goto fail; + } + + error = gfs2_inode_refresh(GFS2_I(inode)); + if (error) { + iput(inode); + goto fail; + } + + /* Pick up the works we bypass in gfs2_inode_lookup */ + if (inode->i_state & I_NEW) + gfs2_set_iop(inode); + + if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { + iput(inode); + goto fail; + } + + error = -EIO; + if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) { + iput(inode); + goto fail; + } + + gfs2_glock_dq_uninit(&i_gh); + +out_inode: + dentry = d_obtain_alias(inode); + if (!IS_ERR(dentry)) + dentry->d_op = &gfs2_dops; + return dentry; + +fail_rgd: + gfs2_glock_dq_uninit(&rgd_gh); + +fail_rindex: + gfs2_glock_dq_uninit(&ri_gh); + +fail: + gfs2_glock_dq_uninit(&i_gh); + return ERR_PTR(error); +} + +static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + struct gfs2_inum_host this; + __be32 *fh = (__force __be32 *)fid->raw; + + switch (fh_type) { + case GFS2_SMALL_FH_SIZE: + case GFS2_LARGE_FH_SIZE: + case GFS2_OLD_FH_SIZE: + this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; + this.no_formal_ino |= be32_to_cpu(fh[1]); + this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; + this.no_addr |= be32_to_cpu(fh[3]); + return gfs2_get_dentry(sb, &this); + default: + return NULL; + } +} + +static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + struct gfs2_inum_host parent; + __be32 *fh = (__force __be32 *)fid->raw; + + switch (fh_type) { + case GFS2_LARGE_FH_SIZE: + case GFS2_OLD_FH_SIZE: + parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; + parent.no_formal_ino |= be32_to_cpu(fh[5]); + parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; + parent.no_addr |= be32_to_cpu(fh[7]); + return gfs2_get_dentry(sb, &parent); + default: + return NULL; + } +} + +const struct export_operations gfs2_export_ops = { + .encode_fh = gfs2_encode_fh, + .fh_to_dentry = gfs2_fh_to_dentry, + .fh_to_parent = gfs2_fh_to_parent, + .get_name = gfs2_get_name, + .get_parent = gfs2_get_parent, +}; + diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c new file mode 100644 index 000000000000..73b6f552f06d --- /dev/null +++ b/fs/gfs2/file.c @@ -0,0 +1,765 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License version 2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "incore.h" +#include "bmap.h" +#include "dir.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "meta_io.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" +#include "util.h" +#include "eaops.h" + +/** + * gfs2_llseek - seek to a location in a file + * @file: the file + * @offset: the offset + * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) + * + * SEEK_END requires the glock for the file because it references the + * file's size. + * + * Returns: The new offset, or errno + */ + +static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) +{ + struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); + struct gfs2_holder i_gh; + loff_t error; + + if (origin == 2) { + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (!error) { + error = generic_file_llseek_unlocked(file, offset, origin); + gfs2_glock_dq_uninit(&i_gh); + } + } else + error = generic_file_llseek_unlocked(file, offset, origin); + + return error; +} + +/** + * gfs2_readdir - Read directory entries from a directory + * @file: The directory to read from + * @dirent: Buffer for dirents + * @filldir: Function used to do the copying + * + * Returns: errno + */ + +static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + struct inode *dir = file->f_mapping->host; + struct gfs2_inode *dip = GFS2_I(dir); + struct gfs2_holder d_gh; + u64 offset = file->f_pos; + int error; + + gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + error = gfs2_glock_nq(&d_gh); + if (error) { + gfs2_holder_uninit(&d_gh); + return error; + } + + error = gfs2_dir_read(dir, &offset, dirent, filldir); + + gfs2_glock_dq_uninit(&d_gh); + + file->f_pos = offset; + + return error; +} + +/** + * fsflags_cvt + * @table: A table of 32 u32 flags + * @val: a 32 bit value to convert + * + * This function can be used to convert between fsflags values and + * GFS2's own flags values. + * + * Returns: the converted flags + */ +static u32 fsflags_cvt(const u32 *table, u32 val) +{ + u32 res = 0; + while(val) { + if (val & 1) + res |= *table; + table++; + val >>= 1; + } + return res; +} + +static const u32 fsflags_to_gfs2[32] = { + [3] = GFS2_DIF_SYNC, + [4] = GFS2_DIF_IMMUTABLE, + [5] = GFS2_DIF_APPENDONLY, + [7] = GFS2_DIF_NOATIME, + [12] = GFS2_DIF_EXHASH, + [14] = GFS2_DIF_INHERIT_JDATA, +}; + +static const u32 gfs2_to_fsflags[32] = { + [gfs2fl_Sync] = FS_SYNC_FL, + [gfs2fl_Immutable] = FS_IMMUTABLE_FL, + [gfs2fl_AppendOnly] = FS_APPEND_FL, + [gfs2fl_NoAtime] = FS_NOATIME_FL, + [gfs2fl_ExHash] = FS_INDEX_FL, + [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, +}; + +static int gfs2_get_flags(struct file *filp, u32 __user *ptr) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + int error; + u32 fsflags; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); + error = gfs2_glock_nq(&gh); + if (error) + return error; + + fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_diskflags); + if (!S_ISDIR(inode->i_mode) && ip->i_diskflags & GFS2_DIF_JDATA) + fsflags |= FS_JOURNAL_DATA_FL; + if (put_user(fsflags, ptr)) + error = -EFAULT; + + gfs2_glock_dq(&gh); + gfs2_holder_uninit(&gh); + return error; +} + +void gfs2_set_inode_flags(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + unsigned int flags = inode->i_flags; + + flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + if (ip->i_diskflags & GFS2_DIF_IMMUTABLE) + flags |= S_IMMUTABLE; + if (ip->i_diskflags & GFS2_DIF_APPENDONLY) + flags |= S_APPEND; + if (ip->i_diskflags & GFS2_DIF_NOATIME) + flags |= S_NOATIME; + if (ip->i_diskflags & GFS2_DIF_SYNC) + flags |= S_SYNC; + inode->i_flags = flags; +} + +/* Flags that can be set by user space */ +#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ + GFS2_DIF_IMMUTABLE| \ + GFS2_DIF_APPENDONLY| \ + GFS2_DIF_NOATIME| \ + GFS2_DIF_SYNC| \ + GFS2_DIF_SYSTEM| \ + GFS2_DIF_INHERIT_JDATA) + +/** + * gfs2_set_flags - set flags on an inode + * @inode: The inode + * @flags: The flags to set + * @mask: Indicates which flags are valid + * + */ +static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct buffer_head *bh; + struct gfs2_holder gh; + int error; + u32 new_flags, flags; + + error = mnt_want_write(filp->f_path.mnt); + if (error) + return error; + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (error) + goto out_drop_write; + + flags = ip->i_diskflags; + new_flags = (flags & ~mask) | (reqflags & mask); + if ((new_flags ^ flags) == 0) + goto out; + + error = -EINVAL; + if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET) + goto out; + + error = -EPERM; + if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE)) + goto out; + if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY)) + goto out; + if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) && + !capable(CAP_LINUX_IMMUTABLE)) + goto out; + if (!IS_IMMUTABLE(inode)) { + error = gfs2_permission(inode, MAY_WRITE); + if (error) + goto out; + } + if ((flags ^ new_flags) & GFS2_DIF_JDATA) { + if (flags & GFS2_DIF_JDATA) + gfs2_log_flush(sdp, ip->i_gl); + error = filemap_fdatawrite(inode->i_mapping); + if (error) + goto out; + error = filemap_fdatawait(inode->i_mapping); + if (error) + goto out; + } + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error) + goto out; + error = gfs2_meta_inode_buffer(ip, &bh); + if (error) + goto out_trans_end; + gfs2_trans_add_bh(ip->i_gl, bh, 1); + ip->i_diskflags = new_flags; + gfs2_dinode_out(ip, bh->b_data); + brelse(bh); + gfs2_set_inode_flags(inode); + gfs2_set_aops(inode); +out_trans_end: + gfs2_trans_end(sdp); +out: + gfs2_glock_dq_uninit(&gh); +out_drop_write: + mnt_drop_write(filp->f_path.mnt); + return error; +} + +static int gfs2_set_flags(struct file *filp, u32 __user *ptr) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + u32 fsflags, gfsflags; + if (get_user(fsflags, ptr)) + return -EFAULT; + gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags); + if (!S_ISDIR(inode->i_mode)) { + if (gfsflags & GFS2_DIF_INHERIT_JDATA) + gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); + return do_gfs2_set_flags(filp, gfsflags, ~0); + } + return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA); +} + +static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + switch(cmd) { + case FS_IOC_GETFLAGS: + return gfs2_get_flags(filp, (u32 __user *)arg); + case FS_IOC_SETFLAGS: + return gfs2_set_flags(filp, (u32 __user *)arg); + } + return -ENOTTY; +} + +/** + * gfs2_allocate_page_backing - Use bmap to allocate blocks + * @page: The (locked) page to allocate backing for + * + * We try to allocate all the blocks required for the page in + * one go. This might fail for various reasons, so we keep + * trying until all the blocks to back this page are allocated. + * If some of the blocks are already allocated, thats ok too. + */ + +static int gfs2_allocate_page_backing(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct buffer_head bh; + unsigned long size = PAGE_CACHE_SIZE; + u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + + do { + bh.b_state = 0; + bh.b_size = size; + gfs2_block_map(inode, lblock, &bh, 1); + if (!buffer_mapped(&bh)) + return -EIO; + size -= bh.b_size; + lblock += (bh.b_size >> inode->i_blkbits); + } while(size > 0); + return 0; +} + +/** + * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable + * @vma: The virtual memory area + * @page: The page which is about to become writable + * + * When the page becomes writable, we need to ensure that we have + * blocks allocated on disk to back that page. + */ + +static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *page = vmf->page; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + unsigned long last_index; + u64 pos = page->index << PAGE_CACHE_SHIFT; + unsigned int data_blocks, ind_blocks, rblocks; + int alloc_required = 0; + struct gfs2_holder gh; + struct gfs2_alloc *al; + int ret; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret) + goto out; + + set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); + set_bit(GIF_SW_PAGED, &ip->i_flags); + + ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); + if (ret || !alloc_required) + goto out_unlock; + ret = -ENOMEM; + al = gfs2_alloc_get(ip); + if (al == NULL) + goto out_unlock; + + ret = gfs2_quota_lock_check(ip); + if (ret) + goto out_alloc_put; + gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); + al->al_requested = data_blocks + ind_blocks; + ret = gfs2_inplace_reserve(ip); + if (ret) + goto out_quota_unlock; + + rblocks = RES_DINODE + ind_blocks; + if (gfs2_is_jdata(ip)) + rblocks += data_blocks ? data_blocks : 1; + if (ind_blocks || data_blocks) + rblocks += RES_STATFS + RES_QUOTA; + ret = gfs2_trans_begin(sdp, rblocks, 0); + if (ret) + goto out_trans_fail; + + lock_page(page); + ret = -EINVAL; + last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT; + if (page->index > last_index) + goto out_unlock_page; + ret = 0; + if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping) + goto out_unlock_page; + if (gfs2_is_stuffed(ip)) { + ret = gfs2_unstuff_dinode(ip, page); + if (ret) + goto out_unlock_page; + } + ret = gfs2_allocate_page_backing(page); + +out_unlock_page: + unlock_page(page); + gfs2_trans_end(sdp); +out_trans_fail: + gfs2_inplace_release(ip); +out_quota_unlock: + gfs2_quota_unlock(ip); +out_alloc_put: + gfs2_alloc_put(ip); +out_unlock: + gfs2_glock_dq(&gh); +out: + gfs2_holder_uninit(&gh); + if (ret == -ENOMEM) + ret = VM_FAULT_OOM; + else if (ret) + ret = VM_FAULT_SIGBUS; + return ret; +} + +static struct vm_operations_struct gfs2_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = gfs2_page_mkwrite, +}; + +/** + * gfs2_mmap - + * @file: The file to map + * @vma: The VMA which described the mapping + * + * There is no need to get a lock here unless we should be updating + * atime. We ignore any locking errors since the only consequence is + * a missed atime update (which will just be deferred until later). + * + * Returns: 0 + */ + +static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); + + if (!(file->f_flags & O_NOATIME)) { + struct gfs2_holder i_gh; + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + error = gfs2_glock_nq(&i_gh); + file_accessed(file); + if (error == 0) + gfs2_glock_dq_uninit(&i_gh); + } + vma->vm_ops = &gfs2_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; + + return 0; +} + +/** + * gfs2_open - open a file + * @inode: the inode to open + * @file: the struct file for this opening + * + * Returns: errno + */ + +static int gfs2_open(struct inode *inode, struct file *file) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder i_gh; + struct gfs2_file *fp; + int error; + + fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL); + if (!fp) + return -ENOMEM; + + mutex_init(&fp->f_fl_mutex); + + gfs2_assert_warn(GFS2_SB(inode), !file->private_data); + file->private_data = fp; + + if (S_ISREG(ip->i_inode.i_mode)) { + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (error) + goto fail; + + if (!(file->f_flags & O_LARGEFILE) && + ip->i_disksize > MAX_NON_LFS) { + error = -EOVERFLOW; + goto fail_gunlock; + } + + gfs2_glock_dq_uninit(&i_gh); + } + + return 0; + +fail_gunlock: + gfs2_glock_dq_uninit(&i_gh); +fail: + file->private_data = NULL; + kfree(fp); + return error; +} + +/** + * gfs2_close - called to close a struct file + * @inode: the inode the struct file belongs to + * @file: the struct file being closed + * + * Returns: errno + */ + +static int gfs2_close(struct inode *inode, struct file *file) +{ + struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; + struct gfs2_file *fp; + + fp = file->private_data; + file->private_data = NULL; + + if (gfs2_assert_warn(sdp, fp)) + return -EIO; + + kfree(fp); + + return 0; +} + +/** + * gfs2_fsync - sync the dirty data for a file (across the cluster) + * @file: the file that points to the dentry (we ignore this) + * @dentry: the dentry that points to the inode to sync + * + * The VFS will flush "normal" data for us. We only need to worry + * about metadata here. For journaled data, we just do a log flush + * as we can't avoid it. Otherwise we can just bale out if datasync + * is set. For stuffed inodes we must flush the log in order to + * ensure that all data is on disk. + * + * The call to write_inode_now() is there to write back metadata and + * the inode itself. It does also try and write the data, but thats + * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite() + * for us. + * + * Returns: errno + */ + +static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); + int ret = 0; + + if (gfs2_is_jdata(GFS2_I(inode))) { + gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); + return 0; + } + + if (sync_state != 0) { + if (!datasync) + ret = write_inode_now(inode, 0); + + if (gfs2_is_stuffed(GFS2_I(inode))) + gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); + } + + return ret; +} + +#ifdef CONFIG_GFS2_FS_LOCKING_DLM + +/** + * gfs2_setlease - acquire/release a file lease + * @file: the file pointer + * @arg: lease type + * @fl: file lock + * + * We don't currently have a way to enforce a lease across the whole + * cluster; until we do, disable leases (by just returning -EINVAL), + * unless the administrator has requested purely local locking. + * + * Returns: errno + */ + +static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) +{ + return -EINVAL; +} + +/** + * gfs2_lock - acquire/release a posix lock on a file + * @file: the file pointer + * @cmd: either modify or retrieve lock state, possibly wait + * @fl: type and range of lock + * + * Returns: errno + */ + +static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) +{ + struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); + struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + + if (!(fl->fl_flags & FL_POSIX)) + return -ENOLCK; + if (__mandatory_lock(&ip->i_inode)) + return -ENOLCK; + + if (cmd == F_CANCELLK) { + /* Hack: */ + cmd = F_SETLK; + fl->fl_type = F_UNLCK; + } + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return -EIO; + if (IS_GETLK(cmd)) + return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); + else if (fl->fl_type == F_UNLCK) + return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); + else + return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); +} + +static int do_flock(struct file *file, int cmd, struct file_lock *fl) +{ + struct gfs2_file *fp = file->private_data; + struct gfs2_holder *fl_gh = &fp->f_fl_gh; + struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode); + struct gfs2_glock *gl; + unsigned int state; + int flags; + int error = 0; + + state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; + flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; + + mutex_lock(&fp->f_fl_mutex); + + gl = fl_gh->gh_gl; + if (gl) { + if (fl_gh->gh_state == state) + goto out; + flock_lock_file_wait(file, + &(struct file_lock){.fl_type = F_UNLCK}); + gfs2_glock_dq_wait(fl_gh); + gfs2_holder_reinit(state, flags, fl_gh); + } else { + error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, + &gfs2_flock_glops, CREATE, &gl); + if (error) + goto out; + gfs2_holder_init(gl, state, flags, fl_gh); + gfs2_glock_put(gl); + } + error = gfs2_glock_nq(fl_gh); + if (error) { + gfs2_holder_uninit(fl_gh); + if (error == GLR_TRYFAILED) + error = -EAGAIN; + } else { + error = flock_lock_file_wait(file, fl); + gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); + } + +out: + mutex_unlock(&fp->f_fl_mutex); + return error; +} + +static void do_unflock(struct file *file, struct file_lock *fl) +{ + struct gfs2_file *fp = file->private_data; + struct gfs2_holder *fl_gh = &fp->f_fl_gh; + + mutex_lock(&fp->f_fl_mutex); + flock_lock_file_wait(file, fl); + if (fl_gh->gh_gl) + gfs2_glock_dq_uninit(fl_gh); + mutex_unlock(&fp->f_fl_mutex); +} + +/** + * gfs2_flock - acquire/release a flock lock on a file + * @file: the file pointer + * @cmd: either modify or retrieve lock state, possibly wait + * @fl: type and range of lock + * + * Returns: errno + */ + +static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) +{ + struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); + + if (!(fl->fl_flags & FL_FLOCK)) + return -ENOLCK; + if (__mandatory_lock(&ip->i_inode)) + return -ENOLCK; + + if (fl->fl_type == F_UNLCK) { + do_unflock(file, fl); + return 0; + } else { + return do_flock(file, cmd, fl); + } +} + +const struct file_operations gfs2_file_fops = { + .llseek = gfs2_llseek, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, + .unlocked_ioctl = gfs2_ioctl, + .mmap = gfs2_mmap, + .open = gfs2_open, + .release = gfs2_close, + .fsync = gfs2_fsync, + .lock = gfs2_lock, + .flock = gfs2_flock, + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, + .setlease = gfs2_setlease, +}; + +const struct file_operations gfs2_dir_fops = { + .readdir = gfs2_readdir, + .unlocked_ioctl = gfs2_ioctl, + .open = gfs2_open, + .release = gfs2_close, + .fsync = gfs2_fsync, + .lock = gfs2_lock, + .flock = gfs2_flock, +}; + +#endif /* CONFIG_GFS2_FS_LOCKING_DLM */ + +const struct file_operations gfs2_file_fops_nolock = { + .llseek = gfs2_llseek, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, + .unlocked_ioctl = gfs2_ioctl, + .mmap = gfs2_mmap, + .open = gfs2_open, + .release = gfs2_close, + .fsync = gfs2_fsync, + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, + .setlease = generic_setlease, +}; + +const struct file_operations gfs2_dir_fops_nolock = { + .readdir = gfs2_readdir, + .unlocked_ioctl = gfs2_ioctl, + .open = gfs2_open, + .release = gfs2_close, + .fsync = gfs2_fsync, +}; + diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 5a31d426116f..c03a1a384e72 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -30,7 +30,6 @@ #include "inode.h" #include "log.h" #include "meta_io.h" -#include "ops_address.h" #include "quota.h" #include "rgrp.h" #include "trans.h" diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index c30be2b66580..2c3ec072d60e 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -11,8 +11,16 @@ #define __INODE_DOT_H__ #include +#include +#include #include "util.h" +extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); +extern int gfs2_internal_read(struct gfs2_inode *ip, + struct file_ra_state *ra_state, + char *buf, loff_t *pos, unsigned size); +extern void gfs2_set_aops(struct inode *inode); + static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) { return !ip->i_height; @@ -73,30 +81,31 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, } -void gfs2_set_iop(struct inode *inode); -struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, - u64 no_addr, u64 no_formal_ino, - int skip_freeing); -struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); - -int gfs2_inode_refresh(struct gfs2_inode *ip); - -int gfs2_dinode_dealloc(struct gfs2_inode *inode); -int gfs2_change_nlink(struct gfs2_inode *ip, int diff); -struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, - int is_root); -struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, - unsigned int mode, dev_t dev); -int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip); -int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, - const struct gfs2_inode *ip); -int gfs2_permission(struct inode *inode, int mask); -int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); -int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); -struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); -void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); -void gfs2_dinode_print(const struct gfs2_inode *ip); +extern void gfs2_set_iop(struct inode *inode); +extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, + u64 no_addr, u64 no_formal_ino, + int skip_freeing); +extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); + +extern int gfs2_inode_refresh(struct gfs2_inode *ip); + +extern int gfs2_dinode_dealloc(struct gfs2_inode *inode); +extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff); +extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, + int is_root); +extern struct inode *gfs2_createi(struct gfs2_holder *ghs, + const struct qstr *name, + unsigned int mode, dev_t dev); +extern int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, + struct gfs2_inode *ip); +extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, + const struct gfs2_inode *ip); +extern int gfs2_permission(struct inode *inode, int mask); +extern int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); +extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); +extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); +extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); +extern void gfs2_dinode_print(const struct gfs2_inode *ip); extern const struct inode_operations gfs2_file_iops; extern const struct inode_operations gfs2_dir_iops; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 78a5f4312667..cb8d7a93d5ec 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -31,7 +31,6 @@ #include "rgrp.h" #include "trans.h" #include "util.h" -#include "ops_address.h" static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) { diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c deleted file mode 100644 index e5664210f0d8..000000000000 --- a/fs/gfs2/ops_address.c +++ /dev/null @@ -1,1146 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "incore.h" -#include "bmap.h" -#include "glock.h" -#include "inode.h" -#include "log.h" -#include "meta_io.h" -#include "ops_address.h" -#include "quota.h" -#include "trans.h" -#include "rgrp.h" -#include "super.h" -#include "util.h" -#include "glops.h" - - -static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, - unsigned int from, unsigned int to) -{ - struct buffer_head *head = page_buffers(page); - unsigned int bsize = head->b_size; - struct buffer_head *bh; - unsigned int start, end; - - for (bh = head, start = 0; bh != head || !start; - bh = bh->b_this_page, start = end) { - end = start + bsize; - if (end <= from || start >= to) - continue; - if (gfs2_is_jdata(ip)) - set_buffer_uptodate(bh); - gfs2_trans_add_bh(ip->i_gl, bh, 0); - } -} - -/** - * gfs2_get_block_noalloc - Fills in a buffer head with details about a block - * @inode: The inode - * @lblock: The block number to look up - * @bh_result: The buffer head to return the result in - * @create: Non-zero if we may add block to the file - * - * Returns: errno - */ - -static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, - struct buffer_head *bh_result, int create) -{ - int error; - - error = gfs2_block_map(inode, lblock, bh_result, 0); - if (error) - return error; - if (!buffer_mapped(bh_result)) - return -EIO; - return 0; -} - -static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, - struct buffer_head *bh_result, int create) -{ - return gfs2_block_map(inode, lblock, bh_result, 0); -} - -/** - * gfs2_writepage_common - Common bits of writepage - * @page: The page to be written - * @wbc: The writeback control - * - * Returns: 1 if writepage is ok, otherwise an error code or zero if no error. - */ - -static int gfs2_writepage_common(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - loff_t i_size = i_size_read(inode); - pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; - unsigned offset; - - if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) - goto out; - if (current->journal_info) - goto redirty; - /* Is the page fully outside i_size? (truncate in progress) */ - offset = i_size & (PAGE_CACHE_SIZE-1); - if (page->index > end_index || (page->index == end_index && !offset)) { - page->mapping->a_ops->invalidatepage(page, 0); - goto out; - } - return 1; -redirty: - redirty_page_for_writepage(wbc, page); -out: - unlock_page(page); - return 0; -} - -/** - * gfs2_writeback_writepage - Write page for writeback mappings - * @page: The page - * @wbc: The writeback control - * - */ - -static int gfs2_writeback_writepage(struct page *page, - struct writeback_control *wbc) -{ - int ret; - - ret = gfs2_writepage_common(page, wbc); - if (ret <= 0) - return ret; - - ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc); - if (ret == -EAGAIN) - ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc); - return ret; -} - -/** - * gfs2_ordered_writepage - Write page for ordered data files - * @page: The page to write - * @wbc: The writeback control - * - */ - -static int gfs2_ordered_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - int ret; - - ret = gfs2_writepage_common(page, wbc); - if (ret <= 0) - return ret; - - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1); - return block_write_full_page(page, gfs2_get_block_noalloc, wbc); -} - -/** - * __gfs2_jdata_writepage - The core of jdata writepage - * @page: The page to write - * @wbc: The writeback control - * - * This is shared between writepage and writepages and implements the - * core of the writepage operation. If a transaction is required then - * PageChecked will have been set and the transaction will have - * already been started before this is called. - */ - -static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - - if (PageChecked(page)) { - ClearPageChecked(page); - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); - } - return block_write_full_page(page, gfs2_get_block_noalloc, wbc); -} - -/** - * gfs2_jdata_writepage - Write complete page - * @page: Page to write - * - * Returns: errno - * - */ - -static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct gfs2_sbd *sdp = GFS2_SB(inode); - int ret; - int done_trans = 0; - - if (PageChecked(page)) { - if (wbc->sync_mode != WB_SYNC_ALL) - goto out_ignore; - ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); - if (ret) - goto out_ignore; - done_trans = 1; - } - ret = gfs2_writepage_common(page, wbc); - if (ret > 0) - ret = __gfs2_jdata_writepage(page, wbc); - if (done_trans) - gfs2_trans_end(sdp); - return ret; - -out_ignore: - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; -} - -/** - * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk - * @mapping: The mapping to write - * @wbc: Write-back control - * - * For the data=writeback case we can already ignore buffer heads - * and write whole extents at once. This is a big reduction in the - * number of I/O requests we send and the bmap calls we make in this case. - */ -static int gfs2_writeback_writepages(struct address_space *mapping, - struct writeback_control *wbc) -{ - return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); -} - -/** - * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages - * @mapping: The mapping - * @wbc: The writeback control - * @writepage: The writepage function to call for each page - * @pvec: The vector of pages - * @nr_pages: The number of pages to write - * - * Returns: non-zero if loop should terminate, zero otherwise - */ - -static int gfs2_write_jdata_pagevec(struct address_space *mapping, - struct writeback_control *wbc, - struct pagevec *pvec, - int nr_pages, pgoff_t end) -{ - struct inode *inode = mapping->host; - struct gfs2_sbd *sdp = GFS2_SB(inode); - loff_t i_size = i_size_read(inode); - pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; - unsigned offset = i_size & (PAGE_CACHE_SIZE-1); - unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); - struct backing_dev_info *bdi = mapping->backing_dev_info; - int i; - int ret; - - ret = gfs2_trans_begin(sdp, nrblocks, nrblocks); - if (ret < 0) - return ret; - - for(i = 0; i < nr_pages; i++) { - struct page *page = pvec->pages[i]; - - lock_page(page); - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - continue; - } - - if (!wbc->range_cyclic && page->index > end) { - ret = 1; - unlock_page(page); - continue; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; - } - - /* Is the page fully outside i_size? (truncate in progress) */ - if (page->index > end_index || (page->index == end_index && !offset)) { - page->mapping->a_ops->invalidatepage(page, 0); - unlock_page(page); - continue; - } - - ret = __gfs2_jdata_writepage(page, wbc); - - if (ret || (--(wbc->nr_to_write) <= 0)) - ret = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - ret = 1; - } - - } - gfs2_trans_end(sdp); - return ret; -} - -/** - * gfs2_write_cache_jdata - Like write_cache_pages but different - * @mapping: The mapping to write - * @wbc: The writeback control - * @writepage: The writepage function to call - * @data: The data to pass to writepage - * - * The reason that we use our own function here is that we need to - * start transactions before we grab page locks. This allows us - * to get the ordering right. - */ - -static int gfs2_write_cache_jdata(struct address_space *mapping, - struct writeback_control *wbc) -{ - struct backing_dev_info *bdi = mapping->backing_dev_info; - int ret = 0; - int done = 0; - struct pagevec pvec; - int nr_pages; - pgoff_t index; - pgoff_t end; - int scanned = 0; - int range_whole = 0; - - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - - pagevec_init(&pvec, 0); - if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ - end = -1; - } else { - index = wbc->range_start >> PAGE_CACHE_SHIFT; - end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = 1; - scanned = 1; - } - -retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - scanned = 1; - ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); - if (ret) - done = 1; - if (ret > 0) - ret = 0; - - pagevec_release(&pvec); - cond_resched(); - } - - if (!scanned && !done) { - /* - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - scanned = 1; - index = 0; - goto retry; - } - - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; - return ret; -} - - -/** - * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk - * @mapping: The mapping to write - * @wbc: The writeback control - * - */ - -static int gfs2_jdata_writepages(struct address_space *mapping, - struct writeback_control *wbc) -{ - struct gfs2_inode *ip = GFS2_I(mapping->host); - struct gfs2_sbd *sdp = GFS2_SB(mapping->host); - int ret; - - ret = gfs2_write_cache_jdata(mapping, wbc); - if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { - gfs2_log_flush(sdp, ip->i_gl); - ret = gfs2_write_cache_jdata(mapping, wbc); - } - return ret; -} - -/** - * stuffed_readpage - Fill in a Linux page with stuffed file data - * @ip: the inode - * @page: the page - * - * Returns: errno - */ - -static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) -{ - struct buffer_head *dibh; - void *kaddr; - int error; - - /* - * Due to the order of unstuffing files and ->fault(), we can be - * asked for a zero page in the case of a stuffed file being extended, - * so we need to supply one here. It doesn't happen often. - */ - if (unlikely(page->index)) { - zero_user(page, 0, PAGE_CACHE_SIZE); - SetPageUptodate(page); - return 0; - } - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - kaddr = kmap_atomic(page, KM_USER0); - memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), - ip->i_disksize); - memset(kaddr + ip->i_disksize, 0, PAGE_CACHE_SIZE - ip->i_disksize); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); - brelse(dibh); - SetPageUptodate(page); - - return 0; -} - - -/** - * __gfs2_readpage - readpage - * @file: The file to read a page for - * @page: The page to read - * - * This is the core of gfs2's readpage. Its used by the internal file - * reading code as in that case we already hold the glock. Also its - * called by gfs2_readpage() once the required lock has been granted. - * - */ - -static int __gfs2_readpage(void *file, struct page *page) -{ - struct gfs2_inode *ip = GFS2_I(page->mapping->host); - struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); - int error; - - if (gfs2_is_stuffed(ip)) { - error = stuffed_readpage(ip, page); - unlock_page(page); - } else { - error = mpage_readpage(page, gfs2_block_map); - } - - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - return -EIO; - - return error; -} - -/** - * gfs2_readpage - read a page of a file - * @file: The file to read - * @page: The page of the file - * - * This deals with the locking required. We have to unlock and - * relock the page in order to get the locking in the right - * order. - */ - -static int gfs2_readpage(struct file *file, struct page *page) -{ - struct address_space *mapping = page->mapping; - struct gfs2_inode *ip = GFS2_I(mapping->host); - struct gfs2_holder gh; - int error; - - unlock_page(page); - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); - error = gfs2_glock_nq(&gh); - if (unlikely(error)) - goto out; - error = AOP_TRUNCATED_PAGE; - lock_page(page); - if (page->mapping == mapping && !PageUptodate(page)) - error = __gfs2_readpage(file, page); - else - unlock_page(page); - gfs2_glock_dq(&gh); -out: - gfs2_holder_uninit(&gh); - if (error && error != AOP_TRUNCATED_PAGE) - lock_page(page); - return error; -} - -/** - * gfs2_internal_read - read an internal file - * @ip: The gfs2 inode - * @ra_state: The readahead state (or NULL for no readahead) - * @buf: The buffer to fill - * @pos: The file position - * @size: The amount to read - * - */ - -int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, - char *buf, loff_t *pos, unsigned size) -{ - struct address_space *mapping = ip->i_inode.i_mapping; - unsigned long index = *pos / PAGE_CACHE_SIZE; - unsigned offset = *pos & (PAGE_CACHE_SIZE - 1); - unsigned copied = 0; - unsigned amt; - struct page *page; - void *p; - - do { - amt = size - copied; - if (offset + size > PAGE_CACHE_SIZE) - amt = PAGE_CACHE_SIZE - offset; - page = read_cache_page(mapping, index, __gfs2_readpage, NULL); - if (IS_ERR(page)) - return PTR_ERR(page); - p = kmap_atomic(page, KM_USER0); - memcpy(buf + copied, p + offset, amt); - kunmap_atomic(p, KM_USER0); - mark_page_accessed(page); - page_cache_release(page); - copied += amt; - index++; - offset = 0; - } while(copied < size); - (*pos) += size; - return size; -} - -/** - * gfs2_readpages - Read a bunch of pages at once - * - * Some notes: - * 1. This is only for readahead, so we can simply ignore any things - * which are slightly inconvenient (such as locking conflicts between - * the page lock and the glock) and return having done no I/O. Its - * obviously not something we'd want to do on too regular a basis. - * Any I/O we ignore at this time will be done via readpage later. - * 2. We don't handle stuffed files here we let readpage do the honours. - * 3. mpage_readpages() does most of the heavy lifting in the common case. - * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places. - */ - -static int gfs2_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - struct inode *inode = mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_holder gh; - int ret; - - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); - ret = gfs2_glock_nq(&gh); - if (unlikely(ret)) - goto out_uninit; - if (!gfs2_is_stuffed(ip)) - ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map); - gfs2_glock_dq(&gh); -out_uninit: - gfs2_holder_uninit(&gh); - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - ret = -EIO; - return ret; -} - -/** - * gfs2_write_begin - Begin to write to a file - * @file: The file to write to - * @mapping: The mapping in which to write - * @pos: The file offset at which to start writing - * @len: Length of the write - * @flags: Various flags - * @pagep: Pointer to return the page - * @fsdata: Pointer to return fs data (unused by GFS2) - * - * Returns: errno - */ - -static int gfs2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - struct gfs2_inode *ip = GFS2_I(mapping->host); - struct gfs2_sbd *sdp = GFS2_SB(mapping->host); - unsigned int data_blocks = 0, ind_blocks = 0, rblocks; - int alloc_required; - int error = 0; - struct gfs2_alloc *al; - pgoff_t index = pos >> PAGE_CACHE_SHIFT; - unsigned from = pos & (PAGE_CACHE_SIZE - 1); - unsigned to = from + len; - struct page *page; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); - error = gfs2_glock_nq(&ip->i_gh); - if (unlikely(error)) - goto out_uninit; - - error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); - if (error) - goto out_unlock; - - if (alloc_required || gfs2_is_jdata(ip)) - gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); - - if (alloc_required) { - al = gfs2_alloc_get(ip); - if (!al) { - error = -ENOMEM; - goto out_unlock; - } - - error = gfs2_quota_lock_check(ip); - if (error) - goto out_alloc_put; - - al->al_requested = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip); - if (error) - goto out_qunlock; - } - - rblocks = RES_DINODE + ind_blocks; - if (gfs2_is_jdata(ip)) - rblocks += data_blocks ? data_blocks : 1; - if (ind_blocks || data_blocks) - rblocks += RES_STATFS + RES_QUOTA; - - error = gfs2_trans_begin(sdp, rblocks, - PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); - if (error) - goto out_trans_fail; - - error = -ENOMEM; - flags |= AOP_FLAG_NOFS; - page = grab_cache_page_write_begin(mapping, index, flags); - *pagep = page; - if (unlikely(!page)) - goto out_endtrans; - - if (gfs2_is_stuffed(ip)) { - error = 0; - if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { - error = gfs2_unstuff_dinode(ip, page); - if (error == 0) - goto prepare_write; - } else if (!PageUptodate(page)) { - error = stuffed_readpage(ip, page); - } - goto out; - } - -prepare_write: - error = block_prepare_write(page, from, to, gfs2_block_map); -out: - if (error == 0) - return 0; - - page_cache_release(page); - if (pos + len > ip->i_inode.i_size) - vmtruncate(&ip->i_inode, ip->i_inode.i_size); -out_endtrans: - gfs2_trans_end(sdp); -out_trans_fail: - if (alloc_required) { - gfs2_inplace_release(ip); -out_qunlock: - gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_alloc_put(ip); - } -out_unlock: - gfs2_glock_dq(&ip->i_gh); -out_uninit: - gfs2_holder_uninit(&ip->i_gh); - return error; -} - -/** - * adjust_fs_space - Adjusts the free space available due to gfs2_grow - * @inode: the rindex inode - */ -static void adjust_fs_space(struct inode *inode) -{ - struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; - struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; - struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; - u64 fs_total, new_free; - - /* Total up the file system space, according to the latest rindex. */ - fs_total = gfs2_ri_total(sdp); - - spin_lock(&sdp->sd_statfs_spin); - if (fs_total > (m_sc->sc_total + l_sc->sc_total)) - new_free = fs_total - (m_sc->sc_total + l_sc->sc_total); - else - new_free = 0; - spin_unlock(&sdp->sd_statfs_spin); - fs_warn(sdp, "File system extended by %llu blocks.\n", - (unsigned long long)new_free); - gfs2_statfs_change(sdp, new_free, new_free, 0); -} - -/** - * gfs2_stuffed_write_end - Write end for stuffed files - * @inode: The inode - * @dibh: The buffer_head containing the on-disk inode - * @pos: The file position - * @len: The length of the write - * @copied: How much was actually copied by the VFS - * @page: The page - * - * This copies the data from the page into the inode block after - * the inode data structure itself. - * - * Returns: errno - */ -static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, - loff_t pos, unsigned len, unsigned copied, - struct page *page) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - u64 to = pos + copied; - void *kaddr; - unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); - struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; - - BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); - kaddr = kmap_atomic(page, KM_USER0); - memcpy(buf + pos, kaddr + pos, copied); - memset(kaddr + pos + copied, 0, len - copied); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - - if (!PageUptodate(page)) - SetPageUptodate(page); - unlock_page(page); - page_cache_release(page); - - if (copied) { - if (inode->i_size < to) { - i_size_write(inode, to); - ip->i_disksize = inode->i_size; - } - gfs2_dinode_out(ip, di); - mark_inode_dirty(inode); - } - - if (inode == sdp->sd_rindex) - adjust_fs_space(inode); - - brelse(dibh); - gfs2_trans_end(sdp); - gfs2_glock_dq(&ip->i_gh); - gfs2_holder_uninit(&ip->i_gh); - return copied; -} - -/** - * gfs2_write_end - * @file: The file to write to - * @mapping: The address space to write to - * @pos: The file position - * @len: The length of the data - * @copied: - * @page: The page that has been written - * @fsdata: The fsdata (unused in GFS2) - * - * The main write_end function for GFS2. We have a separate one for - * stuffed files as they are slightly different, otherwise we just - * put our locking around the VFS provided functions. - * - * Returns: errno - */ - -static int gfs2_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct buffer_head *dibh; - struct gfs2_alloc *al = ip->i_alloc; - unsigned int from = pos & (PAGE_CACHE_SIZE - 1); - unsigned int to = from + len; - int ret; - - BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); - - ret = gfs2_meta_inode_buffer(ip, &dibh); - if (unlikely(ret)) { - unlock_page(page); - page_cache_release(page); - goto failed; - } - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - - if (gfs2_is_stuffed(ip)) - return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); - - if (!gfs2_is_writeback(ip)) - gfs2_page_add_databufs(ip, page, from, to); - - ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (ret > 0) { - if (inode->i_size > ip->i_disksize) - ip->i_disksize = inode->i_size; - gfs2_dinode_out(ip, dibh->b_data); - mark_inode_dirty(inode); - } - - if (inode == sdp->sd_rindex) - adjust_fs_space(inode); - - brelse(dibh); - gfs2_trans_end(sdp); -failed: - if (al) { - gfs2_inplace_release(ip); - gfs2_quota_unlock(ip); - gfs2_alloc_put(ip); - } - gfs2_glock_dq(&ip->i_gh); - gfs2_holder_uninit(&ip->i_gh); - return ret; -} - -/** - * gfs2_set_page_dirty - Page dirtying function - * @page: The page to dirty - * - * Returns: 1 if it dirtyed the page, or 0 otherwise - */ - -static int gfs2_set_page_dirty(struct page *page) -{ - SetPageChecked(page); - return __set_page_dirty_buffers(page); -} - -/** - * gfs2_bmap - Block map function - * @mapping: Address space info - * @lblock: The block to map - * - * Returns: The disk address for the block or 0 on hole or error - */ - -static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) -{ - struct gfs2_inode *ip = GFS2_I(mapping->host); - struct gfs2_holder i_gh; - sector_t dblock = 0; - int error; - - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - if (error) - return 0; - - if (!gfs2_is_stuffed(ip)) - dblock = generic_block_bmap(mapping, lblock, gfs2_block_map); - - gfs2_glock_dq_uninit(&i_gh); - - return dblock; -} - -static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) -{ - struct gfs2_bufdata *bd; - - lock_buffer(bh); - gfs2_log_lock(sdp); - clear_buffer_dirty(bh); - bd = bh->b_private; - if (bd) { - if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh)) - list_del_init(&bd->bd_le.le_list); - else - gfs2_remove_from_journal(bh, current->journal_info, 0); - } - bh->b_bdev = NULL; - clear_buffer_mapped(bh); - clear_buffer_req(bh); - clear_buffer_new(bh); - gfs2_log_unlock(sdp); - unlock_buffer(bh); -} - -static void gfs2_invalidatepage(struct page *page, unsigned long offset) -{ - struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); - struct buffer_head *bh, *head; - unsigned long pos = 0; - - BUG_ON(!PageLocked(page)); - if (offset == 0) - ClearPageChecked(page); - if (!page_has_buffers(page)) - goto out; - - bh = head = page_buffers(page); - do { - if (offset <= pos) - gfs2_discard(sdp, bh); - pos += bh->b_size; - bh = bh->b_this_page; - } while (bh != head); -out: - if (offset == 0) - try_to_release_page(page, 0); -} - -/** - * gfs2_ok_for_dio - check that dio is valid on this file - * @ip: The inode - * @rw: READ or WRITE - * @offset: The offset at which we are reading or writing - * - * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) - * 1 (to accept the i/o request) - */ -static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) -{ - /* - * Should we return an error here? I can't see that O_DIRECT for - * a stuffed file makes any sense. For now we'll silently fall - * back to buffered I/O - */ - if (gfs2_is_stuffed(ip)) - return 0; - - if (offset >= i_size_read(&ip->i_inode)) - return 0; - return 1; -} - - - -static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - int rv; - - /* - * Deferred lock, even if its a write, since we do no allocation - * on this path. All we need change is atime, and this lock mode - * ensures that other nodes have flushed their buffered read caches - * (i.e. their page cache entries for this inode). We do not, - * unfortunately have the option of only flushing a range like - * the VFS does. - */ - gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); - rv = gfs2_glock_nq(&gh); - if (rv) - return rv; - rv = gfs2_ok_for_dio(ip, rw, offset); - if (rv != 1) - goto out; /* dio not valid, fall back to buffered i/o */ - - rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, - iov, offset, nr_segs, - gfs2_get_block_direct, NULL); -out: - gfs2_glock_dq_m(1, &gh); - gfs2_holder_uninit(&gh); - return rv; -} - -/** - * gfs2_releasepage - free the metadata associated with a page - * @page: the page that's being released - * @gfp_mask: passed from Linux VFS, ignored by us - * - * Call try_to_free_buffers() if the buffers in this page can be - * released. - * - * Returns: 0 - */ - -int gfs2_releasepage(struct page *page, gfp_t gfp_mask) -{ - struct inode *aspace = page->mapping->host; - struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info; - struct buffer_head *bh, *head; - struct gfs2_bufdata *bd; - - if (!page_has_buffers(page)) - return 0; - - gfs2_log_lock(sdp); - head = bh = page_buffers(page); - do { - if (atomic_read(&bh->b_count)) - goto cannot_release; - bd = bh->b_private; - if (bd && bd->bd_ail) - goto cannot_release; - gfs2_assert_warn(sdp, !buffer_pinned(bh)); - gfs2_assert_warn(sdp, !buffer_dirty(bh)); - bh = bh->b_this_page; - } while(bh != head); - gfs2_log_unlock(sdp); - - head = bh = page_buffers(page); - do { - gfs2_log_lock(sdp); - bd = bh->b_private; - if (bd) { - gfs2_assert_warn(sdp, bd->bd_bh == bh); - gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr)); - if (!list_empty(&bd->bd_le.le_list)) { - if (!buffer_pinned(bh)) - list_del_init(&bd->bd_le.le_list); - else - bd = NULL; - } - if (bd) - bd->bd_bh = NULL; - bh->b_private = NULL; - } - gfs2_log_unlock(sdp); - if (bd) - kmem_cache_free(gfs2_bufdata_cachep, bd); - - bh = bh->b_this_page; - } while (bh != head); - - return try_to_free_buffers(page); -cannot_release: - gfs2_log_unlock(sdp); - return 0; -} - -static const struct address_space_operations gfs2_writeback_aops = { - .writepage = gfs2_writeback_writepage, - .writepages = gfs2_writeback_writepages, - .readpage = gfs2_readpage, - .readpages = gfs2_readpages, - .sync_page = block_sync_page, - .write_begin = gfs2_write_begin, - .write_end = gfs2_write_end, - .bmap = gfs2_bmap, - .invalidatepage = gfs2_invalidatepage, - .releasepage = gfs2_releasepage, - .direct_IO = gfs2_direct_IO, - .migratepage = buffer_migrate_page, - .is_partially_uptodate = block_is_partially_uptodate, -}; - -static const struct address_space_operations gfs2_ordered_aops = { - .writepage = gfs2_ordered_writepage, - .readpage = gfs2_readpage, - .readpages = gfs2_readpages, - .sync_page = block_sync_page, - .write_begin = gfs2_write_begin, - .write_end = gfs2_write_end, - .set_page_dirty = gfs2_set_page_dirty, - .bmap = gfs2_bmap, - .invalidatepage = gfs2_invalidatepage, - .releasepage = gfs2_releasepage, - .direct_IO = gfs2_direct_IO, - .migratepage = buffer_migrate_page, - .is_partially_uptodate = block_is_partially_uptodate, -}; - -static const struct address_space_operations gfs2_jdata_aops = { - .writepage = gfs2_jdata_writepage, - .writepages = gfs2_jdata_writepages, - .readpage = gfs2_readpage, - .readpages = gfs2_readpages, - .sync_page = block_sync_page, - .write_begin = gfs2_write_begin, - .write_end = gfs2_write_end, - .set_page_dirty = gfs2_set_page_dirty, - .bmap = gfs2_bmap, - .invalidatepage = gfs2_invalidatepage, - .releasepage = gfs2_releasepage, - .is_partially_uptodate = block_is_partially_uptodate, -}; - -void gfs2_set_aops(struct inode *inode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - - if (gfs2_is_writeback(ip)) - inode->i_mapping->a_ops = &gfs2_writeback_aops; - else if (gfs2_is_ordered(ip)) - inode->i_mapping->a_ops = &gfs2_ordered_aops; - else if (gfs2_is_jdata(ip)) - inode->i_mapping->a_ops = &gfs2_jdata_aops; - else - BUG(); -} - diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h deleted file mode 100644 index 5da21285bba4..000000000000 --- a/fs/gfs2/ops_address.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#ifndef __OPS_ADDRESS_DOT_H__ -#define __OPS_ADDRESS_DOT_H__ - -#include -#include -#include - -extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); -extern int gfs2_internal_read(struct gfs2_inode *ip, - struct file_ra_state *ra_state, - char *buf, loff_t *pos, unsigned size); -extern void gfs2_set_aops(struct inode *inode); - -#endif /* __OPS_ADDRESS_DOT_H__ */ diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c deleted file mode 100644 index 022c66cd5606..000000000000 --- a/fs/gfs2/ops_dentry.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "incore.h" -#include "dir.h" -#include "glock.h" -#include "super.h" -#include "util.h" -#include "inode.h" - -/** - * gfs2_drevalidate - Check directory lookup consistency - * @dentry: the mapping to check - * @nd: - * - * Check to make sure the lookup necessary to arrive at this inode from its - * parent is still good. - * - * Returns: 1 if the dentry is ok, 0 if it isn't - */ - -static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) -{ - struct dentry *parent = dget_parent(dentry); - struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode); - struct gfs2_inode *dip = GFS2_I(parent->d_inode); - struct inode *inode = dentry->d_inode; - struct gfs2_holder d_gh; - struct gfs2_inode *ip = NULL; - int error; - int had_lock = 0; - - if (inode) { - if (is_bad_inode(inode)) - goto invalid; - ip = GFS2_I(inode); - } - - if (sdp->sd_args.ar_localcaching) - goto valid; - - had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); - if (!had_lock) { - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); - if (error) - goto fail; - } - - error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip); - switch (error) { - case 0: - if (!inode) - goto invalid_gunlock; - break; - case -ENOENT: - if (!inode) - goto valid_gunlock; - goto invalid_gunlock; - default: - goto fail_gunlock; - } - -valid_gunlock: - if (!had_lock) - gfs2_glock_dq_uninit(&d_gh); -valid: - dput(parent); - return 1; - -invalid_gunlock: - if (!had_lock) - gfs2_glock_dq_uninit(&d_gh); -invalid: - if (inode && S_ISDIR(inode->i_mode)) { - if (have_submounts(dentry)) - goto valid; - shrink_dcache_parent(dentry); - } - d_drop(dentry); - dput(parent); - return 0; - -fail_gunlock: - gfs2_glock_dq_uninit(&d_gh); -fail: - dput(parent); - return 0; -} - -static int gfs2_dhash(struct dentry *dentry, struct qstr *str) -{ - str->hash = gfs2_disk_hash(str->name, str->len); - return 0; -} - -const struct dentry_operations gfs2_dops = { - .d_revalidate = gfs2_drevalidate, - .d_hash = gfs2_dhash, -}; - diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c deleted file mode 100644 index 9200ef221716..000000000000 --- a/fs/gfs2/ops_export.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "incore.h" -#include "dir.h" -#include "glock.h" -#include "glops.h" -#include "inode.h" -#include "super.h" -#include "rgrp.h" -#include "util.h" - -#define GFS2_SMALL_FH_SIZE 4 -#define GFS2_LARGE_FH_SIZE 8 -#define GFS2_OLD_FH_SIZE 10 - -static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, - int connectable) -{ - __be32 *fh = (__force __be32 *)p; - struct inode *inode = dentry->d_inode; - struct super_block *sb = inode->i_sb; - struct gfs2_inode *ip = GFS2_I(inode); - - if (*len < GFS2_SMALL_FH_SIZE || - (connectable && *len < GFS2_LARGE_FH_SIZE)) - return 255; - - fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); - fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); - fh[2] = cpu_to_be32(ip->i_no_addr >> 32); - fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); - *len = GFS2_SMALL_FH_SIZE; - - if (!connectable || inode == sb->s_root->d_inode) - return *len; - - spin_lock(&dentry->d_lock); - inode = dentry->d_parent->d_inode; - ip = GFS2_I(inode); - igrab(inode); - spin_unlock(&dentry->d_lock); - - fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32); - fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); - fh[6] = cpu_to_be32(ip->i_no_addr >> 32); - fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); - *len = GFS2_LARGE_FH_SIZE; - - iput(inode); - - return *len; -} - -struct get_name_filldir { - struct gfs2_inum_host inum; - char *name; -}; - -static int get_name_filldir(void *opaque, const char *name, int length, - loff_t offset, u64 inum, unsigned int type) -{ - struct get_name_filldir *gnfd = opaque; - - if (inum != gnfd->inum.no_addr) - return 0; - - memcpy(gnfd->name, name, length); - gnfd->name[length] = 0; - - return 1; -} - -static int gfs2_get_name(struct dentry *parent, char *name, - struct dentry *child) -{ - struct inode *dir = parent->d_inode; - struct inode *inode = child->d_inode; - struct gfs2_inode *dip, *ip; - struct get_name_filldir gnfd; - struct gfs2_holder gh; - u64 offset = 0; - int error; - - if (!dir) - return -EINVAL; - - if (!S_ISDIR(dir->i_mode) || !inode) - return -EINVAL; - - dip = GFS2_I(dir); - ip = GFS2_I(inode); - - *name = 0; - gnfd.inum.no_addr = ip->i_no_addr; - gnfd.inum.no_formal_ino = ip->i_no_formal_ino; - gnfd.name = name; - - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); - if (error) - return error; - - error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir); - - gfs2_glock_dq_uninit(&gh); - - if (!error && !*name) - error = -ENOENT; - - return error; -} - -static struct dentry *gfs2_get_parent(struct dentry *child) -{ - struct qstr dotdot; - struct dentry *dentry; - - /* - * XXX(hch): it would be a good idea to keep this around as a - * static variable. - */ - gfs2_str2qstr(&dotdot, ".."); - - dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1)); - if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; - return dentry; -} - -static struct dentry *gfs2_get_dentry(struct super_block *sb, - struct gfs2_inum_host *inum) -{ - struct gfs2_sbd *sdp = sb->s_fs_info; - struct gfs2_holder i_gh, ri_gh, rgd_gh; - struct gfs2_rgrpd *rgd; - struct inode *inode; - struct dentry *dentry; - int error; - - /* System files? */ - - inode = gfs2_ilookup(sb, inum->no_addr); - if (inode) { - if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { - iput(inode); - return ERR_PTR(-ESTALE); - } - goto out_inode; - } - - error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, - LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - if (error) - return ERR_PTR(error); - - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - goto fail; - - error = -EINVAL; - rgd = gfs2_blk2rgrpd(sdp, inum->no_addr); - if (!rgd) - goto fail_rindex; - - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); - if (error) - goto fail_rindex; - - error = -ESTALE; - if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE) - goto fail_rgd; - - gfs2_glock_dq_uninit(&rgd_gh); - gfs2_glock_dq_uninit(&ri_gh); - - inode = gfs2_inode_lookup(sb, DT_UNKNOWN, - inum->no_addr, - 0, 0); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); - goto fail; - } - - error = gfs2_inode_refresh(GFS2_I(inode)); - if (error) { - iput(inode); - goto fail; - } - - /* Pick up the works we bypass in gfs2_inode_lookup */ - if (inode->i_state & I_NEW) - gfs2_set_iop(inode); - - if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { - iput(inode); - goto fail; - } - - error = -EIO; - if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) { - iput(inode); - goto fail; - } - - gfs2_glock_dq_uninit(&i_gh); - -out_inode: - dentry = d_obtain_alias(inode); - if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; - return dentry; - -fail_rgd: - gfs2_glock_dq_uninit(&rgd_gh); - -fail_rindex: - gfs2_glock_dq_uninit(&ri_gh); - -fail: - gfs2_glock_dq_uninit(&i_gh); - return ERR_PTR(error); -} - -static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - struct gfs2_inum_host this; - __be32 *fh = (__force __be32 *)fid->raw; - - switch (fh_type) { - case GFS2_SMALL_FH_SIZE: - case GFS2_LARGE_FH_SIZE: - case GFS2_OLD_FH_SIZE: - this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; - this.no_formal_ino |= be32_to_cpu(fh[1]); - this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; - this.no_addr |= be32_to_cpu(fh[3]); - return gfs2_get_dentry(sb, &this); - default: - return NULL; - } -} - -static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - struct gfs2_inum_host parent; - __be32 *fh = (__force __be32 *)fid->raw; - - switch (fh_type) { - case GFS2_LARGE_FH_SIZE: - case GFS2_OLD_FH_SIZE: - parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; - parent.no_formal_ino |= be32_to_cpu(fh[5]); - parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; - parent.no_addr |= be32_to_cpu(fh[7]); - return gfs2_get_dentry(sb, &parent); - default: - return NULL; - } -} - -const struct export_operations gfs2_export_ops = { - .encode_fh = gfs2_encode_fh, - .fh_to_dentry = gfs2_fh_to_dentry, - .fh_to_parent = gfs2_fh_to_parent, - .get_name = gfs2_get_name, - .get_parent = gfs2_get_parent, -}; - diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c deleted file mode 100644 index 0ee7bd287c5a..000000000000 --- a/fs/gfs2/ops_file.c +++ /dev/null @@ -1,766 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "incore.h" -#include "bmap.h" -#include "dir.h" -#include "glock.h" -#include "glops.h" -#include "inode.h" -#include "log.h" -#include "meta_io.h" -#include "quota.h" -#include "rgrp.h" -#include "trans.h" -#include "util.h" -#include "eaops.h" -#include "ops_address.h" - -/** - * gfs2_llseek - seek to a location in a file - * @file: the file - * @offset: the offset - * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) - * - * SEEK_END requires the glock for the file because it references the - * file's size. - * - * Returns: The new offset, or errno - */ - -static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) -{ - struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - struct gfs2_holder i_gh; - loff_t error; - - if (origin == 2) { - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, - &i_gh); - if (!error) { - error = generic_file_llseek_unlocked(file, offset, origin); - gfs2_glock_dq_uninit(&i_gh); - } - } else - error = generic_file_llseek_unlocked(file, offset, origin); - - return error; -} - -/** - * gfs2_readdir - Read directory entries from a directory - * @file: The directory to read from - * @dirent: Buffer for dirents - * @filldir: Function used to do the copying - * - * Returns: errno - */ - -static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) -{ - struct inode *dir = file->f_mapping->host; - struct gfs2_inode *dip = GFS2_I(dir); - struct gfs2_holder d_gh; - u64 offset = file->f_pos; - int error; - - gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); - error = gfs2_glock_nq(&d_gh); - if (error) { - gfs2_holder_uninit(&d_gh); - return error; - } - - error = gfs2_dir_read(dir, &offset, dirent, filldir); - - gfs2_glock_dq_uninit(&d_gh); - - file->f_pos = offset; - - return error; -} - -/** - * fsflags_cvt - * @table: A table of 32 u32 flags - * @val: a 32 bit value to convert - * - * This function can be used to convert between fsflags values and - * GFS2's own flags values. - * - * Returns: the converted flags - */ -static u32 fsflags_cvt(const u32 *table, u32 val) -{ - u32 res = 0; - while(val) { - if (val & 1) - res |= *table; - table++; - val >>= 1; - } - return res; -} - -static const u32 fsflags_to_gfs2[32] = { - [3] = GFS2_DIF_SYNC, - [4] = GFS2_DIF_IMMUTABLE, - [5] = GFS2_DIF_APPENDONLY, - [7] = GFS2_DIF_NOATIME, - [12] = GFS2_DIF_EXHASH, - [14] = GFS2_DIF_INHERIT_JDATA, -}; - -static const u32 gfs2_to_fsflags[32] = { - [gfs2fl_Sync] = FS_SYNC_FL, - [gfs2fl_Immutable] = FS_IMMUTABLE_FL, - [gfs2fl_AppendOnly] = FS_APPEND_FL, - [gfs2fl_NoAtime] = FS_NOATIME_FL, - [gfs2fl_ExHash] = FS_INDEX_FL, - [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, -}; - -static int gfs2_get_flags(struct file *filp, u32 __user *ptr) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - int error; - u32 fsflags; - - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); - error = gfs2_glock_nq(&gh); - if (error) - return error; - - fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_diskflags); - if (!S_ISDIR(inode->i_mode) && ip->i_diskflags & GFS2_DIF_JDATA) - fsflags |= FS_JOURNAL_DATA_FL; - if (put_user(fsflags, ptr)) - error = -EFAULT; - - gfs2_glock_dq(&gh); - gfs2_holder_uninit(&gh); - return error; -} - -void gfs2_set_inode_flags(struct inode *inode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - unsigned int flags = inode->i_flags; - - flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); - if (ip->i_diskflags & GFS2_DIF_IMMUTABLE) - flags |= S_IMMUTABLE; - if (ip->i_diskflags & GFS2_DIF_APPENDONLY) - flags |= S_APPEND; - if (ip->i_diskflags & GFS2_DIF_NOATIME) - flags |= S_NOATIME; - if (ip->i_diskflags & GFS2_DIF_SYNC) - flags |= S_SYNC; - inode->i_flags = flags; -} - -/* Flags that can be set by user space */ -#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ - GFS2_DIF_IMMUTABLE| \ - GFS2_DIF_APPENDONLY| \ - GFS2_DIF_NOATIME| \ - GFS2_DIF_SYNC| \ - GFS2_DIF_SYSTEM| \ - GFS2_DIF_INHERIT_JDATA) - -/** - * gfs2_set_flags - set flags on an inode - * @inode: The inode - * @flags: The flags to set - * @mask: Indicates which flags are valid - * - */ -static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct buffer_head *bh; - struct gfs2_holder gh; - int error; - u32 new_flags, flags; - - error = mnt_want_write(filp->f_path.mnt); - if (error) - return error; - - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - if (error) - goto out_drop_write; - - flags = ip->i_diskflags; - new_flags = (flags & ~mask) | (reqflags & mask); - if ((new_flags ^ flags) == 0) - goto out; - - error = -EINVAL; - if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET) - goto out; - - error = -EPERM; - if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE)) - goto out; - if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY)) - goto out; - if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) && - !capable(CAP_LINUX_IMMUTABLE)) - goto out; - if (!IS_IMMUTABLE(inode)) { - error = gfs2_permission(inode, MAY_WRITE); - if (error) - goto out; - } - if ((flags ^ new_flags) & GFS2_DIF_JDATA) { - if (flags & GFS2_DIF_JDATA) - gfs2_log_flush(sdp, ip->i_gl); - error = filemap_fdatawrite(inode->i_mapping); - if (error) - goto out; - error = filemap_fdatawait(inode->i_mapping); - if (error) - goto out; - } - error = gfs2_trans_begin(sdp, RES_DINODE, 0); - if (error) - goto out; - error = gfs2_meta_inode_buffer(ip, &bh); - if (error) - goto out_trans_end; - gfs2_trans_add_bh(ip->i_gl, bh, 1); - ip->i_diskflags = new_flags; - gfs2_dinode_out(ip, bh->b_data); - brelse(bh); - gfs2_set_inode_flags(inode); - gfs2_set_aops(inode); -out_trans_end: - gfs2_trans_end(sdp); -out: - gfs2_glock_dq_uninit(&gh); -out_drop_write: - mnt_drop_write(filp->f_path.mnt); - return error; -} - -static int gfs2_set_flags(struct file *filp, u32 __user *ptr) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - u32 fsflags, gfsflags; - if (get_user(fsflags, ptr)) - return -EFAULT; - gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags); - if (!S_ISDIR(inode->i_mode)) { - if (gfsflags & GFS2_DIF_INHERIT_JDATA) - gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); - return do_gfs2_set_flags(filp, gfsflags, ~0); - } - return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA); -} - -static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - switch(cmd) { - case FS_IOC_GETFLAGS: - return gfs2_get_flags(filp, (u32 __user *)arg); - case FS_IOC_SETFLAGS: - return gfs2_set_flags(filp, (u32 __user *)arg); - } - return -ENOTTY; -} - -/** - * gfs2_allocate_page_backing - Use bmap to allocate blocks - * @page: The (locked) page to allocate backing for - * - * We try to allocate all the blocks required for the page in - * one go. This might fail for various reasons, so we keep - * trying until all the blocks to back this page are allocated. - * If some of the blocks are already allocated, thats ok too. - */ - -static int gfs2_allocate_page_backing(struct page *page) -{ - struct inode *inode = page->mapping->host; - struct buffer_head bh; - unsigned long size = PAGE_CACHE_SIZE; - u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - - do { - bh.b_state = 0; - bh.b_size = size; - gfs2_block_map(inode, lblock, &bh, 1); - if (!buffer_mapped(&bh)) - return -EIO; - size -= bh.b_size; - lblock += (bh.b_size >> inode->i_blkbits); - } while(size > 0); - return 0; -} - -/** - * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable - * @vma: The virtual memory area - * @page: The page which is about to become writable - * - * When the page becomes writable, we need to ensure that we have - * blocks allocated on disk to back that page. - */ - -static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - unsigned long last_index; - u64 pos = page->index << PAGE_CACHE_SHIFT; - unsigned int data_blocks, ind_blocks, rblocks; - int alloc_required = 0; - struct gfs2_holder gh; - struct gfs2_alloc *al; - int ret; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - ret = gfs2_glock_nq(&gh); - if (ret) - goto out; - - set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); - set_bit(GIF_SW_PAGED, &ip->i_flags); - - ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); - if (ret || !alloc_required) - goto out_unlock; - ret = -ENOMEM; - al = gfs2_alloc_get(ip); - if (al == NULL) - goto out_unlock; - - ret = gfs2_quota_lock_check(ip); - if (ret) - goto out_alloc_put; - gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); - al->al_requested = data_blocks + ind_blocks; - ret = gfs2_inplace_reserve(ip); - if (ret) - goto out_quota_unlock; - - rblocks = RES_DINODE + ind_blocks; - if (gfs2_is_jdata(ip)) - rblocks += data_blocks ? data_blocks : 1; - if (ind_blocks || data_blocks) - rblocks += RES_STATFS + RES_QUOTA; - ret = gfs2_trans_begin(sdp, rblocks, 0); - if (ret) - goto out_trans_fail; - - lock_page(page); - ret = -EINVAL; - last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT; - if (page->index > last_index) - goto out_unlock_page; - ret = 0; - if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping) - goto out_unlock_page; - if (gfs2_is_stuffed(ip)) { - ret = gfs2_unstuff_dinode(ip, page); - if (ret) - goto out_unlock_page; - } - ret = gfs2_allocate_page_backing(page); - -out_unlock_page: - unlock_page(page); - gfs2_trans_end(sdp); -out_trans_fail: - gfs2_inplace_release(ip); -out_quota_unlock: - gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_alloc_put(ip); -out_unlock: - gfs2_glock_dq(&gh); -out: - gfs2_holder_uninit(&gh); - if (ret == -ENOMEM) - ret = VM_FAULT_OOM; - else if (ret) - ret = VM_FAULT_SIGBUS; - return ret; -} - -static struct vm_operations_struct gfs2_vm_ops = { - .fault = filemap_fault, - .page_mkwrite = gfs2_page_mkwrite, -}; - -/** - * gfs2_mmap - - * @file: The file to map - * @vma: The VMA which described the mapping - * - * There is no need to get a lock here unless we should be updating - * atime. We ignore any locking errors since the only consequence is - * a missed atime update (which will just be deferred until later). - * - * Returns: 0 - */ - -static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - - if (!(file->f_flags & O_NOATIME)) { - struct gfs2_holder i_gh; - int error; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); - error = gfs2_glock_nq(&i_gh); - file_accessed(file); - if (error == 0) - gfs2_glock_dq_uninit(&i_gh); - } - vma->vm_ops = &gfs2_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; - - return 0; -} - -/** - * gfs2_open - open a file - * @inode: the inode to open - * @file: the struct file for this opening - * - * Returns: errno - */ - -static int gfs2_open(struct inode *inode, struct file *file) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder i_gh; - struct gfs2_file *fp; - int error; - - fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL); - if (!fp) - return -ENOMEM; - - mutex_init(&fp->f_fl_mutex); - - gfs2_assert_warn(GFS2_SB(inode), !file->private_data); - file->private_data = fp; - - if (S_ISREG(ip->i_inode.i_mode)) { - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, - &i_gh); - if (error) - goto fail; - - if (!(file->f_flags & O_LARGEFILE) && - ip->i_disksize > MAX_NON_LFS) { - error = -EOVERFLOW; - goto fail_gunlock; - } - - gfs2_glock_dq_uninit(&i_gh); - } - - return 0; - -fail_gunlock: - gfs2_glock_dq_uninit(&i_gh); -fail: - file->private_data = NULL; - kfree(fp); - return error; -} - -/** - * gfs2_close - called to close a struct file - * @inode: the inode the struct file belongs to - * @file: the struct file being closed - * - * Returns: errno - */ - -static int gfs2_close(struct inode *inode, struct file *file) -{ - struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; - struct gfs2_file *fp; - - fp = file->private_data; - file->private_data = NULL; - - if (gfs2_assert_warn(sdp, fp)) - return -EIO; - - kfree(fp); - - return 0; -} - -/** - * gfs2_fsync - sync the dirty data for a file (across the cluster) - * @file: the file that points to the dentry (we ignore this) - * @dentry: the dentry that points to the inode to sync - * - * The VFS will flush "normal" data for us. We only need to worry - * about metadata here. For journaled data, we just do a log flush - * as we can't avoid it. Otherwise we can just bale out if datasync - * is set. For stuffed inodes we must flush the log in order to - * ensure that all data is on disk. - * - * The call to write_inode_now() is there to write back metadata and - * the inode itself. It does also try and write the data, but thats - * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite() - * for us. - * - * Returns: errno - */ - -static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); - int ret = 0; - - if (gfs2_is_jdata(GFS2_I(inode))) { - gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); - return 0; - } - - if (sync_state != 0) { - if (!datasync) - ret = write_inode_now(inode, 0); - - if (gfs2_is_stuffed(GFS2_I(inode))) - gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); - } - - return ret; -} - -#ifdef CONFIG_GFS2_FS_LOCKING_DLM - -/** - * gfs2_setlease - acquire/release a file lease - * @file: the file pointer - * @arg: lease type - * @fl: file lock - * - * We don't currently have a way to enforce a lease across the whole - * cluster; until we do, disable leases (by just returning -EINVAL), - * unless the administrator has requested purely local locking. - * - * Returns: errno - */ - -static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) -{ - return -EINVAL; -} - -/** - * gfs2_lock - acquire/release a posix lock on a file - * @file: the file pointer - * @cmd: either modify or retrieve lock state, possibly wait - * @fl: type and range of lock - * - * Returns: errno - */ - -static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) -{ - struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); - struct lm_lockstruct *ls = &sdp->sd_lockstruct; - - if (!(fl->fl_flags & FL_POSIX)) - return -ENOLCK; - if (__mandatory_lock(&ip->i_inode)) - return -ENOLCK; - - if (cmd == F_CANCELLK) { - /* Hack: */ - cmd = F_SETLK; - fl->fl_type = F_UNLCK; - } - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - return -EIO; - if (IS_GETLK(cmd)) - return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); - else if (fl->fl_type == F_UNLCK) - return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); - else - return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); -} - -static int do_flock(struct file *file, int cmd, struct file_lock *fl) -{ - struct gfs2_file *fp = file->private_data; - struct gfs2_holder *fl_gh = &fp->f_fl_gh; - struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode); - struct gfs2_glock *gl; - unsigned int state; - int flags; - int error = 0; - - state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; - flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; - - mutex_lock(&fp->f_fl_mutex); - - gl = fl_gh->gh_gl; - if (gl) { - if (fl_gh->gh_state == state) - goto out; - flock_lock_file_wait(file, - &(struct file_lock){.fl_type = F_UNLCK}); - gfs2_glock_dq_wait(fl_gh); - gfs2_holder_reinit(state, flags, fl_gh); - } else { - error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, - &gfs2_flock_glops, CREATE, &gl); - if (error) - goto out; - gfs2_holder_init(gl, state, flags, fl_gh); - gfs2_glock_put(gl); - } - error = gfs2_glock_nq(fl_gh); - if (error) { - gfs2_holder_uninit(fl_gh); - if (error == GLR_TRYFAILED) - error = -EAGAIN; - } else { - error = flock_lock_file_wait(file, fl); - gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); - } - -out: - mutex_unlock(&fp->f_fl_mutex); - return error; -} - -static void do_unflock(struct file *file, struct file_lock *fl) -{ - struct gfs2_file *fp = file->private_data; - struct gfs2_holder *fl_gh = &fp->f_fl_gh; - - mutex_lock(&fp->f_fl_mutex); - flock_lock_file_wait(file, fl); - if (fl_gh->gh_gl) - gfs2_glock_dq_uninit(fl_gh); - mutex_unlock(&fp->f_fl_mutex); -} - -/** - * gfs2_flock - acquire/release a flock lock on a file - * @file: the file pointer - * @cmd: either modify or retrieve lock state, possibly wait - * @fl: type and range of lock - * - * Returns: errno - */ - -static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) -{ - struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - - if (!(fl->fl_flags & FL_FLOCK)) - return -ENOLCK; - if (__mandatory_lock(&ip->i_inode)) - return -ENOLCK; - - if (fl->fl_type == F_UNLCK) { - do_unflock(file, fl); - return 0; - } else { - return do_flock(file, cmd, fl); - } -} - -const struct file_operations gfs2_file_fops = { - .llseek = gfs2_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, - .unlocked_ioctl = gfs2_ioctl, - .mmap = gfs2_mmap, - .open = gfs2_open, - .release = gfs2_close, - .fsync = gfs2_fsync, - .lock = gfs2_lock, - .flock = gfs2_flock, - .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, - .setlease = gfs2_setlease, -}; - -const struct file_operations gfs2_dir_fops = { - .readdir = gfs2_readdir, - .unlocked_ioctl = gfs2_ioctl, - .open = gfs2_open, - .release = gfs2_close, - .fsync = gfs2_fsync, - .lock = gfs2_lock, - .flock = gfs2_flock, -}; - -#endif /* CONFIG_GFS2_FS_LOCKING_DLM */ - -const struct file_operations gfs2_file_fops_nolock = { - .llseek = gfs2_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, - .unlocked_ioctl = gfs2_ioctl, - .mmap = gfs2_mmap, - .open = gfs2_open, - .release = gfs2_close, - .fsync = gfs2_fsync, - .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, - .setlease = generic_setlease, -}; - -const struct file_operations gfs2_dir_fops_nolock = { - .readdir = gfs2_readdir, - .unlocked_ioctl = gfs2_ioctl, - .open = gfs2_open, - .release = gfs2_close, - .fsync = gfs2_fsync, -}; - diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 152e6c4a0dca..2e9b9326bfc9 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -60,7 +60,6 @@ #include "super.h" #include "trans.h" #include "inode.h" -#include "ops_address.h" #include "util.h" #define QUOTA_USER 1 diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index ee3d5c1876a3..6122c7ee3648 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -29,7 +29,6 @@ #include "util.h" #include "log.h" #include "inode.h" -#include "ops_address.h" #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) -- cgit v1.2.3 From 9e6e0a128bca0a151d8d3fbd9459b22fc21cfebb Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 22 May 2009 10:36:01 +0100 Subject: GFS2: Merge mount.c and ops_super.c into super.c mount.c only contained a single function, so is not really worth retaining on its own. All of the super related code is now either in super.c or ops_fstype.c Signed-off-by: Steven Whitehouse --- fs/gfs2/Makefile | 4 +- fs/gfs2/mount.c | 195 ------------ fs/gfs2/ops_super.c | 757 ------------------------------------------- fs/gfs2/super.c | 903 +++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 903 insertions(+), 956 deletions(-) delete mode 100644 fs/gfs2/mount.c delete mode 100644 fs/gfs2/ops_super.c (limited to 'fs/gfs2') diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 4f7332c7682f..d53a9bea1c2f 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -1,8 +1,8 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ glops.o inode.o log.o lops.o main.o meta_io.o \ - mount.o aops.o dentry.o export.o file.o \ - ops_fstype.o ops_inode.o ops_super.o quota.o \ + aops.o dentry.o export.o file.o \ + ops_fstype.o ops_inode.o quota.o \ recovery.o rgrp.o super.o sys.o trans.o util.o gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c deleted file mode 100644 index 947af151fa24..000000000000 --- a/fs/gfs2/mount.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "incore.h" -#include "super.h" -#include "sys.h" -#include "util.h" - -enum { - Opt_lockproto, - Opt_locktable, - Opt_hostdata, - Opt_spectator, - Opt_ignore_local_fs, - Opt_localflocks, - Opt_localcaching, - Opt_debug, - Opt_nodebug, - Opt_upgrade, - Opt_acl, - Opt_noacl, - Opt_quota_off, - Opt_quota_account, - Opt_quota_on, - Opt_quota, - Opt_noquota, - Opt_suiddir, - Opt_nosuiddir, - Opt_data_writeback, - Opt_data_ordered, - Opt_meta, - Opt_discard, - Opt_nodiscard, - Opt_commit, - Opt_err, -}; - -static const match_table_t tokens = { - {Opt_lockproto, "lockproto=%s"}, - {Opt_locktable, "locktable=%s"}, - {Opt_hostdata, "hostdata=%s"}, - {Opt_spectator, "spectator"}, - {Opt_ignore_local_fs, "ignore_local_fs"}, - {Opt_localflocks, "localflocks"}, - {Opt_localcaching, "localcaching"}, - {Opt_debug, "debug"}, - {Opt_nodebug, "nodebug"}, - {Opt_upgrade, "upgrade"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_quota_off, "quota=off"}, - {Opt_quota_account, "quota=account"}, - {Opt_quota_on, "quota=on"}, - {Opt_quota, "quota"}, - {Opt_noquota, "noquota"}, - {Opt_suiddir, "suiddir"}, - {Opt_nosuiddir, "nosuiddir"}, - {Opt_data_writeback, "data=writeback"}, - {Opt_data_ordered, "data=ordered"}, - {Opt_meta, "meta"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_commit, "commit=%d"}, - {Opt_err, NULL} -}; - -/** - * gfs2_mount_args - Parse mount options - * @sdp: - * @data: - * - * Return: errno - */ - -int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) -{ - char *o; - int token; - substring_t tmp[MAX_OPT_ARGS]; - int rv; - - /* Split the options into tokens with the "," character and - process them */ - - while (1) { - o = strsep(&options, ","); - if (o == NULL) - break; - if (*o == '\0') - continue; - - token = match_token(o, tokens, tmp); - switch (token) { - case Opt_lockproto: - match_strlcpy(args->ar_lockproto, &tmp[0], - GFS2_LOCKNAME_LEN); - break; - case Opt_locktable: - match_strlcpy(args->ar_locktable, &tmp[0], - GFS2_LOCKNAME_LEN); - break; - case Opt_hostdata: - match_strlcpy(args->ar_hostdata, &tmp[0], - GFS2_LOCKNAME_LEN); - break; - case Opt_spectator: - args->ar_spectator = 1; - break; - case Opt_ignore_local_fs: - args->ar_ignore_local_fs = 1; - break; - case Opt_localflocks: - args->ar_localflocks = 1; - break; - case Opt_localcaching: - args->ar_localcaching = 1; - break; - case Opt_debug: - args->ar_debug = 1; - break; - case Opt_nodebug: - args->ar_debug = 0; - break; - case Opt_upgrade: - args->ar_upgrade = 1; - break; - case Opt_acl: - args->ar_posix_acl = 1; - break; - case Opt_noacl: - args->ar_posix_acl = 0; - break; - case Opt_quota_off: - case Opt_noquota: - args->ar_quota = GFS2_QUOTA_OFF; - break; - case Opt_quota_account: - args->ar_quota = GFS2_QUOTA_ACCOUNT; - break; - case Opt_quota_on: - case Opt_quota: - args->ar_quota = GFS2_QUOTA_ON; - break; - case Opt_suiddir: - args->ar_suiddir = 1; - break; - case Opt_nosuiddir: - args->ar_suiddir = 0; - break; - case Opt_data_writeback: - args->ar_data = GFS2_DATA_WRITEBACK; - break; - case Opt_data_ordered: - args->ar_data = GFS2_DATA_ORDERED; - break; - case Opt_meta: - args->ar_meta = 1; - break; - case Opt_discard: - args->ar_discard = 1; - break; - case Opt_nodiscard: - args->ar_discard = 0; - break; - case Opt_commit: - rv = match_int(&tmp[0], &args->ar_commit); - if (rv || args->ar_commit <= 0) { - fs_info(sdp, "commit mount option requires a positive numeric argument\n"); - return rv ? rv : -EINVAL; - } - break; - case Opt_err: - default: - fs_info(sdp, "invalid mount option: %s\n", o); - return -EINVAL; - } - } - - return 0; -} - diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c deleted file mode 100644 index 2fd1dcbcc5b7..000000000000 --- a/fs/gfs2/ops_super.c +++ /dev/null @@ -1,757 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "incore.h" -#include "glock.h" -#include "inode.h" -#include "log.h" -#include "quota.h" -#include "recovery.h" -#include "rgrp.h" -#include "super.h" -#include "sys.h" -#include "util.h" -#include "trans.h" -#include "dir.h" -#include "eattr.h" -#include "bmap.h" -#include "meta_io.h" - -#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) - -/** - * gfs2_write_inode - Make sure the inode is stable on the disk - * @inode: The inode - * @sync: synchronous write flag - * - * Returns: errno - */ - -static int gfs2_write_inode(struct inode *inode, int sync) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_holder gh; - struct buffer_head *bh; - struct timespec atime; - struct gfs2_dinode *di; - int ret = 0; - - /* Check this is a "normal" inode, etc */ - if (!test_bit(GIF_USER, &ip->i_flags) || - (current->flags & PF_MEMALLOC)) - return 0; - ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - if (ret) - goto do_flush; - ret = gfs2_trans_begin(sdp, RES_DINODE, 0); - if (ret) - goto do_unlock; - ret = gfs2_meta_inode_buffer(ip, &bh); - if (ret == 0) { - di = (struct gfs2_dinode *)bh->b_data; - atime.tv_sec = be64_to_cpu(di->di_atime); - atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); - if (timespec_compare(&inode->i_atime, &atime) > 0) { - gfs2_trans_add_bh(ip->i_gl, bh, 1); - gfs2_dinode_out(ip, bh->b_data); - } - brelse(bh); - } - gfs2_trans_end(sdp); -do_unlock: - gfs2_glock_dq_uninit(&gh); -do_flush: - if (sync != 0) - gfs2_log_flush(GFS2_SB(inode), ip->i_gl); - return ret; -} - -/** - * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one - * @sdp: the filesystem - * - * Returns: errno - */ - -static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) -{ - struct gfs2_holder t_gh; - int error; - - gfs2_quota_sync(sdp); - gfs2_statfs_sync(sdp); - - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, - &t_gh); - if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - return error; - - gfs2_meta_syncfs(sdp); - gfs2_log_shutdown(sdp); - - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - - if (t_gh.gh_gl) - gfs2_glock_dq_uninit(&t_gh); - - gfs2_quota_cleanup(sdp); - - return error; -} - -static int gfs2_umount_recovery_wait(void *word) -{ - schedule(); - return 0; -} - -/** - * gfs2_put_super - Unmount the filesystem - * @sb: The VFS superblock - * - */ - -static void gfs2_put_super(struct super_block *sb) -{ - struct gfs2_sbd *sdp = sb->s_fs_info; - int error; - struct gfs2_jdesc *jd; - - /* Unfreeze the filesystem, if we need to */ - - mutex_lock(&sdp->sd_freeze_lock); - if (sdp->sd_freeze_count) - gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); - mutex_unlock(&sdp->sd_freeze_lock); - - /* No more recovery requests */ - set_bit(SDF_NORECOVERY, &sdp->sd_flags); - smp_mb(); - - /* Wait on outstanding recovery */ -restart: - spin_lock(&sdp->sd_jindex_spin); - list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { - if (!test_bit(JDF_RECOVERY, &jd->jd_flags)) - continue; - spin_unlock(&sdp->sd_jindex_spin); - wait_on_bit(&jd->jd_flags, JDF_RECOVERY, - gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE); - goto restart; - } - spin_unlock(&sdp->sd_jindex_spin); - - kthread_stop(sdp->sd_quotad_process); - kthread_stop(sdp->sd_logd_process); - - if (!(sb->s_flags & MS_RDONLY)) { - error = gfs2_make_fs_ro(sdp); - if (error) - gfs2_io_error(sdp); - } - /* At this point, we're through modifying the disk */ - - /* Release stuff */ - - iput(sdp->sd_jindex); - iput(sdp->sd_inum_inode); - iput(sdp->sd_statfs_inode); - iput(sdp->sd_rindex); - iput(sdp->sd_quota_inode); - - gfs2_glock_put(sdp->sd_rename_gl); - gfs2_glock_put(sdp->sd_trans_gl); - - if (!sdp->sd_args.ar_spectator) { - gfs2_glock_dq_uninit(&sdp->sd_journal_gh); - gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); - gfs2_glock_dq_uninit(&sdp->sd_ir_gh); - gfs2_glock_dq_uninit(&sdp->sd_sc_gh); - gfs2_glock_dq_uninit(&sdp->sd_qc_gh); - iput(sdp->sd_ir_inode); - iput(sdp->sd_sc_inode); - iput(sdp->sd_qc_inode); - } - - gfs2_glock_dq_uninit(&sdp->sd_live_gh); - gfs2_clear_rgrpd(sdp); - gfs2_jindex_free(sdp); - /* Take apart glock structures and buffer lists */ - gfs2_gl_hash_clear(sdp); - /* Unmount the locking protocol */ - gfs2_lm_unmount(sdp); - - /* At this point, we're through participating in the lockspace */ - gfs2_sys_fs_del(sdp); -} - -/** - * gfs2_write_super - * @sb: the superblock - * - */ - -static void gfs2_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; -} - -/** - * gfs2_sync_fs - sync the filesystem - * @sb: the superblock - * - * Flushes the log to disk. - */ - -static int gfs2_sync_fs(struct super_block *sb, int wait) -{ - sb->s_dirt = 0; - if (wait && sb->s_fs_info) - gfs2_log_flush(sb->s_fs_info, NULL); - return 0; -} - -/** - * gfs2_freeze - prevent further writes to the filesystem - * @sb: the VFS structure for the filesystem - * - */ - -static int gfs2_freeze(struct super_block *sb) -{ - struct gfs2_sbd *sdp = sb->s_fs_info; - int error; - - if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - return -EINVAL; - - for (;;) { - error = gfs2_freeze_fs(sdp); - if (!error) - break; - - switch (error) { - case -EBUSY: - fs_err(sdp, "waiting for recovery before freeze\n"); - break; - - default: - fs_err(sdp, "error freezing FS: %d\n", error); - break; - } - - fs_err(sdp, "retrying...\n"); - msleep(1000); - } - return 0; -} - -/** - * gfs2_unfreeze - reallow writes to the filesystem - * @sb: the VFS structure for the filesystem - * - */ - -static int gfs2_unfreeze(struct super_block *sb) -{ - gfs2_unfreeze_fs(sb->s_fs_info); - return 0; -} - -/** - * statfs_fill - fill in the sg for a given RG - * @rgd: the RG - * @sc: the sc structure - * - * Returns: 0 on success, -ESTALE if the LVB is invalid - */ - -static int statfs_slow_fill(struct gfs2_rgrpd *rgd, - struct gfs2_statfs_change_host *sc) -{ - gfs2_rgrp_verify(rgd); - sc->sc_total += rgd->rd_data; - sc->sc_free += rgd->rd_free; - sc->sc_dinodes += rgd->rd_dinodes; - return 0; -} - -/** - * gfs2_statfs_slow - Stat a filesystem using asynchronous locking - * @sdp: the filesystem - * @sc: the sc info that will be returned - * - * Any error (other than a signal) will cause this routine to fall back - * to the synchronous version. - * - * FIXME: This really shouldn't busy wait like this. - * - * Returns: errno - */ - -static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) -{ - struct gfs2_holder ri_gh; - struct gfs2_rgrpd *rgd_next; - struct gfs2_holder *gha, *gh; - unsigned int slots = 64; - unsigned int x; - int done; - int error = 0, err; - - memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); - gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); - if (!gha) - return -ENOMEM; - - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - goto out; - - rgd_next = gfs2_rgrpd_get_first(sdp); - - for (;;) { - done = 1; - - for (x = 0; x < slots; x++) { - gh = gha + x; - - if (gh->gh_gl && gfs2_glock_poll(gh)) { - err = gfs2_glock_wait(gh); - if (err) { - gfs2_holder_uninit(gh); - error = err; - } else { - if (!error) - error = statfs_slow_fill( - gh->gh_gl->gl_object, sc); - gfs2_glock_dq_uninit(gh); - } - } - - if (gh->gh_gl) - done = 0; - else if (rgd_next && !error) { - error = gfs2_glock_nq_init(rgd_next->rd_gl, - LM_ST_SHARED, - GL_ASYNC, - gh); - rgd_next = gfs2_rgrpd_get_next(rgd_next); - done = 0; - } - - if (signal_pending(current)) - error = -ERESTARTSYS; - } - - if (done) - break; - - yield(); - } - - gfs2_glock_dq_uninit(&ri_gh); - -out: - kfree(gha); - return error; -} - -/** - * gfs2_statfs_i - Do a statfs - * @sdp: the filesystem - * @sg: the sg structure - * - * Returns: errno - */ - -static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) -{ - struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; - struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; - - spin_lock(&sdp->sd_statfs_spin); - - *sc = *m_sc; - sc->sc_total += l_sc->sc_total; - sc->sc_free += l_sc->sc_free; - sc->sc_dinodes += l_sc->sc_dinodes; - - spin_unlock(&sdp->sd_statfs_spin); - - if (sc->sc_free < 0) - sc->sc_free = 0; - if (sc->sc_free > sc->sc_total) - sc->sc_free = sc->sc_total; - if (sc->sc_dinodes < 0) - sc->sc_dinodes = 0; - - return 0; -} - -/** - * gfs2_statfs - Gather and return stats about the filesystem - * @sb: The superblock - * @statfsbuf: The buffer - * - * Returns: 0 on success or error code - */ - -static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) -{ - struct super_block *sb = dentry->d_inode->i_sb; - struct gfs2_sbd *sdp = sb->s_fs_info; - struct gfs2_statfs_change_host sc; - int error; - - if (gfs2_tune_get(sdp, gt_statfs_slow)) - error = gfs2_statfs_slow(sdp, &sc); - else - error = gfs2_statfs_i(sdp, &sc); - - if (error) - return error; - - buf->f_type = GFS2_MAGIC; - buf->f_bsize = sdp->sd_sb.sb_bsize; - buf->f_blocks = sc.sc_total; - buf->f_bfree = sc.sc_free; - buf->f_bavail = sc.sc_free; - buf->f_files = sc.sc_dinodes + sc.sc_free; - buf->f_ffree = sc.sc_free; - buf->f_namelen = GFS2_FNAMESIZE; - - return 0; -} - -/** - * gfs2_remount_fs - called when the FS is remounted - * @sb: the filesystem - * @flags: the remount flags - * @data: extra data passed in (not used right now) - * - * Returns: errno - */ - -static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) -{ - struct gfs2_sbd *sdp = sb->s_fs_info; - struct gfs2_args args = sdp->sd_args; /* Default to current settings */ - struct gfs2_tune *gt = &sdp->sd_tune; - int error; - - spin_lock(>->gt_spin); - args.ar_commit = gt->gt_log_flush_secs; - spin_unlock(>->gt_spin); - error = gfs2_mount_args(sdp, &args, data); - if (error) - return error; - - /* Not allowed to change locking details */ - if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) || - strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) || - strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata)) - return -EINVAL; - - /* Some flags must not be changed */ - if (args_neq(&args, &sdp->sd_args, spectator) || - args_neq(&args, &sdp->sd_args, ignore_local_fs) || - args_neq(&args, &sdp->sd_args, localflocks) || - args_neq(&args, &sdp->sd_args, localcaching) || - args_neq(&args, &sdp->sd_args, meta)) - return -EINVAL; - - if (sdp->sd_args.ar_spectator) - *flags |= MS_RDONLY; - - if ((sb->s_flags ^ *flags) & MS_RDONLY) { - if (*flags & MS_RDONLY) - error = gfs2_make_fs_ro(sdp); - else - error = gfs2_make_fs_rw(sdp); - if (error) - return error; - } - - sdp->sd_args = args; - if (sdp->sd_args.ar_posix_acl) - sb->s_flags |= MS_POSIXACL; - else - sb->s_flags &= ~MS_POSIXACL; - spin_lock(>->gt_spin); - gt->gt_log_flush_secs = args.ar_commit; - spin_unlock(>->gt_spin); - - return 0; -} - -/** - * gfs2_drop_inode - Drop an inode (test for remote unlink) - * @inode: The inode to drop - * - * If we've received a callback on an iopen lock then its because a - * remote node tried to deallocate the inode but failed due to this node - * still having the inode open. Here we mark the link count zero - * since we know that it must have reached zero if the GLF_DEMOTE flag - * is set on the iopen glock. If we didn't do a disk read since the - * remote node removed the final link then we might otherwise miss - * this event. This check ensures that this node will deallocate the - * inode's blocks, or alternatively pass the baton on to another - * node for later deallocation. - */ - -static void gfs2_drop_inode(struct inode *inode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - - if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) { - struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; - if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) - clear_nlink(inode); - } - generic_drop_inode(inode); -} - -/** - * gfs2_clear_inode - Deallocate an inode when VFS is done with it - * @inode: The VFS inode - * - */ - -static void gfs2_clear_inode(struct inode *inode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - - /* This tells us its a "real" inode and not one which only - * serves to contain an address space (see rgrp.c, meta_io.c) - * which therefore doesn't have its own glocks. - */ - if (test_bit(GIF_USER, &ip->i_flags)) { - ip->i_gl->gl_object = NULL; - gfs2_glock_put(ip->i_gl); - ip->i_gl = NULL; - if (ip->i_iopen_gh.gh_gl) { - ip->i_iopen_gh.gh_gl->gl_object = NULL; - gfs2_glock_dq_uninit(&ip->i_iopen_gh); - } - } -} - -static int is_ancestor(const struct dentry *d1, const struct dentry *d2) -{ - do { - if (d1 == d2) - return 1; - d1 = d1->d_parent; - } while (!IS_ROOT(d1)); - return 0; -} - -/** - * gfs2_show_options - Show mount options for /proc/mounts - * @s: seq_file structure - * @mnt: vfsmount - * - * Returns: 0 on success or error code - */ - -static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) -{ - struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; - struct gfs2_args *args = &sdp->sd_args; - int lfsecs; - - if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) - seq_printf(s, ",meta"); - if (args->ar_lockproto[0]) - seq_printf(s, ",lockproto=%s", args->ar_lockproto); - if (args->ar_locktable[0]) - seq_printf(s, ",locktable=%s", args->ar_locktable); - if (args->ar_hostdata[0]) - seq_printf(s, ",hostdata=%s", args->ar_hostdata); - if (args->ar_spectator) - seq_printf(s, ",spectator"); - if (args->ar_ignore_local_fs) - seq_printf(s, ",ignore_local_fs"); - if (args->ar_localflocks) - seq_printf(s, ",localflocks"); - if (args->ar_localcaching) - seq_printf(s, ",localcaching"); - if (args->ar_debug) - seq_printf(s, ",debug"); - if (args->ar_upgrade) - seq_printf(s, ",upgrade"); - if (args->ar_posix_acl) - seq_printf(s, ",acl"); - if (args->ar_quota != GFS2_QUOTA_DEFAULT) { - char *state; - switch (args->ar_quota) { - case GFS2_QUOTA_OFF: - state = "off"; - break; - case GFS2_QUOTA_ACCOUNT: - state = "account"; - break; - case GFS2_QUOTA_ON: - state = "on"; - break; - default: - state = "unknown"; - break; - } - seq_printf(s, ",quota=%s", state); - } - if (args->ar_suiddir) - seq_printf(s, ",suiddir"); - if (args->ar_data != GFS2_DATA_DEFAULT) { - char *state; - switch (args->ar_data) { - case GFS2_DATA_WRITEBACK: - state = "writeback"; - break; - case GFS2_DATA_ORDERED: - state = "ordered"; - break; - default: - state = "unknown"; - break; - } - seq_printf(s, ",data=%s", state); - } - if (args->ar_discard) - seq_printf(s, ",discard"); - lfsecs = sdp->sd_tune.gt_log_flush_secs; - if (lfsecs != 60) - seq_printf(s, ",commit=%d", lfsecs); - return 0; -} - -/* - * We have to (at the moment) hold the inodes main lock to cover - * the gap between unlocking the shared lock on the iopen lock and - * taking the exclusive lock. I'd rather do a shared -> exclusive - * conversion on the iopen lock, but we can change that later. This - * is safe, just less efficient. - */ - -static void gfs2_delete_inode(struct inode *inode) -{ - struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - int error; - - if (!test_bit(GIF_USER, &ip->i_flags)) - goto out; - - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - if (unlikely(error)) { - gfs2_glock_dq_uninit(&ip->i_iopen_gh); - goto out; - } - - gfs2_glock_dq_wait(&ip->i_iopen_gh); - gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); - error = gfs2_glock_nq(&ip->i_iopen_gh); - if (error) - goto out_truncate; - - if (S_ISDIR(inode->i_mode) && - (ip->i_diskflags & GFS2_DIF_EXHASH)) { - error = gfs2_dir_exhash_dealloc(ip); - if (error) - goto out_unlock; - } - - if (ip->i_eattr) { - error = gfs2_ea_dealloc(ip); - if (error) - goto out_unlock; - } - - if (!gfs2_is_stuffed(ip)) { - error = gfs2_file_dealloc(ip); - if (error) - goto out_unlock; - } - - error = gfs2_dinode_dealloc(ip); - if (error) - goto out_unlock; - -out_truncate: - error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); - if (error) - goto out_unlock; - /* Needs to be done before glock release & also in a transaction */ - truncate_inode_pages(&inode->i_data, 0); - gfs2_trans_end(sdp); - -out_unlock: - if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) - gfs2_glock_dq(&ip->i_iopen_gh); - gfs2_holder_uninit(&ip->i_iopen_gh); - gfs2_glock_dq_uninit(&gh); - if (error && error != GLR_TRYFAILED && error != -EROFS) - fs_warn(sdp, "gfs2_delete_inode: %d\n", error); -out: - truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); -} - -static struct inode *gfs2_alloc_inode(struct super_block *sb) -{ - struct gfs2_inode *ip; - - ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); - if (ip) { - ip->i_flags = 0; - ip->i_gl = NULL; - } - return &ip->i_inode; -} - -static void gfs2_destroy_inode(struct inode *inode) -{ - kmem_cache_free(gfs2_inode_cachep, inode); -} - -const struct super_operations gfs2_super_ops = { - .alloc_inode = gfs2_alloc_inode, - .destroy_inode = gfs2_destroy_inode, - .write_inode = gfs2_write_inode, - .delete_inode = gfs2_delete_inode, - .put_super = gfs2_put_super, - .write_super = gfs2_write_super, - .sync_fs = gfs2_sync_fs, - .freeze_fs = gfs2_freeze, - .unfreeze_fs = gfs2_unfreeze, - .statfs = gfs2_statfs, - .remount_fs = gfs2_remount_fs, - .clear_inode = gfs2_clear_inode, - .drop_inode = gfs2_drop_inode, - .show_options = gfs2_show_options, -}; - diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 601913e0a482..40bcc37e5a70 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -7,14 +7,20 @@ * of the GNU General Public License version 2. */ +#include #include #include #include #include #include -#include +#include +#include +#include +#include +#include #include -#include +#include +#include #include "gfs2.h" #include "incore.h" @@ -31,6 +37,183 @@ #include "super.h" #include "trans.h" #include "util.h" +#include "sys.h" +#include "eattr.h" + +#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) + +enum { + Opt_lockproto, + Opt_locktable, + Opt_hostdata, + Opt_spectator, + Opt_ignore_local_fs, + Opt_localflocks, + Opt_localcaching, + Opt_debug, + Opt_nodebug, + Opt_upgrade, + Opt_acl, + Opt_noacl, + Opt_quota_off, + Opt_quota_account, + Opt_quota_on, + Opt_quota, + Opt_noquota, + Opt_suiddir, + Opt_nosuiddir, + Opt_data_writeback, + Opt_data_ordered, + Opt_meta, + Opt_discard, + Opt_nodiscard, + Opt_commit, + Opt_error, +}; + +static const match_table_t tokens = { + {Opt_lockproto, "lockproto=%s"}, + {Opt_locktable, "locktable=%s"}, + {Opt_hostdata, "hostdata=%s"}, + {Opt_spectator, "spectator"}, + {Opt_ignore_local_fs, "ignore_local_fs"}, + {Opt_localflocks, "localflocks"}, + {Opt_localcaching, "localcaching"}, + {Opt_debug, "debug"}, + {Opt_nodebug, "nodebug"}, + {Opt_upgrade, "upgrade"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, + {Opt_quota_off, "quota=off"}, + {Opt_quota_account, "quota=account"}, + {Opt_quota_on, "quota=on"}, + {Opt_quota, "quota"}, + {Opt_noquota, "noquota"}, + {Opt_suiddir, "suiddir"}, + {Opt_nosuiddir, "nosuiddir"}, + {Opt_data_writeback, "data=writeback"}, + {Opt_data_ordered, "data=ordered"}, + {Opt_meta, "meta"}, + {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, + {Opt_commit, "commit=%d"}, + {Opt_error, NULL} +}; + +/** + * gfs2_mount_args - Parse mount options + * @sdp: + * @data: + * + * Return: errno + */ + +int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) +{ + char *o; + int token; + substring_t tmp[MAX_OPT_ARGS]; + int rv; + + /* Split the options into tokens with the "," character and + process them */ + + while (1) { + o = strsep(&options, ","); + if (o == NULL) + break; + if (*o == '\0') + continue; + + token = match_token(o, tokens, tmp); + switch (token) { + case Opt_lockproto: + match_strlcpy(args->ar_lockproto, &tmp[0], + GFS2_LOCKNAME_LEN); + break; + case Opt_locktable: + match_strlcpy(args->ar_locktable, &tmp[0], + GFS2_LOCKNAME_LEN); + break; + case Opt_hostdata: + match_strlcpy(args->ar_hostdata, &tmp[0], + GFS2_LOCKNAME_LEN); + break; + case Opt_spectator: + args->ar_spectator = 1; + break; + case Opt_ignore_local_fs: + args->ar_ignore_local_fs = 1; + break; + case Opt_localflocks: + args->ar_localflocks = 1; + break; + case Opt_localcaching: + args->ar_localcaching = 1; + break; + case Opt_debug: + args->ar_debug = 1; + break; + case Opt_nodebug: + args->ar_debug = 0; + break; + case Opt_upgrade: + args->ar_upgrade = 1; + break; + case Opt_acl: + args->ar_posix_acl = 1; + break; + case Opt_noacl: + args->ar_posix_acl = 0; + break; + case Opt_quota_off: + case Opt_noquota: + args->ar_quota = GFS2_QUOTA_OFF; + break; + case Opt_quota_account: + args->ar_quota = GFS2_QUOTA_ACCOUNT; + break; + case Opt_quota_on: + case Opt_quota: + args->ar_quota = GFS2_QUOTA_ON; + break; + case Opt_suiddir: + args->ar_suiddir = 1; + break; + case Opt_nosuiddir: + args->ar_suiddir = 0; + break; + case Opt_data_writeback: + args->ar_data = GFS2_DATA_WRITEBACK; + break; + case Opt_data_ordered: + args->ar_data = GFS2_DATA_ORDERED; + break; + case Opt_meta: + args->ar_meta = 1; + break; + case Opt_discard: + args->ar_discard = 1; + break; + case Opt_nodiscard: + args->ar_discard = 0; + break; + case Opt_commit: + rv = match_int(&tmp[0], &args->ar_commit); + if (rv || args->ar_commit <= 0) { + fs_info(sdp, "commit mount option requires a positive numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; + case Opt_error: + default: + fs_info(sdp, "invalid mount option: %s\n", o); + return -EINVAL; + } + } + + return 0; +} /** * gfs2_jindex_free - Clear all the journal index information @@ -436,3 +619,719 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) mutex_unlock(&sdp->sd_freeze_lock); } + +/** + * gfs2_write_inode - Make sure the inode is stable on the disk + * @inode: The inode + * @sync: synchronous write flag + * + * Returns: errno + */ + +static int gfs2_write_inode(struct inode *inode, int sync) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_holder gh; + struct buffer_head *bh; + struct timespec atime; + struct gfs2_dinode *di; + int ret = 0; + + /* Check this is a "normal" inode, etc */ + if (!test_bit(GIF_USER, &ip->i_flags) || + (current->flags & PF_MEMALLOC)) + return 0; + ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (ret) + goto do_flush; + ret = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (ret) + goto do_unlock; + ret = gfs2_meta_inode_buffer(ip, &bh); + if (ret == 0) { + di = (struct gfs2_dinode *)bh->b_data; + atime.tv_sec = be64_to_cpu(di->di_atime); + atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); + if (timespec_compare(&inode->i_atime, &atime) > 0) { + gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_dinode_out(ip, bh->b_data); + } + brelse(bh); + } + gfs2_trans_end(sdp); +do_unlock: + gfs2_glock_dq_uninit(&gh); +do_flush: + if (sync != 0) + gfs2_log_flush(GFS2_SB(inode), ip->i_gl); + return ret; +} + +/** + * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one + * @sdp: the filesystem + * + * Returns: errno + */ + +static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) +{ + struct gfs2_holder t_gh; + int error; + + gfs2_quota_sync(sdp); + gfs2_statfs_sync(sdp); + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, + &t_gh); + if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + return error; + + gfs2_meta_syncfs(sdp); + gfs2_log_shutdown(sdp); + + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + + if (t_gh.gh_gl) + gfs2_glock_dq_uninit(&t_gh); + + gfs2_quota_cleanup(sdp); + + return error; +} + +static int gfs2_umount_recovery_wait(void *word) +{ + schedule(); + return 0; +} + +/** + * gfs2_put_super - Unmount the filesystem + * @sb: The VFS superblock + * + */ + +static void gfs2_put_super(struct super_block *sb) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + int error; + struct gfs2_jdesc *jd; + + /* Unfreeze the filesystem, if we need to */ + + mutex_lock(&sdp->sd_freeze_lock); + if (sdp->sd_freeze_count) + gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + mutex_unlock(&sdp->sd_freeze_lock); + + /* No more recovery requests */ + set_bit(SDF_NORECOVERY, &sdp->sd_flags); + smp_mb(); + + /* Wait on outstanding recovery */ +restart: + spin_lock(&sdp->sd_jindex_spin); + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + if (!test_bit(JDF_RECOVERY, &jd->jd_flags)) + continue; + spin_unlock(&sdp->sd_jindex_spin); + wait_on_bit(&jd->jd_flags, JDF_RECOVERY, + gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE); + goto restart; + } + spin_unlock(&sdp->sd_jindex_spin); + + kthread_stop(sdp->sd_quotad_process); + kthread_stop(sdp->sd_logd_process); + + if (!(sb->s_flags & MS_RDONLY)) { + error = gfs2_make_fs_ro(sdp); + if (error) + gfs2_io_error(sdp); + } + /* At this point, we're through modifying the disk */ + + /* Release stuff */ + + iput(sdp->sd_jindex); + iput(sdp->sd_inum_inode); + iput(sdp->sd_statfs_inode); + iput(sdp->sd_rindex); + iput(sdp->sd_quota_inode); + + gfs2_glock_put(sdp->sd_rename_gl); + gfs2_glock_put(sdp->sd_trans_gl); + + if (!sdp->sd_args.ar_spectator) { + gfs2_glock_dq_uninit(&sdp->sd_journal_gh); + gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); + gfs2_glock_dq_uninit(&sdp->sd_ir_gh); + gfs2_glock_dq_uninit(&sdp->sd_sc_gh); + gfs2_glock_dq_uninit(&sdp->sd_qc_gh); + iput(sdp->sd_ir_inode); + iput(sdp->sd_sc_inode); + iput(sdp->sd_qc_inode); + } + + gfs2_glock_dq_uninit(&sdp->sd_live_gh); + gfs2_clear_rgrpd(sdp); + gfs2_jindex_free(sdp); + /* Take apart glock structures and buffer lists */ + gfs2_gl_hash_clear(sdp); + /* Unmount the locking protocol */ + gfs2_lm_unmount(sdp); + + /* At this point, we're through participating in the lockspace */ + gfs2_sys_fs_del(sdp); +} + +/** + * gfs2_write_super + * @sb: the superblock + * + */ + +static void gfs2_write_super(struct super_block *sb) +{ + sb->s_dirt = 0; +} + +/** + * gfs2_sync_fs - sync the filesystem + * @sb: the superblock + * + * Flushes the log to disk. + */ + +static int gfs2_sync_fs(struct super_block *sb, int wait) +{ + sb->s_dirt = 0; + if (wait && sb->s_fs_info) + gfs2_log_flush(sb->s_fs_info, NULL); + return 0; +} + +/** + * gfs2_freeze - prevent further writes to the filesystem + * @sb: the VFS structure for the filesystem + * + */ + +static int gfs2_freeze(struct super_block *sb) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + int error; + + if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + return -EINVAL; + + for (;;) { + error = gfs2_freeze_fs(sdp); + if (!error) + break; + + switch (error) { + case -EBUSY: + fs_err(sdp, "waiting for recovery before freeze\n"); + break; + + default: + fs_err(sdp, "error freezing FS: %d\n", error); + break; + } + + fs_err(sdp, "retrying...\n"); + msleep(1000); + } + return 0; +} + +/** + * gfs2_unfreeze - reallow writes to the filesystem + * @sb: the VFS structure for the filesystem + * + */ + +static int gfs2_unfreeze(struct super_block *sb) +{ + gfs2_unfreeze_fs(sb->s_fs_info); + return 0; +} + +/** + * statfs_fill - fill in the sg for a given RG + * @rgd: the RG + * @sc: the sc structure + * + * Returns: 0 on success, -ESTALE if the LVB is invalid + */ + +static int statfs_slow_fill(struct gfs2_rgrpd *rgd, + struct gfs2_statfs_change_host *sc) +{ + gfs2_rgrp_verify(rgd); + sc->sc_total += rgd->rd_data; + sc->sc_free += rgd->rd_free; + sc->sc_dinodes += rgd->rd_dinodes; + return 0; +} + +/** + * gfs2_statfs_slow - Stat a filesystem using asynchronous locking + * @sdp: the filesystem + * @sc: the sc info that will be returned + * + * Any error (other than a signal) will cause this routine to fall back + * to the synchronous version. + * + * FIXME: This really shouldn't busy wait like this. + * + * Returns: errno + */ + +static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) +{ + struct gfs2_holder ri_gh; + struct gfs2_rgrpd *rgd_next; + struct gfs2_holder *gha, *gh; + unsigned int slots = 64; + unsigned int x; + int done; + int error = 0, err; + + memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); + gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); + if (!gha) + return -ENOMEM; + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + goto out; + + rgd_next = gfs2_rgrpd_get_first(sdp); + + for (;;) { + done = 1; + + for (x = 0; x < slots; x++) { + gh = gha + x; + + if (gh->gh_gl && gfs2_glock_poll(gh)) { + err = gfs2_glock_wait(gh); + if (err) { + gfs2_holder_uninit(gh); + error = err; + } else { + if (!error) + error = statfs_slow_fill( + gh->gh_gl->gl_object, sc); + gfs2_glock_dq_uninit(gh); + } + } + + if (gh->gh_gl) + done = 0; + else if (rgd_next && !error) { + error = gfs2_glock_nq_init(rgd_next->rd_gl, + LM_ST_SHARED, + GL_ASYNC, + gh); + rgd_next = gfs2_rgrpd_get_next(rgd_next); + done = 0; + } + + if (signal_pending(current)) + error = -ERESTARTSYS; + } + + if (done) + break; + + yield(); + } + + gfs2_glock_dq_uninit(&ri_gh); + +out: + kfree(gha); + return error; +} + +/** + * gfs2_statfs_i - Do a statfs + * @sdp: the filesystem + * @sg: the sg structure + * + * Returns: errno + */ + +static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) +{ + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; + + spin_lock(&sdp->sd_statfs_spin); + + *sc = *m_sc; + sc->sc_total += l_sc->sc_total; + sc->sc_free += l_sc->sc_free; + sc->sc_dinodes += l_sc->sc_dinodes; + + spin_unlock(&sdp->sd_statfs_spin); + + if (sc->sc_free < 0) + sc->sc_free = 0; + if (sc->sc_free > sc->sc_total) + sc->sc_free = sc->sc_total; + if (sc->sc_dinodes < 0) + sc->sc_dinodes = 0; + + return 0; +} + +/** + * gfs2_statfs - Gather and return stats about the filesystem + * @sb: The superblock + * @statfsbuf: The buffer + * + * Returns: 0 on success or error code + */ + +static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_inode->i_sb; + struct gfs2_sbd *sdp = sb->s_fs_info; + struct gfs2_statfs_change_host sc; + int error; + + if (gfs2_tune_get(sdp, gt_statfs_slow)) + error = gfs2_statfs_slow(sdp, &sc); + else + error = gfs2_statfs_i(sdp, &sc); + + if (error) + return error; + + buf->f_type = GFS2_MAGIC; + buf->f_bsize = sdp->sd_sb.sb_bsize; + buf->f_blocks = sc.sc_total; + buf->f_bfree = sc.sc_free; + buf->f_bavail = sc.sc_free; + buf->f_files = sc.sc_dinodes + sc.sc_free; + buf->f_ffree = sc.sc_free; + buf->f_namelen = GFS2_FNAMESIZE; + + return 0; +} + +/** + * gfs2_remount_fs - called when the FS is remounted + * @sb: the filesystem + * @flags: the remount flags + * @data: extra data passed in (not used right now) + * + * Returns: errno + */ + +static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + struct gfs2_args args = sdp->sd_args; /* Default to current settings */ + struct gfs2_tune *gt = &sdp->sd_tune; + int error; + + spin_lock(>->gt_spin); + args.ar_commit = gt->gt_log_flush_secs; + spin_unlock(>->gt_spin); + error = gfs2_mount_args(sdp, &args, data); + if (error) + return error; + + /* Not allowed to change locking details */ + if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) || + strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) || + strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata)) + return -EINVAL; + + /* Some flags must not be changed */ + if (args_neq(&args, &sdp->sd_args, spectator) || + args_neq(&args, &sdp->sd_args, ignore_local_fs) || + args_neq(&args, &sdp->sd_args, localflocks) || + args_neq(&args, &sdp->sd_args, localcaching) || + args_neq(&args, &sdp->sd_args, meta)) + return -EINVAL; + + if (sdp->sd_args.ar_spectator) + *flags |= MS_RDONLY; + + if ((sb->s_flags ^ *flags) & MS_RDONLY) { + if (*flags & MS_RDONLY) + error = gfs2_make_fs_ro(sdp); + else + error = gfs2_make_fs_rw(sdp); + if (error) + return error; + } + + sdp->sd_args = args; + if (sdp->sd_args.ar_posix_acl) + sb->s_flags |= MS_POSIXACL; + else + sb->s_flags &= ~MS_POSIXACL; + spin_lock(>->gt_spin); + gt->gt_log_flush_secs = args.ar_commit; + spin_unlock(>->gt_spin); + + return 0; +} + +/** + * gfs2_drop_inode - Drop an inode (test for remote unlink) + * @inode: The inode to drop + * + * If we've received a callback on an iopen lock then its because a + * remote node tried to deallocate the inode but failed due to this node + * still having the inode open. Here we mark the link count zero + * since we know that it must have reached zero if the GLF_DEMOTE flag + * is set on the iopen glock. If we didn't do a disk read since the + * remote node removed the final link then we might otherwise miss + * this event. This check ensures that this node will deallocate the + * inode's blocks, or alternatively pass the baton on to another + * node for later deallocation. + */ + +static void gfs2_drop_inode(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + + if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) { + struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; + if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) + clear_nlink(inode); + } + generic_drop_inode(inode); +} + +/** + * gfs2_clear_inode - Deallocate an inode when VFS is done with it + * @inode: The VFS inode + * + */ + +static void gfs2_clear_inode(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + + /* This tells us its a "real" inode and not one which only + * serves to contain an address space (see rgrp.c, meta_io.c) + * which therefore doesn't have its own glocks. + */ + if (test_bit(GIF_USER, &ip->i_flags)) { + ip->i_gl->gl_object = NULL; + gfs2_glock_put(ip->i_gl); + ip->i_gl = NULL; + if (ip->i_iopen_gh.gh_gl) { + ip->i_iopen_gh.gh_gl->gl_object = NULL; + gfs2_glock_dq_uninit(&ip->i_iopen_gh); + } + } +} + +static int is_ancestor(const struct dentry *d1, const struct dentry *d2) +{ + do { + if (d1 == d2) + return 1; + d1 = d1->d_parent; + } while (!IS_ROOT(d1)); + return 0; +} + +/** + * gfs2_show_options - Show mount options for /proc/mounts + * @s: seq_file structure + * @mnt: vfsmount + * + * Returns: 0 on success or error code + */ + +static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) +{ + struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; + struct gfs2_args *args = &sdp->sd_args; + int lfsecs; + + if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) + seq_printf(s, ",meta"); + if (args->ar_lockproto[0]) + seq_printf(s, ",lockproto=%s", args->ar_lockproto); + if (args->ar_locktable[0]) + seq_printf(s, ",locktable=%s", args->ar_locktable); + if (args->ar_hostdata[0]) + seq_printf(s, ",hostdata=%s", args->ar_hostdata); + if (args->ar_spectator) + seq_printf(s, ",spectator"); + if (args->ar_ignore_local_fs) + seq_printf(s, ",ignore_local_fs"); + if (args->ar_localflocks) + seq_printf(s, ",localflocks"); + if (args->ar_localcaching) + seq_printf(s, ",localcaching"); + if (args->ar_debug) + seq_printf(s, ",debug"); + if (args->ar_upgrade) + seq_printf(s, ",upgrade"); + if (args->ar_posix_acl) + seq_printf(s, ",acl"); + if (args->ar_quota != GFS2_QUOTA_DEFAULT) { + char *state; + switch (args->ar_quota) { + case GFS2_QUOTA_OFF: + state = "off"; + break; + case GFS2_QUOTA_ACCOUNT: + state = "account"; + break; + case GFS2_QUOTA_ON: + state = "on"; + break; + default: + state = "unknown"; + break; + } + seq_printf(s, ",quota=%s", state); + } + if (args->ar_suiddir) + seq_printf(s, ",suiddir"); + if (args->ar_data != GFS2_DATA_DEFAULT) { + char *state; + switch (args->ar_data) { + case GFS2_DATA_WRITEBACK: + state = "writeback"; + break; + case GFS2_DATA_ORDERED: + state = "ordered"; + break; + default: + state = "unknown"; + break; + } + seq_printf(s, ",data=%s", state); + } + if (args->ar_discard) + seq_printf(s, ",discard"); + lfsecs = sdp->sd_tune.gt_log_flush_secs; + if (lfsecs != 60) + seq_printf(s, ",commit=%d", lfsecs); + return 0; +} + +/* + * We have to (at the moment) hold the inodes main lock to cover + * the gap between unlocking the shared lock on the iopen lock and + * taking the exclusive lock. I'd rather do a shared -> exclusive + * conversion on the iopen lock, but we can change that later. This + * is safe, just less efficient. + */ + +static void gfs2_delete_inode(struct inode *inode) +{ + struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + int error; + + if (!test_bit(GIF_USER, &ip->i_flags)) + goto out; + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (unlikely(error)) { + gfs2_glock_dq_uninit(&ip->i_iopen_gh); + goto out; + } + + gfs2_glock_dq_wait(&ip->i_iopen_gh); + gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); + error = gfs2_glock_nq(&ip->i_iopen_gh); + if (error) + goto out_truncate; + + if (S_ISDIR(inode->i_mode) && + (ip->i_diskflags & GFS2_DIF_EXHASH)) { + error = gfs2_dir_exhash_dealloc(ip); + if (error) + goto out_unlock; + } + + if (ip->i_eattr) { + error = gfs2_ea_dealloc(ip); + if (error) + goto out_unlock; + } + + if (!gfs2_is_stuffed(ip)) { + error = gfs2_file_dealloc(ip); + if (error) + goto out_unlock; + } + + error = gfs2_dinode_dealloc(ip); + if (error) + goto out_unlock; + +out_truncate: + error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); + if (error) + goto out_unlock; + /* Needs to be done before glock release & also in a transaction */ + truncate_inode_pages(&inode->i_data, 0); + gfs2_trans_end(sdp); + +out_unlock: + if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) + gfs2_glock_dq(&ip->i_iopen_gh); + gfs2_holder_uninit(&ip->i_iopen_gh); + gfs2_glock_dq_uninit(&gh); + if (error && error != GLR_TRYFAILED && error != -EROFS) + fs_warn(sdp, "gfs2_delete_inode: %d\n", error); +out: + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); +} + +static struct inode *gfs2_alloc_inode(struct super_block *sb) +{ + struct gfs2_inode *ip; + + ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); + if (ip) { + ip->i_flags = 0; + ip->i_gl = NULL; + } + return &ip->i_inode; +} + +static void gfs2_destroy_inode(struct inode *inode) +{ + kmem_cache_free(gfs2_inode_cachep, inode); +} + +const struct super_operations gfs2_super_ops = { + .alloc_inode = gfs2_alloc_inode, + .destroy_inode = gfs2_destroy_inode, + .write_inode = gfs2_write_inode, + .delete_inode = gfs2_delete_inode, + .put_super = gfs2_put_super, + .write_super = gfs2_write_super, + .sync_fs = gfs2_sync_fs, + .freeze_fs = gfs2_freeze, + .unfreeze_fs = gfs2_unfreeze, + .statfs = gfs2_statfs, + .remount_fs = gfs2_remount_fs, + .clear_inode = gfs2_clear_inode, + .drop_inode = gfs2_drop_inode, + .show_options = gfs2_show_options, +}; + -- cgit v1.2.3 From 2286dbfad1fb622ee2691537e5caaedee4618860 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 22 May 2009 10:45:09 +0100 Subject: GFS2: Move gfs2_rmdiri into ops_inode.c Move gfs2_rmdiri() into ops_inode.c and make it static. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 52 ---------------------------------------------------- fs/gfs2/inode.h | 2 -- fs/gfs2/ops_inode.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 54 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c03a1a384e72..9b17447a0f95 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1046,58 +1046,6 @@ fail: return ERR_PTR(error); } -/** - * gfs2_rmdiri - Remove a directory - * @dip: The parent directory of the directory to be removed - * @name: The name of the directory to be removed - * @ip: The GFS2 inode of the directory to be removed - * - * Assumes Glocks on dip and ip are held - * - * Returns: errno - */ - -int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip) -{ - struct qstr dotname; - int error; - - if (ip->i_entries != 2) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - return -EIO; - } - - error = gfs2_dir_del(dip, name); - if (error) - return error; - - error = gfs2_change_nlink(dip, -1); - if (error) - return error; - - gfs2_str2qstr(&dotname, "."); - error = gfs2_dir_del(ip, &dotname); - if (error) - return error; - - gfs2_str2qstr(&dotname, ".."); - error = gfs2_dir_del(ip, &dotname); - if (error) - return error; - - /* It looks odd, but it really should be done twice */ - error = gfs2_change_nlink(ip, -1); - if (error) - return error; - - error = gfs2_change_nlink(ip, -1); - if (error) - return error; - - return error; -} /* * gfs2_unlink_ok - check to see that a inode is still in a directory diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 2c3ec072d60e..6cd39284eb08 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -96,8 +96,6 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, extern struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, unsigned int mode, dev_t dev); -extern int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip); extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, const struct gfs2_inode *ip); extern int gfs2_permission(struct inode *inode, int mask); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 1c70fa5168d6..5dacd647ff0d 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -472,6 +472,59 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) return 0; } +/** + * gfs2_rmdiri - Remove a directory + * @dip: The parent directory of the directory to be removed + * @name: The name of the directory to be removed + * @ip: The GFS2 inode of the directory to be removed + * + * Assumes Glocks on dip and ip are held + * + * Returns: errno + */ + +static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, + struct gfs2_inode *ip) +{ + struct qstr dotname; + int error; + + if (ip->i_entries != 2) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(ip); + return -EIO; + } + + error = gfs2_dir_del(dip, name); + if (error) + return error; + + error = gfs2_change_nlink(dip, -1); + if (error) + return error; + + gfs2_str2qstr(&dotname, "."); + error = gfs2_dir_del(ip, &dotname); + if (error) + return error; + + gfs2_str2qstr(&dotname, ".."); + error = gfs2_dir_del(ip, &dotname); + if (error) + return error; + + /* It looks odd, but it really should be done twice */ + error = gfs2_change_nlink(ip, -1); + if (error) + return error; + + error = gfs2_change_nlink(ip, -1); + if (error) + return error; + + return error; +} + /** * gfs2_rmdir - Remove a directory * @dir: The parent directory of the directory to be removed -- cgit v1.2.3 From 536baf02f650f4547f105386878b4736fbc181e8 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 22 May 2009 10:48:59 +0100 Subject: GFS2: Move gfs2_readlinki into ops_inode.c Move gfs2_readlinki into ops_inode.c and make it static Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 58 +---------------------------------------------------- fs/gfs2/inode.h | 1 - fs/gfs2/ops_inode.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 58 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 9b17447a0f95..676e750fc84c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1085,63 +1085,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, return 0; } -/** - * gfs2_readlinki - return the contents of a symlink - * @ip: the symlink's inode - * @buf: a pointer to the buffer to be filled - * @len: a pointer to the length of @buf - * - * If @buf is too small, a piece of memory is kmalloc()ed and needs - * to be freed by the caller. - * - * Returns: errno - */ - -int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) -{ - struct gfs2_holder i_gh; - struct buffer_head *dibh; - unsigned int x; - int error; - - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); - error = gfs2_glock_nq(&i_gh); - if (error) { - gfs2_holder_uninit(&i_gh); - return error; - } - - if (!ip->i_disksize) { - gfs2_consist_inode(ip); - error = -EIO; - goto out; - } - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto out; - - x = ip->i_disksize + 1; - if (x > *len) { - *buf = kmalloc(x, GFP_NOFS); - if (!*buf) { - error = -ENOMEM; - goto out_brelse; - } - } - - memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); - *len = x; - -out_brelse: - brelse(dibh); -out: - gfs2_glock_dq_uninit(&i_gh); - return error; -} - -static int -__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) +static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) { struct buffer_head *dibh; int error; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 6cd39284eb08..fc9a08f45be7 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -99,7 +99,6 @@ extern struct inode *gfs2_createi(struct gfs2_holder *ghs, extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, const struct gfs2_inode *ip); extern int gfs2_permission(struct inode *inode, int mask); -extern int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 5dacd647ff0d..f607f0908cff 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -937,6 +937,61 @@ out: return error; } +/** + * gfs2_readlinki - return the contents of a symlink + * @ip: the symlink's inode + * @buf: a pointer to the buffer to be filled + * @len: a pointer to the length of @buf + * + * If @buf is too small, a piece of memory is kmalloc()ed and needs + * to be freed by the caller. + * + * Returns: errno + */ + +static int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) +{ + struct gfs2_holder i_gh; + struct buffer_head *dibh; + unsigned int x; + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); + error = gfs2_glock_nq(&i_gh); + if (error) { + gfs2_holder_uninit(&i_gh); + return error; + } + + if (!ip->i_disksize) { + gfs2_consist_inode(ip); + error = -EIO; + goto out; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + x = ip->i_disksize + 1; + if (x > *len) { + *buf = kmalloc(x, GFP_NOFS); + if (!*buf) { + error = -ENOMEM; + goto out_brelse; + } + } + + memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); + *len = x; + +out_brelse: + brelse(dibh); +out: + gfs2_glock_dq_uninit(&i_gh); + return error; +} + /** * gfs2_readlink - Read the value of a symlink * @dentry: the symlink -- cgit v1.2.3 From 87ec21741138bb42e7f943bb142b1d8567c10925 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 22 May 2009 10:54:50 +0100 Subject: GFS2: Move gfs2_unlink_ok into ops_inode.c Another function which is only called from one ops_inode.c so we move it and make it static. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 39 --------------------------------------- fs/gfs2/inode.h | 2 -- fs/gfs2/ops_inode.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 41 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 676e750fc84c..2f94bd723698 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1046,45 +1046,6 @@ fail: return ERR_PTR(error); } - -/* - * gfs2_unlink_ok - check to see that a inode is still in a directory - * @dip: the directory - * @name: the name of the file - * @ip: the inode - * - * Assumes that the lock on (at least) @dip is held. - * - * Returns: 0 if the parent/child relationship is correct, errno if it isn't - */ - -int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, - const struct gfs2_inode *ip) -{ - int error; - - if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) - return -EPERM; - - if ((dip->i_inode.i_mode & S_ISVTX) && - dip->i_inode.i_uid != current_fsuid() && - ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) - return -EPERM; - - if (IS_APPEND(&dip->i_inode)) - return -EPERM; - - error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); - if (error) - return error; - - error = gfs2_dir_check(&dip->i_inode, name, ip); - if (error) - return error; - - return 0; -} - static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) { struct buffer_head *dibh; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index fc9a08f45be7..c341aaf67adb 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -96,8 +96,6 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, extern struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, unsigned int mode, dev_t dev); -extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, - const struct gfs2_inode *ip); extern int gfs2_permission(struct inode *inode, int mask); extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index f607f0908cff..f8bd20baf99c 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -262,6 +262,44 @@ out_parent: return error; } +/* + * gfs2_unlink_ok - check to see that a inode is still in a directory + * @dip: the directory + * @name: the name of the file + * @ip: the inode + * + * Assumes that the lock on (at least) @dip is held. + * + * Returns: 0 if the parent/child relationship is correct, errno if it isn't + */ + +static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, + const struct gfs2_inode *ip) +{ + int error; + + if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) + return -EPERM; + + if ((dip->i_inode.i_mode & S_ISVTX) && + dip->i_inode.i_uid != current_fsuid() && + ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) + return -EPERM; + + if (IS_APPEND(&dip->i_inode)) + return -EPERM; + + error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); + if (error) + return error; + + error = gfs2_dir_check(&dip->i_inode, name, ip); + if (error) + return error; + + return 0; +} + /** * gfs2_unlink - Unlink a file * @dir: The inode of the directory containing the file to unlink -- cgit v1.2.3 From e1defc4ff0cf57aca6c5e3ff99fa503f5943c1f1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:49 -0400 Subject: block: Do away with the notion of hardsect_size Until now we have had a 1:1 mapping between storage device physical block size and the logical block sized used when addressing the device. With SATA 4KB drives coming out that will no longer be the case. The sector size will be 4KB but the logical block size will remain 512-bytes. Hence we need to distinguish between the physical block size and the logical ditto. This patch renames hardsect_size to logical_block_size. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- arch/powerpc/sysdev/axonram.c | 2 +- block/blk-integrity.c | 2 +- block/blk-settings.c | 21 ++++++++++----------- block/blk-sysfs.c | 12 +++++++++--- block/compat_ioctl.c | 2 +- block/ioctl.c | 2 +- drivers/block/cciss.c | 6 +++--- drivers/block/cpqarray.c | 4 ++-- drivers/block/hd.c | 2 +- drivers/block/mg_disk.c | 2 +- drivers/block/pktcdvd.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/block/ub.c | 6 +++--- drivers/block/virtio_blk.c | 2 +- drivers/block/xen-blkfront.c | 2 +- drivers/block/xsysace.c | 2 +- drivers/cdrom/gdrom.c | 2 +- drivers/cdrom/viocd.c | 4 ++-- drivers/char/raw.c | 2 +- drivers/ide/ide-cd.c | 12 ++++++------ drivers/md/bitmap.c | 4 ++-- drivers/md/dm-exception-store.c | 2 +- drivers/md/dm-log.c | 3 ++- drivers/md/dm-snap-persistent.c | 2 +- drivers/md/dm-table.c | 12 +++++++----- drivers/md/md.c | 2 +- drivers/memstick/core/mspro_block.c | 2 +- drivers/message/i2o/i2o_block.c | 5 +++-- drivers/mmc/card/block.c | 2 +- drivers/mtd/mtd_blkdevs.c | 2 +- drivers/s390/block/dasd.c | 2 +- drivers/s390/block/dcssblk.c | 2 +- drivers/s390/block/xpram.c | 2 +- drivers/s390/char/tape_block.c | 2 +- drivers/scsi/sd.c | 2 +- drivers/scsi/sr.c | 2 +- fs/bio.c | 3 ++- fs/block_dev.c | 6 +++--- fs/buffer.c | 6 +++--- fs/direct-io.c | 2 +- fs/ext3/super.c | 4 ++-- fs/ext4/super.c | 2 +- fs/gfs2/ops_fstype.c | 4 ++-- fs/gfs2/rgrp.c | 2 +- fs/nilfs2/the_nilfs.c | 2 +- fs/ntfs/super.c | 6 +++--- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/ocfs2/super.c | 2 +- fs/partitions/ibm.c | 2 +- fs/partitions/msdos.c | 4 ++-- fs/udf/super.c | 2 +- fs/xfs/linux-2.6/xfs_buf.c | 2 +- include/linux/blkdev.h | 14 +++++++------- include/linux/device-mapper.h | 2 +- 54 files changed, 108 insertions(+), 98 deletions(-) (limited to 'fs/gfs2') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 9e105cbc5e5f..a4779912a5ca 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -250,7 +250,7 @@ axon_ram_probe(struct of_device *device, const struct of_device_id *device_id) set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT); blk_queue_make_request(bank->disk->queue, axon_ram_make_request); - blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE); + blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE); add_disk(bank->disk); bank->irq_id = irq_of_parse_and_map(device->node, 0); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 91fa8e06b6a5..73e28d355688 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -340,7 +340,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) kobject_uevent(&bi->kobj, KOBJ_ADD); bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE; - bi->sector_size = disk->queue->hardsect_size; + bi->sector_size = queue_logical_block_size(disk->queue); disk->integrity = bi; } else bi = disk->integrity; diff --git a/block/blk-settings.c b/block/blk-settings.c index 57af728d94bb..15c3164537b8 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -134,7 +134,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) q->backing_dev_info.state = 0; q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; blk_queue_max_sectors(q, SAFE_MAX_SECTORS); - blk_queue_hardsect_size(q, 512); + blk_queue_logical_block_size(q, 512); blk_queue_dma_alignment(q, 511); blk_queue_congestion_threshold(q); q->nr_batching = BLK_BATCH_REQ; @@ -288,21 +288,20 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) EXPORT_SYMBOL(blk_queue_max_segment_size); /** - * blk_queue_hardsect_size - set hardware sector size for the queue + * blk_queue_logical_block_size - set logical block size for the queue * @q: the request queue for the device - * @size: the hardware sector size, in bytes + * @size: the logical block size, in bytes * * Description: - * This should typically be set to the lowest possible sector size - * that the hardware can operate on (possible without reverting to - * even internal read-modify-write operations). Usually the default - * of 512 covers most hardware. + * This should be set to the lowest possible block size that the + * storage device can address. The default of 512 covers most + * hardware. **/ -void blk_queue_hardsect_size(struct request_queue *q, unsigned short size) +void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) { - q->hardsect_size = size; + q->logical_block_size = size; } -EXPORT_SYMBOL(blk_queue_hardsect_size); +EXPORT_SYMBOL(blk_queue_logical_block_size); /* * Returns the minimum that is _not_ zero, unless both are zero. @@ -324,7 +323,7 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments); t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments); t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); - t->hardsect_size = max(t->hardsect_size, b->hardsect_size); + t->logical_block_size = max(t->logical_block_size, b->logical_block_size); if (!t->queue_lock) WARN_ON_ONCE(1); else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 3ff9bba3379a..13d38b7e4d0f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -100,9 +100,9 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) return queue_var_show(max_sectors_kb, (page)); } -static ssize_t queue_hw_sector_size_show(struct request_queue *q, char *page) +static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page) { - return queue_var_show(q->hardsect_size, page); + return queue_var_show(queue_logical_block_size(q), page); } static ssize_t @@ -249,7 +249,12 @@ static struct queue_sysfs_entry queue_iosched_entry = { static struct queue_sysfs_entry queue_hw_sector_size_entry = { .attr = {.name = "hw_sector_size", .mode = S_IRUGO }, - .show = queue_hw_sector_size_show, + .show = queue_logical_block_size_show, +}; + +static struct queue_sysfs_entry queue_logical_block_size_entry = { + .attr = {.name = "logical_block_size", .mode = S_IRUGO }, + .show = queue_logical_block_size_show, }; static struct queue_sysfs_entry queue_nonrot_entry = { @@ -283,6 +288,7 @@ static struct attribute *default_attrs[] = { &queue_max_sectors_entry.attr, &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, + &queue_logical_block_size_entry.attr, &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index f87615dea46b..9eaa1940273a 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -763,7 +763,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ return compat_put_int(arg, block_size(bdev)); case BLKSSZGET: /* get block device hardware sector size */ - return compat_put_int(arg, bdev_hardsect_size(bdev)); + return compat_put_int(arg, bdev_logical_block_size(bdev)); case BLKSECTGET: return compat_put_ushort(arg, bdev_get_queue(bdev)->max_sectors); diff --git a/block/ioctl.c b/block/ioctl.c index ad474d4bbcce..7aa97f65da82 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -311,7 +311,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */ return put_int(arg, block_size(bdev)); case BLKSSZGET: /* get block device hardware sector size */ - return put_int(arg, bdev_hardsect_size(bdev)); + return put_int(arg, bdev_logical_block_size(bdev)); case BLKSECTGET: return put_ushort(arg, bdev_get_queue(bdev)->max_sectors); case BLKRASET: diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index e714e7cce6f2..94474f5f8bce 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1389,8 +1389,8 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, disk->queue->queuedata = h; - blk_queue_hardsect_size(disk->queue, - h->drv[drv_index].block_size); + blk_queue_logical_block_size(disk->queue, + h->drv[drv_index].block_size); /* Make sure all queue data is written out before */ /* setting h->drv[drv_index].queue, as setting this */ @@ -2298,7 +2298,7 @@ static int cciss_revalidate(struct gendisk *disk) cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size, inq_buff, drv); - blk_queue_hardsect_size(drv->queue, drv->block_size); + blk_queue_logical_block_size(drv->queue, drv->block_size); set_capacity(disk, drv->nr_blocks); kfree(inq_buff); diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index a02dcfc00f13..44fa2018f6b0 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -474,7 +474,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev) disk->fops = &ida_fops; if (j && !drv->nr_blks) continue; - blk_queue_hardsect_size(hba[i]->queue, drv->blk_size); + blk_queue_logical_block_size(hba[i]->queue, drv->blk_size); set_capacity(disk, drv->nr_blks); disk->queue = hba[i]->queue; disk->private_data = drv; @@ -1546,7 +1546,7 @@ static int revalidate_allvol(ctlr_info_t *host) drv_info_t *drv = &host->drv[i]; if (i && !drv->nr_blks) continue; - blk_queue_hardsect_size(host->queue, drv->blk_size); + blk_queue_logical_block_size(host->queue, drv->blk_size); set_capacity(disk, drv->nr_blks); disk->queue = host->queue; disk->private_data = drv; diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 961de56d00a9..f65b3f369eb0 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -724,7 +724,7 @@ static int __init hd_init(void) blk_queue_max_sectors(hd_queue, 255); init_timer(&device_timer); device_timer.function = hd_times_out; - blk_queue_hardsect_size(hd_queue, 512); + blk_queue_logical_block_size(hd_queue, 512); if (!NR_HD) { /* diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index c0cd0a03f698..60de5a01e71e 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -996,7 +996,7 @@ static int mg_probe(struct platform_device *plat_dev) goto probe_err_6; } blk_queue_max_sectors(host->breq, MG_MAX_SECTS); - blk_queue_hardsect_size(host->breq, MG_SECTOR_SIZE); + blk_queue_logical_block_size(host->breq, MG_SECTOR_SIZE); init_timer(&host->timer); host->timer.function = mg_times_out; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index dc7a8c352da2..293f5858921d 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2657,7 +2657,7 @@ static void pkt_init_queue(struct pktcdvd_device *pd) struct request_queue *q = pd->disk->queue; blk_queue_make_request(q, pkt_make_request); - blk_queue_hardsect_size(q, CD_FRAMESIZE); + blk_queue_logical_block_size(q, CD_FRAMESIZE); blk_queue_max_sectors(q, PACKET_MAX_SECTORS); blk_queue_merge_bvec(q, pkt_merge_bvec); q->queuedata = pd; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 338cee4cc0ba..aaeeb544228a 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -477,7 +477,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) blk_queue_max_sectors(queue, dev->bounce_size >> 9); blk_queue_segment_boundary(queue, -1UL); blk_queue_dma_alignment(queue, dev->blk_size-1); - blk_queue_hardsect_size(queue, dev->blk_size); + blk_queue_logical_block_size(queue, dev->blk_size); blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH, ps3disk_prepare_flush); diff --git a/drivers/block/ub.c b/drivers/block/ub.c index e67bbae9547d..cc54473b8e77 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -722,7 +722,7 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, /* * build the command * - * The call to blk_queue_hardsect_size() guarantees that request + * The call to blk_queue_logical_block_size() guarantees that request * is aligned, but it is given in terms of 512 byte units, always. */ block = blk_rq_pos(rq) >> lun->capacity.bshift; @@ -1749,7 +1749,7 @@ static int ub_bd_revalidate(struct gendisk *disk) ub_revalidate(lun->udev, lun); /* XXX Support sector size switching like in sr.c */ - blk_queue_hardsect_size(disk->queue, lun->capacity.bsize); + blk_queue_logical_block_size(disk->queue, lun->capacity.bsize); set_capacity(disk, lun->capacity.nsec); // set_disk_ro(sdkp->disk, lun->readonly); @@ -2324,7 +2324,7 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum) blk_queue_max_phys_segments(q, UB_MAX_REQ_SG); blk_queue_segment_boundary(q, 0xffffffff); /* Dubious. */ blk_queue_max_sectors(q, UB_MAX_SECTORS); - blk_queue_hardsect_size(q, lun->capacity.bsize); + blk_queue_logical_block_size(q, lun->capacity.bsize); lun->disk = disk; q->queuedata = lun; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 511d4ae2d176..c4845b169464 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -347,7 +347,7 @@ static int virtblk_probe(struct virtio_device *vdev) offsetof(struct virtio_blk_config, blk_size), &blk_size); if (!err) - blk_queue_hardsect_size(vblk->disk->queue, blk_size); + blk_queue_logical_block_size(vblk->disk->queue, blk_size); add_disk(vblk->disk); return 0; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 132120ae4bde..c1996829d5ec 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -344,7 +344,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_hardsect_size(rq, sector_size); + blk_queue_logical_block_size(rq, sector_size); blk_queue_max_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 3a4397edab71..f08491a3a813 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -984,7 +984,7 @@ static int __devinit ace_setup(struct ace_device *ace) ace->queue = blk_init_queue(ace_request, &ace->lock); if (ace->queue == NULL) goto err_blk_initq; - blk_queue_hardsect_size(ace->queue, 512); + blk_queue_logical_block_size(ace->queue, 512); /* * Allocate and initialize GD structure diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 1e366ad8f680..b5621f27c4be 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -739,7 +739,7 @@ static void __devinit probe_gdrom_setupdisk(void) static int __devinit probe_gdrom_setupqueue(void) { - blk_queue_hardsect_size(gd.gdrom_rq, GDROM_HARD_SECTOR); + blk_queue_logical_block_size(gd.gdrom_rq, GDROM_HARD_SECTOR); /* using DMA so memory will need to be contiguous */ blk_queue_max_hw_segments(gd.gdrom_rq, 1); /* set a large max size to get most from DMA */ diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index f177c2d4017f..0fff646cc2f0 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -469,8 +469,8 @@ static void vio_handle_cd_event(struct HvLpEvent *event) case viocdopen: if (event->xRc == 0) { di = &viocd_diskinfo[bevent->disk]; - blk_queue_hardsect_size(di->viocd_disk->queue, - bevent->block_size); + blk_queue_logical_block_size(di->viocd_disk->queue, + bevent->block_size); set_capacity(di->viocd_disk, bevent->media_size * bevent->block_size / 512); diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 20d90e6a6e50..db32f0e4c7dd 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -71,7 +71,7 @@ static int raw_open(struct inode *inode, struct file *filp) err = bd_claim(bdev, raw_open); if (err) goto out1; - err = set_blocksize(bdev, bdev_hardsect_size(bdev)); + err = set_blocksize(bdev, bdev_logical_block_size(bdev)); if (err) goto out2; filp->f_flags |= O_DIRECT; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 1799328decfb..424140c6c400 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -182,7 +182,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, (sense->information[2] << 8) | (sense->information[3]); - if (drive->queue->hardsect_size == 2048) + if (queue_logical_block_size(drive->queue) == 2048) /* device sector size is 2K */ sector <<= 2; @@ -737,7 +737,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) struct request_queue *q = drive->queue; int write = rq_data_dir(rq) == WRITE; unsigned short sectors_per_frame = - queue_hardsect_size(q) >> SECTOR_BITS; + queue_logical_block_size(q) >> SECTOR_BITS; ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, " "secs_per_frame: %u", @@ -1021,8 +1021,8 @@ int ide_cd_read_toc(ide_drive_t *drive, struct request_sense *sense) /* save a private copy of the TOC capacity for error handling */ drive->probed_capacity = toc->capacity * sectors_per_frame; - blk_queue_hardsect_size(drive->queue, - sectors_per_frame << SECTOR_BITS); + blk_queue_logical_block_size(drive->queue, + sectors_per_frame << SECTOR_BITS); /* first read just the header, so we know how long the TOC is */ stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr, @@ -1338,7 +1338,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive) /* standard prep_rq_fn that builds 10 byte cmds */ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) { - int hard_sect = queue_hardsect_size(q); + int hard_sect = queue_logical_block_size(q); long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); @@ -1543,7 +1543,7 @@ static int ide_cdrom_setup(ide_drive_t *drive) nslots = ide_cdrom_probe_capabilities(drive); - blk_queue_hardsect_size(q, CD_FRAMESIZE); + blk_queue_logical_block_size(q, CD_FRAMESIZE); if (ide_cdrom_register(drive, nslots)) { printk(KERN_ERR PFX "%s: %s failed to register device with the" diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 47c68bc75a17..06b0ded1ce23 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -232,7 +232,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, target = rdev->sb_start + offset + index * (PAGE_SIZE/512); if (sync_page_io(rdev->bdev, target, - roundup(size, bdev_hardsect_size(rdev->bdev)), + roundup(size, bdev_logical_block_size(rdev->bdev)), page, READ)) { page->index = index; attach_page_buffers(page, NULL); /* so that free_buffer will @@ -287,7 +287,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) int size = PAGE_SIZE; if (page->index == bitmap->file_pages-1) size = roundup(bitmap->last_page_size, - bdev_hardsect_size(rdev->bdev)); + bdev_logical_block_size(rdev->bdev)); /* Just make sure we aren't corrupting data or * metadata */ diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index a2e26c242141..75d8081a9041 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -178,7 +178,7 @@ static int set_chunk_size(struct dm_exception_store *store, } /* Validate the chunk size against the device block size */ - if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) { + if (chunk_size_ulong % (bdev_logical_block_size(store->cow->bdev) >> 9)) { *error = "Chunk size is not a multiple of device blocksize"; return -EINVAL; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index be233bc4d917..6fa8ccf91c70 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -413,7 +413,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, * Buffer holds both header and bitset. */ buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + - bitset_size, ti->limits.hardsect_size); + bitset_size, + ti->limits.logical_block_size); if (buf_size > dev->bdev->bd_inode->i_size) { DMWARN("log device %s too small: need %llu bytes", diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index e75c6dd76a9a..2662a41337e7 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -282,7 +282,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) */ if (!ps->store->chunk_size) { ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, - bdev_hardsect_size(ps->store->cow->bdev) >> 9); + bdev_logical_block_size(ps->store->cow->bdev) >> 9); ps->store->chunk_mask = ps->store->chunk_size - 1; ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1; chunk_size_supplied = 0; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 429b50b975d5..65e2d9759857 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -108,7 +108,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs, lhs->max_hw_segments = min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments); - lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size); + lhs->logical_block_size = max(lhs->logical_block_size, + rhs->logical_block_size); lhs->max_segment_size = min_not_zero(lhs->max_segment_size, rhs->max_segment_size); @@ -529,7 +530,8 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) rs->max_hw_segments = min_not_zero(rs->max_hw_segments, q->max_hw_segments); - rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size); + rs->logical_block_size = max(rs->logical_block_size, + queue_logical_block_size(q)); rs->max_segment_size = min_not_zero(rs->max_segment_size, q->max_segment_size); @@ -683,8 +685,8 @@ static void check_for_valid_limits(struct io_restrictions *rs) rs->max_phys_segments = MAX_PHYS_SEGMENTS; if (!rs->max_hw_segments) rs->max_hw_segments = MAX_HW_SEGMENTS; - if (!rs->hardsect_size) - rs->hardsect_size = 1 << SECTOR_SHIFT; + if (!rs->logical_block_size) + rs->logical_block_size = 1 << SECTOR_SHIFT; if (!rs->max_segment_size) rs->max_segment_size = MAX_SEGMENT_SIZE; if (!rs->seg_boundary_mask) @@ -914,7 +916,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) blk_queue_max_sectors(q, t->limits.max_sectors); q->max_phys_segments = t->limits.max_phys_segments; q->max_hw_segments = t->limits.max_hw_segments; - q->hardsect_size = t->limits.hardsect_size; + q->logical_block_size = t->limits.logical_block_size; q->max_segment_size = t->limits.max_segment_size; q->max_hw_sectors = t->limits.max_hw_sectors; q->seg_boundary_mask = t->limits.seg_boundary_mask; diff --git a/drivers/md/md.c b/drivers/md/md.c index fccc8343a250..4cbc19f5c304 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1202,7 +1202,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; - bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; + bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; if (rdev->sb_size & bmask) rdev->sb_size = (rdev->sb_size | bmask) + 1; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index c0bebc6a2f2c..7847bbc1440d 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1242,7 +1242,7 @@ static int mspro_block_init_disk(struct memstick_dev *card) sprintf(msb->disk->disk_name, "mspblk%d", disk_id); - blk_queue_hardsect_size(msb->queue, msb->page_size); + blk_queue_logical_block_size(msb->queue, msb->page_size); capacity = be16_to_cpu(sys_info->user_block_count); capacity *= be16_to_cpu(sys_info->block_size); diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 6573ef4408f1..335d4c78a775 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -794,8 +794,9 @@ static int i2o_block_transfer(struct request *req) if (c->adaptec) { u8 cmd[10]; u32 scsi_flags; - u16 hwsec = queue_hardsect_size(req->q) >> KERNEL_SECTOR_SHIFT; + u16 hwsec; + hwsec = queue_logical_block_size(req->q) >> KERNEL_SECTOR_SHIFT; memset(cmd, 0, 10); sgl_offset = SGL_OFFSET_12; @@ -1078,7 +1079,7 @@ static int i2o_block_probe(struct device *dev) */ if (!i2o_parm_field_get(i2o_dev, 0x0004, 1, &blocksize, 4) || !i2o_parm_field_get(i2o_dev, 0x0000, 3, &blocksize, 4)) { - blk_queue_hardsect_size(queue, le32_to_cpu(blocksize)); + blk_queue_logical_block_size(queue, le32_to_cpu(blocksize)); } else osm_warn("unable to get blocksize of %s\n", gd->disk_name); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index c5df86546458..98ffc41eaf2c 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -521,7 +521,7 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) sprintf(md->disk->disk_name, "mmcblk%d", devidx); - blk_queue_hardsect_size(md->queue.queue, 512); + blk_queue_logical_block_size(md->queue.queue, 512); if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) { /* diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 502622f628bc..aaac3b6800b7 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -378,7 +378,7 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) } tr->blkcore_priv->rq->queuedata = tr; - blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize); + blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize); if (tr->discard) blk_queue_set_discard(tr->blkcore_priv->rq, blktrans_discard_request); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index e64f62d5e0fc..27a1be0cd4d4 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1990,7 +1990,7 @@ static void dasd_setup_queue(struct dasd_block *block) { int max; - blk_queue_hardsect_size(block->request_queue, block->bp_block); + blk_queue_logical_block_size(block->request_queue, block->bp_block); max = block->base->discipline->max_blocks << block->s2b_shift; blk_queue_max_sectors(block->request_queue, max); blk_queue_max_phys_segments(block->request_queue, -1L); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index cfdcf1aed33c..a4c7ffcd9987 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -602,7 +602,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->gd->private_data = dev_info; dev_info->gd->driverfs_dev = &dev_info->dev; blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request); - blk_queue_hardsect_size(dev_info->dcssblk_queue, 4096); + blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096); seg_byte_size = (dev_info->end - dev_info->start + 1); set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 76814f3e898a..0ae0c83ef879 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -343,7 +343,7 @@ static int __init xpram_setup_blkdev(void) goto out; } blk_queue_make_request(xpram_queues[i], xpram_make_request); - blk_queue_hardsect_size(xpram_queues[i], 4096); + blk_queue_logical_block_size(xpram_queues[i], 4096); } /* diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index 1e7967675980..47ff695255ea 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -222,7 +222,7 @@ tapeblock_setup_device(struct tape_device * device) if (rc) goto cleanup_queue; - blk_queue_hardsect_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE); + blk_queue_logical_block_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE); blk_queue_max_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC); blk_queue_max_phys_segments(blkdat->request_queue, -1L); blk_queue_max_hw_segments(blkdat->request_queue, -1L); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 40d2860f235a..bcf3bd40bbd5 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1510,7 +1510,7 @@ got_data: */ sector_size = 512; } - blk_queue_hardsect_size(sdp->request_queue, sector_size); + blk_queue_logical_block_size(sdp->request_queue, sector_size); { char cap_str_2[10], cap_str_10[10]; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index fddba53c7fe5..cd350dfc1216 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -727,7 +727,7 @@ static void get_sectorsize(struct scsi_cd *cd) } queue = cd->device->request_queue; - blk_queue_hardsect_size(queue, sector_size); + blk_queue_logical_block_size(queue, sector_size); return; } diff --git a/fs/bio.c b/fs/bio.c index 81dc93e72535..4445c3821730 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1490,11 +1490,12 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) sector_t bio_sector_offset(struct bio *bio, unsigned short index, unsigned int offset) { - unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue); + unsigned int sector_sz; struct bio_vec *bv; sector_t sectors; int i; + sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue); sectors = 0; if (index >= bio->bi_idx) diff --git a/fs/block_dev.c b/fs/block_dev.c index a85fe310fc6f..a29b4dcc1bca 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -76,7 +76,7 @@ int set_blocksize(struct block_device *bdev, int size) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ - if (size < bdev_hardsect_size(bdev)) + if (size < bdev_logical_block_size(bdev)) return -EINVAL; /* Don't change the size if it is same as current */ @@ -106,7 +106,7 @@ EXPORT_SYMBOL(sb_set_blocksize); int sb_min_blocksize(struct super_block *sb, int size) { - int minsize = bdev_hardsect_size(sb->s_bdev); + int minsize = bdev_logical_block_size(sb->s_bdev); if (size < minsize) size = minsize; return sb_set_blocksize(sb, size); @@ -1117,7 +1117,7 @@ EXPORT_SYMBOL(check_disk_change); void bd_set_size(struct block_device *bdev, loff_t size) { - unsigned bsize = bdev_hardsect_size(bdev); + unsigned bsize = bdev_logical_block_size(bdev); bdev->bd_inode->i_size = size; while (bsize < PAGE_CACHE_SIZE) { diff --git a/fs/buffer.c b/fs/buffer.c index aed297739eb0..36e2bbc60ec7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1085,12 +1085,12 @@ static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { /* Size must be multiple of hard sectorsize */ - if (unlikely(size & (bdev_hardsect_size(bdev)-1) || + if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { printk(KERN_ERR "getblk(): invalid block size %d requested\n", size); - printk(KERN_ERR "hardsect size: %d\n", - bdev_hardsect_size(bdev)); + printk(KERN_ERR "logical block size: %d\n", + bdev_logical_block_size(bdev)); dump_stack(); return NULL; diff --git a/fs/direct-io.c b/fs/direct-io.c index 05763bbc2050..8b10b87dc01a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1127,7 +1127,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, rw = WRITE_ODIRECT; if (bdev) - bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); + bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); if (offset & blocksize_mask) { if (bdev) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 599dbfe504c3..acbb94fdf903 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1696,7 +1696,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; } - hblock = bdev_hardsect_size(sb->s_bdev); + hblock = bdev_logical_block_size(sb->s_bdev); if (sb->s_blocksize != blocksize) { /* * Make sure the blocksize for the filesystem is larger @@ -2119,7 +2119,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, } blocksize = sb->s_blocksize; - hblock = bdev_hardsect_size(bdev); + hblock = bdev_logical_block_size(bdev); if (blocksize < hblock) { printk(KERN_ERR "EXT3-fs: blocksize too small for journal device.\n"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2958f4e6f222..a30549f7a305 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2962,7 +2962,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, } blocksize = sb->s_blocksize; - hblock = bdev_hardsect_size(bdev); + hblock = bdev_logical_block_size(bdev); if (blocksize < hblock) { printk(KERN_ERR "EXT4-fs: blocksize too small for journal device.\n"); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1ff9473ea753..a3b2ac989fc3 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -526,11 +526,11 @@ static int init_sb(struct gfs2_sbd *sdp, int silent) } /* Set up the buffer cache and SB for real */ - if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { + if (sdp->sd_sb.sb_bsize < bdev_logical_block_size(sb->s_bdev)) { ret = -EINVAL; fs_err(sdp, "FS block size (%u) is too small for device " "block size (%u)\n", - sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); + sdp->sd_sb.sb_bsize, bdev_logical_block_size(sb->s_bdev)); goto out; } if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 565038243fa2..a971d24e10ce 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -845,7 +845,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, struct super_block *sb = sdp->sd_vfs; struct block_device *bdev = sb->s_bdev; const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / - bdev_hardsect_size(sb->s_bdev); + bdev_logical_block_size(sb->s_bdev); u64 blk; sector_t start = 0; sector_t nr_sects = 0; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 7f65b3be4aa9..a91f15b8673c 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -515,7 +515,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); if (sb->s_blocksize != blocksize) { - int hw_blocksize = bdev_hardsect_size(sb->s_bdev); + int hw_blocksize = bdev_logical_block_size(sb->s_bdev); if (blocksize < hw_blocksize) { printk(KERN_ERR diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index f76951dcd4a6..6aa7c4713536 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -25,7 +25,7 @@ #include #include #include -#include /* For bdev_hardsect_size(). */ +#include /* For bdev_logical_block_size(). */ #include #include #include @@ -2785,13 +2785,13 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) goto err_out_now; /* We support sector sizes up to the PAGE_CACHE_SIZE. */ - if (bdev_hardsect_size(sb->s_bdev) > PAGE_CACHE_SIZE) { + if (bdev_logical_block_size(sb->s_bdev) > PAGE_CACHE_SIZE) { if (!silent) ntfs_error(sb, "Device has unsupported sector size " "(%i). The maximum supported sector " "size on this architecture is %lu " "bytes.", - bdev_hardsect_size(sb->s_bdev), + bdev_logical_block_size(sb->s_bdev), PAGE_CACHE_SIZE); goto err_out_now; } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 4f85eceab376..09cc25d04611 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1371,7 +1371,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, bdevname(reg->hr_bdev, reg->hr_dev_name); - sectsize = bdev_hardsect_size(reg->hr_bdev); + sectsize = bdev_logical_block_size(reg->hr_bdev); if (sectsize != reg->hr_block_bytes) { mlog(ML_ERROR, "blocksize %u incorrect for device, expected %d", diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 79ff8d9d37e0..5c6163f55039 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -713,7 +713,7 @@ static int ocfs2_sb_probe(struct super_block *sb, *bh = NULL; /* may be > 512 */ - *sector_size = bdev_hardsect_size(sb->s_bdev); + *sector_size = bdev_logical_block_size(sb->s_bdev); if (*sector_size > OCFS2_MAX_BLOCKSIZE) { mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n", *sector_size, OCFS2_MAX_BLOCKSIZE); diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index 46297683cd34..fc71aab08460 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c @@ -76,7 +76,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) Sector sect; res = 0; - blocksize = bdev_hardsect_size(bdev); + blocksize = bdev_logical_block_size(bdev); if (blocksize <= 0) goto out_exit; i_size = i_size_read(bdev->bd_inode); diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 796511886f28..0028d2ef0662 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -110,7 +110,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev, Sector sect; unsigned char *data; u32 this_sector, this_size; - int sector_size = bdev_hardsect_size(bdev) / 512; + int sector_size = bdev_logical_block_size(bdev) / 512; int loopct = 0; /* number of links followed without finding a data partition */ int i; @@ -415,7 +415,7 @@ static struct { int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) { - int sector_size = bdev_hardsect_size(bdev) / 512; + int sector_size = bdev_logical_block_size(bdev) / 512; Sector sect; unsigned char *data; struct partition *p; diff --git a/fs/udf/super.c b/fs/udf/super.c index 72348cc855a4..0ba44107d8f1 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1915,7 +1915,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (uopt.flags & (1 << UDF_FLAG_BLOCKSIZE_SET)) { ret = udf_load_vrs(sb, &uopt, silent, &fileset); } else { - uopt.blocksize = bdev_hardsect_size(sb->s_bdev); + uopt.blocksize = bdev_logical_block_size(sb->s_bdev); ret = udf_load_vrs(sb, &uopt, silent, &fileset); if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) { if (!silent) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index e28800a9f2b5..1418b916fc27 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1501,7 +1501,7 @@ xfs_setsize_buftarg_early( struct block_device *bdev) { return xfs_setsize_buftarg_flags(btp, - PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0); + PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); } int diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 56ce53fce72e..872b78b7a101 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -391,7 +391,7 @@ struct request_queue unsigned int max_hw_sectors; unsigned short max_phys_segments; unsigned short max_hw_segments; - unsigned short hardsect_size; + unsigned short logical_block_size; unsigned int max_segment_size; unsigned long seg_boundary_mask; @@ -901,7 +901,7 @@ extern void blk_queue_max_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); -extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); +extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); @@ -988,19 +988,19 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) -static inline int queue_hardsect_size(struct request_queue *q) +static inline unsigned short queue_logical_block_size(struct request_queue *q) { int retval = 512; - if (q && q->hardsect_size) - retval = q->hardsect_size; + if (q && q->logical_block_size) + retval = q->logical_block_size; return retval; } -static inline int bdev_hardsect_size(struct block_device *bdev) +static inline unsigned short bdev_logical_block_size(struct block_device *bdev) { - return queue_hardsect_size(bdev_get_queue(bdev)); + return queue_logical_block_size(bdev_get_queue(bdev)); } static inline int queue_dma_alignment(struct request_queue *q) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ded2d7c42668..49c2362977fd 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -149,7 +149,7 @@ struct io_restrictions { unsigned max_hw_sectors; unsigned max_sectors; unsigned max_segment_size; - unsigned short hardsect_size; + unsigned short logical_block_size; unsigned short max_hw_segments; unsigned short max_phys_segments; unsigned char no_cluster; /* inverted so that 0 is default */ -- cgit v1.2.3 From e1b28aab5804aa477c33d19855d6747607a885fd Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 26 May 2009 15:41:27 +0100 Subject: GFS2: Remove lockstruct subdir from gfs2 sysfs files The lockstruct sub directory contained two entries, both of which are duplicated elsewhere in the gfs2 sysfs files as well as being available via /proc/mounts. There is no userland program using either of them, so this patch removes them. Signed-off-by: Steven Whitehouse --- fs/gfs2/sys.c | 41 ++++++++--------------------------------- 1 file changed, 8 insertions(+), 33 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 9f6d48b75fd2..94bd59ec54e2 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -276,25 +276,6 @@ static struct kobj_type gfs2_ktype = { .sysfs_ops = &gfs2_attr_ops, }; -/* - * display struct lm_lockstruct fields - */ - -#define LOCKSTRUCT_ATTR(name, fmt) \ -static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ -{ \ - return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \ -} \ -static struct gfs2_attr lockstruct_attr_##name = __ATTR_RO(name) - -LOCKSTRUCT_ATTR(jid, "%u\n"); -LOCKSTRUCT_ATTR(first, "%u\n"); - -static struct attribute *lockstruct_attrs[] = { - &lockstruct_attr_jid.attr, - &lockstruct_attr_first.attr, - NULL, -}; /* * lock_module. Originally from lock_dlm @@ -397,6 +378,11 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%d\n", ls->ls_recover_jid_status); } +static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) +{ + return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid); +} + #define GDLM_ATTR(_name,_mode,_show,_store) \ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) @@ -404,6 +390,7 @@ GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); GDLM_ATTR(block, 0644, block_show, block_store); GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); GDLM_ATTR(id, 0444, lkid_show, NULL); +GDLM_ATTR(jid, 0444, jid_show, NULL); GDLM_ATTR(first, 0444, lkfirst_show, NULL); GDLM_ATTR(first_done, 0444, first_done_show, NULL); GDLM_ATTR(recover, 0200, NULL, recover_store); @@ -415,7 +402,7 @@ static struct attribute *lock_module_attrs[] = { &gdlm_attr_block.attr, &gdlm_attr_withdraw.attr, &gdlm_attr_id.attr, - &lockstruct_attr_jid.attr, + &gdlm_attr_jid.attr, &gdlm_attr_first.attr, &gdlm_attr_first_done.attr, &gdlm_attr_recover.attr, @@ -558,11 +545,6 @@ static struct attribute *tune_attrs[] = { NULL, }; -static struct attribute_group lockstruct_group = { - .name = "lockstruct", - .attrs = lockstruct_attrs, -}; - static struct attribute_group args_group = { .name = "args", .attrs = args_attrs, @@ -588,13 +570,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) if (error) goto fail; - error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group); - if (error) - goto fail_reg; - error = sysfs_create_group(&sdp->sd_kobj, &args_group); if (error) - goto fail_lockstruct; + goto fail_reg; error = sysfs_create_group(&sdp->sd_kobj, &tune_group); if (error) @@ -611,8 +589,6 @@ fail_tune: sysfs_remove_group(&sdp->sd_kobj, &tune_group); fail_args: sysfs_remove_group(&sdp->sd_kobj, &args_group); -fail_lockstruct: - sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); fail_reg: kobject_put(&sdp->sd_kobj); fail: @@ -624,7 +600,6 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp) { sysfs_remove_group(&sdp->sd_kobj, &tune_group); sysfs_remove_group(&sdp->sd_kobj, &args_group); - sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); kobject_put(&sdp->sd_kobj); } -- cgit v1.2.3 From f6eb53498ee8f725832f3a0fffca90566bb118a6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 26 May 2009 15:50:25 +0100 Subject: GFS2: Remove args subdir from gfs2 sysfs files Since we can cat /proc/mounts there is no need to have this subdirectory in the gfs2 sysfs files. In fact this does not reflect the full range of possible mount argumenmts, where as /proc/mounts does. There was only one userland user of this set of sysfs files and it will function perfectly well without these files being present (in fact that subcommand of gfs2_tool is obsolete anyway). The tune/* subdirectory is also considered mostly obsolete, but there are a few uses of this until mount arguments can be added for the last few functions for which there are no equivalents currently. However the tune/* directory is still in my sights and new code should avoid using it. Only the gfs2_quota and gfs2_tool programs are know to use tune/* at the moment. Signed-off-by: Steven Whitehouse --- fs/gfs2/sys.c | 52 +--------------------------------------------------- 1 file changed, 1 insertion(+), 51 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 94bd59ec54e2..23419dc3027b 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -411,44 +411,6 @@ static struct attribute *lock_module_attrs[] = { NULL, }; -#define ARGS_ATTR(name, fmt) \ -static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ -{ \ - return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name); \ -} \ -static struct gfs2_attr args_attr_##name = __ATTR_RO(name) - -ARGS_ATTR(lockproto, "%s\n"); -ARGS_ATTR(locktable, "%s\n"); -ARGS_ATTR(hostdata, "%s\n"); -ARGS_ATTR(spectator, "%d\n"); -ARGS_ATTR(ignore_local_fs, "%d\n"); -ARGS_ATTR(localcaching, "%d\n"); -ARGS_ATTR(localflocks, "%d\n"); -ARGS_ATTR(debug, "%d\n"); -ARGS_ATTR(upgrade, "%d\n"); -ARGS_ATTR(posix_acl, "%d\n"); -ARGS_ATTR(quota, "%u\n"); -ARGS_ATTR(suiddir, "%d\n"); -ARGS_ATTR(data, "%d\n"); - -static struct attribute *args_attrs[] = { - &args_attr_lockproto.attr, - &args_attr_locktable.attr, - &args_attr_hostdata.attr, - &args_attr_spectator.attr, - &args_attr_ignore_local_fs.attr, - &args_attr_localcaching.attr, - &args_attr_localflocks.attr, - &args_attr_debug.attr, - &args_attr_upgrade.attr, - &args_attr_posix_acl.attr, - &args_attr_quota.attr, - &args_attr_suiddir.attr, - &args_attr_data.attr, - NULL, -}; - /* * get and set struct gfs2_tune fields */ @@ -545,11 +507,6 @@ static struct attribute *tune_attrs[] = { NULL, }; -static struct attribute_group args_group = { - .name = "args", - .attrs = args_attrs, -}; - static struct attribute_group tune_group = { .name = "tune", .attrs = tune_attrs, @@ -570,13 +527,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) if (error) goto fail; - error = sysfs_create_group(&sdp->sd_kobj, &args_group); - if (error) - goto fail_reg; - error = sysfs_create_group(&sdp->sd_kobj, &tune_group); if (error) - goto fail_args; + goto fail_reg; error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group); if (error) @@ -587,8 +540,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) fail_tune: sysfs_remove_group(&sdp->sd_kobj, &tune_group); -fail_args: - sysfs_remove_group(&sdp->sd_kobj, &args_group); fail_reg: kobject_put(&sdp->sd_kobj); fail: @@ -599,7 +550,6 @@ fail: void gfs2_sys_fs_del(struct gfs2_sbd *sdp) { sysfs_remove_group(&sdp->sd_kobj, &tune_group); - sysfs_remove_group(&sdp->sd_kobj, &args_group); sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); kobject_put(&sdp->sd_kobj); } -- cgit v1.2.3 From a12af1ebe675e85831fde3c4d0908fc3b0908b7a Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Mon, 1 Jun 2009 12:30:03 -0500 Subject: GFS2: smbd proccess hangs with flock() call. GFS2 currently does not support mandatory flocks. An flock() call with LOCK_MAND triggers unexpected behavior because gfs2 is not checking for this lock type. This patch corrects that. Signed-off-by: Abhi Das Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 73b6f552f06d..841ddc979388 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -698,8 +698,8 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; - if (__mandatory_lock(&ip->i_inode)) - return -ENOLCK; + if (fl->fl_type & LOCK_MAND) + return -EOPNOTSUPP; if (fl->fl_type == F_UNLCK) { do_unflock(file, fl); -- cgit v1.2.3 From e09f9446b94ac64b27d37e98c1110f29d712cdad Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 3 Jun 2009 10:07:44 +0100 Subject: GFS2: Remove unused variable Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 841ddc979388..73318a3ce6f1 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -694,8 +694,6 @@ static void do_unflock(struct file *file, struct file_lock *fl) static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) { - struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; if (fl->fl_type & LOCK_MAND) -- cgit v1.2.3 From f6d03139d745198b434f65a28aabed524f415a4c Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 5 Jun 2009 07:18:57 +0100 Subject: GFS2: Fix locking issue mounting gfs2meta fs This patch uses sget() to get a reference to the existing gfs2 sb when mouting the gfs2meta filesystem (in fact thats just another mount of the gfs2 filesystem with a different root and this interface is for backward compatibility). Signed-off-by: Steven Whitehouse Reported-by: Benjamin Marzinski Tested-by: Benjamin Marzinski Cc: Christoph Hellwig --- fs/gfs2/ops_fstype.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 2cd1164c88d7..9da161cbb30f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1273,9 +1273,20 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags, return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); } +static int test_meta_super(struct super_block *s, void *ptr) +{ + struct block_device *bdev = ptr; + return (bdev == s->s_bdev); +} + +static int set_meta_super(struct super_block *s, void *ptr) +{ + return -EINVAL; +} + static struct super_block *get_gfs2_sb(const char *dev_name) { - struct super_block *sb; + struct super_block *s; struct path path; int error; @@ -1283,30 +1294,27 @@ static struct super_block *get_gfs2_sb(const char *dev_name) if (error) { printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", dev_name, error); - return NULL; + return ERR_PTR(-ENOENT); } - sb = path.dentry->d_inode->i_sb; - if (sb && (sb->s_type == &gfs2_fs_type)) - atomic_inc(&sb->s_active); - else - sb = NULL; + s = sget(&gfs2_fs_type, test_meta_super, set_meta_super, + path.dentry->d_inode->i_sb->s_bdev); path_put(&path); - return sb; + return s; } static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - struct super_block *sb = NULL; + struct super_block *s; struct gfs2_sbd *sdp; - sb = get_gfs2_sb(dev_name); - if (!sb) { + s = get_gfs2_sb(dev_name); + if (IS_ERR(s)) { printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); - return -ENOENT; + return PTR_ERR(s); } - sdp = sb->s_fs_info; - mnt->mnt_sb = sb; + sdp = s->s_fs_info; + mnt->mnt_sb = s; mnt->mnt_root = dget(sdp->sd_master_dir); return 0; } -- cgit v1.2.3 From 40bc9a27e00d6c8c7e4dc2865c02d7402a950472 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 10 Jun 2009 09:09:40 +0100 Subject: GFS2: Fix cache coherency between truncate and O_DIRECT read If a page was partially zeroed as the result of a truncate, then it was not being correctly marked dirty. This resulted in the deleted data reappearing if the file was read back via direct I/O. Reported-by: Eric Sandeen Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 1153a078920c..329763530dc0 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1012,7 +1012,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) gfs2_trans_add_bh(ip->i_gl, bh, 0); zero_user(page, offset, length); - + mark_buffer_dirty(bh); unlock: unlock_page(page); page_cache_release(page); -- cgit v1.2.3 From 003dec8913d6bebb4ecc989ec04a235cf38f5ea9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 10 Jun 2009 10:31:45 +0100 Subject: GFS2: Merge gfs2_get_sb into gfs2_get_sb_meta These don't need to be separate functions. Reported-by: Christoph Hellwig Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_fstype.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 9da161cbb30f..f234aba36fb8 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1284,9 +1284,11 @@ static int set_meta_super(struct super_block *s, void *ptr) return -EINVAL; } -static struct super_block *get_gfs2_sb(const char *dev_name) +static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) { struct super_block *s; + struct gfs2_sbd *sdp; struct path path; int error; @@ -1294,21 +1296,11 @@ static struct super_block *get_gfs2_sb(const char *dev_name) if (error) { printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", dev_name, error); - return ERR_PTR(-ENOENT); + return error; } s = sget(&gfs2_fs_type, test_meta_super, set_meta_super, path.dentry->d_inode->i_sb->s_bdev); path_put(&path); - return s; -} - -static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) -{ - struct super_block *s; - struct gfs2_sbd *sdp; - - s = get_gfs2_sb(dev_name); if (IS_ERR(s)) { printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); return PTR_ERR(s); -- cgit v1.2.3 From b7d245de25d1f0bb75a0d04194b647762b30d3db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Apr 2009 09:46:43 -0400 Subject: gfs2: remove ->write_super and stop maintaining ->s_dirt Signed-off-by: Christoph Hellwig Acked-by: Steven Whitehouse Signed-off-by: Al Viro --- fs/gfs2/log.c | 2 -- fs/gfs2/super.c | 13 ------------- 2 files changed, 15 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index aa62cf5976e8..f2e449c595b4 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -764,7 +764,6 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) } gfs2_log_unlock(sdp); - sdp->sd_vfs->s_dirt = 0; up_write(&sdp->sd_log_flush_lock); kfree(ai); @@ -823,7 +822,6 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) log_refund(sdp, tr); buf_lo_incore_commit(sdp, tr); - sdp->sd_vfs->s_dirt = 1; up_read(&sdp->sd_log_flush_lock); gfs2_log_lock(sdp); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 40bcc37e5a70..0a6801336470 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -787,17 +787,6 @@ restart: gfs2_sys_fs_del(sdp); } -/** - * gfs2_write_super - * @sb: the superblock - * - */ - -static void gfs2_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; -} - /** * gfs2_sync_fs - sync the filesystem * @sb: the superblock @@ -807,7 +796,6 @@ static void gfs2_write_super(struct super_block *sb) static int gfs2_sync_fs(struct super_block *sb, int wait) { - sb->s_dirt = 0; if (wait && sb->s_fs_info) gfs2_log_flush(sb->s_fs_info, NULL); return 0; @@ -1324,7 +1312,6 @@ const struct super_operations gfs2_super_ops = { .write_inode = gfs2_write_inode, .delete_inode = gfs2_delete_inode, .put_super = gfs2_put_super, - .write_super = gfs2_write_super, .sync_fs = gfs2_sync_fs, .freeze_fs = gfs2_freeze, .unfreeze_fs = gfs2_unfreeze, -- cgit v1.2.3 From 6cfd0148425e528b859b26e436b01f23f6926224 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 May 2009 15:40:36 +0200 Subject: push BKL down into ->put_super Move BKL into ->put_super from the only caller. A couple of filesystems had trivial enough ->put_super (only kfree and NULLing of s_fs_info + stuff in there) to not get any locking: coda, cramfs, efs, hugetlbfs, omfs, qnx4, shmem, all others got the full treatment. Most of them probably don't need it, but I'd rather sort that out individually. Preferably after all the other BKL pushdowns in that area. [AV: original used to move lock_super() down as well; these changes are removed since we don't do lock_super() at all in generic_shutdown_super() now] [AV: fuse, btrfs and xfs are known to need no damn BKL, exempt] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/adfs/super.c | 4 ++++ fs/affs/super.c | 5 ++++- fs/afs/super.c | 4 ++++ fs/befs/linuxvfs.c | 5 ++++- fs/bfs/inode.c | 4 ++++ fs/cifs/cifsfs.c | 6 +++++- fs/ecryptfs/super.c | 5 +++++ fs/exofs/super.c | 4 ++++ fs/ext2/super.c | 4 +++- fs/ext3/super.c | 5 ++++- fs/ext4/super.c | 2 +- fs/fat/inode.c | 4 ++++ fs/freevxfs/vxfs_super.c | 4 ++++ fs/gfs2/super.c | 4 ++++ fs/hfs/super.c | 4 ++++ fs/hfsplus/super.c | 5 +++++ fs/hpfs/super.c | 5 +++++ fs/isofs/inode.c | 5 +++++ fs/jffs2/super.c | 4 ++++ fs/jfs/super.c | 5 +++++ fs/minix/inode.c | 4 +++- fs/ncpfs/inode.c | 4 ++++ fs/nilfs2/super.c | 4 ++++ fs/ntfs/super.c | 6 +++++- fs/ocfs2/super.c | 4 ++++ fs/reiserfs/super.c | 4 +++- fs/smbfs/inode.c | 4 ++++ fs/squashfs/super.c | 4 ++++ fs/super.c | 3 --- fs/sysv/inode.c | 4 ++++ fs/ubifs/super.c | 5 +++++ fs/udf/super.c | 5 +++++ fs/ufs/super.c | 6 ++++++ 33 files changed, 133 insertions(+), 12 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/adfs/super.c b/fs/adfs/super.c index dd9becca4241..0ec5aaf47aa7 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -132,11 +132,15 @@ static void adfs_put_super(struct super_block *sb) int i; struct adfs_sb_info *asb = ADFS_SB(sb); + lock_kernel(); + for (i = 0; i < asb->s_map_size; i++) brelse(asb->s_map[i].dm_bh); kfree(asb->s_map); kfree(asb); sb->s_fs_info = NULL; + + unlock_kernel(); } static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) diff --git a/fs/affs/super.c b/fs/affs/super.c index 63f5183f263b..d7386462a8e7 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -29,6 +29,8 @@ affs_put_super(struct super_block *sb) struct affs_sb_info *sbi = AFFS_SB(sb); pr_debug("AFFS: put_super()\n"); + lock_kernel(); + if (!(sb->s_flags & MS_RDONLY)) { AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(1); secs_to_datestamp(get_seconds(), @@ -42,7 +44,8 @@ affs_put_super(struct super_block *sb) affs_brelse(sbi->s_root_bh); kfree(sbi); sb->s_fs_info = NULL; - return; + + unlock_kernel(); } static void diff --git a/fs/afs/super.c b/fs/afs/super.c index 76828e5f8a39..ad0514d0115f 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -440,8 +440,12 @@ static void afs_put_super(struct super_block *sb) _enter(""); + lock_kernel(); + afs_put_volume(as->volume); + unlock_kernel(); + _leave(""); } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 76afd0d6b86c..9367b6297d84 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -737,6 +737,8 @@ parse_options(char *options, befs_mount_options * opts) static void befs_put_super(struct super_block *sb) { + lock_kernel(); + kfree(BEFS_SB(sb)->mount_opts.iocharset); BEFS_SB(sb)->mount_opts.iocharset = NULL; @@ -747,7 +749,8 @@ befs_put_super(struct super_block *sb) kfree(sb->s_fs_info); sb->s_fs_info = NULL; - return; + + unlock_kernel(); } /* Allocate private field of the superblock, fill it. diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 4cf3d269e271..3a9a1361fdc1 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -217,6 +217,8 @@ static void bfs_put_super(struct super_block *s) if (!info) return; + lock_kernel(); + if (s->s_dirt) bfs_write_super(s); @@ -225,6 +227,8 @@ static void bfs_put_super(struct super_block *s) kfree(info->si_imap); kfree(info); s->s_fs_info = NULL; + + unlock_kernel(); } static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0a10a59b6392..0d92114195ab 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -204,6 +204,9 @@ cifs_put_super(struct super_block *sb) cFYI(1, ("Empty cifs superblock info passed to unmount")); return; } + + lock_kernel(); + rc = cifs_umount(sb, cifs_sb); if (rc) cERROR(1, ("cifs_umount failed with return code %d", rc)); @@ -216,7 +219,8 @@ cifs_put_super(struct super_block *sb) unload_nls(cifs_sb->local_nls); kfree(cifs_sb); - return; + + unlock_kernel(); } static int diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index fa4c7e7d15d9..12d649602d3a 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "ecryptfs_kernel.h" @@ -120,9 +121,13 @@ static void ecryptfs_put_super(struct super_block *sb) { struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); + lock_kernel(); + ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); kmem_cache_free(ecryptfs_sb_info_cache, sb_info); ecryptfs_set_superblock_private(sb, NULL); + + unlock_kernel(); } /** diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 3cdb761db8ad..cd1f8b18a218 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -258,6 +258,8 @@ static void exofs_put_super(struct super_block *sb) int num_pend; struct exofs_sb_info *sbi = sb->s_fs_info; + lock_kernel(); + if (sb->s_dirt) exofs_write_super(sb); @@ -274,6 +276,8 @@ static void exofs_put_super(struct super_block *sb) osduld_put_device(sbi->s_dev); kfree(sb->s_fs_info); sb->s_fs_info = NULL; + + unlock_kernel(); } /* diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 932a2bcb6908..a44963d8edbd 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -114,6 +114,8 @@ static void ext2_put_super (struct super_block * sb) int i; struct ext2_sb_info *sbi = EXT2_SB(sb); + lock_kernel(); + if (sb->s_dirt) ext2_write_super(sb); @@ -138,7 +140,7 @@ static void ext2_put_super (struct super_block * sb) kfree(sbi->s_blockgroup_lock); kfree(sbi); - return; + unlock_kernel(); } static struct kmem_cache * ext2_inode_cachep; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 1efd958687e9..546b8d732bf2 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -398,6 +398,8 @@ static void ext3_put_super (struct super_block * sb) struct ext3_super_block *es = sbi->s_es; int i, err; + lock_kernel(); + ext3_xattr_put_super(sb); err = journal_destroy(sbi->s_journal); sbi->s_journal = NULL; @@ -446,7 +448,8 @@ static void ext3_put_super (struct super_block * sb) sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); kfree(sbi); - return; + + unlock_kernel(); } static struct kmem_cache *ext3_inode_cachep; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0d3034c5e8a4..1d4180b86772 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -577,6 +577,7 @@ static void ext4_put_super(struct super_block *sb) int i, err; lock_super(sb); + lock_kernel(); if (sb->s_dirt) ext4_write_super(sb); @@ -646,7 +647,6 @@ static void ext4_put_super(struct super_block *sb) unlock_super(sb); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); - lock_kernel(); kfree(sbi->s_blockgroup_lock); kfree(sbi); } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 4978621511bf..2b88c93af227 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -451,6 +451,8 @@ static void fat_put_super(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); + lock_kernel(); + if (sb->s_dirt) fat_write_super(sb); @@ -470,6 +472,8 @@ static void fat_put_super(struct super_block *sb) sb->s_fs_info = NULL; kfree(sbi); + + unlock_kernel(); } static struct kmem_cache *fat_inode_cachep; diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 1dacda831577..cdbd1654e4cd 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -80,12 +80,16 @@ vxfs_put_super(struct super_block *sbp) { struct vxfs_sb_info *infp = VXFS_SBI(sbp); + lock_kernel(); + vxfs_put_fake_inode(infp->vsi_fship); vxfs_put_fake_inode(infp->vsi_ilist); vxfs_put_fake_inode(infp->vsi_stilist); brelse(infp->vsi_bp); kfree(infp); + + unlock_kernel(); } /** diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 0a6801336470..c8930b31cdf0 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -719,6 +719,8 @@ static void gfs2_put_super(struct super_block *sb) int error; struct gfs2_jdesc *jd; + lock_kernel(); + /* Unfreeze the filesystem, if we need to */ mutex_lock(&sdp->sd_freeze_lock); @@ -785,6 +787,8 @@ restart: /* At this point, we're through participating in the lockspace */ gfs2_sys_fs_del(sdp); + + unlock_kernel(); } /** diff --git a/fs/hfs/super.c b/fs/hfs/super.c index e071e6d06463..9f5eaa01cc77 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -65,11 +65,15 @@ static void hfs_write_super(struct super_block *sb) */ static void hfs_put_super(struct super_block *sb) { + lock_kernel(); + if (sb->s_dirt) hfs_write_super(sb); hfs_mdb_close(sb); /* release the MDB's resources */ hfs_mdb_put(sb); + + unlock_kernel(); } /* diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 40bdab79dae8..9b292dcc39c8 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -199,6 +199,9 @@ static void hfsplus_put_super(struct super_block *sb) dprint(DBG_SUPER, "hfsplus_put_super\n"); if (!sb->s_fs_info) return; + + lock_kernel(); + if (sb->s_dirt) hfsplus_write_super(sb); if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) { @@ -220,6 +223,8 @@ static void hfsplus_put_super(struct super_block *sb) unload_nls(HFSPLUS_SB(sb).nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; + + unlock_kernel(); } static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index fc77965be841..437a32e9deac 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -99,11 +99,16 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2, static void hpfs_put_super(struct super_block *s) { struct hpfs_sb_info *sbi = hpfs_sb(s); + + lock_kernel(); + kfree(sbi->sb_cp_table); kfree(sbi->sb_bmp_dir); unmark_dirty(s); s->s_fs_info = NULL; kfree(sbi); + + unlock_kernel(); } unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index b4cbe9603c7d..068b34b5a107 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -42,11 +42,16 @@ static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qst static void isofs_put_super(struct super_block *sb) { struct isofs_sb_info *sbi = ISOFS_SB(sb); + #ifdef CONFIG_JOLIET + lock_kernel(); + if (sbi->s_nls_iocharset) { unload_nls(sbi->s_nls_iocharset); sbi->s_nls_iocharset = NULL; } + + unlock_kernel(); #endif kfree(sbi); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 5059e9633edb..37b12125c127 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -174,6 +174,8 @@ static void jffs2_put_super (struct super_block *sb) D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n")); + lock_kernel(); + if (sb->s_dirt) jffs2_write_super(sb); @@ -195,6 +197,8 @@ static void jffs2_put_super (struct super_block *sb) if (c->mtd->sync) c->mtd->sync(c->mtd); + unlock_kernel(); + D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index d9b0e92b3602..3eb13adf3862 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -183,6 +183,9 @@ static void jfs_put_super(struct super_block *sb) int rc; jfs_info("In jfs_put_super"); + + lock_kernel(); + rc = jfs_umount(sb); if (rc) jfs_err("jfs_umount failed with return code %d", rc); @@ -195,6 +198,8 @@ static void jfs_put_super(struct super_block *sb) sbi->direct_inode = NULL; kfree(sbi); + + unlock_kernel(); } enum { diff --git a/fs/minix/inode.c b/fs/minix/inode.c index daad3c2740db..7eb53970f4bc 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -35,6 +35,8 @@ static void minix_put_super(struct super_block *sb) int i; struct minix_sb_info *sbi = minix_sb(sb); + lock_kernel(); + if (!(sb->s_flags & MS_RDONLY)) { if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ sbi->s_ms->s_state = sbi->s_mount_state; @@ -49,7 +51,7 @@ static void minix_put_super(struct super_block *sb) sb->s_fs_info = NULL; kfree(sbi); - return; + unlock_kernel(); } static struct kmem_cache * minix_inode_cachep; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index d642f0e5b365..b99ce205b1bd 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -736,6 +736,8 @@ static void ncp_put_super(struct super_block *sb) { struct ncp_server *server = NCP_SBP(sb); + lock_kernel(); + ncp_lock_server(server); ncp_disconnect(server); ncp_unlock_server(server); @@ -769,6 +771,8 @@ static void ncp_put_super(struct super_block *sb) vfree(server->packet); sb->s_fs_info = NULL; kfree(server); + + unlock_kernel(); } static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 7901d8cbb9b1..7262e8427c20 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -316,6 +316,8 @@ static void nilfs_put_super(struct super_block *sb) struct nilfs_sb_info *sbi = NILFS_SB(sb); struct the_nilfs *nilfs = sbi->s_nilfs; + lock_kernel(); + if (sb->s_dirt) nilfs_write_super(sb); @@ -333,6 +335,8 @@ static void nilfs_put_super(struct super_block *sb) sbi->s_super = NULL; sb->s_fs_info = NULL; kfree(sbi); + + unlock_kernel(); } /** diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 6aa7c4713536..a9ec4e1084e4 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -2246,6 +2246,9 @@ static void ntfs_put_super(struct super_block *sb) ntfs_volume *vol = NTFS_SB(sb); ntfs_debug("Entering."); + + lock_kernel(); + #ifdef NTFS_RW /* * Commit all inodes while they are still open in case some of them @@ -2444,7 +2447,8 @@ static void ntfs_put_super(struct super_block *sb) } sb->s_fs_info = NULL; kfree(vol); - return; + + unlock_kernel(); } /** diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 3eb076ce4c07..02737596b597 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1536,9 +1536,13 @@ static void ocfs2_put_super(struct super_block *sb) { mlog_entry("(0x%p)\n", sb); + lock_kernel(); + ocfs2_sync_blockdev(sb); ocfs2_dismount_volume(sb, 0); + unlock_kernel(); + mlog_exit_void(); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 3da0401c0a96..90dcb7b033ea 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -465,6 +465,8 @@ static void reiserfs_put_super(struct super_block *s) struct reiserfs_transaction_handle th; th.t_trans_id = 0; + lock_kernel(); + if (s->s_dirt) reiserfs_write_super(s); @@ -500,7 +502,7 @@ static void reiserfs_put_super(struct super_block *s) kfree(s->s_fs_info); s->s_fs_info = NULL; - return; + unlock_kernel(); } static struct kmem_cache *reiserfs_inode_cachep; diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index fc27fbfc5397..1402d2d54f52 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -474,6 +474,8 @@ smb_put_super(struct super_block *sb) { struct smb_sb_info *server = SMB_SB(sb); + lock_kernel(); + smb_lock_server(server); server->state = CONN_INVALID; smbiod_unregister_server(server); @@ -489,6 +491,8 @@ smb_put_super(struct super_block *sb) smb_unlock_server(server); put_pid(server->conn_pid); kfree(server); + + unlock_kernel(); } static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 0adc624c956f..3b52770f46ff 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -338,6 +338,8 @@ static int squashfs_remount(struct super_block *sb, int *flags, char *data) static void squashfs_put_super(struct super_block *sb) { + lock_kernel(); + if (sb->s_fs_info) { struct squashfs_sb_info *sbi = sb->s_fs_info; squashfs_cache_delete(sbi->block_cache); @@ -350,6 +352,8 @@ static void squashfs_put_super(struct super_block *sb) kfree(sb->s_fs_info); sb->s_fs_info = NULL; } + + unlock_kernel(); } diff --git a/fs/super.c b/fs/super.c index 54fd331f0cab..bdd7158b785e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -309,7 +309,6 @@ void generic_shutdown_super(struct super_block *sb) /* bad name - it should be evict_inodes() */ invalidate_inodes(sb); - lock_kernel(); if (sop->put_super) sop->put_super(sb); @@ -320,8 +319,6 @@ void generic_shutdown_super(struct super_block *sb) "Self-destruct in 5 seconds. Have a nice day...\n", sb->s_id); } - - unlock_kernel(); put_fs_excl(); } spin_lock(&sb_lock); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index cd80316302cc..a8189864c241 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -72,6 +72,8 @@ static void sysv_put_super(struct super_block *sb) { struct sysv_sb_info *sbi = SYSV_SB(sb); + lock_kernel(); + if (sb->s_dirt) sysv_write_super(sb); @@ -87,6 +89,8 @@ static void sysv_put_super(struct super_block *sb) brelse(sbi->s_bh2); kfree(sbi); + + unlock_kernel(); } static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 84f3c7fd1552..522c3fd7eb3c 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1684,6 +1684,9 @@ static void ubifs_put_super(struct super_block *sb) ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, c->vi.vol_id); + + lock_kernel(); + /* * The following asserts are only valid if there has not been a failure * of the media. For example, there will be dirty inodes if we failed @@ -1750,6 +1753,8 @@ static void ubifs_put_super(struct super_block *sb) ubi_close_volume(c->ubi); mutex_unlock(&c->umount_mutex); kfree(c); + + unlock_kernel(); } static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) diff --git a/fs/udf/super.c b/fs/udf/super.c index 0ba44107d8f1..04802cc39b18 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2062,6 +2062,9 @@ static void udf_put_super(struct super_block *sb) struct udf_sb_info *sbi; sbi = UDF_SB(sb); + + lock_kernel(); + if (sbi->s_vat_inode) iput(sbi->s_vat_inode); if (sbi->s_partitions) @@ -2077,6 +2080,8 @@ static void udf_put_super(struct super_block *sb) kfree(sbi->s_partmaps); kfree(sb->s_fs_info); sb->s_fs_info = NULL; + + unlock_kernel(); } static int udf_sync_fs(struct super_block *sb, int wait) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 74afb9fbf58e..2b4d2b6234df 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -594,6 +594,9 @@ static void ufs_put_super_internal(struct super_block *sb) UFSD("ENTER\n"); + + lock_kernel(); + ufs_put_cstotal(sb); size = uspi->s_cssize; blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; @@ -621,6 +624,9 @@ static void ufs_put_super_internal(struct super_block *sb) brelse (sbi->s_ucg[i]); kfree (sbi->s_ucg); kfree (base); + + unlock_kernel(); + UFSD("EXIT\n"); } -- cgit v1.2.3 From 63997775b795f97ef51f3e56bc3abc9edc04bbb0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 12 Jun 2009 08:49:20 +0100 Subject: GFS2: Add tracepoints This patch adds the ability to trace various aspects of the GFS2 filesystem. The trace points are divided into three groups, glocks, logging and bmap. These points have been chosen because they allow inspection of the major internal functions of GFS2 and they are also generic enough that they are unlikely to need any major changes as the filesystem evolves. Signed-off-by: Steven Whitehouse --- fs/gfs2/Makefile | 1 + fs/gfs2/bmap.c | 3 + fs/gfs2/glock.c | 12 +- fs/gfs2/log.c | 9 +- fs/gfs2/lops.c | 3 + fs/gfs2/ops_fstype.c | 2 + fs/gfs2/rgrp.c | 11 +- fs/gfs2/trace_gfs2.h | 407 +++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 442 insertions(+), 6 deletions(-) create mode 100644 fs/gfs2/trace_gfs2.h (limited to 'fs/gfs2') diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index d53a9bea1c2f..3da2f1f4f738 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -1,3 +1,4 @@ +EXTRA_CFLAGS := -I$(src) obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ glops.o inode.o log.o lops.o main.o meta_io.o \ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 329763530dc0..6d47379e794b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -25,6 +25,7 @@ #include "trans.h" #include "dir.h" #include "util.h" +#include "trace_gfs2.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k * block is 512, so __u16 is fine for that. It saves stack space to @@ -589,6 +590,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, clear_buffer_mapped(bh_map); clear_buffer_new(bh_map); clear_buffer_boundary(bh_map); + trace_gfs2_bmap(ip, bh_map, lblock, create, 1); if (gfs2_is_dir(ip)) { bsize = sdp->sd_jbsize; arr = sdp->sd_jheightsize; @@ -623,6 +625,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, ret = 0; out: release_metapath(&mp); + trace_gfs2_bmap(ip, bh_map, lblock, create, ret); bmap_unlock(ip, create); return ret; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 2bf62bcc5181..297421c0427a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -39,6 +39,8 @@ #include "super.h" #include "util.h" #include "bmap.h" +#define CREATE_TRACE_POINTS +#include "trace_gfs2.h" struct gfs2_gl_hash_bucket { struct hlist_head hb_list; @@ -155,7 +157,7 @@ static void glock_free(struct gfs2_glock *gl) if (aspace) gfs2_aspace_put(aspace); - + trace_gfs2_glock_put(gl); sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl); } @@ -317,14 +319,17 @@ restart: return 2; gh->gh_error = ret; list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); gfs2_holder_wake(gh); goto restart; } set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh, 1); gfs2_holder_wake(gh); goto restart; } set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh, 0); gfs2_holder_wake(gh); continue; } @@ -354,6 +359,7 @@ static inline void do_error(struct gfs2_glock *gl, const int ret) else continue; list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); gfs2_holder_wake(gh); } } @@ -422,6 +428,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) int rv; spin_lock(&gl->gl_spin); + trace_gfs2_glock_state_change(gl, state); state_change(gl, state); gh = find_first_waiter(gl); @@ -851,6 +858,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, gl->gl_demote_state != state) { gl->gl_demote_state = LM_ST_UNLOCKED; } + trace_gfs2_demote_rq(gl); } /** @@ -936,6 +944,7 @@ fail: goto do_cancel; return; } + trace_gfs2_glock_queue(gh, 1); list_add_tail(&gh->gh_list, insert_pt); do_cancel: gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); @@ -1032,6 +1041,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } + trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); if (likely(fast_path)) return; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f2e449c595b4..13c6237c5f67 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -28,6 +28,7 @@ #include "meta_io.h" #include "util.h" #include "dir.h" +#include "trace_gfs2.h" #define PULL 1 @@ -313,6 +314,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) gfs2_log_lock(sdp); } atomic_sub(blks, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, -blks); gfs2_log_unlock(sdp); mutex_unlock(&sdp->sd_log_reserve_mutex); @@ -333,6 +335,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) gfs2_log_lock(sdp); atomic_add(blks, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, blks); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); gfs2_log_unlock(sdp); @@ -558,6 +561,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) gfs2_log_lock(sdp); atomic_add(dist, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, dist); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); gfs2_log_unlock(sdp); @@ -715,6 +719,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) up_write(&sdp->sd_log_flush_lock); return; } + trace_gfs2_log_flush(sdp, 1); ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&ai->ai_ail1_list); @@ -746,6 +751,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ gfs2_log_lock(sdp); atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ + trace_gfs2_log_blocks(sdp, -1); gfs2_log_unlock(sdp); log_write_header(sdp, 0, PULL); } @@ -763,7 +769,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) ai = NULL; } gfs2_log_unlock(sdp); - + trace_gfs2_log_flush(sdp, 0); up_write(&sdp->sd_log_flush_lock); kfree(ai); @@ -787,6 +793,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; atomic_add(unused, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, unused); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); sdp->sd_log_blks_reserved = reserved; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 00315f50fa46..9969ff062c5b 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -27,6 +27,7 @@ #include "rgrp.h" #include "trans.h" #include "util.h" +#include "trace_gfs2.h" /** * gfs2_pin - Pin a buffer in memory @@ -53,6 +54,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) if (bd->bd_ail) list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); get_bh(bh); + trace_gfs2_pin(bd, 1); } /** @@ -89,6 +91,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, bd->bd_ail = ai; list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); + trace_gfs2_pin(bd, 0); gfs2_log_unlock(sdp); unlock_buffer(bh); } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index cc34f271b3e7..7bc3c45cd676 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -33,6 +33,7 @@ #include "log.h" #include "quota.h" #include "dir.h" +#include "trace_gfs2.h" #define DO 0 #define UNDO 1 @@ -775,6 +776,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) /* Map the extents for this journal's blocks */ map_journal_extents(sdp); } + trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free)); if (sdp->sd_lockstruct.ls_first) { unsigned int x; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index de3239731db8..daa4ae341a29 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -29,6 +29,7 @@ #include "util.h" #include "log.h" #include "inode.h" +#include "trace_gfs2.h" #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) @@ -1519,7 +1520,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) spin_lock(&sdp->sd_rindex_spin); rgd->rd_free_clone -= *n; spin_unlock(&sdp->sd_rindex_spin); - + trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED); *bn = block; return 0; @@ -1571,7 +1572,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) spin_lock(&sdp->sd_rindex_spin); rgd->rd_free_clone--; spin_unlock(&sdp->sd_rindex_spin); - + trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); return block; } @@ -1591,7 +1592,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); if (!rgd) return; - + trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); @@ -1619,7 +1620,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); if (!rgd) return; - + trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); @@ -1642,6 +1643,7 @@ void gfs2_unlink_di(struct inode *inode) rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); if (!rgd) return; + trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); gfs2_trans_add_rg(rgd); @@ -1673,6 +1675,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { gfs2_free_uninit_di(rgd, ip->i_no_addr); + trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE); gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); gfs2_meta_wipe(ip, ip->i_no_addr, 1); } diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h new file mode 100644 index 000000000000..98d6ef1c1dc0 --- /dev/null +++ b/fs/gfs2/trace_gfs2.h @@ -0,0 +1,407 @@ +#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_GFS2_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gfs2 +#define TRACE_INCLUDE_FILE trace_gfs2 + +#include +#include +#include +#include +#include "incore.h" +#include "glock.h" + +#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn } +#define glock_trace_name(x) __print_symbolic(x, \ + dlm_state_name(IV), \ + dlm_state_name(NL), \ + dlm_state_name(CR), \ + dlm_state_name(CW), \ + dlm_state_name(PR), \ + dlm_state_name(PW), \ + dlm_state_name(EX)) + +#define block_state_name(x) __print_symbolic(x, \ + { GFS2_BLKST_FREE, "free" }, \ + { GFS2_BLKST_USED, "used" }, \ + { GFS2_BLKST_DINODE, "dinode" }, \ + { GFS2_BLKST_UNLINKED, "unlinked" }) + +#define show_glock_flags(flags) __print_flags(flags, "", \ + {(1UL << GLF_LOCK), "l" }, \ + {(1UL << GLF_DEMOTE), "D" }, \ + {(1UL << GLF_PENDING_DEMOTE), "d" }, \ + {(1UL << GLF_DEMOTE_IN_PROGRESS), "p" }, \ + {(1UL << GLF_DIRTY), "y" }, \ + {(1UL << GLF_LFLUSH), "f" }, \ + {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ + {(1UL << GLF_REPLY_PENDING), "r" }, \ + {(1UL << GLF_INITIAL), "I" }, \ + {(1UL << GLF_FROZEN), "F" }) + +#ifndef NUMPTY +#define NUMPTY +static inline u8 glock_trace_state(unsigned int state) +{ + switch(state) { + case LM_ST_SHARED: + return DLM_LOCK_PR; + case LM_ST_DEFERRED: + return DLM_LOCK_CW; + case LM_ST_EXCLUSIVE: + return DLM_LOCK_EX; + } + return DLM_LOCK_NL; +} +#endif + +/* Section 1 - Locking + * + * Objectives: + * Latency: Remote demote request to state change + * Latency: Local lock request to state change + * Latency: State change to lock grant + * Correctness: Ordering of local lock state vs. I/O requests + * Correctness: Responses to remote demote requests + */ + +/* General glock state change (DLM lock request completes) */ +TRACE_EVENT(gfs2_glock_state_change, + + TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state), + + TP_ARGS(gl, new_state), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( u8, cur_state ) + __field( u8, new_state ) + __field( u8, dmt_state ) + __field( u8, tgt_state ) + __field( unsigned long, flags ) + ), + + TP_fast_assign( + __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->glnum = gl->gl_name.ln_number; + __entry->gltype = gl->gl_name.ln_type; + __entry->cur_state = glock_trace_state(gl->gl_state); + __entry->new_state = glock_trace_state(new_state); + __entry->tgt_state = glock_trace_state(gl->gl_target); + __entry->dmt_state = glock_trace_state(gl->gl_demote_state); + __entry->flags = gl->gl_flags; + ), + + TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + glock_trace_name(__entry->cur_state), + glock_trace_name(__entry->new_state), + glock_trace_name(__entry->tgt_state), + glock_trace_name(__entry->dmt_state), + show_glock_flags(__entry->flags)) +); + +/* State change -> unlocked, glock is being deallocated */ +TRACE_EVENT(gfs2_glock_put, + + TP_PROTO(const struct gfs2_glock *gl), + + TP_ARGS(gl), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( u8, cur_state ) + __field( unsigned long, flags ) + ), + + TP_fast_assign( + __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->gltype = gl->gl_name.ln_type; + __entry->glnum = gl->gl_name.ln_number; + __entry->cur_state = glock_trace_state(gl->gl_state); + __entry->flags = gl->gl_flags; + ), + + TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->gltype, (unsigned long long)__entry->glnum, + glock_trace_name(__entry->cur_state), + glock_trace_name(DLM_LOCK_IV), + show_glock_flags(__entry->flags)) + +); + +/* Callback (local or remote) requesting lock demotion */ +TRACE_EVENT(gfs2_demote_rq, + + TP_PROTO(const struct gfs2_glock *gl), + + TP_ARGS(gl), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( u8, cur_state ) + __field( u8, dmt_state ) + __field( unsigned long, flags ) + ), + + TP_fast_assign( + __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->gltype = gl->gl_name.ln_type; + __entry->glnum = gl->gl_name.ln_number; + __entry->cur_state = glock_trace_state(gl->gl_state); + __entry->dmt_state = glock_trace_state(gl->gl_demote_state); + __entry->flags = gl->gl_flags; + ), + + TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + glock_trace_name(__entry->cur_state), + glock_trace_name(__entry->dmt_state), + show_glock_flags(__entry->flags)) + +); + +/* Promotion/grant of a glock */ +TRACE_EVENT(gfs2_promote, + + TP_PROTO(const struct gfs2_holder *gh, int first), + + TP_ARGS(gh, first), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( int, first ) + __field( u8, state ) + ), + + TP_fast_assign( + __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; + __entry->glnum = gh->gh_gl->gl_name.ln_number; + __entry->gltype = gh->gh_gl->gl_name.ln_type; + __entry->first = first; + __entry->state = glock_trace_state(gh->gh_state); + ), + + TP_printk("%u,%u glock %u:%llu promote %s %s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + __entry->first ? "first": "other", + glock_trace_name(__entry->state)) +); + +/* Queue/dequeue a lock request */ +TRACE_EVENT(gfs2_glock_queue, + + TP_PROTO(const struct gfs2_holder *gh, int queue), + + TP_ARGS(gh, queue), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( int, queue ) + __field( u8, state ) + ), + + TP_fast_assign( + __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; + __entry->glnum = gh->gh_gl->gl_name.ln_number; + __entry->gltype = gh->gh_gl->gl_name.ln_type; + __entry->queue = queue; + __entry->state = glock_trace_state(gh->gh_state); + ), + + TP_printk("%u,%u glock %u:%llu %squeue %s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + __entry->queue ? "" : "de", + glock_trace_name(__entry->state)) +); + +/* Section 2 - Log/journal + * + * Objectives: + * Latency: Log flush time + * Correctness: pin/unpin vs. disk I/O ordering + * Performance: Log usage stats + */ + +/* Pin/unpin a block in the log */ +TRACE_EVENT(gfs2_pin, + + TP_PROTO(const struct gfs2_bufdata *bd, int pin), + + TP_ARGS(bd, pin), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, pin ) + __field( u32, len ) + __field( sector_t, block ) + __field( u64, ino ) + ), + + TP_fast_assign( + __entry->dev = bd->bd_gl->gl_sbd->sd_vfs->s_dev; + __entry->pin = pin; + __entry->len = bd->bd_bh->b_size; + __entry->block = bd->bd_bh->b_blocknr; + __entry->ino = bd->bd_gl->gl_name.ln_number; + ), + + TP_printk("%u,%u log %s %llu/%lu inode %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->pin ? "pin" : "unpin", + (unsigned long long)__entry->block, + (unsigned long)__entry->len, + (unsigned long long)__entry->ino) +); + +/* Flushing the log */ +TRACE_EVENT(gfs2_log_flush, + + TP_PROTO(const struct gfs2_sbd *sdp, int start), + + TP_ARGS(sdp, start), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, start ) + __field( u64, log_seq ) + ), + + TP_fast_assign( + __entry->dev = sdp->sd_vfs->s_dev; + __entry->start = start; + __entry->log_seq = sdp->sd_log_sequence; + ), + + TP_printk("%u,%u log flush %s %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->start ? "start" : "end", + (unsigned long long)__entry->log_seq) +); + +/* Reserving/releasing blocks in the log */ +TRACE_EVENT(gfs2_log_blocks, + + TP_PROTO(const struct gfs2_sbd *sdp, int blocks), + + TP_ARGS(sdp, blocks), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, blocks ) + ), + + TP_fast_assign( + __entry->dev = sdp->sd_vfs->s_dev; + __entry->blocks = blocks; + ), + + TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev), + MINOR(__entry->dev), __entry->blocks) +); + +/* Section 3 - bmap + * + * Objectives: + * Latency: Bmap request time + * Performance: Block allocator tracing + * Correctness: Test of disard generation vs. blocks allocated + */ + +/* Map an extent of blocks, possibly a new allocation */ +TRACE_EVENT(gfs2_bmap, + + TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh, + sector_t lblock, int create, int errno), + + TP_ARGS(ip, bh, lblock, create, errno), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, lblock ) + __field( sector_t, pblock ) + __field( u64, inum ) + __field( unsigned long, state ) + __field( u32, len ) + __field( int, create ) + __field( int, errno ) + ), + + TP_fast_assign( + __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; + __entry->lblock = lblock; + __entry->pblock = buffer_mapped(bh) ? bh->b_blocknr : 0; + __entry->inum = ip->i_no_addr; + __entry->state = bh->b_state; + __entry->len = bh->b_size; + __entry->create = create; + __entry->errno = errno; + ), + + TP_printk("%u,%u bmap %llu map %llu/%lu to %llu flags:%08lx %s %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->inum, + (unsigned long long)__entry->lblock, + (unsigned long)__entry->len, + (unsigned long long)__entry->pblock, + __entry->state, __entry->create ? "create " : "nocreate", + __entry->errno) +); + +/* Keep track of blocks as they are allocated/freed */ +TRACE_EVENT(gfs2_block_alloc, + + TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len, + u8 block_state), + + TP_ARGS(ip, block, len, block_state), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, start ) + __field( u64, inum ) + __field( u32, len ) + __field( u8, block_state ) + ), + + TP_fast_assign( + __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; + __entry->start = block; + __entry->inum = ip->i_no_addr; + __entry->len = len; + __entry->block_state = block_state; + ), + + TP_printk("%u,%u bmap %llu alloc %llu/%lu %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->inum, + (unsigned long long)__entry->start, + (unsigned long)__entry->len, + block_state_name(__entry->block_state)) +); + +#endif /* _TRACE_GFS2_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include + -- cgit v1.2.3 From 3ea400581f2b595afd91207bbd79c11cb38598e0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 12 Jun 2009 13:40:47 +0100 Subject: GFS2: Remove lock_kernel from gfs2_put_super() It is not required here. Signed-off-by: Steven Whitehouse Cc: Christoph Hellwig --- fs/gfs2/super.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index c8930b31cdf0..0a6801336470 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -719,8 +719,6 @@ static void gfs2_put_super(struct super_block *sb) int error; struct gfs2_jdesc *jd; - lock_kernel(); - /* Unfreeze the filesystem, if we need to */ mutex_lock(&sdp->sd_freeze_lock); @@ -787,8 +785,6 @@ restart: /* At this point, we're through participating in the lockspace */ gfs2_sys_fs_del(sdp); - - unlock_kernel(); } /** -- cgit v1.2.3