summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-27 04:58:44 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-27 04:58:44 +0400
commit426e1f5cec4821945642230218876b0e89aafab1 (patch)
tree2728ace018d0698886989da586210ef1543a7098 /fs
parent9e5fca251f44832cb996961048ea977f80faf6ea (diff)
parent63997e98a3be68d7cec806d22bf9b02b2e1daabb (diff)
downloadlinux-426e1f5cec4821945642230218876b0e89aafab1.tar.xz
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: (52 commits) split invalidate_inodes() fs: skip I_FREEING inodes in writeback_sb_inodes fs: fold invalidate_list into invalidate_inodes fs: do not drop inode_lock in dispose_list fs: inode split IO and LRU lists fs: switch bdev inode bdi's correctly fs: fix buffer invalidation in invalidate_list fsnotify: use dget_parent smbfs: use dget_parent exportfs: use dget_parent fs: use RCU read side protection in d_validate fs: clean up dentry lru modification fs: split __shrink_dcache_sb fs: improve DCACHE_REFERENCED usage fs: use percpu counter for nr_dentry and nr_dentry_unused fs: simplify __d_free fs: take dcache_lock inside __d_path fs: do not assign default i_ino in new_inode fs: introduce a per-cpu last_ino allocator new helper: ihold() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c5
-rw-r--r--fs/affs/file.c4
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/aio.c14
-rw-r--r--fs/anon_inodes.c6
-rw-r--r--fs/autofs4/inode.c1
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/binfmt_misc.c1
-rw-r--r--fs/block_dev.c34
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/buffer.c26
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/configfs/inode.c1
-rw-r--r--fs/dcache.c277
-rw-r--r--fs/debugfs/inode.c1
-rw-r--r--fs/exofs/file.c6
-rw-r--r--fs/exofs/namei.c2
-rw-r--r--fs/exportfs/expfs.c17
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/inode.c11
-rw-r--r--fs/ext2/namei.c2
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext3/inode.c4
-rw-r--r--fs/ext3/namei.c2
-rw-r--r--fs/ext4/inode.c11
-rw-r--r--fs/ext4/mballoc.c1
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/freevxfs/vxfs_inode.c1
-rw-r--r--fs/fs-writeback.c80
-rw-r--r--fs/fuse/control.c1
-rw-r--r--fs/gfs2/aops.c3
-rw-r--r--fs/gfs2/ops_fstype.c1
-rw-r--r--fs/gfs2/ops_inode.c8
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/hfs/hfs_fs.h13
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfs/mdb.c4
-rw-r--r--fs/hfs/super.c1
-rw-r--r--fs/hfsplus/dir.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hugetlbfs/inode.c1
-rw-r--r--fs/inode.c526
-rw-r--r--fs/internal.h7
-rw-r--r--fs/isofs/inode.c17
-rw-r--r--fs/jffs2/dir.c4
-rw-r--r--fs/jfs/jfs_imap.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/libfs.c8
-rw-r--r--fs/logfs/dir.c2
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/namei.c16
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfsd/vfs.c16
-rw-r--r--fs/nilfs2/namei.c2
-rw-r--r--fs/notify/fsnotify.c33
-rw-r--r--fs/notify/inode_mark.c2
-rw-r--r--fs/ntfs/super.c19
-rw-r--r--fs/ocfs2/aops.c19
-rw-r--r--fs/ocfs2/aops.h3
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c2
-rw-r--r--fs/ocfs2/file.c9
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/read_write.c28
-rw-r--r--fs/reiserfs/inode.c24
-rw-r--r--fs/reiserfs/ioctl.c6
-rw-r--r--fs/reiserfs/namei.c2
-rw-r--r--fs/reiserfs/xattr.c7
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/smbfs/dir.c16
-rw-r--r--fs/smbfs/inode.c1
-rw-r--r--fs/smbfs/proc.c10
-rw-r--r--fs/super.c8
-rw-r--r--fs/sysv/namei.c2
-rw-r--r--fs/ubifs/dir.c2
-rw-r--r--fs/udf/namei.c2
-rw-r--r--fs/ufs/namei.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/xfs_inode.h2
90 files changed, 760 insertions, 643 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9e670d527646..ef5905f7c8a3 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1789,9 +1789,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
kfree(st);
} else {
/* Caching disabled. No need to get upto date stat info.
- * This dentry will be released immediately. So, just i_count++
+ * This dentry will be released immediately. So, just hold the
+ * inode
*/
- atomic_inc(&old_dentry->d_inode->i_count);
+ ihold(old_dentry->d_inode);
}
dentry->d_op = old_dentry->d_op;
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c4a9875bd1a6..0a90dcd46de2 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -894,9 +894,9 @@ affs_truncate(struct inode *inode)
if (AFFS_SB(sb)->s_flags & SF_OFS) {
struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0);
u32 tmp;
- if (IS_ERR(ext_bh)) {
+ if (IS_ERR(bh)) {
affs_warning(sb, "truncate", "unexpected read error for last block %u (%d)",
- ext, PTR_ERR(ext_bh));
+ ext, PTR_ERR(bh));
return;
}
tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next);
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 3a0fdec175ba..5d828903ac69 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -388,7 +388,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
mark_buffer_dirty_inode(inode_bh, inode);
inode->i_nlink = 2;
- atomic_inc(&inode->i_count);
+ ihold(inode);
}
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 0d38c09bd55e..5439e1bc9a86 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1045,7 +1045,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
if (ret < 0)
goto link_error;
- atomic_inc(&vnode->vfs_inode.i_count);
+ ihold(&vnode->vfs_inode);
d_instantiate(dentry, &vnode->vfs_inode);
key_put(key);
_leave(" = 0");
diff --git a/fs/aio.c b/fs/aio.c
index 250b0a73c8a8..8c8f6c5b6d79 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1543,7 +1543,19 @@ static void aio_batch_add(struct address_space *mapping,
}
abe = mempool_alloc(abe_pool, GFP_KERNEL);
- BUG_ON(!igrab(mapping->host));
+
+ /*
+ * we should be using igrab here, but
+ * we don't want to hammer on the global
+ * inode spinlock just to take an extra
+ * reference on a file that we must already
+ * have a reference to.
+ *
+ * When we're called, we always have a reference
+ * on the file, so we must always have a reference
+ * on the inode, so ihold() is safe here.
+ */
+ ihold(mapping->host);
abe->mapping = mapping;
hlist_add_head(&abe->list, &batch_hash[bucket]);
return;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index e4b75d6eda83..5365527ca43f 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -111,10 +111,9 @@ struct file *anon_inode_getfile(const char *name,
path.mnt = mntget(anon_inode_mnt);
/*
* We know the anon_inode inode count is always greater than zero,
- * so we can avoid doing an igrab() and we can use an open-coded
- * atomic_inc().
+ * so ihold() is safe.
*/
- atomic_inc(&anon_inode_inode->i_count);
+ ihold(anon_inode_inode);
path.dentry->d_op = &anon_inodefs_dentry_operations;
d_instantiate(path.dentry, anon_inode_inode);
@@ -194,6 +193,7 @@ static struct inode *anon_inode_mkinode(void)
if (!inode)
return ERR_PTR(-ENOMEM);
+ inode->i_ino = get_next_ino();
inode->i_fop = &anon_inode_fops;
inode->i_mapping->a_ops = &anon_aops;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 821b2b955dac..ac87e49fa706 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -398,6 +398,7 @@ struct inode *autofs4_get_inode(struct super_block *sb,
inode->i_gid = sb->s_root->d_inode->i_gid;
}
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_ino = get_next_ino();
if (S_ISDIR(inf->mode)) {
inode->i_nlink = 2;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index d967e052b779..685ecff3ab31 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
inc_nlink(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
d_instantiate(new, inode);
mutex_unlock(&info->bfs_lock);
return 0;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 139fc8083f53..29990f0eee0c 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -495,6 +495,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
struct inode * inode = new_inode(sb);
if (inode) {
+ inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_atime = inode->i_mtime = inode->i_ctime =
current_fs_time(inode->i_sb);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b737451e2e9d..dea3b628a6ce 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -48,6 +48,21 @@ inline struct block_device *I_BDEV(struct inode *inode)
EXPORT_SYMBOL(I_BDEV);
+/*
+ * move the inode from it's current bdi to the a new bdi. if the inode is dirty
+ * we need to move it onto the dirty list of @dst so that the inode is always
+ * on the right list.
+ */
+static void bdev_inode_switch_bdi(struct inode *inode,
+ struct backing_dev_info *dst)
+{
+ spin_lock(&inode_lock);
+ inode->i_data.backing_dev_info = dst;
+ if (inode->i_state & I_DIRTY)
+ list_move(&inode->i_wb_list, &dst->wb.b_dirty);
+ spin_unlock(&inode_lock);
+}
+
static sector_t max_block(struct block_device *bdev)
{
sector_t retval = ~((sector_t)0);
@@ -550,7 +565,7 @@ EXPORT_SYMBOL(bdget);
*/
struct block_device *bdgrab(struct block_device *bdev)
{
- atomic_inc(&bdev->bd_inode->i_count);
+ ihold(bdev->bd_inode);
return bdev;
}
@@ -580,7 +595,7 @@ static struct block_device *bd_acquire(struct inode *inode)
spin_lock(&bdev_lock);
bdev = inode->i_bdev;
if (bdev) {
- atomic_inc(&bdev->bd_inode->i_count);
+ ihold(bdev->bd_inode);
spin_unlock(&bdev_lock);
return bdev;
}
@@ -591,12 +606,12 @@ static struct block_device *bd_acquire(struct inode *inode)
spin_lock(&bdev_lock);
if (!inode->i_bdev) {
/*
- * We take an additional bd_inode->i_count for inode,
+ * We take an additional reference to bd_inode,
* and it's released in clear_inode() of inode.
* So, we can access it via ->i_mapping always
* without igrab().
*/
- atomic_inc(&bdev->bd_inode->i_count);
+ ihold(bdev->bd_inode);
inode->i_bdev = bdev;
inode->i_mapping = bdev->bd_inode->i_mapping;
list_add(&inode->i_devices, &bdev->bd_inodes);
@@ -1390,7 +1405,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdi = blk_get_backing_dev_info(bdev);
if (bdi == NULL)
bdi = &default_backing_dev_info;
- bdev->bd_inode->i_data.backing_dev_info = bdi;
+ bdev_inode_switch_bdi(bdev->bd_inode, bdi);
}
if (bdev->bd_invalidated)
rescan_partitions(disk, bdev);
@@ -1405,8 +1420,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (ret)
goto out_clear;
bdev->bd_contains = whole;
- bdev->bd_inode->i_data.backing_dev_info =
- whole->bd_inode->i_data.backing_dev_info;
+ bdev_inode_switch_bdi(bdev->bd_inode,
+ whole->bd_inode->i_data.backing_dev_info);
bdev->bd_part = disk_get_part(disk, partno);
if (!(disk->flags & GENHD_FL_UP) ||
!bdev->bd_part || !bdev->bd_part->nr_sects) {
@@ -1439,7 +1454,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
disk_put_part(bdev->bd_part);
bdev->bd_disk = NULL;
bdev->bd_part = NULL;
- bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+ bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
if (bdev != bdev->bd_contains)
__blkdev_put(bdev->bd_contains, mode, 1);
bdev->bd_contains = NULL;
@@ -1533,7 +1548,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
disk_put_part(bdev->bd_part);
bdev->bd_part = NULL;
bdev->bd_disk = NULL;
- bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+ bdev_inode_switch_bdi(bdev->bd_inode,
+ &default_backing_dev_info);
if (bdev != bdev->bd_contains)
victim = bdev->bd_contains;
bdev->bd_contains = NULL;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c03864406af3..64f99cf69ce0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3849,7 +3849,7 @@ again:
p = &root->inode_tree.rb_node;
parent = NULL;
- if (hlist_unhashed(&inode->i_hash))
+ if (inode_unhashed(inode))
return;
spin_lock(&root->inode_lock);
@@ -4758,7 +4758,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
}
btrfs_set_trans_block_group(trans, dir);
- atomic_inc(&inode->i_count);
+ ihold(inode);
err = btrfs_add_nondir(trans, dentry, inode, 1, index);
diff --git a/fs/buffer.c b/fs/buffer.c
index 8d595ab2aed1..5930e382959b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1833,9 +1833,11 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
}
EXPORT_SYMBOL(page_zero_new_buffers);
-int block_prepare_write(struct page *page, unsigned from, unsigned to,
+int __block_write_begin(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block)
{
+ unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned to = from + len;
struct inode *inode = page->mapping->host;
unsigned block_start, block_end;
sector_t block;
@@ -1915,7 +1917,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to,
}
return err;
}
-EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(__block_write_begin);
static int __block_commit_write(struct inode *inode, struct page *page,
unsigned from, unsigned to)
@@ -1952,15 +1954,6 @@ static int __block_commit_write(struct inode *inode, struct page *page,
return 0;
}
-int __block_write_begin(struct page *page, loff_t pos, unsigned len,
- get_block_t *get_block)
-{
- unsigned start = pos & (PAGE_CACHE_SIZE - 1);
-
- return block_prepare_write(page, start, start + len, get_block);
-}
-EXPORT_SYMBOL(__block_write_begin);
-
/*
* block_write_begin takes care of the basic task of block allocation and
* bringing partial write blocks uptodate first.
@@ -2378,7 +2371,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
else
end = PAGE_CACHE_SIZE;
- ret = block_prepare_write(page, 0, end, get_block);
+ ret = __block_write_begin(page, 0, end, get_block);
if (!ret)
ret = block_commit_write(page, 0, end);
@@ -2465,11 +2458,10 @@ int nobh_write_begin(struct address_space *mapping,
*fsdata = NULL;
if (page_has_buffers(page)) {
- unlock_page(page);
- page_cache_release(page);
- *pagep = NULL;
- return block_write_begin(mapping, pos, len, flags, pagep,
- get_block);
+ ret = __block_write_begin(page, pos, len, get_block);
+ if (unlikely(ret))
+ goto out_release;
+ return ret;
}
if (PageMappedToDisk(page))
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 96fbeab77f2f..5d8b35539601 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -276,7 +276,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
}
coda_dir_update_mtime(dir_inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
d_instantiate(de, inode);
inc_nlink(inode);
return 0;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index cf78d44a8d6a..253476d78ed8 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -135,6 +135,7 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
{
struct inode * inode = new_inode(configfs_sb);
if (inode) {
+ inode->i_ino = get_next_ino();
inode->i_mapping->a_ops = &configfs_aops;
inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
inode->i_op = &configfs_inode_operations;
diff --git a/fs/dcache.c b/fs/dcache.c
index 83293be48149..23702a9d4e6d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -67,33 +67,43 @@ struct dentry_stat_t dentry_stat = {
.age_limit = 45,
};
-static void __d_free(struct dentry *dentry)
+static struct percpu_counter nr_dentry __cacheline_aligned_in_smp;
+static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
+
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry);
+ dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
+ return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+#endif
+
+static void __d_free(struct rcu_head *head)
{
+ struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
+
WARN_ON(!list_empty(&dentry->d_alias));
if (dname_external(dentry))
kfree(dentry->d_name.name);
kmem_cache_free(dentry_cache, dentry);
}
-static void d_callback(struct rcu_head *head)
-{
- struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
- __d_free(dentry);
-}
-
/*
- * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry
- * inside dcache_lock.
+ * no dcache_lock, please.
*/
static void d_free(struct dentry *dentry)
{
+ percpu_counter_dec(&nr_dentry);
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
+
/* if dentry was never inserted into hash, immediate free is OK */
if (hlist_unhashed(&dentry->d_hash))
- __d_free(dentry);
+ __d_free(&dentry->d_u.d_rcu);
else
- call_rcu(&dentry->d_u.d_rcu, d_callback);
+ call_rcu(&dentry->d_u.d_rcu, __d_free);
}
/*
@@ -123,37 +133,34 @@ static void dentry_iput(struct dentry * dentry)
}
/*
- * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
+ * dentry_lru_(add|del|move_tail) must be called with dcache_lock held.
*/
static void dentry_lru_add(struct dentry *dentry)
{
- list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
- dentry->d_sb->s_nr_dentry_unused++;
- dentry_stat.nr_unused++;
-}
-
-static void dentry_lru_add_tail(struct dentry *dentry)
-{
- list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
- dentry->d_sb->s_nr_dentry_unused++;
- dentry_stat.nr_unused++;
+ if (list_empty(&dentry->d_lru)) {
+ list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+ dentry->d_sb->s_nr_dentry_unused++;
+ percpu_counter_inc(&nr_dentry_unused);
+ }
}
static void dentry_lru_del(struct dentry *dentry)
{
if (!list_empty(&dentry->d_lru)) {
- list_del(&dentry->d_lru);
+ list_del_init(&dentry->d_lru);
dentry->d_sb->s_nr_dentry_unused--;
- dentry_stat.nr_unused--;
+ percpu_counter_dec(&nr_dentry_unused);
}
}
-static void dentry_lru_del_init(struct dentry *dentry)
+static void dentry_lru_move_tail(struct dentry *dentry)
{
- if (likely(!list_empty(&dentry->d_lru))) {
- list_del_init(&dentry->d_lru);
- dentry->d_sb->s_nr_dentry_unused--;
- dentry_stat.nr_unused--;
+ if (list_empty(&dentry->d_lru)) {
+ list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+ dentry->d_sb->s_nr_dentry_unused++;
+ percpu_counter_inc(&nr_dentry_unused);
+ } else {
+ list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
}
}
@@ -172,7 +179,6 @@ static struct dentry *d_kill(struct dentry *dentry)
struct dentry *parent;
list_del(&dentry->d_u.d_child);
- dentry_stat.nr_dentry--; /* For d_free, below */
/*drops the locks, at that point nobody can reach this dentry */
dentry_iput(dentry);
if (IS_ROOT(dentry))
@@ -237,13 +243,15 @@ repeat:
if (dentry->d_op->d_delete(dentry))
goto unhash_it;
}
+
/* Unreachable? Get rid of it */
if (d_unhashed(dentry))
goto kill_it;
- if (list_empty(&dentry->d_lru)) {
- dentry->d_flags |= DCACHE_REFERENCED;
- dentry_lru_add(dentry);
- }
+
+ /* Otherwise leave it cached and ensure it's on the LRU */
+ dentry->d_flags |= DCACHE_REFERENCED;
+ dentry_lru_add(dentry);
+
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
return;
@@ -318,11 +326,10 @@ int d_invalidate(struct dentry * dentry)
EXPORT_SYMBOL(d_invalidate);
/* This should be called _only_ with dcache_lock held */
-
static inline struct dentry * __dget_locked(struct dentry *dentry)
{
atomic_inc(&dentry->d_count);
- dentry_lru_del_init(dentry);
+ dentry_lru_del(dentry);
return dentry;
}
@@ -441,73 +448,27 @@ static void prune_one_dentry(struct dentry * dentry)
if (dentry->d_op && dentry->d_op->d_delete)
dentry->d_op->d_delete(dentry);
- dentry_lru_del_init(dentry);
+ dentry_lru_del(dentry);
__d_drop(dentry);
dentry = d_kill(dentry);
spin_lock(&dcache_lock);
}
}
-/*
- * Shrink the dentry LRU on a given superblock.
- * @sb : superblock to shrink dentry LRU.
- * @count: If count is NULL, we prune all dentries on superblock.
- * @flags: If flags is non-zero, we need to do special processing based on
- * which flags are set. This means we don't need to maintain multiple
- * similar copies of this loop.
- */
-static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+static void shrink_dentry_list(struct list_head *list)
{
- LIST_HEAD(referenced);
- LIST_HEAD(tmp);
struct dentry *dentry;
- int cnt = 0;
- BUG_ON(!sb);
- BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
- spin_lock(&dcache_lock);
- if (count != NULL)
- /* called from prune_dcache() and shrink_dcache_parent() */
- cnt = *count;
-restart:
- if (count == NULL)
- list_splice_init(&sb->s_dentry_lru, &tmp);
- else {
- while (!list_empty(&sb->s_dentry_lru)) {
- dentry = list_entry(sb->s_dentry_lru.prev,
- struct dentry, d_lru);
- BUG_ON(dentry->d_sb != sb);
+ while (!list_empty(list)) {
+ dentry = list_entry(list->prev, struct dentry, d_lru);
+ dentry_lru_del(dentry);
- spin_lock(&dentry->d_lock);
- /*
- * If we are honouring the DCACHE_REFERENCED flag and
- * the dentry has this flag set, don't free it. Clear
- * the flag and put it back on the LRU.
- */
- if ((flags & DCACHE_REFERENCED)
- && (dentry->d_flags & DCACHE_REFERENCED)) {
- dentry->d_flags &= ~DCACHE_REFERENCED;
- list_move(&dentry->d_lru, &referenced);
- spin_unlock(&dentry->d_lock);
- } else {
- list_move_tail(&dentry->d_lru, &tmp);
- spin_unlock(&dentry->d_lock);
- cnt--;
- if (!cnt)
- break;
- }
- cond_resched_lock(&dcache_lock);
- }
- }
- while (!list_empty(&tmp)) {
- dentry = list_entry(tmp.prev, struct dentry, d_lru);
- dentry_lru_del_init(dentry);
- spin_lock(&dentry->d_lock);
/*
* We found an inuse dentry which was not removed from
* the LRU because of laziness during lookup. Do not free
* it - just keep it off the LRU list.
*/
+ spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count)) {
spin_unlock(&dentry->d_lock);
continue;
@@ -516,13 +477,60 @@ restart:
/* dentry->d_lock was dropped in prune_one_dentry() */
cond_resched_lock(&dcache_lock);
}
- if (count == NULL && !list_empty(&sb->s_dentry_lru))
- goto restart;
- if (count != NULL)
- *count = cnt;
+}
+
+/**
+ * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
+ * @sb: superblock to shrink dentry LRU.
+ * @count: number of entries to prune
+ * @flags: flags to control the dentry processing
+ *
+ * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
+ */
+static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+{
+ /* called from prune_dcache() and shrink_dcache_parent() */
+ struct dentry *dentry;
+ LIST_HEAD(referenced);
+ LIST_HEAD(tmp);
+ int cnt = *count;
+
+ spin_lock(&dcache_lock);
+ while (!list_empty(&sb->s_dentry_lru)) {
+ dentry = list_entry(sb->s_dentry_lru.prev,
+ struct dentry, d_lru);
+ BUG_ON(dentry->d_sb != sb);
+
+ /*
+ * If we are honouring the DCACHE_REFERENCED flag and the
+ * dentry has this flag set, don't free it. Clear the flag
+ * and put it back on the LRU.
+ */
+ if (flags & DCACHE_REFERENCED) {
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_flags & DCACHE_REFERENCED) {
+ dentry->d_flags &= ~DCACHE_REFERENCED;
+ list_move(&dentry->d_lru, &referenced);
+ spin_unlock(&dentry->d_lock);
+ cond_resched_lock(&dcache_lock);
+ continue;
+ }
+ spin_unlock(&dentry->d_lock);
+ }
+
+ list_move_tail(&dentry->d_lru, &tmp);
+ if (!--cnt)
+ break;
+ cond_resched_lock(&dcache_lock);
+ }
+
+ *count = cnt;
+ shrink_dentry_list(&tmp);
+
if (!list_empty(&referenced))
list_splice(&referenced, &sb->s_dentry_lru);
spin_unlock(&dcache_lock);
+
}
/**
@@ -538,7 +546,7 @@ static void prune_dcache(int count)
{
struct super_block *sb, *p = NULL;
int w_count;
- int unused = dentry_stat.nr_unused;
+ int unused = percpu_counter_sum_positive(&nr_dentry_unused);
int prune_ratio;
int pruned;
@@ -608,13 +616,19 @@ static void prune_dcache(int count)
* shrink_dcache_sb - shrink dcache for a superblock
* @sb: superblock
*
- * Shrink the dcache for the specified super block. This
- * is used to free the dcache before unmounting a file
- * system
+ * Shrink the dcache for the specified super block. This is used to free
+ * the dcache before unmounting a file system.
*/
-void shrink_dcache_sb(struct super_block * sb)
+void shrink_dcache_sb(struct super_block *sb)
{
- __shrink_dcache_sb(sb, NULL, 0);
+ LIST_HEAD(tmp);
+
+ spin_lock(&dcache_lock);
+ while (!list_empty(&sb->s_dentry_lru)) {
+ list_splice_init(&sb->s_dentry_lru, &tmp);
+ shrink_dentry_list(&tmp);
+ }
+ spin_unlock(&dcache_lock);
}
EXPORT_SYMBOL(shrink_dcache_sb);
@@ -632,7 +646,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
/* detach this root from the system */
spin_lock(&dcache_lock);
- dentry_lru_del_init(dentry);
+ dentry_lru_del(dentry);
__d_drop(dentry);
spin_unlock(&dcache_lock);
@@ -646,7 +660,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
spin_lock(&dcache_lock);
list_for_each_entry(loop, &dentry->d_subdirs,
d_u.d_child) {
- dentry_lru_del_init(loop);
+ dentry_lru_del(loop);
__d_drop(loop);
cond_resched_lock(&dcache_lock);
}
@@ -703,20 +717,13 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
* otherwise we ascend to the parent and move to the
* next sibling if there is one */
if (!parent)
- goto out;
-
+ return;
dentry = parent;
-
} while (list_empty(&dentry->d_subdirs));
dentry = list_entry(dentry->d_subdirs.next,
struct dentry, d_u.d_child);
}
-out:
- /* several dentries were freed, need to correct nr_dentry */
- spin_lock(&dcache_lock);
- dentry_stat.nr_dentry -= detached;
- spin_unlock(&dcache_lock);
}
/*
@@ -830,14 +837,15 @@ resume:
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
- dentry_lru_del_init(dentry);
/*
* move only zero ref count dentries to the end
* of the unused list for prune_dcache
*/
if (!atomic_read(&dentry->d_count)) {
- dentry_lru_add_tail(dentry);
+ dentry_lru_move_tail(dentry);
found++;
+ } else {
+ dentry_lru_del(dentry);
}
/*
@@ -900,12 +908,16 @@ EXPORT_SYMBOL(shrink_dcache_parent);
*/
static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
+ int nr_unused;
+
if (nr) {
if (!(gfp_mask & __GFP_FS))
return -1;
prune_dcache(nr);
}
- return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+
+ nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
+ return (nr_unused / 100) * sysctl_vfs_cache_pressure;
}
static struct shrinker dcache_shrinker = {
@@ -972,9 +984,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
spin_lock(&dcache_lock);
if (parent)
list_add(&dentry->d_u.d_child, &parent->d_subdirs);
- dentry_stat.nr_dentry++;
spin_unlock(&dcache_lock);
+ percpu_counter_inc(&nr_dentry);
+
return dentry;
}
EXPORT_SYMBOL(d_alloc);
@@ -1478,33 +1491,26 @@ out:
* This is used by ncpfs in its readdir implementation.
* Zero is returned in the dentry is invalid.
*/
-
-int d_validate(struct dentry *dentry, struct dentry *dparent)
+int d_validate(struct dentry *dentry, struct dentry *parent)
{
- struct hlist_head *base;
- struct hlist_node *lhp;
+ struct hlist_head *head = d_hash(parent, dentry->d_name.hash);
+ struct hlist_node *node;
+ struct dentry *d;
/* Check whether the ptr might be valid at all.. */
if (!kmem_ptr_validate(dentry_cache, dentry))
- goto out;
-
- if (dentry->d_parent != dparent)
- goto out;
+ return 0;
+ if (dentry->d_parent != parent)
+ return 0;
- spin_lock(&dcache_lock);
- base = d_hash(dparent, dentry->d_name.hash);
- hlist_for_each(lhp,base) {
- /* hlist_for_each_entry_rcu() not required for d_hash list
- * as it is parsed under dcache_lock
- */
- if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
- __dget_locked(dentry);
- spin_unlock(&dcache_lock);
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(d, node, head, d_hash) {
+ if (d == dentry) {
+ dget(dentry);
return 1;
}
}
- spin_unlock(&dcache_lock);
-out:
+ rcu_read_unlock();
return 0;
}
EXPORT_SYMBOL(d_validate);
@@ -1994,7 +2000,7 @@ global_root:
* Returns a pointer into the buffer or an error code if the
* path was too long.
*
- * "buflen" should be positive. Caller holds the dcache_lock.
+ * "buflen" should be positive.
*
* If path is not reachable from the supplied root, then the value of
* root is changed (without modifying refcounts).
@@ -2006,10 +2012,12 @@ char *__d_path(const struct path *path, struct path *root,
int error;
prepend(&res, &buflen, "\0", 1);
+ spin_lock(&dcache_lock);
error = prepend_path(path, root, &res, &buflen);
+ spin_unlock(&dcache_lock);
+
if (error)
return ERR_PTR(error);
-
return res;
}
@@ -2419,6 +2427,9 @@ static void __init dcache_init(void)
{
int loop;
+ percpu_counter_init(&nr_dentry, 0);
+ percpu_counter_init(&nr_dentry_unused, 0);
+
/*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 30a87b3dbcac..a4ed8380e98a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -40,6 +40,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
struct inode *inode = new_inode(sb);
if (inode) {
+ inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
switch (mode & S_IFMT) {
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 68cb23e3bb98..b905c79b4f0a 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -46,10 +46,6 @@ static int exofs_file_fsync(struct file *filp, int datasync)
{
int ret;
struct inode *inode = filp->f_mapping->host;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 0, /* metadata-only; caller takes care of data */
- };
struct super_block *sb;
if (!(inode->i_state & I_DIRTY))
@@ -57,7 +53,7 @@ static int exofs_file_fsync(struct file *filp, int datasync)
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
return 0;
- ret = sync_inode(inode, &wbc);
+ ret = sync_inode_metadata(inode, 1);
/* This is a good place to write the sb */
/* TODO: Sechedule an sb-sync on create */
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index b7dd0c236863..264e95d02830 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
return exofs_add_nondir(dentry, inode);
}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index e9e175949a63..51b304056f10 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -74,21 +74,20 @@ static struct dentry *
find_disconnected_root(struct dentry *dentry)
{
dget(dentry);
- spin_lock(&dentry->d_lock);
- while (!IS_ROOT(dentry) &&
- (dentry->d_parent->d_flags & DCACHE_DISCONNECTED)) {
- struct dentry *parent = dentry->d_parent;
- dget(parent);
- spin_unlock(&dentry->d_lock);
+ while (!IS_ROOT(dentry)) {
+ struct dentry *parent = dget_parent(dentry);
+
+ if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
+ dput(parent);
+ break;
+ }
+
dput(dentry);
dentry = parent;
- spin_lock(&dentry->d_lock);
}
- spin_unlock(&dentry->d_lock);
return dentry;
}
-
/*
* Make sure target_dir is fully connected to the dentry tree.
*
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 764109886ec0..2709b34206ab 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -98,7 +98,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
if (IS_DIRSYNC(dir)) {
err = write_one_page(page, 1);
if (!err)
- err = ext2_sync_inode(dir);
+ err = sync_inode_metadata(dir, 1);
} else {
unlock_page(page);
}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 416daa62242c..6346a2acf326 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -120,7 +120,6 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
extern struct inode *ext2_iget (struct super_block *, unsigned long);
extern int ext2_write_inode (struct inode *, struct writeback_control *);
extern void ext2_evict_inode(struct inode *);
-extern int ext2_sync_inode (struct inode *);
extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern int ext2_setattr (struct dentry *, struct iattr *);
extern void ext2_set_inode_flags(struct inode *inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 533699c16040..40ad210a5049 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1203,7 +1203,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
if (inode_needs_sync(inode)) {
sync_mapping_buffers(inode->i_mapping);
- ext2_sync_inode (inode);
+ sync_inode_metadata(inode, 1);
} else {
mark_inode_dirty(inode);
}
@@ -1523,15 +1523,6 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
}
-int ext2_sync_inode(struct inode *inode)
-{
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 0, /* sys_fsync did this */
- };
- return sync_inode(inode, &wbc);
-}
-
int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = dentry->d_inode;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 71efb0e9a3f2..f8aecd2e3297 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -206,7 +206,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
err = ext2_add_link(dentry, inode);
if (!err) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 85df87d0f7b7..0901320671da 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1221,9 +1221,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
}
es = sbi->s_es;
- if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
- (old_mount_opt & EXT2_MOUNT_XIP)) &&
- invalidate_inodes(sb)) {
+ if ((sbi->s_mount_opt ^ old_mount_opt) & EXT2_MOUNT_XIP) {
ext2_msg(sb, KERN_WARNING, "warning: refusing change of "
"xip flag with busy inodes while remounting");
sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 8c29ae15129e..f84700be3274 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -699,7 +699,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
inode->i_ctime = CURRENT_TIME_SEC;
if (IS_SYNC(inode)) {
- error = ext2_sync_inode (inode);
+ error = sync_inode_metadata(inode, 1);
/* In case sync failed due to ENOSPC the inode was actually
* written (only some dirty data were not) so we just proceed
* as if nothing happened and cleanup the unused block */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 5e0faf4cda79..ad05353040a1 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1696,8 +1696,8 @@ static int ext3_journalled_writepage(struct page *page,
* doesn't seem much point in redirtying the page here.
*/
ClearPageChecked(page);
- ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
- ext3_get_block);
+ ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE,
+ ext3_get_block);
if (ret != 0) {
ext3_journal_stop(handle);
goto out_unlock;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 2b35ddb70d65..bce9dce639b8 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2260,7 +2260,7 @@ retry:
inode->i_ctime = CURRENT_TIME_SEC;
inc_nlink(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
err = ext3_add_entry(handle, dentry, inode);
if (!err) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4b8debeb3965..49635ef236f8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1538,10 +1538,10 @@ static int do_journal_get_write_access(handle_t *handle,
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
/*
- * __block_prepare_write() could have dirtied some buffers. Clean
+ * __block_write_begin() could have dirtied some buffers. Clean
* the dirty bit as jbd2_journal_get_write_access() could complain
* otherwise about fs integrity issues. Setting of the dirty bit
- * by __block_prepare_write() isn't a real problem here as we clear
+ * by __block_write_begin() isn't a real problem here as we clear
* the bit before releasing a page lock and thus writeback cannot
* ever write the buffer.
*/
@@ -2550,8 +2550,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
if (buffer_delay(bh))
return 0; /* Not sure this could or should happen */
/*
- * XXX: __block_prepare_write() unmaps passed block,
- * is it OK?
+ * XXX: __block_write_begin() unmaps passed block, is it OK?
*/
ret = ext4_da_reserve_space(inode, iblock);
if (ret)
@@ -2583,7 +2582,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
/*
* This function is used as a standard get_block_t calback function
* when there is no desire to allocate any blocks. It is used as a
- * callback function for block_prepare_write() and block_write_full_page().
+ * callback function for block_write_begin() and block_write_full_page().
* These functions should only try to map a single block at a time.
*
* Since this function doesn't do block allocations even if the caller
@@ -2743,7 +2742,7 @@ static int ext4_writepage(struct page *page,
* all are mapped and non delay. We don't want to
* do block allocation here.
*/
- ret = block_prepare_write(page, 0, len,
+ ret = __block_write_begin(page, 0, len,
noalloc_get_block_write);
if (!ret) {
page_bufs = page_buffers(page);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 19aa0d44d822..42f77b1dc72d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2373,6 +2373,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
printk(KERN_ERR "EXT4-fs: can't get new inode\n");
goto err_freesgi;
}
+ sbi->s_buddy_cache->i_ino = get_next_ino();
EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
for (i = 0; i < ngroups; i++) {
desc = ext4_get_group_desc(sb, i, NULL);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 314c0d3b3fa9..bd39885b5998 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2312,7 +2312,7 @@ retry:
inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
err = ext4_add_entry(handle, dentry, inode);
if (!err) {
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 79d1b4ea13e7..8c04eac5079d 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -260,6 +260,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
struct inode *ip = NULL;
if ((ip = new_inode(sbp))) {
+ ip->i_ino = get_next_ino();
vxfs_iinit(ip, vip);
ip->i_mapping->a_ops = &vxfs_aops;
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9e46aec10d1a..aed881a76b22 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -79,6 +79,11 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
return sb->s_bdi;
}
+static inline struct inode *wb_inode(struct list_head *head)
+{
+ return list_entry(head, struct inode, i_wb_list);
+}
+
static void bdi_queue_work(struct backing_dev_info *bdi,
struct wb_writeback_work *work)
{
@@ -172,11 +177,11 @@ static void redirty_tail(struct inode *inode)
if (!list_empty(&wb->b_dirty)) {
struct inode *tail;
- tail = list_entry(wb->b_dirty.next, struct inode, i_list);
+ tail = wb_inode(wb->b_dirty.next);
if (time_before(inode->dirtied_when, tail->dirtied_when))
inode->dirtied_when = jiffies;
}
- list_move(&inode->i_list, &wb->b_dirty);
+ list_move(&inode->i_wb_list, &wb->b_dirty);
}
/*
@@ -186,7 +191,7 @@ static void requeue_io(struct inode *inode)
{
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
- list_move(&inode->i_list, &wb->b_more_io);
+ list_move(&inode->i_wb_list, &wb->b_more_io);
}
static void inode_sync_complete(struct inode *inode)
@@ -227,14 +232,14 @@ static void move_expired_inodes(struct list_head *delaying_queue,
int do_sb_sort = 0;
while (!list_empty(delaying_queue)) {
- inode = list_entry(delaying_queue->prev, struct inode, i_list);
+ inode = wb_inode(delaying_queue->prev);
if (older_than_this &&
inode_dirtied_after(inode, *older_than_this))
break;
if (sb && sb != inode->i_sb)
do_sb_sort = 1;
sb = inode->i_sb;
- list_move(&inode->i_list, &tmp);
+ list_move(&inode->i_wb_list, &tmp);
}
/* just one sb in list, splice to dispatch_queue and we're done */
@@ -245,12 +250,11 @@ static void move_expired_inodes(struct list_head *delaying_queue,
/* Move inodes from one superblock together */
while (!list_empty(&tmp)) {
- inode = list_entry(tmp.prev, struct inode, i_list);
- sb = inode->i_sb;
+ sb = wb_inode(tmp.prev)->i_sb;
list_for_each_prev_safe(pos, node, &tmp) {
- inode = list_entry(pos, struct inode, i_list);
+ inode = wb_inode(pos);
if (inode->i_sb == sb)
- list_move(&inode->i_list, dispatch_queue);
+ list_move(&inode->i_wb_list, dispatch_queue);
}
}
}
@@ -408,16 +412,13 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* completion.
*/
redirty_tail(inode);
- } else if (atomic_read(&inode->i_count)) {
- /*
- * The inode is clean, inuse
- */
- list_move(&inode->i_list, &inode_in_use);
} else {
/*
- * The inode is clean, unused
+ * The inode is clean. At this point we either have
+ * a reference to the inode or it's on it's way out.
+ * No need to add it back to the LRU.
*/
- list_move(&inode->i_list, &inode_unused);
+ list_del_init(&inode->i_wb_list);
}
}
inode_sync_complete(inode);
@@ -465,8 +466,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
{
while (!list_empty(&wb->b_io)) {
long pages_skipped;
- struct inode *inode = list_entry(wb->b_io.prev,
- struct inode, i_list);
+ struct inode *inode = wb_inode(wb->b_io.prev);
if (inode->i_sb != sb) {
if (only_this_sb) {
@@ -487,10 +487,16 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
return 0;
}
- if (inode->i_state & (I_NEW | I_WILL_FREE)) {
+ /*
+ * Don't bother with new inodes or inodes beeing freed, first
+ * kind does not need peridic writeout yet, and for the latter
+ * kind writeout is handled by the freer.
+ */
+ if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
requeue_io(inode);
continue;
}
+
/*
* Was this inode dirtied after sync_sb_inodes was called?
* This keeps sync from extra jobs and livelock.
@@ -498,7 +504,6 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
if (inode_dirtied_after(inode, wbc->wb_start))
return 1;
- BUG_ON(inode->i_state & I_FREEING);
__iget(inode);
pages_skipped = wbc->pages_skipped;
writeback_single_inode(inode, wbc);
@@ -536,8 +541,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
queue_io(wb, wbc->older_than_this);
while (!list_empty(&wb->b_io)) {
- struct inode *inode = list_entry(wb->b_io.prev,
- struct inode, i_list);
+ struct inode *inode = wb_inode(wb->b_io.prev);
struct super_block *sb = inode->i_sb;
if (!pin_sb_for_writeback(sb)) {
@@ -675,8 +679,7 @@ static long wb_writeback(struct bdi_writeback *wb,
*/
spin_lock(&inode_lock);
if (!list_empty(&wb->b_more_io)) {
- inode = list_entry(wb->b_more_io.prev,
- struct inode, i_list);
+ inode = wb_inode(wb->b_more_io.prev);
trace_wbc_writeback_wait(&wbc, wb->bdi);
inode_wait_for_writeback(inode);
}
@@ -727,7 +730,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
*/
nr_pages = global_page_state(NR_FILE_DIRTY) +
global_page_state(NR_UNSTABLE_NFS) +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+ get_nr_dirty_inodes();
if (nr_pages) {
struct wb_writeback_work work = {
@@ -966,7 +969,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
* dirty list. Add blockdev inodes as well.
*/
if (!S_ISBLK(inode->i_mode)) {
- if (hlist_unhashed(&inode->i_hash))
+ if (inode_unhashed(inode))
goto out;
}
if (inode->i_state & I_FREEING)
@@ -994,7 +997,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
inode->dirtied_when = jiffies;
- list_move(&inode->i_list, &bdi->wb.b_dirty);
+ list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
}
}
out:
@@ -1094,8 +1097,7 @@ void writeback_inodes_sb(struct super_block *sb)
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- work.nr_pages = nr_dirty + nr_unstable +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+ work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
bdi_queue_work(sb->s_bdi, &work);
wait_for_completion(&done);
@@ -1202,3 +1204,23 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
return ret;
}
EXPORT_SYMBOL(sync_inode);
+
+/**
+ * sync_inode - write an inode to disk
+ * @inode: the inode to sync
+ * @wait: wait for I/O to complete.
+ *
+ * Write an inode to disk and adjust it's dirty state after completion.
+ *
+ * Note: only writes the actual inode, no associated data or other metadata.
+ */
+int sync_inode_metadata(struct inode *inode, int wait)
+{
+ struct writeback_control wbc = {
+ .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
+ .nr_to_write = 0, /* metadata-only */
+ };
+
+ return sync_inode(inode, &wbc);
+}
+EXPORT_SYMBOL(sync_inode_metadata);
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 7367e177186f..4eba07661e5c 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -222,6 +222,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
if (!inode)
return NULL;
+ inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_uid = fc->user_id;
inode->i_gid = fc->group_id;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 6b24afb96aae..4f36f8832b9b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -618,7 +618,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
struct gfs2_alloc *al = NULL;
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
unsigned from = pos & (PAGE_CACHE_SIZE - 1);
- unsigned to = from + len;
struct page *page;
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
@@ -691,7 +690,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
}
prepare_write:
- error = block_prepare_write(page, from, to, gfs2_block_map);
+ error = __block_write_begin(page, from, len, gfs2_block_map);
out:
if (error == 0)
return 0;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index aeafc233dc89..cade1acbcea9 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1219,7 +1219,6 @@ fail_sb:
fail_locking:
init_locking(sdp, &mount_gh, UNDO);
fail_lm:
- invalidate_inodes(sb);
gfs2_gl_hash_clear(sdp);
gfs2_lm_unmount(sdp);
fail_sys:
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 0534510200d5..12cbea7502c2 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -255,7 +255,7 @@ out_parent:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
if (!error) {
- atomic_inc(&inode->i_count);
+ ihold(inode);
d_instantiate(dentry, inode);
mark_inode_dirty(inode);
}
@@ -1294,7 +1294,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
int error;
if (!page_has_buffers(page)) {
- error = block_prepare_write(page, from, to, gfs2_block_map);
+ error = __block_write_begin(page, from, to - from, gfs2_block_map);
if (unlikely(error))
return error;
@@ -1313,7 +1313,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
next += bh->b_size;
if (buffer_mapped(bh)) {
if (end) {
- error = block_prepare_write(page, start, end,
+ error = __block_write_begin(page, start, end - start,
gfs2_block_map);
if (unlikely(error))
return error;
@@ -1328,7 +1328,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
} while (next < to);
if (end) {
- error = block_prepare_write(page, start, end, gfs2_block_map);
+ error = __block_write_begin(page, start, end - start, gfs2_block_map);
if (unlikely(error))
return error;
empty_write_end(page, start, end);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 047d1176096c..2b2c4997430b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -857,7 +857,6 @@ restart:
gfs2_clear_rgrpd(sdp);
gfs2_jindex_free(sdp);
/* Take apart glock structures and buffer lists */
- invalidate_inodes(sdp->sd_vfs);
gfs2_gl_hash_clear(sdp);
/* Unmount the locking protocol */
gfs2_lm_unmount(sdp);
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 4f55651aaa51..c8cffb81e849 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -147,8 +147,6 @@ struct hfs_sb_info {
u16 blockoffset;
int fs_div;
-
- struct hlist_head rsrc_inodes;
};
#define HFS_FLG_BITMAP_DIRTY 0
@@ -254,17 +252,6 @@ static inline void hfs_bitmap_dirty(struct super_block *sb)
sb->s_dirt = 1;
}
-static inline void hfs_buffer_sync(struct buffer_head *bh)
-{
- while (buffer_locked(bh)) {
- wait_on_buffer(bh);
- }
- if (buffer_dirty(bh)) {
- ll_rw_block(WRITE, 1, &bh);
- wait_on_buffer(bh);
- }
-}
-
#define sb_bread512(sb, sec, data) ({ \
struct buffer_head *__bh; \
sector_t __block; \
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 397b7adc7ce6..dffb4e996643 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -524,7 +524,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
HFS_I(inode)->rsrc_inode = dir;
HFS_I(dir)->rsrc_inode = inode;
igrab(dir);
- hlist_add_head(&inode->i_hash, &HFS_SB(dir->i_sb)->rsrc_inodes);
+ hlist_add_fake(&inode->i_hash);
mark_inode_dirty(inode);
out:
d_add(dentry, inode);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 86428f5ac991..1563d5ce5764 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -220,7 +220,7 @@ int hfs_mdb_get(struct super_block *sb)
mdb->drLsMod = hfs_mtime();
mark_buffer_dirty(HFS_SB(sb)->mdb_bh);
- hfs_buffer_sync(HFS_SB(sb)->mdb_bh);
+ sync_dirty_buffer(HFS_SB(sb)->mdb_bh);
}
return 0;
@@ -287,7 +287,7 @@ void hfs_mdb_commit(struct super_block *sb)
HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT);
HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT);
mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh);
- hfs_buffer_sync(HFS_SB(sb)->alt_mdb_bh);
+ sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh);
}
if (test_and_clear_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags)) {
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 33254160f650..6ee1586f2334 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -382,7 +382,6 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
sb->s_fs_info = sbi;
- INIT_HLIST_HEAD(&sbi->rsrc_inodes);
res = -EINVAL;
if (!parse_options((char *)data, sbi)) {
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index d236d85ec9d7..e318bbc0daf6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -286,7 +286,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
inc_nlink(inode);
hfsplus_instantiate(dst_dentry, inode, cnid);
- atomic_inc(&inode->i_count);
+ ihold(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
sbi->file_count++;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 78449280dae0..8afd7e84f98d 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -211,7 +211,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
* appear hashed, but do not put on any lists. hlist_del()
* will work fine and require no locking.
*/
- inode->i_hash.pprev = &inode->i_hash.next;
+ hlist_add_fake(&inode->i_hash);
mark_inode_dirty(inode);
out:
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a14328d270e8..b14be3f781c7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -456,6 +456,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
inode = new_inode(sb);
if (inode) {
struct hugetlbfs_inode_info *info;
+ inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_uid = uid;
inode->i_gid = gid;
diff --git a/fs/inode.c b/fs/inode.c
index 56d909d69bc8..ae2727ab0c3a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -29,7 +29,6 @@
/*
* This is needed for the following functions:
* - inode_has_buffers
- * - invalidate_inode_buffers
* - invalidate_bdev
*
* FIXME: remove all knowledge of the buffer layer from this file
@@ -73,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly;
* allowing for low-overhead inode sync() operations.
*/
-LIST_HEAD(inode_in_use);
-LIST_HEAD(inode_unused);
+static LIST_HEAD(inode_lru);
static struct hlist_head *inode_hashtable __read_mostly;
/*
@@ -104,8 +102,41 @@ static DECLARE_RWSEM(iprune_sem);
*/
struct inodes_stat_t inodes_stat;
+static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
+static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
+
static struct kmem_cache *inode_cachep __read_mostly;
+static inline int get_nr_inodes(void)
+{
+ return percpu_counter_sum_positive(&nr_inodes);
+}
+
+static inline int get_nr_inodes_unused(void)
+{
+ return percpu_counter_sum_positive(&nr_inodes_unused);
+}
+
+int get_nr_dirty_inodes(void)
+{
+ int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
+ return nr_dirty > 0 ? nr_dirty : 0;
+
+}
+
+/*
+ * Handle nr_inode sysctl
+ */
+#ifdef CONFIG_SYSCTL
+int proc_nr_inodes(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ inodes_stat.nr_inodes = get_nr_inodes();
+ inodes_stat.nr_unused = get_nr_inodes_unused();
+ return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+#endif
+
static void wake_up_inode(struct inode *inode)
{
/*
@@ -193,6 +224,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_fsnotify_mask = 0;
#endif
+ percpu_counter_inc(&nr_inodes);
+
return 0;
out:
return -ENOMEM;
@@ -233,11 +266,13 @@ void __destroy_inode(struct inode *inode)
if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
posix_acl_release(inode->i_default_acl);
#endif
+ percpu_counter_dec(&nr_inodes);
}
EXPORT_SYMBOL(__destroy_inode);
-void destroy_inode(struct inode *inode)
+static void destroy_inode(struct inode *inode)
{
+ BUG_ON(!list_empty(&inode->i_lru));
__destroy_inode(inode);
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
@@ -256,6 +291,8 @@ void inode_init_once(struct inode *inode)
INIT_HLIST_NODE(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_devices);
+ INIT_LIST_HEAD(&inode->i_wb_list);
+ INIT_LIST_HEAD(&inode->i_lru);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
spin_lock_init(&inode->i_data.tree_lock);
spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -282,14 +319,109 @@ static void init_once(void *foo)
*/
void __iget(struct inode *inode)
{
- if (atomic_inc_return(&inode->i_count) != 1)
- return;
+ atomic_inc(&inode->i_count);
+}
+
+/*
+ * get additional reference to inode; caller must already hold one.
+ */
+void ihold(struct inode *inode)
+{
+ WARN_ON(atomic_inc_return(&inode->i_count) < 2);
+}
+EXPORT_SYMBOL(ihold);
+
+static void inode_lru_list_add(struct inode *inode)
+{
+ if (list_empty(&inode->i_lru)) {
+ list_add(&inode->i_lru, &inode_lru);
+ percpu_counter_inc(&nr_inodes_unused);
+ }
+}
- if (!(inode->i_state & (I_DIRTY|I_SYNC)))
- list_move(&inode->i_list, &inode_in_use);
- inodes_stat.nr_unused--;
+static void inode_lru_list_del(struct inode *inode)
+{
+ if (!list_empty(&inode->i_lru)) {
+ list_del_init(&inode->i_lru);
+ percpu_counter_dec(&nr_inodes_unused);
+ }
+}
+
+static inline void __inode_sb_list_add(struct inode *inode)
+{
+ list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
}
+/**
+ * inode_sb_list_add - add inode to the superblock list of inodes
+ * @inode: inode to add
+ */
+void inode_sb_list_add(struct inode *inode)
+{
+ spin_lock(&inode_lock);
+ __inode_sb_list_add(inode);
+ spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL_GPL(inode_sb_list_add);
+
+static inline void __inode_sb_list_del(struct inode *inode)
+{
+ list_del_init(&inode->i_sb_list);
+}
+
+static unsigned long hash(struct super_block *sb, unsigned long hashval)
+{
+ unsigned long tmp;
+
+ tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
+ L1_CACHE_BYTES;
+ tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
+ return tmp & I_HASHMASK;
+}
+
+/**
+ * __insert_inode_hash - hash an inode
+ * @inode: unhashed inode
+ * @hashval: unsigned long value used to locate this object in the
+ * inode_hashtable.
+ *
+ * Add an inode to the inode hash for this superblock.
+ */
+void __insert_inode_hash(struct inode *inode, unsigned long hashval)
+{
+ struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
+
+ spin_lock(&inode_lock);
+ hlist_add_head(&inode->i_hash, b);
+ spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL(__insert_inode_hash);
+
+/**
+ * __remove_inode_hash - remove an inode from the hash
+ * @inode: inode to unhash
+ *
+ * Remove an inode from the superblock.
+ */
+static void __remove_inode_hash(struct inode *inode)
+{
+ hlist_del_init(&inode->i_hash);
+}
+
+/**
+ * remove_inode_hash - remove an inode from the hash
+ * @inode: inode to unhash
+ *
+ * Remove an inode from the superblock.
+ */
+void remove_inode_hash(struct inode *inode)
+{
+ spin_lock(&inode_lock);
+ hlist_del_init(&inode->i_hash);
+ spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL(remove_inode_hash);
+
void end_writeback(struct inode *inode)
{
might_sleep();
@@ -328,101 +460,113 @@ static void evict(struct inode *inode)
*/
static void dispose_list(struct list_head *head)
{
- int nr_disposed = 0;
-
while (!list_empty(head)) {
struct inode *inode;
- inode = list_first_entry(head, struct inode, i_list);
- list_del(&inode->i_list);
+ inode = list_first_entry(head, struct inode, i_lru);
+ list_del_init(&inode->i_lru);
evict(inode);
spin_lock(&inode_lock);
- hlist_del_init(&inode->i_hash);
- list_del_init(&inode->i_sb_list);
+ __remove_inode_hash(inode);
+ __inode_sb_list_del(inode);
spin_unlock(&inode_lock);
wake_up_inode(inode);
destroy_inode(inode);
- nr_disposed++;
}
- spin_lock(&inode_lock);
- inodes_stat.nr_inodes -= nr_disposed;
- spin_unlock(&inode_lock);
}
-/*
- * Invalidate all inodes for a device.
+/**
+ * evict_inodes - evict all evictable inodes for a superblock
+ * @sb: superblock to operate on
+ *
+ * Make sure that no inodes with zero refcount are retained. This is
+ * called by superblock shutdown after having MS_ACTIVE flag removed,
+ * so any inode reaching zero refcount during or after that call will
+ * be immediately evicted.
*/
-static int invalidate_list(struct list_head *head, struct list_head *dispose)
+void evict_inodes(struct super_block *sb)
{
- struct list_head *next;
- int busy = 0, count = 0;
-
- next = head->next;
- for (;;) {
- struct list_head *tmp = next;
- struct inode *inode;
+ struct inode *inode, *next;
+ LIST_HEAD(dispose);
- /*
- * We can reschedule here without worrying about the list's
- * consistency because the per-sb list of inodes must not
- * change during umount anymore, and because iprune_sem keeps
- * shrink_icache_memory() away.
- */
- cond_resched_lock(&inode_lock);
+ down_write(&iprune_sem);
- next = next->next;
- if (tmp == head)
- break;
- inode = list_entry(tmp, struct inode, i_sb_list);
- if (inode->i_state & I_NEW)
+ spin_lock(&inode_lock);
+ list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
+ if (atomic_read(&inode->i_count))
continue;
- invalidate_inode_buffers(inode);
- if (!atomic_read(&inode->i_count)) {
- list_move(&inode->i_list, dispose);
- WARN_ON(inode->i_state & I_NEW);
- inode->i_state |= I_FREEING;
- count++;
+
+ if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
+ WARN_ON(1);
continue;
}
- busy = 1;
+
+ inode->i_state |= I_FREEING;
+
+ /*
+ * Move the inode off the IO lists and LRU once I_FREEING is
+ * set so that it won't get moved back on there if it is dirty.
+ */
+ list_move(&inode->i_lru, &dispose);
+ list_del_init(&inode->i_wb_list);
+ if (!(inode->i_state & (I_DIRTY | I_SYNC)))
+ percpu_counter_dec(&nr_inodes_unused);
}
- /* only unused inodes may be cached with i_count zero */
- inodes_stat.nr_unused -= count;
- return busy;
+ spin_unlock(&inode_lock);
+
+ dispose_list(&dispose);
+ up_write(&iprune_sem);
}
/**
- * invalidate_inodes - discard the inodes on a device
- * @sb: superblock
+ * invalidate_inodes - attempt to free all inodes on a superblock
+ * @sb: superblock to operate on
*
- * Discard all of the inodes for a given superblock. If the discard
- * fails because there are busy inodes then a non zero value is returned.
- * If the discard is successful all the inodes have been discarded.
+ * Attempts to free all inodes for a given superblock. If there were any
+ * busy inodes return a non-zero value, else zero.
*/
int invalidate_inodes(struct super_block *sb)
{
- int busy;
- LIST_HEAD(throw_away);
+ int busy = 0;
+ struct inode *inode, *next;
+ LIST_HEAD(dispose);
down_write(&iprune_sem);
+
spin_lock(&inode_lock);
- fsnotify_unmount_inodes(&sb->s_inodes);
- busy = invalidate_list(&sb->s_inodes, &throw_away);
+ list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
+ if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
+ continue;
+ if (atomic_read(&inode->i_count)) {
+ busy = 1;
+ continue;
+ }
+
+ inode->i_state |= I_FREEING;
+
+ /*
+ * Move the inode off the IO lists and LRU once I_FREEING is
+ * set so that it won't get moved back on there if it is dirty.
+ */
+ list_move(&inode->i_lru, &dispose);
+ list_del_init(&inode->i_wb_list);
+ if (!(inode->i_state & (I_DIRTY | I_SYNC)))
+ percpu_counter_dec(&nr_inodes_unused);
+ }
spin_unlock(&inode_lock);
- dispose_list(&throw_away);
+ dispose_list(&dispose);
up_write(&iprune_sem);
return busy;
}
-EXPORT_SYMBOL(invalidate_inodes);
static int can_unuse(struct inode *inode)
{
- if (inode->i_state)
+ if (inode->i_state & ~I_REFERENCED)
return 0;
if (inode_has_buffers(inode))
return 0;
@@ -434,22 +578,24 @@ static int can_unuse(struct inode *inode)
}
/*
- * Scan `goal' inodes on the unused list for freeable ones. They are moved to
- * a temporary list and then are freed outside inode_lock by dispose_list().
+ * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
+ * temporary list and then are freed outside inode_lock by dispose_list().
*
* Any inodes which are pinned purely because of attached pagecache have their
- * pagecache removed. We expect the final iput() on that inode to add it to
- * the front of the inode_unused list. So look for it there and if the
- * inode is still freeable, proceed. The right inode is found 99.9% of the
- * time in testing on a 4-way.
+ * pagecache removed. If the inode has metadata buffers attached to
+ * mapping->private_list then try to remove them.
*
- * If the inode has metadata buffers attached to mapping->private_list then
- * try to remove them.
+ * If the inode has the I_REFERENCED flag set, then it means that it has been
+ * used recently - the flag is set in iput_final(). When we encounter such an
+ * inode, clear the flag and move it to the back of the LRU so it gets another
+ * pass through the LRU before it gets reclaimed. This is necessary because of
+ * the fact we are doing lazy LRU updates to minimise lock contention so the
+ * LRU does not have strict ordering. Hence we don't want to reclaim inodes
+ * with this flag set because they are the inodes that are out of order.
*/
static void prune_icache(int nr_to_scan)
{
LIST_HEAD(freeable);
- int nr_pruned = 0;
int nr_scanned;
unsigned long reap = 0;
@@ -458,13 +604,26 @@ static void prune_icache(int nr_to_scan)
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
- if (list_empty(&inode_unused))
+ if (list_empty(&inode_lru))
break;
- inode = list_entry(inode_unused.prev, struct inode, i_list);
+ inode = list_entry(inode_lru.prev, struct inode, i_lru);
- if (inode->i_state || atomic_read(&inode->i_count)) {
- list_move(&inode->i_list, &inode_unused);
+ /*
+ * Referenced or dirty inodes are still in use. Give them
+ * another pass through the LRU as we canot reclaim them now.
+ */
+ if (atomic_read(&inode->i_count) ||
+ (inode->i_state & ~I_REFERENCED)) {
+ list_del_init(&inode->i_lru);
+ percpu_counter_dec(&nr_inodes_unused);
+ continue;
+ }
+
+ /* recently referenced inodes get one more pass */
+ if (inode->i_state & I_REFERENCED) {
+ list_move(&inode->i_lru, &inode_lru);
+ inode->i_state &= ~I_REFERENCED;
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
@@ -476,18 +635,23 @@ static void prune_icache(int nr_to_scan)
iput(inode);
spin_lock(&inode_lock);
- if (inode != list_entry(inode_unused.next,
- struct inode, i_list))
+ if (inode != list_entry(inode_lru.next,
+ struct inode, i_lru))
continue; /* wrong inode or list_empty */
if (!can_unuse(inode))
continue;
}
- list_move(&inode->i_list, &freeable);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
- nr_pruned++;
+
+ /*
+ * Move the inode off the IO lists and LRU once I_FREEING is
+ * set so that it won't get moved back on there if it is dirty.
+ */
+ list_move(&inode->i_lru, &freeable);
+ list_del_init(&inode->i_wb_list);
+ percpu_counter_dec(&nr_inodes_unused);
}
- inodes_stat.nr_unused -= nr_pruned;
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
@@ -519,7 +683,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
return -1;
prune_icache(nr);
}
- return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+ return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
}
static struct shrinker icache_shrinker = {
@@ -530,9 +694,6 @@ static struct shrinker icache_shrinker = {
static void __wait_on_freeing_inode(struct inode *inode);
/*
* Called with the inode lock held.
- * NOTE: we are not increasing the inode-refcount, you must call __iget()
- * by hand after calling find_inode now! This simplifies iunique and won't
- * add any additional branch in the common code.
*/
static struct inode *find_inode(struct super_block *sb,
struct hlist_head *head,
@@ -552,9 +713,10 @@ repeat:
__wait_on_freeing_inode(inode);
goto repeat;
}
- break;
+ __iget(inode);
+ return inode;
}
- return node ? inode : NULL;
+ return NULL;
}
/*
@@ -577,53 +739,49 @@ repeat:
__wait_on_freeing_inode(inode);
goto repeat;
}
- break;
+ __iget(inode);
+ return inode;
}
- return node ? inode : NULL;
-}
-
-static unsigned long hash(struct super_block *sb, unsigned long hashval)
-{
- unsigned long tmp;
-
- tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
- L1_CACHE_BYTES;
- tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
- return tmp & I_HASHMASK;
-}
-
-static inline void
-__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
- struct inode *inode)
-{
- inodes_stat.nr_inodes++;
- list_add(&inode->i_list, &inode_in_use);
- list_add(&inode->i_sb_list, &sb->s_inodes);
- if (head)
- hlist_add_head(&inode->i_hash, head);
+ return NULL;
}
-/**
- * inode_add_to_lists - add a new inode to relevant lists
- * @sb: superblock inode belongs to
- * @inode: inode to mark in use
+/*
+ * Each cpu owns a range of LAST_INO_BATCH numbers.
+ * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
+ * to renew the exhausted range.
*
- * When an inode is allocated it needs to be accounted for, added to the in use
- * list, the owning superblock and the inode hash. This needs to be done under
- * the inode_lock, so export a function to do this rather than the inode lock
- * itself. We calculate the hash list to add to here so it is all internal
- * which requires the caller to have already set up the inode number in the
- * inode to add.
+ * This does not significantly increase overflow rate because every CPU can
+ * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
+ * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
+ * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
+ * overflow rate by 2x, which does not seem too significant.
+ *
+ * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
+ * error if st_ino won't fit in target struct field. Use 32bit counter
+ * here to attempt to avoid that.
*/
-void inode_add_to_lists(struct super_block *sb, struct inode *inode)
+#define LAST_INO_BATCH 1024
+static DEFINE_PER_CPU(unsigned int, last_ino);
+
+unsigned int get_next_ino(void)
{
- struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
+ unsigned int *p = &get_cpu_var(last_ino);
+ unsigned int res = *p;
- spin_lock(&inode_lock);
- __inode_add_to_lists(sb, head, inode);
- spin_unlock(&inode_lock);
+#ifdef CONFIG_SMP
+ if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
+ static atomic_t shared_last_ino;
+ int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
+
+ res = next - LAST_INO_BATCH;
+ }
+#endif
+
+ *p = ++res;
+ put_cpu_var(last_ino);
+ return res;
}
-EXPORT_SYMBOL_GPL(inode_add_to_lists);
+EXPORT_SYMBOL(get_next_ino);
/**
* new_inode - obtain an inode
@@ -639,12 +797,6 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists);
*/
struct inode *new_inode(struct super_block *sb)
{
- /*
- * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
- * error if st_ino won't fit in target struct field. Use 32bit counter
- * here to attempt to avoid that.
- */
- static unsigned int last_ino;
struct inode *inode;
spin_lock_prefetch(&inode_lock);
@@ -652,8 +804,7 @@ struct inode *new_inode(struct super_block *sb)
inode = alloc_inode(sb);
if (inode) {
spin_lock(&inode_lock);
- __inode_add_to_lists(sb, NULL, inode);
- inode->i_ino = ++last_ino;
+ __inode_sb_list_add(inode);
inode->i_state = 0;
spin_unlock(&inode_lock);
}
@@ -664,7 +815,7 @@ EXPORT_SYMBOL(new_inode);
void unlock_new_inode(struct inode *inode)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
- if (inode->i_mode & S_IFDIR) {
+ if (S_ISDIR(inode->i_mode)) {
struct file_system_type *type = inode->i_sb->s_type;
/* Set new key only if filesystem hasn't already changed it */
@@ -721,7 +872,8 @@ static struct inode *get_new_inode(struct super_block *sb,
if (set(inode, data))
goto set_failed;
- __inode_add_to_lists(sb, head, inode);
+ hlist_add_head(&inode->i_hash, head);
+ __inode_sb_list_add(inode);
inode->i_state = I_NEW;
spin_unlock(&inode_lock);
@@ -736,7 +888,6 @@ static struct inode *get_new_inode(struct super_block *sb,
* us. Use the old inode instead of the one we just
* allocated.
*/
- __iget(old);
spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
@@ -768,7 +919,8 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
old = find_inode_fast(sb, head, ino);
if (!old) {
inode->i_ino = ino;
- __inode_add_to_lists(sb, head, inode);
+ hlist_add_head(&inode->i_hash, head);
+ __inode_sb_list_add(inode);
inode->i_state = I_NEW;
spin_unlock(&inode_lock);
@@ -783,7 +935,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
* us. Use the old inode instead of the one we just
* allocated.
*/
- __iget(old);
spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
@@ -792,6 +943,27 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
return inode;
}
+/*
+ * search the inode cache for a matching inode number.
+ * If we find one, then the inode number we are trying to
+ * allocate is not unique and so we should not use it.
+ *
+ * Returns 1 if the inode number is unique, 0 if it is not.
+ */
+static int test_inode_iunique(struct super_block *sb, unsigned long ino)
+{
+ struct hlist_head *b = inode_hashtable + hash(sb, ino);
+ struct hlist_node *node;
+ struct inode *inode;
+
+ hlist_for_each_entry(inode, node, b, i_hash) {
+ if (inode->i_ino == ino && inode->i_sb == sb)
+ return 0;
+ }
+
+ return 1;
+}
+
/**
* iunique - get a unique inode number
* @sb: superblock
@@ -813,19 +985,18 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
* error if st_ino won't fit in target struct field. Use 32bit counter
* here to attempt to avoid that.
*/
+ static DEFINE_SPINLOCK(iunique_lock);
static unsigned int counter;
- struct inode *inode;
- struct hlist_head *head;
ino_t res;
spin_lock(&inode_lock);
+ spin_lock(&iunique_lock);
do {
if (counter <= max_reserved)
counter = max_reserved + 1;
res = counter++;
- head = inode_hashtable + hash(sb, res);
- inode = find_inode_fast(sb, head, res);
- } while (inode != NULL);
+ } while (!test_inode_iunique(sb, res));
+ spin_unlock(&iunique_lock);
spin_unlock(&inode_lock);
return res;
@@ -877,7 +1048,6 @@ static struct inode *ifind(struct super_block *sb,
spin_lock(&inode_lock);
inode = find_inode(sb, head, test, data);
if (inode) {
- __iget(inode);
spin_unlock(&inode_lock);
if (likely(wait))
wait_on_inode(inode);
@@ -910,7 +1080,6 @@ static struct inode *ifind_fast(struct super_block *sb,
spin_lock(&inode_lock);
inode = find_inode_fast(sb, head, ino);
if (inode) {
- __iget(inode);
spin_unlock(&inode_lock);
wait_on_inode(inode);
return inode;
@@ -1096,7 +1265,7 @@ int insert_inode_locked(struct inode *inode)
__iget(old);
spin_unlock(&inode_lock);
wait_on_inode(old);
- if (unlikely(!hlist_unhashed(&old->i_hash))) {
+ if (unlikely(!inode_unhashed(old))) {
iput(old);
return -EBUSY;
}
@@ -1135,7 +1304,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
__iget(old);
spin_unlock(&inode_lock);
wait_on_inode(old);
- if (unlikely(!hlist_unhashed(&old->i_hash))) {
+ if (unlikely(!inode_unhashed(old))) {
iput(old);
return -EBUSY;
}
@@ -1144,36 +1313,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
}
EXPORT_SYMBOL(insert_inode_locked4);
-/**
- * __insert_inode_hash - hash an inode
- * @inode: unhashed inode
- * @hashval: unsigned long value used to locate this object in the
- * inode_hashtable.
- *
- * Add an inode to the inode hash for this superblock.
- */
-void __insert_inode_hash(struct inode *inode, unsigned long hashval)
-{
- struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
- spin_lock(&inode_lock);
- hlist_add_head(&inode->i_hash, head);
- spin_unlock(&inode_lock);
-}
-EXPORT_SYMBOL(__insert_inode_hash);
-
-/**
- * remove_inode_hash - remove an inode from the hash
- * @inode: inode to unhash
- *
- * Remove an inode from the superblock.
- */
-void remove_inode_hash(struct inode *inode)
-{
- spin_lock(&inode_lock);
- hlist_del_init(&inode->i_hash);
- spin_unlock(&inode_lock);
-}
-EXPORT_SYMBOL(remove_inode_hash);
int generic_delete_inode(struct inode *inode)
{
@@ -1188,7 +1327,7 @@ EXPORT_SYMBOL(generic_delete_inode);
*/
int generic_drop_inode(struct inode *inode)
{
- return !inode->i_nlink || hlist_unhashed(&inode->i_hash);
+ return !inode->i_nlink || inode_unhashed(inode);
}
EXPORT_SYMBOL_GPL(generic_drop_inode);
@@ -1214,10 +1353,11 @@ static void iput_final(struct inode *inode)
drop = generic_drop_inode(inode);
if (!drop) {
- if (!(inode->i_state & (I_DIRTY|I_SYNC)))
- list_move(&inode->i_list, &inode_unused);
- inodes_stat.nr_unused++;
if (sb->s_flags & MS_ACTIVE) {
+ inode->i_state |= I_REFERENCED;
+ if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
+ inode_lru_list_add(inode);
+ }
spin_unlock(&inode_lock);
return;
}
@@ -1228,19 +1368,23 @@ static void iput_final(struct inode *inode)
spin_lock(&inode_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
- inodes_stat.nr_unused--;
- hlist_del_init(&inode->i_hash);
+ __remove_inode_hash(inode);
}
- list_del_init(&inode->i_list);
- list_del_init(&inode->i_sb_list);
+
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
- inodes_stat.nr_inodes--;
+
+ /*
+ * Move the inode off the IO lists and LRU once I_FREEING is
+ * set so that it won't get moved back on there if it is dirty.
+ */
+ inode_lru_list_del(inode);
+ list_del_init(&inode->i_wb_list);
+
+ __inode_sb_list_del(inode);
spin_unlock(&inode_lock);
evict(inode);
- spin_lock(&inode_lock);
- hlist_del_init(&inode->i_hash);
- spin_unlock(&inode_lock);
+ remove_inode_hash(inode);
wake_up_inode(inode);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
destroy_inode(inode);
@@ -1504,6 +1648,8 @@ void __init inode_init(void)
SLAB_MEM_SPREAD),
init_once);
register_shrinker(&icache_shrinker);
+ percpu_counter_init(&nr_inodes, 0);
+ percpu_counter_init(&nr_inodes_unused, 0);
/* Hash may have been set up in inode_init_early */
if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index a6910e91cee8..ebad3b90752d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -101,3 +101,10 @@ extern void put_super(struct super_block *sb);
struct nameidata;
extern struct file *nameidata_to_filp(struct nameidata *);
extern void release_open_intent(struct nameidata *);
+
+/*
+ * inode.c
+ */
+extern int get_nr_dirty_inodes(void);
+extern int evict_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 09ff41a752a0..60c2b944d762 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -962,25 +962,23 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf)
* or getblk() if they are not. Returns the number of blocks inserted
* (-ve == error.)
*/
-int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
+int isofs_get_blocks(struct inode *inode, sector_t iblock,
struct buffer_head **bh, unsigned long nblocks)
{
- unsigned long b_off;
+ unsigned long b_off = iblock;
unsigned offset, sect_size;
unsigned int firstext;
unsigned long nextblk, nextoff;
- long iblock = (long)iblock_s;
int section, rv, error;
struct iso_inode_info *ei = ISOFS_I(inode);
error = -EIO;
rv = 0;
- if (iblock < 0 || iblock != iblock_s) {
+ if (iblock != b_off) {
printk(KERN_DEBUG "%s: block number too large\n", __func__);
goto abort;
}
- b_off = iblock;
offset = 0;
firstext = ei->i_first_extent;
@@ -998,8 +996,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
* I/O errors.
*/
if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
- printk(KERN_DEBUG "%s: block >= EOF (%ld, %ld)\n",
- __func__, iblock, (unsigned long) inode->i_size);
+ printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
+ __func__, b_off,
+ (unsigned long long)inode->i_size);
goto abort;
}
@@ -1025,9 +1024,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
if (++section > 100) {
printk(KERN_DEBUG "%s: More than 100 file sections ?!?"
" aborting...\n", __func__);
- printk(KERN_DEBUG "%s: block=%ld firstext=%u sect_size=%u "
+ printk(KERN_DEBUG "%s: block=%lu firstext=%u sect_size=%u "
"nextblk=%lu nextoff=%lu\n", __func__,
- iblock, firstext, (unsigned) sect_size,
+ b_off, firstext, (unsigned) sect_size,
nextblk, nextoff);
goto abort;
}
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index ed78a3cf3cb0..79121aa5858b 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -289,7 +289,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
mutex_unlock(&f->sem);
d_instantiate(dentry, old_dentry->d_inode);
dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
- atomic_inc(&old_dentry->d_inode->i_count);
+ ihold(old_dentry->d_inode);
}
return ret;
}
@@ -864,7 +864,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
/* Might as well let the VFS know */
d_instantiate(new_dentry, old_dentry->d_inode);
- atomic_inc(&old_dentry->d_inode->i_count);
+ ihold(old_dentry->d_inode);
new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
return ret;
}
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index f8332dc8eeb2..3a09423b6c22 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -497,7 +497,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
* appear hashed, but do not put on any lists. hlist_del()
* will work fine and require no locking.
*/
- ip->i_hash.pprev = &ip->i_hash.next;
+ hlist_add_fake(&ip->i_hash);
return (ip);
}
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index d945ea76b445..9466957ec841 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1279,7 +1279,7 @@ int txCommit(tid_t tid, /* transaction identifier */
* lazy commit thread finishes processing
*/
if (tblk->xflag & COMMIT_DELETE) {
- atomic_inc(&tblk->u.ip->i_count);
+ ihold(tblk->u.ip);
/*
* Avoid a rare deadlock
*
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a9cf8e8675be..231ca4af9bce 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -839,7 +839,7 @@ static int jfs_link(struct dentry *old_dentry,
ip->i_ctime = CURRENT_TIME;
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
mark_inode_dirty(dir);
- atomic_inc(&ip->i_count);
+ ihold(ip);
iplist[0] = ip;
iplist[1] = dir;
diff --git a/fs/libfs.c b/fs/libfs.c
index 62baa0387d6e..304a5132ca27 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -255,7 +255,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inc_nlink(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
dget(dentry);
d_instantiate(dentry, inode);
return 0;
@@ -892,10 +892,6 @@ EXPORT_SYMBOL_GPL(generic_fh_to_parent);
*/
int generic_file_fsync(struct file *file, int datasync)
{
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 0, /* metadata-only; caller takes care of data */
- };
struct inode *inode = file->f_mapping->host;
int err;
int ret;
@@ -906,7 +902,7 @@ int generic_file_fsync(struct file *file, int datasync)
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
return ret;
- err = sync_inode(inode, &wbc);
+ err = sync_inode_metadata(inode, 1);
if (ret == 0)
ret = err;
return ret;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 1eb4e89e045b..409dfd65e9a1 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -569,7 +569,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
return -EMLINK;
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
- atomic_inc(&inode->i_count);
+ ihold(inode);
inode->i_nlink++;
mark_inode_dirty_sync(inode);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index f3f3578393a4..c0d35a3accef 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -101,7 +101,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
return add_nondir(dentry, inode);
}
diff --git a/fs/namei.c b/fs/namei.c
index 24896e833565..f7dbc06857ab 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1121,11 +1121,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
static struct dentry *__lookup_hash(struct qstr *name,
struct dentry *base, struct nameidata *nd)
{
+ struct inode *inode = base->d_inode;
struct dentry *dentry;
- struct inode *inode;
int err;
- inode = base->d_inode;
+ err = exec_permission(inode);
+ if (err)
+ return ERR_PTR(err);
/*
* See if the low-level filesystem might want
@@ -1161,11 +1163,6 @@ out:
*/
static struct dentry *lookup_hash(struct nameidata *nd)
{
- int err;
-
- err = exec_permission(nd->path.dentry->d_inode);
- if (err)
- return ERR_PTR(err);
return __lookup_hash(&nd->last, nd->path.dentry, nd);
}
@@ -1213,9 +1210,6 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
if (err)
return ERR_PTR(err);
- err = exec_permission(base->d_inode);
- if (err)
- return ERR_PTR(err);
return __lookup_hash(&this, base, NULL);
}
@@ -2291,7 +2285,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
goto slashes;
inode = dentry->d_inode;
if (inode)
- atomic_inc(&inode->i_count);
+ ihold(inode);
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit2;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7ca5182c0bed..8a415c9c5e55 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -595,7 +595,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
goto out_free;
}
- mnt->mnt_flags = old->mnt_flags;
+ mnt->mnt_flags = old->mnt_flags & ~MNT_WRITE_HOLD;
atomic_inc(&sb->s_active);
mnt->mnt_sb = sb;
mnt->mnt_root = dget(root);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 257e4052492e..07ac3847e562 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1801,7 +1801,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
d_drop(dentry);
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
if (error == 0) {
- atomic_inc(&inode->i_count);
+ ihold(inode);
d_add(dentry, inode);
}
return error;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index a70e446e1605..ac7b814ce162 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -54,8 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
iput(inode);
return -ENOMEM;
}
- /* Circumvent igrab(): we know the inode is not being freed */
- atomic_inc(&inode->i_count);
+ ihold(inode);
/*
* Ensure that this dentry is invisible to d_find_alias().
* Otherwise, it may be spliced into the tree by
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 661a6cf8e826..184938fcff04 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -281,23 +281,13 @@ commit_metadata(struct svc_fh *fhp)
{
struct inode *inode = fhp->fh_dentry->d_inode;
const struct export_operations *export_ops = inode->i_sb->s_export_op;
- int error = 0;
if (!EX_ISSYNC(fhp->fh_export))
return 0;
- if (export_ops->commit_metadata) {
- error = export_ops->commit_metadata(inode);
- } else {
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 0, /* metadata only */
- };
-
- error = sync_inode(inode, &wbc);
- }
-
- return error;
+ if (export_ops->commit_metadata)
+ return export_ops->commit_metadata(inode);
+ return sync_inode_metadata(inode, 1);
}
/*
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 185d1607cb00..6e9557ecf161 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -207,7 +207,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
err = nilfs_add_nondir(dentry, inode);
if (!err)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 36802420d69a..4498a208df94 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -88,8 +88,6 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
{
struct dentry *parent;
struct inode *p_inode;
- bool send = false;
- bool should_update_children = false;
if (!dentry)
dentry = path->dentry;
@@ -97,29 +95,12 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
return;
- spin_lock(&dentry->d_lock);
- parent = dentry->d_parent;
+ parent = dget_parent(dentry);
p_inode = parent->d_inode;
- if (fsnotify_inode_watches_children(p_inode)) {
- if (p_inode->i_fsnotify_mask & mask) {
- dget(parent);
- send = true;
- }
- } else {
- /*
- * The parent doesn't care about events on it's children but
- * at least one child thought it did. We need to run all the
- * children and update their d_flags to let them know p_inode
- * doesn't care about them any more.
- */
- dget(parent);
- should_update_children = true;
- }
-
- spin_unlock(&dentry->d_lock);
-
- if (send) {
+ if (unlikely(!fsnotify_inode_watches_children(p_inode)))
+ __fsnotify_update_child_dentry_flags(p_inode);
+ else if (p_inode->i_fsnotify_mask & mask) {
/* we are notifying a parent so come up with the new mask which
* specifies these are events which came from a child. */
mask |= FS_EVENT_ON_CHILD;
@@ -130,13 +111,9 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
else
fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
dentry->d_name.name, 0);
- dput(parent);
}
- if (unlikely(should_update_children)) {
- __fsnotify_update_child_dentry_flags(p_inode);
- dput(parent);
- }
+ dput(parent);
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 33297c005060..21ed10660b80 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -240,6 +240,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
{
struct inode *inode, *next_i, *need_iput = NULL;
+ spin_lock(&inode_lock);
list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
struct inode *need_iput_tmp;
@@ -297,4 +298,5 @@ void fsnotify_unmount_inodes(struct list_head *list)
spin_lock(&inode_lock);
}
+ spin_unlock(&inode_lock);
}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 19c5180f8a28..d3fbe5730bfc 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2911,8 +2911,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
goto unl_upcase_iput_tmp_ino_err_out_now;
}
if ((sb->s_root = d_alloc_root(vol->root_ino))) {
- /* We increment i_count simulating an ntfs_iget(). */
- atomic_inc(&vol->root_ino->i_count);
+ /* We grab a reference, simulating an ntfs_iget(). */
+ ihold(vol->root_ino);
ntfs_debug("Exiting, status successful.");
/* Release the default upcase if it has no users. */
mutex_lock(&ntfs_lock);
@@ -3021,21 +3021,6 @@ iput_tmp_ino_err_out_now:
if (vol->mft_ino && vol->mft_ino != tmp_ino)
iput(vol->mft_ino);
vol->mft_ino = NULL;
- /*
- * This is needed to get ntfs_clear_extent_inode() called for each
- * inode we have ever called ntfs_iget()/iput() on, otherwise we A)
- * leak resources and B) a subsequent mount fails automatically due to
- * ntfs_iget() never calling down into our ntfs_read_locked_inode()
- * method again... FIXME: Do we need to do this twice now because of
- * attribute inodes? I think not, so leave as is for now... (AIA)
- */
- if (invalidate_inodes(sb)) {
- ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
- "driver bug.");
- /* Copied from fs/super.c. I just love this message. (-; */
- printk("NTFS: Busy inodes after umount. Self-destruct in 5 "
- "seconds. Have a nice day...\n");
- }
/* Errors at this stage are irrelevant. */
err_out_now:
sb->s_fs_info = NULL;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 5cfeee118158..f1e962cb3b73 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -165,7 +165,7 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
* ocfs2 never allocates in this function - the only time we
* need to use BH_New is when we're extending i_size on a file
* system which doesn't support holes, in which case BH_New
- * allows block_prepare_write() to zero.
+ * allows __block_write_begin() to zero.
*
* If we see this on a sparse file system, then a truncate has
* raced us and removed the cluster. In this case, we clear
@@ -407,21 +407,6 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
return ret;
}
-/*
- * This is called from ocfs2_write_zero_page() which has handled it's
- * own cluster locking and has ensured allocation exists for those
- * blocks to be written.
- */
-int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
- unsigned from, unsigned to)
-{
- int ret;
-
- ret = block_prepare_write(page, from, to, ocfs2_get_block);
-
- return ret;
-}
-
/* Taken from ext3. We don't necessarily need the full blown
* functionality yet, but IMHO it's better to cut and paste the whole
* thing so we can avoid introducing our own bugs (and easily pick up
@@ -732,7 +717,7 @@ static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
}
/*
- * Some of this taken from block_prepare_write(). We already have our
+ * Some of this taken from __block_write_begin(). We already have our
* mapping by now though, and the entire write will be allocating or
* it won't, so not much need to use BH_New.
*
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 7606f663da6d..76bfdfda691a 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,9 +22,6 @@
#ifndef OCFS2_AOPS_H
#define OCFS2_AOPS_H
-int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
- unsigned from, unsigned to);
-
handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
struct page *page,
unsigned from,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index a7ebd9d42dc8..75e115f1bd73 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -400,6 +400,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
if (inode) {
ip = DLMFS_I(inode);
+ inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
@@ -425,6 +426,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
if (!inode)
return NULL;
+ inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1ca6867935bb..77b4c04a2809 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -796,13 +796,12 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
block_end = block_start + (1 << inode->i_blkbits);
/*
- * block_start is block-aligned. Bump it by one to
- * force ocfs2_{prepare,commit}_write() to zero the
+ * block_start is block-aligned. Bump it by one to force
+ * __block_write_begin and block_commit_write to zero the
* whole block.
*/
- ret = ocfs2_prepare_write_nolock(inode, page,
- block_start + 1,
- block_start + 1);
+ ret = __block_write_begin(page, block_start + 1, 0,
+ ocfs2_get_block);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index e7bde21149ae..ff5744e1e36f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -742,7 +742,7 @@ static int ocfs2_link(struct dentry *old_dentry,
goto out_commit;
}
- atomic_inc(&inode->i_count);
+ ihold(inode);
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
diff --git a/fs/pipe.c b/fs/pipe.c
index 37eb1ebeaa90..d2d7566ce68e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -954,6 +954,8 @@ static struct inode * get_pipe_inode(void)
if (!inode)
goto fail_inode;
+ inode->i_ino = get_next_ino();
+
pipe = alloc_pipe_info(inode);
if (!pipe)
goto fail_iput;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 53dc8ad40ae6..9b094c1c8465 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -771,6 +771,8 @@ static const struct file_operations proc_single_file_operations = {
static int mem_open(struct inode* inode, struct file* file)
{
file->private_data = (void*)((long)current->self_exec_id);
+ /* OK to pass negative loff_t, we can catch out-of-range */
+ file->f_mode |= FMODE_UNSIGNED_OFFSET;
return 0;
}
@@ -1646,6 +1648,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
/* Common stuff */
ei = PROC_I(inode);
+ inode->i_ino = get_next_ino();
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_op = &proc_def_inode_operations;
@@ -2592,6 +2595,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
/* Initialize the inode */
ei = PROC_I(inode);
+ inode->i_ino = get_next_ino();
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
/*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 2fc52552271d..b652cb00906b 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -23,6 +23,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
if (!inode)
goto out;
+ inode->i_ino = get_next_ino();
+
sysctl_head_get(head);
ei = PROC_I(inode);
ei->sysctl = head;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a5ebae70dc6d..67fadb1ad2c1 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -58,6 +58,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
struct inode * inode = new_inode(sb);
if (inode) {
+ inode->i_ino = get_next_ino();
inode_init_owner(inode, dir, mode);
inode->i_mapping->a_ops = &ramfs_aops;
inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
diff --git a/fs/read_write.c b/fs/read_write.c
index e757ef26e4ce..9cd9d148105d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -31,6 +31,20 @@ const struct file_operations generic_ro_fops = {
EXPORT_SYMBOL(generic_ro_fops);
+static int
+__negative_fpos_check(struct file *file, loff_t pos, size_t count)
+{
+ /*
+ * pos or pos+count is negative here, check overflow.
+ * too big "count" will be caught in rw_verify_area().
+ */
+ if ((pos < 0) && (pos + count < pos))
+ return -EOVERFLOW;
+ if (file->f_mode & FMODE_UNSIGNED_OFFSET)
+ return 0;
+ return -EINVAL;
+}
+
/**
* generic_file_llseek_unlocked - lockless generic llseek implementation
* @file: file structure to seek on
@@ -62,7 +76,9 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
break;
}
- if (offset < 0 || offset > inode->i_sb->s_maxbytes)
+ if (offset < 0 && __negative_fpos_check(file, offset, 0))
+ return -EINVAL;
+ if (offset > inode->i_sb->s_maxbytes)
return -EINVAL;
/* Special lock needed here? */
@@ -137,7 +153,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
offset += file->f_pos;
}
retval = -EINVAL;
- if (offset >= 0) {
+ if (offset >= 0 || !__negative_fpos_check(file, offset, 0)) {
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
@@ -221,6 +237,7 @@ bad:
}
#endif
+
/*
* rw_verify_area doesn't like huge counts. We limit
* them to something that fits in "int" so that others
@@ -238,8 +255,11 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
if (unlikely((ssize_t) count < 0))
return retval;
pos = *ppos;
- if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
- return retval;
+ if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) {
+ retval = __negative_fpos_check(file, pos, count);
+ if (retval)
+ return retval;
+ }
if (unlikely(inode->i_flock && mandatory_lock(inode))) {
retval = locks_mandatory_area(
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index c1f93896cb53..41656d40dc5c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -22,8 +22,6 @@
int reiserfs_commit_write(struct file *f, struct page *page,
unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
- unsigned from, unsigned to);
void reiserfs_evict_inode(struct inode *inode)
{
@@ -165,7 +163,7 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
** but tail is still sitting in a direct item, and we can't write to
** it. So, look through this page, and check all the mapped buffers
** to make sure they have valid block numbers. Any that don't need
-** to be unmapped, so that block_prepare_write will correctly call
+** to be unmapped, so that __block_write_begin will correctly call
** reiserfs_get_block to convert the tail into an unformatted node
*/
static inline void fix_tail_page_for_writing(struct page *page)
@@ -439,13 +437,13 @@ static int reiserfs_bmap(struct inode *inode, sector_t block,
}
/* special version of get_block that is only used by grab_tail_page right
-** now. It is sent to block_prepare_write, and when you try to get a
+** now. It is sent to __block_write_begin, and when you try to get a
** block past the end of the file (or a block from a hole) it returns
-** -ENOENT instead of a valid buffer. block_prepare_write expects to
+** -ENOENT instead of a valid buffer. __block_write_begin expects to
** be able to do i/o on the buffers returned, unless an error value
** is also returned.
**
-** So, this allows block_prepare_write to be used for reading a single block
+** So, this allows __block_write_begin to be used for reading a single block
** in a page. Where it does not produce a valid page for holes, or past the
** end of the file. This turns out to be exactly what we need for reading
** tails for conversion.
@@ -558,11 +556,12 @@ static int convert_tail_for_hole(struct inode *inode,
**
** We must fix the tail page for writing because it might have buffers
** that are mapped, but have a block number of 0. This indicates tail
- ** data that has been read directly into the page, and block_prepare_write
- ** won't trigger a get_block in this case.
+ ** data that has been read directly into the page, and
+ ** __block_write_begin won't trigger a get_block in this case.
*/
fix_tail_page_for_writing(tail_page);
- retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
+ retval = __reiserfs_write_begin(tail_page, tail_start,
+ tail_end - tail_start);
if (retval)
goto unlock;
@@ -2033,7 +2032,7 @@ static int grab_tail_page(struct inode *inode,
/* start within the page of the last block in the file */
start = (offset / blocksize) * blocksize;
- error = block_prepare_write(page, start, offset,
+ error = __block_write_begin(page, start, offset - start,
reiserfs_get_block_create_0);
if (error)
goto unlock;
@@ -2628,8 +2627,7 @@ static int reiserfs_write_begin(struct file *file,
return ret;
}
-int reiserfs_prepare_write(struct file *f, struct page *page,
- unsigned from, unsigned to)
+int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
{
struct inode *inode = page->mapping->host;
int ret;
@@ -2650,7 +2648,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
th->t_refcount++;
}
- ret = block_prepare_write(page, from, to, reiserfs_get_block);
+ ret = __block_write_begin(page, from, len, reiserfs_get_block);
if (ret && reiserfs_transaction_running(inode->i_sb)) {
struct reiserfs_transaction_handle *th = current->journal_info;
/* this gets a little ugly. If reiserfs_get_block returned an
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 5cbb81e134ac..adf22b485cea 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -160,8 +160,6 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
int reiserfs_commit_write(struct file *f, struct page *page,
unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
- unsigned from, unsigned to);
/*
** reiserfs_unpack
** Function try to convert tail from direct item into indirect.
@@ -200,7 +198,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
}
/* we unpack by finding the page with the tail, and calling
- ** reiserfs_prepare_write on that page. This will force a
+ ** __reiserfs_write_begin on that page. This will force a
** reiserfs_get_block to unpack the tail for us.
*/
index = inode->i_size >> PAGE_CACHE_SHIFT;
@@ -210,7 +208,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
if (!page) {
goto out;
}
- retval = reiserfs_prepare_write(NULL, page, write_from, write_from);
+ retval = __reiserfs_write_begin(page, write_from, 0);
if (retval)
goto out_unlock;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ee78d4a0086a..ba5f51ec3458 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1156,7 +1156,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
inode->i_ctime = CURRENT_TIME_SEC;
reiserfs_update_sd(&th, inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
d_instantiate(dentry, inode);
retval = journal_end(&th, dir->i_sb, jbegin_count);
reiserfs_write_unlock(dir->i_sb);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8c4cf273c672..5d04a7828e7a 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -418,13 +418,11 @@ static inline __u32 xattr_hash(const char *msg, int len)
int reiserfs_commit_write(struct file *f, struct page *page,
unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
- unsigned from, unsigned to);
static void update_ctime(struct inode *inode)
{
struct timespec now = current_fs_time(inode->i_sb);
- if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink ||
+ if (inode_unhashed(inode) || !inode->i_nlink ||
timespec_equal(&inode->i_ctime, &now))
return;
@@ -532,8 +530,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
rxh->h_hash = cpu_to_le32(xahash);
}
- err = reiserfs_prepare_write(NULL, page, page_offset,
- page_offset + chunk + skip);
+ err = __reiserfs_write_begin(page, page_offset, chunk + skip);
if (!err) {
if (buffer)
memcpy(data + skip, buffer + buffer_pos, chunk);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0e7cb1395a94..05d6b0e78c95 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -462,9 +462,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
if (size) {
char *p;
- spin_lock(&dcache_lock);
p = __d_path(path, root, buf, size);
- spin_unlock(&dcache_lock);
res = PTR_ERR(p);
if (!IS_ERR(p)) {
char *end = mangle_path(buf, p, esc);
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 00a70cab1f36..f678d421e541 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -406,21 +406,15 @@ void
smb_renew_times(struct dentry * dentry)
{
dget(dentry);
- spin_lock(&dentry->d_lock);
- for (;;) {
- struct dentry *parent;
+ dentry->d_time = jiffies;
- dentry->d_time = jiffies;
- if (IS_ROOT(dentry))
- break;
- parent = dentry->d_parent;
- dget(parent);
- spin_unlock(&dentry->d_lock);
+ while (!IS_ROOT(dentry)) {
+ struct dentry *parent = dget_parent(dentry);
dput(dentry);
dentry = parent;
- spin_lock(&dentry->d_lock);
+
+ dentry->d_time = jiffies;
}
- spin_unlock(&dentry->d_lock);
dput(dentry);
}
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 8fc5e50e142f..f6e9ee59757e 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -229,7 +229,6 @@ smb_invalidate_inodes(struct smb_sb_info *server)
{
VERBOSE("\n");
shrink_dcache_sb(SB_of(server));
- invalidate_inodes(SB_of(server));
}
/*
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 71c29b6670b4..3dcf638d4d3a 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -332,16 +332,15 @@ static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
* and store it in reversed order [see reverse_string()]
*/
dget(entry);
- spin_lock(&entry->d_lock);
while (!IS_ROOT(entry)) {
struct dentry *parent;
if (maxlen < (3<<unicode)) {
- spin_unlock(&entry->d_lock);
dput(entry);
return -ENAMETOOLONG;
}
+ spin_lock(&entry->d_lock);
len = server->ops->convert(path, maxlen-2,
entry->d_name.name, entry->d_name.len,
server->local_nls, server->remote_nls);
@@ -359,15 +358,12 @@ static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
}
*path++ = '\\';
maxlen -= len+1;
-
- parent = entry->d_parent;
- dget(parent);
spin_unlock(&entry->d_lock);
+
+ parent = dget_parent(entry);
dput(entry);
entry = parent;
- spin_lock(&entry->d_lock);
}
- spin_unlock(&entry->d_lock);
dput(entry);
reverse_string(buf, path-buf);
diff --git a/fs/super.c b/fs/super.c
index 8819e3a7ff20..b9c9869165db 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -273,14 +273,14 @@ void generic_shutdown_super(struct super_block *sb)
get_fs_excl();
sb->s_flags &= ~MS_ACTIVE;
- /* bad name - it should be evict_inodes() */
- invalidate_inodes(sb);
+ fsnotify_unmount_inodes(&sb->s_inodes);
+
+ evict_inodes(sb);
if (sop->put_super)
sop->put_super(sb);
- /* Forget any remaining inodes */
- if (invalidate_inodes(sb)) {
+ if (!list_empty(&sb->s_inodes)) {
printk("VFS: Busy inodes after unmount of %s. "
"Self-destruct in 5 seconds. Have a nice day...\n",
sb->s_id);
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 33e047b59b8d..11e7f7d11cd0 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -126,7 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
return add_nondir(dentry, inode);
}
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 87ebcce72213..14f64b689d7f 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -550,7 +550,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
lock_2_inodes(dir, inode);
inc_nlink(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
inode->i_ctime = ubifs_current_time(inode);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index bf5fc674193c..6d8dc02baebb 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1101,7 +1101,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
inc_nlink(inode);
inode->i_ctime = current_fs_time(inode->i_sb);
mark_inode_dirty(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
d_instantiate(dentry, inode);
unlock_kernel();
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index b056f02b1fb3..12f39b9e4437 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
error = ufs_add_nondir(dentry, inode);
unlock_kernel();
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ba5312802aa9..63fd2c07cb57 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1580,6 +1580,7 @@ xfs_mapping_buftarg(
XFS_BUFTARG_NAME(btp));
return ENOMEM;
}
+ inode->i_ino = get_next_ino();
inode->i_mode = S_IFBLK;
inode->i_bdev = bdev;
inode->i_rdev = bdev->bd_dev;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index ec858e09d546..96107efc0c61 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -317,7 +317,7 @@ xfs_vn_link(
if (unlikely(error))
return -error;
- atomic_inc(&inode->i_count);
+ ihold(inode);
d_instantiate(dentry, inode);
return 0;
}
@@ -760,7 +760,9 @@ xfs_setup_inode(
inode->i_ino = ip->i_ino;
inode->i_state = I_NEW;
- inode_add_to_lists(ip->i_mount->m_super, inode);
+
+ inode_sb_list_add(inode);
+ insert_inode_hash(inode);
inode->i_mode = ip->i_d.di_mode;
inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index ab31ce5aeaf9..cf808782c065 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -576,7 +576,7 @@ xfs_max_file_offset(
/* Figure out maximum filesize, on Linux this can depend on
* the filesystem blocksize (on 32 bit platforms).
- * __block_prepare_write does this in an [unsigned] long...
+ * __block_write_begin does this in an [unsigned] long...
* page->index << (PAGE_CACHE_SHIFT - bbits)
* So, for page sized blocks (4K on 32 bit platforms),
* this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index fac52290de90..fb2ca2e4cdc9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -500,7 +500,7 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *);
#define IHOLD(ip) \
do { \
ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
- atomic_inc(&(VFS_I(ip)->i_count)); \
+ ihold(VFS_I(ip)); \
trace_xfs_ihold(ip, _THIS_IP_); \
} while (0)