summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig5
-rw-r--r--fs/9p/vfs_inode_dotl.c11
-rw-r--r--fs/Kconfig17
-rw-r--r--fs/binfmt_flat.c8
-rw-r--r--fs/block_dev.c45
-rw-r--r--fs/btrfs/acl.c5
-rw-r--r--fs/btrfs/extent-tree.c37
-rw-r--r--fs/btrfs/extent_io.c1
-rw-r--r--fs/btrfs/ioctl.c24
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/ceph/addr.c12
-rw-r--r--fs/ceph/caps.c91
-rw-r--r--fs/ceph/dir.c7
-rw-r--r--fs/ceph/export.c25
-rw-r--r--fs/ceph/file.c5
-rw-r--r--fs/ceph/inode.c7
-rw-r--r--fs/ceph/mds_client.c9
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c12
-rw-r--r--fs/cifs/Kconfig35
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README12
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifs_fs_sb.h4
-rw-r--r--fs/cifs/cifs_unicode.c14
-rw-r--r--fs/cifs/cifs_unicode.h3
-rw-r--r--fs/cifs/cifsacl.c483
-rw-r--r--fs/cifs/cifsacl.h25
-rw-r--r--fs/cifs/cifsencrypt.c12
-rw-r--r--fs/cifs/cifsfs.c119
-rw-r--r--fs/cifs/cifsfs.h20
-rw-r--r--fs/cifs/cifsglob.h10
-rw-r--r--fs/cifs/cifspdu.h37
-rw-r--r--fs/cifs/cifsproto.h30
-rw-r--r--fs/cifs/cifssmb.c377
-rw-r--r--fs/cifs/connect.c402
-rw-r--r--fs/cifs/export.c4
-rw-r--r--fs/cifs/file.c167
-rw-r--r--fs/cifs/inode.c129
-rw-r--r--fs/cifs/misc.c12
-rw-r--r--fs/cifs/netmisc.c7
-rw-r--r--fs/cifs/sess.c28
-rw-r--r--fs/cifs/smbdes.c418
-rw-r--r--fs/cifs/smbencrypt.c124
-rw-r--r--fs/cifs/transport.c66
-rw-r--r--fs/cifs/xattr.c20
-rw-r--r--fs/compat.c235
-rw-r--r--fs/configfs/dir.c39
-rw-r--r--fs/dcache.c9
-rw-r--r--fs/debugfs/file.c19
-rw-r--r--fs/dlm/config.c9
-rw-r--r--fs/dlm/config.h1
-rw-r--r--fs/dlm/dlm_internal.h3
-rw-r--r--fs/dlm/lock.c182
-rw-r--r--fs/dlm/lock.h1
-rw-r--r--fs/dlm/lockspace.c6
-rw-r--r--fs/dlm/plock.c65
-rw-r--r--fs/dlm/user.c1
-rw-r--r--fs/drop_caches.c5
-rw-r--r--fs/exec.c139
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext3/namei.c80
-rw-r--r--fs/fat/cache.c7
-rw-r--r--fs/fat/dir.c32
-rw-r--r--fs/fat/fat.h15
-rw-r--r--fs/fat/fatent.c4
-rw-r--r--fs/fat/inode.c74
-rw-r--r--fs/fat/misc.c44
-rw-r--r--fs/fat/namei_msdos.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/fscache/operation.c10
-rw-r--r--fs/fscache/page.c13
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/gfs2/Makefile4
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/dir.c197
-rw-r--r--fs/gfs2/dir.h4
-rw-r--r--fs/gfs2/export.c2
-rw-r--r--fs/gfs2/file.c46
-rw-r--r--fs/gfs2/glock.c99
-rw-r--r--fs/gfs2/glock.h3
-rw-r--r--fs/gfs2/glops.c172
-rw-r--r--fs/gfs2/glops.h2
-rw-r--r--fs/gfs2/incore.h8
-rw-r--r--fs/gfs2/inode.c1510
-rw-r--r--fs/gfs2/inode.h8
-rw-r--r--fs/gfs2/log.c208
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c39
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/meta_io.h2
-rw-r--r--fs/gfs2/ops_fstype.c32
-rw-r--r--fs/gfs2/ops_inode.c1344
-rw-r--r--fs/gfs2/quota.c12
-rw-r--r--fs/gfs2/quota.h4
-rw-r--r--fs/gfs2/rgrp.c24
-rw-r--r--fs/gfs2/super.c138
-rw-r--r--fs/gfs2/sys.c6
-rw-r--r--fs/gfs2/trace_gfs2.h38
-rw-r--r--fs/hpfs/Kconfig1
-rw-r--r--fs/hpfs/alloc.c118
-rw-r--r--fs/hpfs/anode.c138
-rw-r--r--fs/hpfs/buffer.c24
-rw-r--r--fs/hpfs/dir.c22
-rw-r--r--fs/hpfs/dnode.c174
-rw-r--r--fs/hpfs/ea.c136
-rw-r--r--fs/hpfs/file.c31
-rw-r--r--fs/hpfs/hpfs.h439
-rw-r--r--fs/hpfs/hpfs_fn.h80
-rw-r--r--fs/hpfs/inode.c47
-rw-r--r--fs/hpfs/map.c56
-rw-r--r--fs/hpfs/name.c33
-rw-r--r--fs/hpfs/namei.c106
-rw-r--r--fs/hpfs/super.c118
-rw-r--r--fs/hugetlbfs/inode.c7
-rw-r--r--fs/inode.c10
-rw-r--r--fs/jbd/commit.c15
-rw-r--r--fs/jbd/journal.c16
-rw-r--r--fs/jbd/transaction.c3
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/logfs/dev_bdev.c1
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/logfs/super.c8
-rw-r--r--fs/mbcache.c10
-rw-r--r--fs/namei.c4
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4filelayout.c27
-rw-r--r--fs/nfs/nfs4filelayout.h2
-rw-r--r--fs/nfs/nfs4filelayoutdev.c34
-rw-r--r--fs/nfs/nfs4proc.c6
-rw-r--r--fs/nfs/pnfs.c34
-rw-r--r--fs/nfs/pnfs.h6
-rw-r--r--fs/nfs/read.c4
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nfsd/stats.c2
-rw-r--r--fs/nilfs2/alloc.c14
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/btnode.c19
-rw-r--r--fs/nilfs2/btnode.h4
-rw-r--r--fs/nilfs2/btree.c38
-rw-r--r--fs/nilfs2/cpfile.c24
-rw-r--r--fs/nilfs2/dat.c4
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/nilfs2/gcinode.c25
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/inode.c23
-rw-r--r--fs/nilfs2/ioctl.c61
-rw-r--r--fs/nilfs2/mdt.c8
-rw-r--r--fs/nilfs2/mdt.h9
-rw-r--r--fs/nilfs2/nilfs.h7
-rw-r--r--fs/nilfs2/page.c79
-rw-r--r--fs/nilfs2/page.h7
-rw-r--r--fs/nilfs2/recovery.c12
-rw-r--r--fs/nilfs2/segbuf.c17
-rw-r--r--fs/nilfs2/segment.c190
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/sufile.c274
-rw-r--r--fs/nilfs2/sufile.h4
-rw-r--r--fs/nilfs2/super.c131
-rw-r--r--fs/nilfs2/the_nilfs.c24
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c61
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c3
-rw-r--r--fs/ocfs2/file.c12
-rw-r--r--fs/ocfs2/journal.c3
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/partitions/check.c8
-rw-r--r--fs/partitions/efi.c6
-rw-r--r--fs/partitions/ldm.c7
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/base.c20
-rw-r--r--fs/proc/generic.c1
-rw-r--r--fs/proc/inode.c7
-rw-r--r--fs/proc/internal.h26
-rw-r--r--fs/proc/namespaces.c198
-rw-r--r--fs/proc/task_mmu.c218
-rw-r--r--fs/pstore/platform.c12
-rw-r--r--fs/quota/dquot.c5
-rw-r--r--fs/splice.c33
-rw-r--r--fs/squashfs/Kconfig4
-rw-r--r--fs/squashfs/cache.c2
-rw-r--r--fs/super.c3
-rw-r--r--fs/sysfs/file.c12
-rw-r--r--fs/sysfs/group.c6
-rw-r--r--fs/timerfd.c102
-rw-r--r--fs/ubifs/budget.c104
-rw-r--r--fs/ubifs/commit.c2
-rw-r--r--fs/ubifs/debug.c167
-rw-r--r--fs/ubifs/debug.h178
-rw-r--r--fs/ubifs/dir.c4
-rw-r--r--fs/ubifs/file.c28
-rw-r--r--fs/ubifs/find.c10
-rw-r--r--fs/ubifs/gc.c71
-rw-r--r--fs/ubifs/io.c33
-rw-r--r--fs/ubifs/journal.c29
-rw-r--r--fs/ubifs/log.c48
-rw-r--r--fs/ubifs/lprops.c115
-rw-r--r--fs/ubifs/lpt_commit.c55
-rw-r--r--fs/ubifs/master.c8
-rw-r--r--fs/ubifs/misc.h17
-rw-r--r--fs/ubifs/orphan.c3
-rw-r--r--fs/ubifs/recovery.c354
-rw-r--r--fs/ubifs/replay.c468
-rw-r--r--fs/ubifs/sb.c153
-rw-r--r--fs/ubifs/super.c61
-rw-r--r--fs/ubifs/tnc.c10
-rw-r--r--fs/ubifs/tnc_commit.c18
-rw-r--r--fs/ubifs/ubifs-media.h30
-rw-r--r--fs/ubifs/ubifs.h86
-rw-r--r--fs/ubifs/xattr.c8
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c26
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h76
-rw-r--r--fs/xfs/quota/xfs_qm.c6
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_alloc.c844
-rw-r--r--fs/xfs/xfs_alloc.h15
-rw-r--r--fs/xfs/xfs_alloc_btree.c13
-rw-r--r--fs/xfs/xfs_dfrag.c6
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_inode_item.c1
-rw-r--r--fs/xfs/xfs_log.c15
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c5
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c75
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_trans.c6
-rw-r--r--fs/xfs/xfs_trans_ail.c47
-rw-r--r--fs/xfs/xfs_types.h2
247 files changed, 8024 insertions, 6537 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 814ac4e213a8..0a93dc1cb4ac 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -1,6 +1,6 @@
config 9P_FS
- tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
- depends on INET && NET_9P && EXPERIMENTAL
+ tristate "Plan 9 Resource Sharing Support (9P2000)"
+ depends on INET && NET_9P
help
If you say Y here, you will get experimental support for
Plan 9 resource sharing via the 9P2000 protocol.
@@ -10,7 +10,6 @@ config 9P_FS
If unsure, say N.
if 9P_FS
-
config 9P_FSCACHE
bool "Enable 9P client caching support (EXPERIMENTAL)"
depends on EXPERIMENTAL
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 82a7c38ddad0..691c78f58bef 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -259,7 +259,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
if (IS_ERR(inode_fid)) {
err = PTR_ERR(inode_fid);
mutex_unlock(&v9inode->v_mutex);
- goto error;
+ goto err_clunk_old_fid;
}
v9inode->writeback_fid = (void *) inode_fid;
}
@@ -267,8 +267,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
/* Since we are opening a file, assign the open fid to the file */
filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
if (IS_ERR(filp)) {
- p9_client_clunk(ofid);
- return PTR_ERR(filp);
+ err = PTR_ERR(filp);
+ goto err_clunk_old_fid;
}
filp->private_data = ofid;
#ifdef CONFIG_9P_FSCACHE
@@ -278,10 +278,11 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
return 0;
error:
- if (ofid)
- p9_client_clunk(ofid);
if (fid)
p9_client_clunk(fid);
+err_clunk_old_fid:
+ if (ofid)
+ p9_client_clunk(ofid);
return err;
}
diff --git a/fs/Kconfig b/fs/Kconfig
index efb7d4ec6fcf..19891aab9c6e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -124,6 +124,7 @@ config TMPFS
config TMPFS_POSIX_ACL
bool "Tmpfs POSIX Access Control Lists"
depends on TMPFS
+ select TMPFS_XATTR
select GENERIC_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
@@ -134,6 +135,22 @@ config TMPFS_POSIX_ACL
If you don't know what Access Control Lists are, say N.
+config TMPFS_XATTR
+ bool "Tmpfs extended attributes"
+ depends on TMPFS
+ default n
+ help
+ Extended attributes are name:value pairs associated with inodes by
+ the kernel or by users (see the attr(5) manual page, or visit
+ <http://acl.bestbits.at/> for details).
+
+ Currently this enables support for the trusted.* and
+ security.* namespaces.
+
+ You need this for POSIX ACL support on tmpfs.
+
+ If unsure, say N.
+
config HUGETLBFS
bool "HugeTLB file system support"
depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 397d3057d336..1bffbe0ed778 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -820,6 +820,8 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
int res;
char buf[16];
+ memset(&bprm, 0, sizeof(bprm));
+
/* Create the file name */
sprintf(buf, "/lib/lib%d.so", id);
@@ -835,6 +837,12 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
if (!bprm.cred)
goto out;
+ /* We don't really care about recalculating credentials at this point
+ * as we're past the point of no return and are dealing with shared
+ * libraries.
+ */
+ bprm.cred_prepared = 1;
+
res = prepare_binprm(&bprm);
if (!IS_ERR_VALUE(res))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5147bdd3b8e1..1f2b19978333 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1102,6 +1102,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (!bdev->bd_part)
goto out_clear;
+ ret = 0;
if (disk->fops->open) {
ret = disk->fops->open(bdev, mode);
if (ret == -ERESTARTSYS) {
@@ -1118,18 +1119,26 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
put_disk(disk);
goto restart;
}
- if (ret)
- goto out_clear;
}
- if (!bdev->bd_openers) {
+
+ if (!ret && !bdev->bd_openers) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
bdi = blk_get_backing_dev_info(bdev);
if (bdi == NULL)
bdi = &default_backing_dev_info;
bdev_inode_switch_bdi(bdev->bd_inode, bdi);
}
- if (bdev->bd_invalidated)
+
+ /*
+ * If the device is invalidated, rescan partition
+ * if open succeeded or failed with -ENOMEDIUM.
+ * The latter is necessary to prevent ghost
+ * partitions on a removed medium.
+ */
+ if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
rescan_partitions(disk, bdev);
+ if (ret)
+ goto out_clear;
} else {
struct block_device *whole;
whole = bdget_disk(disk, 0);
@@ -1153,13 +1162,14 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
}
} else {
if (bdev->bd_contains == bdev) {
- if (bdev->bd_disk->fops->open) {
+ ret = 0;
+ if (bdev->bd_disk->fops->open)
ret = bdev->bd_disk->fops->open(bdev, mode);
- if (ret)
- goto out_unlock_bdev;
- }
- if (bdev->bd_invalidated)
+ /* the same as first opener case, read comment there */
+ if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
rescan_partitions(bdev->bd_disk, bdev);
+ if (ret)
+ goto out_unlock_bdev;
}
/* only one opener holds refs to the module and disk */
module_put(disk->fops->owner);
@@ -1228,6 +1238,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
res = __blkdev_get(bdev, mode, 0);
if (whole) {
+ struct gendisk *disk = whole->bd_disk;
+
/* finish claiming */
mutex_lock(&bdev->bd_mutex);
spin_lock(&bdev_lock);
@@ -1254,15 +1266,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
spin_unlock(&bdev_lock);
/*
- * Block event polling for write claims. Any write
- * holder makes the write_holder state stick until all
- * are released. This is good enough and tracking
- * individual writeable reference is too fragile given
- * the way @mode is used in blkdev_get/put().
+ * Block event polling for write claims if requested. Any
+ * write holder makes the write_holder state stick until
+ * all are released. This is good enough and tracking
+ * individual writeable reference is too fragile given the
+ * way @mode is used in blkdev_get/put().
*/
- if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
+ if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
+ !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
bdev->bd_write_holder = true;
- disk_block_events(bdev->bd_disk);
+ disk_block_events(disk);
}
mutex_unlock(&bdev->bd_mutex);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 5d505aaa72fb..44ea5b92e1ba 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,12 +178,13 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
if (value) {
acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+
if (acl) {
ret = posix_acl_valid(acl);
if (ret)
goto out;
- } else if (IS_ERR(acl)) {
- return PTR_ERR(acl);
}
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cd52f7f556ef..9ee6bd55e16c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8856,23 +8856,38 @@ out:
int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *space_info;
+ struct btrfs_super_block *disk_super;
+ u64 features;
+ u64 flags;
+ int mixed = 0;
int ret;
- ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0,
- &space_info);
- if (ret)
- return ret;
+ disk_super = &fs_info->super_copy;
+ if (!btrfs_super_root(disk_super))
+ return 1;
- ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0,
- &space_info);
- if (ret)
- return ret;
+ features = btrfs_super_incompat_flags(disk_super);
+ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ mixed = 1;
- ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0,
- &space_info);
+ flags = BTRFS_BLOCK_GROUP_SYSTEM;
+ ret = update_space_info(fs_info, flags, 0, 0, &space_info);
if (ret)
- return ret;
+ goto out;
+ if (mixed) {
+ flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
+ ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+ } else {
+ flags = BTRFS_BLOCK_GROUP_METADATA;
+ ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+ if (ret)
+ goto out;
+
+ flags = BTRFS_BLOCK_GROUP_DATA;
+ ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+ }
+out:
return ret;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ba41da59e31b..96fcfa522dab 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -10,6 +10,7 @@
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
+#include <linux/prefetch.h>
#include "extent_io.h"
#include "extent_map.h"
#include "compat.h"
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ffb48d6c5433..2616f7ed4799 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -81,6 +81,13 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
iflags |= FS_NOATIME_FL;
if (flags & BTRFS_INODE_DIRSYNC)
iflags |= FS_DIRSYNC_FL;
+ if (flags & BTRFS_INODE_NODATACOW)
+ iflags |= FS_NOCOW_FL;
+
+ if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS))
+ iflags |= FS_COMPR_FL;
+ else if (flags & BTRFS_INODE_NOCOMPRESS)
+ iflags |= FS_NOCOMP_FL;
return iflags;
}
@@ -144,16 +151,13 @@ static int check_flags(unsigned int flags)
if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
FS_NOATIME_FL | FS_NODUMP_FL | \
FS_SYNC_FL | FS_DIRSYNC_FL | \
- FS_NOCOMP_FL | FS_COMPR_FL | \
- FS_NOCOW_FL | FS_COW_FL))
+ FS_NOCOMP_FL | FS_COMPR_FL |
+ FS_NOCOW_FL))
return -EOPNOTSUPP;
if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
return -EINVAL;
- if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
- return -EINVAL;
-
return 0;
}
@@ -218,6 +222,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
ip->flags |= BTRFS_INODE_DIRSYNC;
else
ip->flags &= ~BTRFS_INODE_DIRSYNC;
+ if (flags & FS_NOCOW_FL)
+ ip->flags |= BTRFS_INODE_NODATACOW;
+ else
+ ip->flags &= ~BTRFS_INODE_NODATACOW;
/*
* The COMPRESS flag can only be changed by users, while the NOCOMPRESS
@@ -230,11 +238,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
} else if (flags & FS_COMPR_FL) {
ip->flags |= BTRFS_INODE_COMPRESS;
ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+ } else {
+ ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
}
- if (flags & FS_NOCOW_FL)
- ip->flags |= BTRFS_INODE_NODATACOW;
- else if (flags & FS_COW_FL)
- ip->flags &= ~BTRFS_INODE_NODATACOW;
trans = btrfs_join_transaction(root, 1);
BUG_ON(IS_ERR(trans));
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 199a80134312..f340f7c99d09 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -709,7 +709,7 @@ again:
WARN_ON(cur->checked);
if (!list_empty(&cur->upper)) {
/*
- * the backref was added previously when processsing
+ * the backref was added previously when processing
* backref of type BTRFS_TREE_BLOCK_REF_KEY
*/
BUG_ON(!list_is_singular(&cur->upper));
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e159c529fd2b..33da49dc3cc6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -775,6 +775,13 @@ get_more_pages:
ci->i_truncate_seq,
ci->i_truncate_size,
&inode->i_mtime, true, 1, 0);
+
+ if (!req) {
+ rc = -ENOMEM;
+ unlock_page(page);
+ break;
+ }
+
max_pages = req->r_num_pages;
alloc_page_vec(fsc, req);
@@ -841,7 +848,8 @@ get_more_pages:
op->payload_len = cpu_to_le32(len);
req->r_request->hdr.data_len = cpu_to_le32(len);
- ceph_osdc_start_request(&fsc->client->osdc, req, true);
+ rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
+ BUG_ON(rc);
req = NULL;
/* continue? */
@@ -873,8 +881,6 @@ release_pvec_pages:
out:
if (req)
ceph_osdc_put_request(req);
- if (rc > 0)
- rc = 0; /* vfs expects us to return 0 */
ceph_put_snap_context(snapc);
dout("writepages done, rc = %d\n", rc);
return rc;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 5323c330bbf3..1f72b00447c4 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -569,7 +569,8 @@ retry:
list_add_tail(&cap->session_caps, &session->s_caps);
session->s_nr_caps++;
spin_unlock(&session->s_cap_lock);
- }
+ } else if (new_cap)
+ ceph_put_cap(mdsc, new_cap);
if (!ci->i_snap_realm) {
/*
@@ -819,7 +820,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
used |= CEPH_CAP_FILE_CACHE;
if (ci->i_wr_ref)
used |= CEPH_CAP_FILE_WR;
- if (ci->i_wrbuffer_ref)
+ if (ci->i_wb_ref || ci->i_wrbuffer_ref)
used |= CEPH_CAP_FILE_BUFFER;
return used;
}
@@ -1331,10 +1332,11 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
}
/*
- * Mark caps dirty. If inode is newly dirty, add to the global dirty
- * list.
+ * Mark caps dirty. If inode is newly dirty, return the dirty flags.
+ * Caller is then responsible for calling __mark_inode_dirty with the
+ * returned flags value.
*/
-void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
+int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
{
struct ceph_mds_client *mdsc =
ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
@@ -1357,7 +1359,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
spin_unlock(&mdsc->cap_dirty_lock);
if (ci->i_flushing_caps == 0) {
- igrab(inode);
+ ihold(inode);
dirty |= I_DIRTY_SYNC;
}
}
@@ -1365,9 +1367,8 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
(mask & CEPH_CAP_FILE_BUFFER))
dirty |= I_DIRTY_DATASYNC;
- if (dirty)
- __mark_inode_dirty(inode, dirty);
__cap_delay_requeue(mdsc, ci);
+ return dirty;
}
/*
@@ -1990,11 +1991,11 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
if (got & CEPH_CAP_FILE_WR)
ci->i_wr_ref++;
if (got & CEPH_CAP_FILE_BUFFER) {
- if (ci->i_wrbuffer_ref == 0)
- igrab(&ci->vfs_inode);
- ci->i_wrbuffer_ref++;
- dout("__take_cap_refs %p wrbuffer %d -> %d (?)\n",
- &ci->vfs_inode, ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref);
+ if (ci->i_wb_ref == 0)
+ ihold(&ci->vfs_inode);
+ ci->i_wb_ref++;
+ dout("__take_cap_refs %p wb %d -> %d (?)\n",
+ &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref);
}
}
@@ -2169,12 +2170,12 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
if (--ci->i_rdcache_ref == 0)
last++;
if (had & CEPH_CAP_FILE_BUFFER) {
- if (--ci->i_wrbuffer_ref == 0) {
+ if (--ci->i_wb_ref == 0) {
last++;
put++;
}
- dout("put_cap_refs %p wrbuffer %d -> %d (?)\n",
- inode, ci->i_wrbuffer_ref+1, ci->i_wrbuffer_ref);
+ dout("put_cap_refs %p wb %d -> %d (?)\n",
+ inode, ci->i_wb_ref+1, ci->i_wb_ref);
}
if (had & CEPH_CAP_FILE_WR)
if (--ci->i_wr_ref == 0) {
@@ -2634,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
struct ceph_mds_session *session,
int *open_target_sessions)
{
+ struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2670,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
* export targets, so that we get the matching IMPORT
*/
*open_target_sessions = 1;
+
+ /*
+ * we can't flush dirty caps that we've seen the
+ * EXPORT but no IMPORT for
+ */
+ spin_lock(&mdsc->cap_dirty_lock);
+ if (!list_empty(&ci->i_dirty_item)) {
+ dout(" moving %p to cap_dirty_migrating\n",
+ inode);
+ list_move(&ci->i_dirty_item,
+ &mdsc->cap_dirty_migrating);
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
}
__ceph_remove_cap(cap);
}
@@ -2707,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
ci->i_cap_exporting_issued = 0;
ci->i_cap_exporting_mseq = 0;
ci->i_cap_exporting_mds = -1;
+
+ spin_lock(&mdsc->cap_dirty_lock);
+ if (!list_empty(&ci->i_dirty_item)) {
+ dout(" moving %p back to cap_dirty\n", inode);
+ list_move(&ci->i_dirty_item, &mdsc->cap_dirty);
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
} else {
dout("handle_cap_import inode %p ci %p mds%d mseq %d\n",
inode, ci, mds, mseq);
@@ -2910,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
*/
void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
{
- struct ceph_inode_info *ci, *nci = NULL;
- struct inode *inode, *ninode = NULL;
- struct list_head *p, *n;
+ struct ceph_inode_info *ci;
+ struct inode *inode;
dout("flush_dirty_caps\n");
spin_lock(&mdsc->cap_dirty_lock);
- list_for_each_safe(p, n, &mdsc->cap_dirty) {
- if (nci) {
- ci = nci;
- inode = ninode;
- ci->i_ceph_flags &= ~CEPH_I_NOFLUSH;
- dout("flush_dirty_caps inode %p (was next inode)\n",
- inode);
- } else {
- ci = list_entry(p, struct ceph_inode_info,
- i_dirty_item);
- inode = igrab(&ci->vfs_inode);
- BUG_ON(!inode);
- dout("flush_dirty_caps inode %p\n", inode);
- }
- if (n != &mdsc->cap_dirty) {
- nci = list_entry(n, struct ceph_inode_info,
- i_dirty_item);
- ninode = igrab(&nci->vfs_inode);
- BUG_ON(!ninode);
- nci->i_ceph_flags |= CEPH_I_NOFLUSH;
- dout("flush_dirty_caps next inode %p, noflush\n",
- ninode);
- } else {
- nci = NULL;
- ninode = NULL;
- }
+ while (!list_empty(&mdsc->cap_dirty)) {
+ ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
+ i_dirty_item);
+ inode = igrab(&ci->vfs_inode);
+ dout("flush_dirty_caps %p\n", inode);
spin_unlock(&mdsc->cap_dirty_lock);
if (inode) {
ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
@@ -2951,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
spin_lock(&mdsc->cap_dirty_lock);
}
spin_unlock(&mdsc->cap_dirty_lock);
+ dout("flush_dirty_caps done\n");
}
/*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 1a867a3601ae..33729e822bb9 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -360,7 +360,7 @@ more:
rinfo = &fi->last_readdir->r_reply_info;
dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
rinfo->dir_nr, off, fi->offset);
- while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) {
+ while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) {
u64 pos = ceph_make_fpos(frag, off);
struct ceph_mds_reply_inode *in =
rinfo->dir_in[off - fi->offset].in;
@@ -1066,16 +1066,17 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
int left;
+ const int bufsize = 1024;
if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
return -EISDIR;
if (!cf->dir_info) {
- cf->dir_info = kmalloc(1024, GFP_NOFS);
+ cf->dir_info = kmalloc(bufsize, GFP_NOFS);
if (!cf->dir_info)
return -ENOMEM;
cf->dir_info_len =
- sprintf(cf->dir_info,
+ snprintf(cf->dir_info, bufsize,
"entries: %20lld\n"
" files: %20lld\n"
" subdirs: %20lld\n"
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e41056174bf8..a610d3d67488 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
static struct dentry *__fh_to_dentry(struct super_block *sb,
struct ceph_nfs_fh *fh)
{
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct inode *inode;
struct dentry *dentry;
struct ceph_vino vino;
@@ -95,8 +96,24 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
vino.ino = fh->ino;
vino.snap = CEPH_NOSNAP;
inode = ceph_find_inode(sb, vino);
- if (!inode)
- return ERR_PTR(-ESTALE);
+ if (!inode) {
+ struct ceph_mds_request *req;
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
+ USE_ANY_MDS);
+ if (IS_ERR(req))
+ return ERR_CAST(req);
+
+ req->r_ino1 = vino;
+ req->r_num_caps = 1;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ inode = req->r_target_inode;
+ if (inode)
+ igrab(inode);
+ ceph_mdsc_put_request(req);
+ if (!inode)
+ return ERR_PTR(-ESTALE);
+ }
dentry = d_obtain_alias(inode);
if (IS_ERR(dentry)) {
@@ -148,8 +165,10 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ inode = req->r_target_inode;
+ if (inode)
+ igrab(inode);
ceph_mdsc_put_request(req);
- inode = ceph_find_inode(sb, vino);
if (!inode)
return ERR_PTR(err ? err : -ESTALE);
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 159b512d5a27..203252d88d9f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -734,9 +734,12 @@ retry_snap:
}
}
if (ret >= 0) {
+ int dirty;
spin_lock(&inode->i_lock);
- __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&inode->i_lock);
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
}
out:
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index b54c97da1c43..70b6a4839c38 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -355,6 +355,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_rd_ref = 0;
ci->i_rdcache_ref = 0;
ci->i_wr_ref = 0;
+ ci->i_wb_ref = 0;
ci->i_wrbuffer_ref = 0;
ci->i_wrbuffer_ref_head = 0;
ci->i_shared_gen = 0;
@@ -1567,6 +1568,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
int release = 0, dirtied = 0;
int mask = 0;
int err = 0;
+ int inode_dirty_flags = 0;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
@@ -1725,13 +1727,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
dout("setattr %p ATTR_FILE ... hrm!\n", inode);
if (dirtied) {
- __ceph_mark_dirty_caps(ci, dirtied);
+ inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
inode->i_ctime = CURRENT_TIME;
}
release &= issued;
spin_unlock(&inode->i_lock);
+ if (inode_dirty_flags)
+ __mark_inode_dirty(inode, inode_dirty_flags);
+
if (mask) {
req->r_inode = igrab(inode);
req->r_inode_drop = release;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f60b07b0feb0..79743d146be6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -578,6 +578,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
if (dir) {
struct ceph_inode_info *ci = ceph_inode(dir);
+ ihold(dir);
spin_lock(&ci->i_unsafe_lock);
req->r_unsafe_dir = dir;
list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
@@ -598,6 +599,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
spin_lock(&ci->i_unsafe_lock);
list_del_init(&req->r_unsafe_dir_item);
spin_unlock(&ci->i_unsafe_lock);
+
+ iput(req->r_unsafe_dir);
+ req->r_unsafe_dir = NULL;
}
ceph_mdsc_put_request(req);
@@ -2691,7 +2695,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
{
struct super_block *sb = mdsc->fsc->sb;
struct inode *inode;
- struct ceph_inode_info *ci;
struct dentry *parent, *dentry;
struct ceph_dentry_info *di;
int mds = session->s_mds;
@@ -2728,7 +2731,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
dout("handle_lease no inode %llx\n", vino.ino);
goto release;
}
- ci = ceph_inode(inode);
/* dentry */
parent = d_find_alias(inode);
@@ -3002,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
spin_lock_init(&mdsc->snap_flush_lock);
mdsc->cap_flush_seq = 0;
INIT_LIST_HEAD(&mdsc->cap_dirty);
+ INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
mdsc->num_cap_flushing = 0;
spin_lock_init(&mdsc->cap_dirty_lock);
init_waitqueue_head(&mdsc->cap_flushing_wq);
@@ -3304,8 +3307,8 @@ static void con_put(struct ceph_connection *con)
{
struct ceph_mds_session *s = con->private;
+ dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1);
ceph_put_mds_session(s);
- dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref));
}
/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4e3a9cc0bba6..7d8a0d662d56 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -278,6 +278,7 @@ struct ceph_mds_client {
u64 cap_flush_seq;
struct list_head cap_dirty; /* inodes with dirty caps */
+ struct list_head cap_dirty_migrating; /* ...that are migration... */
int num_cap_flushing; /* # caps we are flushing */
spinlock_t cap_dirty_lock; /* protects above items */
wait_queue_head_t cap_flushing_wq;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e86ec1155f8f..24067d68a554 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -206,7 +206,7 @@ void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
up_write(&mdsc->snap_rwsem);
} else {
spin_lock(&mdsc->snap_empty_lock);
- list_add(&mdsc->snap_empty, &realm->empty_item);
+ list_add(&realm->empty_item, &mdsc->snap_empty);
spin_unlock(&mdsc->snap_empty_lock);
}
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 619fe719968f..f5cabefa98dc 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -293,7 +293,7 @@ struct ceph_inode_info {
/* held references to caps */
int i_pin_ref;
- int i_rd_ref, i_rdcache_ref, i_wr_ref;
+ int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
int i_wrbuffer_ref, i_wrbuffer_ref_head;
u32 i_shared_gen; /* increment each time we get FILE_SHARED */
u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
@@ -506,7 +506,7 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
{
return ci->i_dirty_caps | ci->i_flushing_caps;
}
-extern void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
+extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
extern int __ceph_caps_used(struct ceph_inode_info *ci);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 8c9eba6ef9df..f2b628696180 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -703,6 +703,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
struct ceph_inode_xattr *xattr = NULL;
int issued;
int required_blob_size;
+ int dirty;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
@@ -763,11 +764,12 @@ retry:
dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
err = __set_xattr(ci, newname, name_len, newval,
val_len, 1, 1, 1, &xattr);
- __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
ci->i_xattrs.dirty = true;
inode->i_ctime = CURRENT_TIME;
spin_unlock(&inode->i_lock);
-
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
return err;
do_sync:
@@ -810,6 +812,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
int issued;
int err;
+ int dirty;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
@@ -833,12 +836,13 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
goto do_sync;
err = __remove_xattr_by_name(ceph_inode(inode), name);
- __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
ci->i_xattrs.dirty = true;
inode->i_ctime = CURRENT_TIME;
spin_unlock(&inode->i_lock);
-
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
return err;
do_sync:
spin_unlock(&inode->i_lock);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 7cb0f7f847e4..75c47cd8d086 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -7,6 +7,7 @@ config CIFS
select CRYPTO_MD5
select CRYPTO_HMAC
select CRYPTO_ARC4
+ select CRYPTO_DES
help
This is the client VFS module for the Common Internet File System
(CIFS) protocol which is the successor to the Server Message Block
@@ -152,16 +153,28 @@ config CIFS_ACL
Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
is handed over to the application/caller.
-config CIFS_EXPERIMENTAL
- bool "CIFS Experimental Features (EXPERIMENTAL)"
+config CIFS_SMB2
+ bool "SMB2 network file system support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && INET && BROKEN
+ select NLS
+ select KEYS
+ select FSCACHE
+ select DNS_RESOLVER
+
+ help
+ This enables experimental support for the SMB2 (Server Message Block
+ version 2) protocol. The SMB2 protocol is the successor to the
+ popular CIFS and SMB network file sharing protocols. SMB2 is the
+ native file sharing mechanism for recent versions of Windows
+ operating systems (since Vista). SMB2 enablement will eventually
+ allow users better performance, security and features, than would be
+ possible with cifs. Note that smb2 mount options also are simpler
+ (compared to cifs) due to protocol improvements.
+
+ Unless you are a developer or tester, say N.
+
+config CIFS_NFSD_EXPORT
+ bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
depends on CIFS && EXPERIMENTAL
help
- Enables cifs features under testing. These features are
- experimental and currently include DFS support and directory
- change notification ie fcntl(F_DNOTIFY), as well as the upcall
- mechanism which will be used for Kerberos session negotiation
- and uid remapping. Some of these features also may depend on
- setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
- (which is disabled by default). See the file fs/cifs/README
- for more details. If unsure, say N.
-
+ Allows NFS server to export a CIFS mounted share (nfsd over cifs)
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index d87558448e3d..005d524c3a4a 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -4,7 +4,7 @@
obj-$(CONFIG_CIFS) += cifs.o
cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
- link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
+ link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
readdir.o ioctl.o sess.o export.o
diff --git a/fs/cifs/README b/fs/cifs/README
index 74ab165fc646..4a3ca0e5ca24 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -704,18 +704,6 @@ the start of smb requests and responses can be enabled via:
echo 1 > /proc/fs/cifs/traceSMB
-Two other experimental features are under development. To test these
-requires enabling CONFIG_CIFS_EXPERIMENTAL
-
- cifsacl support needed to retrieve approximated mode bits based on
- the contents on the CIFS ACL.
-
- lease support: cifs will check the oplock state before calling into
- the vfs to see if we can grant a lease on a file.
-
- DNOTIFY fcntl: needed for support of directory change
- notification and perhaps later for file leases)
-
Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
if the kernel was configured with cifs statistics enabled. The statistics
represent the number of successful (ie non-zero return code from the server)
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 30d01bc90855..18f4272d9047 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -63,7 +63,7 @@ void cifs_dump_detail(struct smb_hdr *smb)
cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
smb->Command, smb->Status.CifsError,
smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
- cERROR(1, "smb buf %p len %d", smb, smbCalcSize_LE(smb));
+ cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb));
}
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ac51cd2d33ae..a9d5692e0c20 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -58,9 +58,7 @@ struct cifs_sb_info {
unsigned int mnt_cifs_flags;
int prepathlen;
char *prepath; /* relative path under the share to mount to */
-#ifdef CONFIG_CIFS_DFS_UPCALL
- char *mountdata; /* mount options received at mount time */
-#endif
+ char *mountdata; /* options received at mount time or via DFS refs */
struct backing_dev_info bdi;
struct delayed_work prune_tlinks;
};
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 23d43cde4306..1b2e180b018d 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -277,6 +277,7 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen,
for (i = 0, j = 0; i < srclen; j++) {
src_char = source[i];
+ charlen = 1;
switch (src_char) {
case 0:
put_unaligned(0, &target[j]);
@@ -316,16 +317,13 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen,
dst_char = cpu_to_le16(0x003f);
charlen = 1;
}
- /*
- * character may take more than one byte in the source
- * string, but will take exactly two bytes in the
- * target string
- */
- i += charlen;
- continue;
}
+ /*
+ * character may take more than one byte in the source string,
+ * but will take exactly two bytes in the target string
+ */
+ i += charlen;
put_unaligned(dst_char, &target[j]);
- i++; /* move to next char in source string */
}
ctoUCS_out:
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 644dd882a560..6d02fd560566 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -82,6 +82,9 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
char *cifs_strndup_from_ucs(const char *src, const int maxlen,
const bool is_unicode,
const struct nls_table *codepage);
+extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
+ const struct nls_table *cp, int mapChars);
+
#endif
/*
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index beeebf194234..f3c6fb9942ac 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -23,24 +23,16 @@
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/keyctl.h>
+#include <linux/key-type.h>
+#include <keys/user-type.h>
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifsacl.h"
#include "cifsproto.h"
#include "cifs_debug.h"
-
-static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
- {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
- {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
- {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
- {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
- {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"},
- {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"},
- {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} }
-;
-
-
/* security id for everyone/world system group */
static const struct cifs_sid sid_everyone = {
1, 1, {0, 0, 0, 0, 0, 1}, {0} };
@@ -50,50 +42,385 @@ static const struct cifs_sid sid_authusers = {
/* group users */
static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
+const struct cred *root_cred;
-int match_sid(struct cifs_sid *ctsid)
+static void
+shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem,
+ int *nr_del)
{
- int i, j;
- int num_subauth, num_sat, num_saw;
- struct cifs_sid *cwsid;
+ struct rb_node *node;
+ struct rb_node *tmp;
+ struct cifs_sid_id *psidid;
+
+ node = rb_first(root);
+ while (node) {
+ tmp = node;
+ node = rb_next(tmp);
+ psidid = rb_entry(tmp, struct cifs_sid_id, rbnode);
+ if (nr_to_scan == 0 || *nr_del == nr_to_scan)
+ ++(*nr_rem);
+ else {
+ if (time_after(jiffies, psidid->time + SID_MAP_EXPIRE)
+ && psidid->refcount == 0) {
+ rb_erase(tmp, root);
+ ++(*nr_del);
+ } else
+ ++(*nr_rem);
+ }
+ }
+}
+
+/*
+ * Run idmap cache shrinker.
+ */
+static int
+cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+{
+ int nr_del = 0;
+ int nr_rem = 0;
+ struct rb_root *root;
+
+ root = &uidtree;
+ spin_lock(&siduidlock);
+ shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
+ spin_unlock(&siduidlock);
+
+ root = &gidtree;
+ spin_lock(&sidgidlock);
+ shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
+ spin_unlock(&sidgidlock);
+
+ return nr_rem;
+}
+
+static struct shrinker cifs_shrinker = {
+ .shrink = cifs_idmap_shrinker,
+ .seeks = DEFAULT_SEEKS,
+};
+
+static int
+cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen)
+{
+ char *payload;
+
+ payload = kmalloc(datalen, GFP_KERNEL);
+ if (!payload)
+ return -ENOMEM;
+
+ memcpy(payload, data, datalen);
+ key->payload.data = payload;
+ return 0;
+}
+
+static inline void
+cifs_idmap_key_destroy(struct key *key)
+{
+ kfree(key->payload.data);
+}
- if (!ctsid)
- return -1;
+struct key_type cifs_idmap_key_type = {
+ .name = "cifs.idmap",
+ .instantiate = cifs_idmap_key_instantiate,
+ .destroy = cifs_idmap_key_destroy,
+ .describe = user_describe,
+ .match = user_match,
+};
+
+static void
+sid_to_str(struct cifs_sid *sidptr, char *sidstr)
+{
+ int i;
+ unsigned long saval;
+ char *strptr;
- for (i = 0; i < NUM_WK_SIDS; ++i) {
- cwsid = &(wksidarr[i].cifssid);
+ strptr = sidstr;
- /* compare the revision */
- if (ctsid->revision != cwsid->revision)
- continue;
+ sprintf(strptr, "%s", "S");
+ strptr = sidstr + strlen(sidstr);
- /* compare all of the six auth values */
- for (j = 0; j < 6; ++j) {
- if (ctsid->authority[j] != cwsid->authority[j])
- break;
+ sprintf(strptr, "-%d", sidptr->revision);
+ strptr = sidstr + strlen(sidstr);
+
+ for (i = 0; i < 6; ++i) {
+ if (sidptr->authority[i]) {
+ sprintf(strptr, "-%d", sidptr->authority[i]);
+ strptr = sidstr + strlen(sidstr);
}
- if (j < 6)
- continue; /* all of the auth values did not match */
-
- /* compare all of the subauth values if any */
- num_sat = ctsid->num_subauth;
- num_saw = cwsid->num_subauth;
- num_subauth = num_sat < num_saw ? num_sat : num_saw;
- if (num_subauth) {
- for (j = 0; j < num_subauth; ++j) {
- if (ctsid->sub_auth[j] != cwsid->sub_auth[j])
- break;
- }
- if (j < num_subauth)
- continue; /* all sub_auth values do not match */
+ }
+
+ for (i = 0; i < sidptr->num_subauth; ++i) {
+ saval = le32_to_cpu(sidptr->sub_auth[i]);
+ sprintf(strptr, "-%ld", saval);
+ strptr = sidstr + strlen(sidstr);
+ }
+}
+
+static void
+id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
+ struct cifs_sid_id **psidid, char *typestr)
+{
+ int rc;
+ char *strptr;
+ struct rb_node *node = root->rb_node;
+ struct rb_node *parent = NULL;
+ struct rb_node **linkto = &(root->rb_node);
+ struct cifs_sid_id *lsidid;
+
+ while (node) {
+ lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
+ parent = node;
+ rc = compare_sids(sidptr, &((lsidid)->sid));
+ if (rc > 0) {
+ linkto = &(node->rb_left);
+ node = node->rb_left;
+ } else if (rc < 0) {
+ linkto = &(node->rb_right);
+ node = node->rb_right;
+ }
+ }
+
+ memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
+ (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
+ (*psidid)->refcount = 0;
+
+ sprintf((*psidid)->sidstr, "%s", typestr);
+ strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr);
+ sid_to_str(&(*psidid)->sid, strptr);
+
+ clear_bit(SID_ID_PENDING, &(*psidid)->state);
+ clear_bit(SID_ID_MAPPED, &(*psidid)->state);
+
+ rb_link_node(&(*psidid)->rbnode, parent, linkto);
+ rb_insert_color(&(*psidid)->rbnode, root);
+}
+
+static struct cifs_sid_id *
+id_rb_search(struct rb_root *root, struct cifs_sid *sidptr)
+{
+ int rc;
+ struct rb_node *node = root->rb_node;
+ struct cifs_sid_id *lsidid;
+
+ while (node) {
+ lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
+ rc = compare_sids(sidptr, &((lsidid)->sid));
+ if (rc > 0) {
+ node = node->rb_left;
+ } else if (rc < 0) {
+ node = node->rb_right;
+ } else /* node found */
+ return lsidid;
+ }
+
+ return NULL;
+}
+
+static int
+sidid_pending_wait(void *unused)
+{
+ schedule();
+ return signal_pending(current) ? -ERESTARTSYS : 0;
+}
+
+static int
+sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
+ struct cifs_fattr *fattr, uint sidtype)
+{
+ int rc;
+ unsigned long cid;
+ struct key *idkey;
+ const struct cred *saved_cred;
+ struct cifs_sid_id *psidid, *npsidid;
+ struct rb_root *cidtree;
+ spinlock_t *cidlock;
+
+ if (sidtype == SIDOWNER) {
+ cid = cifs_sb->mnt_uid; /* default uid, in case upcall fails */
+ cidlock = &siduidlock;
+ cidtree = &uidtree;
+ } else if (sidtype == SIDGROUP) {
+ cid = cifs_sb->mnt_gid; /* default gid, in case upcall fails */
+ cidlock = &sidgidlock;
+ cidtree = &gidtree;
+ } else
+ return -ENOENT;
+
+ spin_lock(cidlock);
+ psidid = id_rb_search(cidtree, psid);
+
+ if (!psidid) { /* node does not exist, allocate one & attempt adding */
+ spin_unlock(cidlock);
+ npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL);
+ if (!npsidid)
+ return -ENOMEM;
+
+ npsidid->sidstr = kmalloc(SIDLEN, GFP_KERNEL);
+ if (!npsidid->sidstr) {
+ kfree(npsidid);
+ return -ENOMEM;
+ }
+
+ spin_lock(cidlock);
+ psidid = id_rb_search(cidtree, psid);
+ if (psidid) { /* node happened to get inserted meanwhile */
+ ++psidid->refcount;
+ spin_unlock(cidlock);
+ kfree(npsidid->sidstr);
+ kfree(npsidid);
+ } else {
+ psidid = npsidid;
+ id_rb_insert(cidtree, psid, &psidid,
+ sidtype == SIDOWNER ? "os:" : "gs:");
+ ++psidid->refcount;
+ spin_unlock(cidlock);
}
+ } else {
+ ++psidid->refcount;
+ spin_unlock(cidlock);
+ }
+
+ /*
+ * If we are here, it is safe to access psidid and its fields
+ * since a reference was taken earlier while holding the spinlock.
+ * A reference on the node is put without holding the spinlock
+ * and it is OK to do so in this case, shrinker will not erase
+ * this node until all references are put and we do not access
+ * any fields of the node after a reference is put .
+ */
+ if (test_bit(SID_ID_MAPPED, &psidid->state)) {
+ cid = psidid->id;
+ psidid->time = jiffies; /* update ts for accessing */
+ goto sid_to_id_out;
+ }
- cFYI(1, "matching sid: %s\n", wksidarr[i].sidname);
- return 0; /* sids compare/match */
+ if (time_after(psidid->time + SID_MAP_RETRY, jiffies))
+ goto sid_to_id_out;
+
+ if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) {
+ saved_cred = override_creds(root_cred);
+ idkey = request_key(&cifs_idmap_key_type, psidid->sidstr, "");
+ if (IS_ERR(idkey))
+ cFYI(1, "%s: Can't map SID to an id", __func__);
+ else {
+ cid = *(unsigned long *)idkey->payload.value;
+ psidid->id = cid;
+ set_bit(SID_ID_MAPPED, &psidid->state);
+ key_put(idkey);
+ kfree(psidid->sidstr);
+ }
+ revert_creds(saved_cred);
+ psidid->time = jiffies; /* update ts for accessing */
+ clear_bit(SID_ID_PENDING, &psidid->state);
+ wake_up_bit(&psidid->state, SID_ID_PENDING);
+ } else {
+ rc = wait_on_bit(&psidid->state, SID_ID_PENDING,
+ sidid_pending_wait, TASK_INTERRUPTIBLE);
+ if (rc) {
+ cFYI(1, "%s: sidid_pending_wait interrupted %d",
+ __func__, rc);
+ --psidid->refcount; /* decremented without spinlock */
+ return rc;
+ }
+ if (test_bit(SID_ID_MAPPED, &psidid->state))
+ cid = psidid->id;
}
- cFYI(1, "No matching sid");
- return -1;
+sid_to_id_out:
+ --psidid->refcount; /* decremented without spinlock */
+ if (sidtype == SIDOWNER)
+ fattr->cf_uid = cid;
+ else
+ fattr->cf_gid = cid;
+
+ return 0;
+}
+
+int
+init_cifs_idmap(void)
+{
+ struct cred *cred;
+ struct key *keyring;
+ int ret;
+
+ cFYI(1, "Registering the %s key type\n", cifs_idmap_key_type.name);
+
+ /* create an override credential set with a special thread keyring in
+ * which requests are cached
+ *
+ * this is used to prevent malicious redirections from being installed
+ * with add_key().
+ */
+ cred = prepare_kernel_cred(NULL);
+ if (!cred)
+ return -ENOMEM;
+
+ keyring = key_alloc(&key_type_keyring, ".cifs_idmap", 0, 0, cred,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ,
+ KEY_ALLOC_NOT_IN_QUOTA);
+ if (IS_ERR(keyring)) {
+ ret = PTR_ERR(keyring);
+ goto failed_put_cred;
+ }
+
+ ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
+ if (ret < 0)
+ goto failed_put_key;
+
+ ret = register_key_type(&cifs_idmap_key_type);
+ if (ret < 0)
+ goto failed_put_key;
+
+ /* instruct request_key() to use this special keyring as a cache for
+ * the results it looks up */
+ cred->thread_keyring = keyring;
+ cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+ root_cred = cred;
+
+ spin_lock_init(&siduidlock);
+ uidtree = RB_ROOT;
+ spin_lock_init(&sidgidlock);
+ gidtree = RB_ROOT;
+
+ register_shrinker(&cifs_shrinker);
+
+ cFYI(1, "cifs idmap keyring: %d\n", key_serial(keyring));
+ return 0;
+
+failed_put_key:
+ key_put(keyring);
+failed_put_cred:
+ put_cred(cred);
+ return ret;
+}
+
+void
+exit_cifs_idmap(void)
+{
+ key_revoke(root_cred->thread_keyring);
+ unregister_key_type(&cifs_idmap_key_type);
+ put_cred(root_cred);
+ unregister_shrinker(&cifs_shrinker);
+ cFYI(1, "Unregistered %s key type\n", cifs_idmap_key_type.name);
+}
+
+void
+cifs_destroy_idmaptrees(void)
+{
+ struct rb_root *root;
+ struct rb_node *node;
+
+ root = &uidtree;
+ spin_lock(&siduidlock);
+ while ((node = rb_first(root)))
+ rb_erase(node, root);
+ spin_unlock(&siduidlock);
+
+ root = &gidtree;
+ spin_lock(&sidgidlock);
+ while ((node = rb_first(root)))
+ rb_erase(node, root);
+ spin_unlock(&sidgidlock);
}
/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -104,16 +431,24 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
int num_subauth, num_sat, num_saw;
if ((!ctsid) || (!cwsid))
- return 0;
+ return 1;
/* compare the revision */
- if (ctsid->revision != cwsid->revision)
- return 0;
+ if (ctsid->revision != cwsid->revision) {
+ if (ctsid->revision > cwsid->revision)
+ return 1;
+ else
+ return -1;
+ }
/* compare all of the six auth values */
for (i = 0; i < 6; ++i) {
- if (ctsid->authority[i] != cwsid->authority[i])
- return 0;
+ if (ctsid->authority[i] != cwsid->authority[i]) {
+ if (ctsid->authority[i] > cwsid->authority[i])
+ return 1;
+ else
+ return -1;
+ }
}
/* compare all of the subauth values if any */
@@ -122,12 +457,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
num_subauth = num_sat < num_saw ? num_sat : num_saw;
if (num_subauth) {
for (i = 0; i < num_subauth; ++i) {
- if (ctsid->sub_auth[i] != cwsid->sub_auth[i])
- return 0;
+ if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
+ if (ctsid->sub_auth[i] > cwsid->sub_auth[i])
+ return 1;
+ else
+ return -1;
+ }
}
}
- return 1; /* sids compare/match */
+ return 0; /* sids compare/match */
}
@@ -382,22 +721,22 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
#ifdef CONFIG_CIFS_DEBUG2
dump_ace(ppace[i], end_of_acl);
#endif
- if (compare_sids(&(ppace[i]->sid), pownersid))
+ if (compare_sids(&(ppace[i]->sid), pownersid) == 0)
access_flags_to_mode(ppace[i]->access_req,
ppace[i]->type,
&fattr->cf_mode,
&user_mask);
- if (compare_sids(&(ppace[i]->sid), pgrpsid))
+ if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0)
access_flags_to_mode(ppace[i]->access_req,
ppace[i]->type,
&fattr->cf_mode,
&group_mask);
- if (compare_sids(&(ppace[i]->sid), &sid_everyone))
+ if (compare_sids(&(ppace[i]->sid), &sid_everyone) == 0)
access_flags_to_mode(ppace[i]->access_req,
ppace[i]->type,
&fattr->cf_mode,
&other_mask);
- if (compare_sids(&(ppace[i]->sid), &sid_authusers))
+ if (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)
access_flags_to_mode(ppace[i]->access_req,
ppace[i]->type,
&fattr->cf_mode,
@@ -475,10 +814,10 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
/* Convert CIFS ACL to POSIX form */
-static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
- struct cifs_fattr *fattr)
+static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
+ struct cifs_ntsd *pntsd, int acl_len, struct cifs_fattr *fattr)
{
- int rc;
+ int rc = 0;
struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
char *end_of_acl = ((char *)pntsd) + acl_len;
@@ -500,12 +839,26 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
le32_to_cpu(pntsd->sacloffset), dacloffset);
/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
rc = parse_sid(owner_sid_ptr, end_of_acl);
- if (rc)
+ if (rc) {
+ cFYI(1, "%s: Error %d parsing Owner SID", __func__, rc);
+ return rc;
+ }
+ rc = sid_to_id(cifs_sb, owner_sid_ptr, fattr, SIDOWNER);
+ if (rc) {
+ cFYI(1, "%s: Error %d mapping Owner SID to uid", __func__, rc);
return rc;
+ }
rc = parse_sid(group_sid_ptr, end_of_acl);
- if (rc)
+ if (rc) {
+ cFYI(1, "%s: Error %d mapping Owner SID to gid", __func__, rc);
return rc;
+ }
+ rc = sid_to_id(cifs_sb, group_sid_ptr, fattr, SIDGROUP);
+ if (rc) {
+ cFYI(1, "%s: Error %d mapping Group SID to gid", __func__, rc);
+ return rc;
+ }
if (dacloffset)
parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
@@ -520,7 +873,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
sizeof(struct cifs_sid)); */
- return 0;
+ return rc;
}
@@ -688,7 +1041,7 @@ out:
}
/* Set an ACL on the server */
-static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
+int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
struct inode *inode, const char *path)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -727,7 +1080,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
rc = PTR_ERR(pntsd);
cERROR(1, "%s: error %d getting sec desc", __func__, rc);
} else {
- rc = parse_sec_desc(pntsd, acllen, fattr);
+ rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr);
kfree(pntsd);
if (rc)
cERROR(1, "parse sec desc failed rc = %d", rc);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index c4ae7d036563..5c902c7ce524 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -39,6 +39,15 @@
#define ACCESS_ALLOWED 0
#define ACCESS_DENIED 1
+#define SIDOWNER 1
+#define SIDGROUP 2
+#define SIDLEN 150 /* S- 1 revision- 6 authorities- max 5 sub authorities */
+
+#define SID_ID_MAPPED 0
+#define SID_ID_PENDING 1
+#define SID_MAP_EXPIRE (3600 * HZ) /* map entry expires after one hour */
+#define SID_MAP_RETRY (300 * HZ) /* wait 5 minutes for next attempt to map */
+
struct cifs_ntsd {
__le16 revision; /* revision level */
__le16 type;
@@ -74,7 +83,21 @@ struct cifs_wksid {
char sidname[SIDNAMELENGTH];
} __attribute__((packed));
-extern int match_sid(struct cifs_sid *);
+struct cifs_sid_id {
+ unsigned int refcount; /* increment with spinlock, decrement without */
+ unsigned long id;
+ unsigned long time;
+ unsigned long state;
+ char *sidstr;
+ struct rb_node rbnode;
+ struct cifs_sid sid;
+};
+
+#ifdef __KERNEL__
+extern struct key_type cifs_idmap_key_type;
+extern const struct cred *root_cred;
+#endif /* KERNEL */
+
extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index d1a016be73ba..45c3f78c8f81 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -60,7 +60,7 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
server->session_key.response, server->session_key.len);
crypto_shash_update(&server->secmech.sdescmd5->shash,
- cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+ cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length));
rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
@@ -268,10 +268,11 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
}
#ifdef CONFIG_CIFS_WEAK_PW_HASH
-void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
+int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
char *lnm_session_key)
{
int i;
+ int rc;
char password_with_pad[CIFS_ENCPWD_SIZE];
memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
@@ -282,7 +283,7 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
memcpy(lnm_session_key, password_with_pad,
CIFS_ENCPWD_SIZE);
- return;
+ return 0;
}
/* calculate old style session key */
@@ -299,10 +300,9 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
password_with_pad[i] = toupper(password_with_pad[i]);
- SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
+ rc = SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
- /* clear password before we return/free memory */
- memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
+ return rc;
}
#endif /* CIFS_WEAK_PW_HASH */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5c412b33cd7c..493b74ca5648 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -128,29 +128,22 @@ cifs_read_super(struct super_block *sb, void *data,
}
cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
-#ifdef CONFIG_CIFS_DFS_UPCALL
- /* copy mount params to sb for use in submounts */
- /* BB: should we move this after the mount so we
- * do not have to do the copy on failed mounts?
- * BB: May be it is better to do simple copy before
- * complex operation (mount), and in case of fail
- * just exit instead of doing mount and attempting
- * undo it if this copy fails?*/
+ /*
+ * Copy mount params to sb for use in submounts. Better to do
+ * the copy here and deal with the error before cleanup gets
+ * complicated post-mount.
+ */
if (data) {
- int len = strlen(data);
- cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
+ cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
if (cifs_sb->mountdata == NULL) {
bdi_destroy(&cifs_sb->bdi);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
return -ENOMEM;
}
- strncpy(cifs_sb->mountdata, data, len + 1);
- cifs_sb->mountdata[len] = '\0';
}
-#endif
- rc = cifs_mount(sb, cifs_sb, data, devname);
+ rc = cifs_mount(sb, cifs_sb, devname);
if (rc) {
if (!silent)
@@ -163,7 +156,7 @@ cifs_read_super(struct super_block *sb, void *data,
sb->s_bdi = &cifs_sb->bdi;
sb->s_blocksize = CIFS_MAX_MSGSIZE;
sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
- inode = cifs_root_iget(sb, ROOT_I);
+ inode = cifs_root_iget(sb);
if (IS_ERR(inode)) {
rc = PTR_ERR(inode);
@@ -184,12 +177,12 @@ cifs_read_super(struct super_block *sb, void *data,
else
sb->s_d_op = &cifs_dentry_ops;
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CIFS_NFSD_EXPORT
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
cFYI(1, "export ops supported");
sb->s_export_op = &cifs_export_ops;
}
-#endif /* EXPERIMENTAL */
+#endif /* CIFS_NFSD_EXPORT */
return 0;
@@ -202,12 +195,10 @@ out_no_root:
out_mount_failed:
if (cifs_sb) {
-#ifdef CONFIG_CIFS_DFS_UPCALL
if (cifs_sb->mountdata) {
kfree(cifs_sb->mountdata);
cifs_sb->mountdata = NULL;
}
-#endif
unload_nls(cifs_sb->local_nls);
bdi_destroy(&cifs_sb->bdi);
kfree(cifs_sb);
@@ -231,12 +222,10 @@ cifs_put_super(struct super_block *sb)
rc = cifs_umount(sb, cifs_sb);
if (rc)
cERROR(1, "cifs_umount failed with return code %d", rc);
-#ifdef CONFIG_CIFS_DFS_UPCALL
if (cifs_sb->mountdata) {
kfree(cifs_sb->mountdata);
cifs_sb->mountdata = NULL;
}
-#endif
unload_nls(cifs_sb->local_nls);
bdi_destroy(&cifs_sb->bdi);
@@ -618,16 +607,31 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
{
/* origin == SEEK_END => we must revalidate the cached file length */
if (origin == SEEK_END) {
- int retval;
-
- /* some applications poll for the file length in this strange
- way so we must seek to end on non-oplocked files by
- setting the revalidate time to zero */
- CIFS_I(file->f_path.dentry->d_inode)->time = 0;
-
- retval = cifs_revalidate_file(file);
- if (retval < 0)
- return (loff_t)retval;
+ int rc;
+ struct inode *inode = file->f_path.dentry->d_inode;
+
+ /*
+ * We need to be sure that all dirty pages are written and the
+ * server has the newest file length.
+ */
+ if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
+ inode->i_mapping->nrpages != 0) {
+ rc = filemap_fdatawait(inode->i_mapping);
+ if (rc) {
+ mapping_set_error(inode->i_mapping, rc);
+ return rc;
+ }
+ }
+ /*
+ * Some applications poll for the file length in this strange
+ * way so we must seek to end on non-oplocked files by
+ * setting the revalidate time to zero.
+ */
+ CIFS_I(inode)->time = 0;
+
+ rc = cifs_revalidate_file_attr(file);
+ if (rc < 0)
+ return (loff_t)rc;
}
return generic_file_llseek_unlocked(file, offset, origin);
}
@@ -760,10 +764,11 @@ const struct file_operations cifs_file_strict_ops = {
};
const struct file_operations cifs_file_direct_ops = {
- /* no aio, no readv -
- BB reevaluate whether they can be done with directio, no cache */
- .read = cifs_user_read,
- .write = cifs_user_write,
+ /* BB reevaluate whether they can be done with directio, no cache */
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = cifs_user_readv,
+ .aio_write = cifs_user_writev,
.open = cifs_open,
.release = cifs_close,
.lock = cifs_lock,
@@ -815,10 +820,11 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
};
const struct file_operations cifs_file_direct_nobrl_ops = {
- /* no mmap, no aio, no readv -
- BB reevaluate whether they can be done with directio, no cache */
- .read = cifs_user_read,
- .write = cifs_user_write,
+ /* BB reevaluate whether they can be done with directio, no cache */
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = cifs_user_readv,
+ .aio_write = cifs_user_writev,
.open = cifs_open,
.release = cifs_close,
.fsync = cifs_fsync,
@@ -981,10 +987,10 @@ init_cifs(void)
int rc = 0;
cifs_proc_init();
INIT_LIST_HEAD(&cifs_tcp_ses_list);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
INIT_LIST_HEAD(&GlobalDnotifyReqList);
INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
-#endif
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
/*
* Initialize Global counters
*/
@@ -1033,22 +1039,33 @@ init_cifs(void)
if (rc)
goto out_destroy_mids;
- rc = register_filesystem(&cifs_fs_type);
- if (rc)
- goto out_destroy_request_bufs;
#ifdef CONFIG_CIFS_UPCALL
rc = register_key_type(&cifs_spnego_key_type);
if (rc)
- goto out_unregister_filesystem;
-#endif
+ goto out_destroy_request_bufs;
+#endif /* CONFIG_CIFS_UPCALL */
+
+#ifdef CONFIG_CIFS_ACL
+ rc = init_cifs_idmap();
+ if (rc)
+ goto out_register_key_type;
+#endif /* CONFIG_CIFS_ACL */
+
+ rc = register_filesystem(&cifs_fs_type);
+ if (rc)
+ goto out_init_cifs_idmap;
return 0;
-#ifdef CONFIG_CIFS_UPCALL
-out_unregister_filesystem:
- unregister_filesystem(&cifs_fs_type);
+out_init_cifs_idmap:
+#ifdef CONFIG_CIFS_ACL
+ exit_cifs_idmap();
+out_register_key_type:
#endif
+#ifdef CONFIG_CIFS_UPCALL
+ unregister_key_type(&cifs_spnego_key_type);
out_destroy_request_bufs:
+#endif
cifs_destroy_request_bufs();
out_destroy_mids:
cifs_destroy_mids();
@@ -1070,6 +1087,10 @@ exit_cifs(void)
#ifdef CONFIG_CIFS_DFS_UPCALL
cifs_dfs_release_automount_timer();
#endif
+#ifdef CONFIG_CIFS_ACL
+ cifs_destroy_idmaptrees();
+ exit_cifs_idmap();
+#endif
#ifdef CONFIG_CIFS_UPCALL
unregister_key_type(&cifs_spnego_key_type);
#endif
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a9371b6578c0..64313f778ebf 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -47,7 +47,7 @@ extern void cifs_sb_deactive(struct super_block *sb);
/* Functions related to inodes */
extern const struct inode_operations cifs_dir_inode_ops;
-extern struct inode *cifs_root_iget(struct super_block *, unsigned long);
+extern struct inode *cifs_root_iget(struct super_block *);
extern int cifs_create(struct inode *, struct dentry *, int,
struct nameidata *);
extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
@@ -59,9 +59,11 @@ extern int cifs_mkdir(struct inode *, struct dentry *, int);
extern int cifs_rmdir(struct inode *, struct dentry *);
extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
struct dentry *);
+extern int cifs_revalidate_file_attr(struct file *filp);
+extern int cifs_revalidate_dentry_attr(struct dentry *);
extern int cifs_revalidate_file(struct file *filp);
extern int cifs_revalidate_dentry(struct dentry *);
-extern void cifs_invalidate_mapping(struct inode *inode);
+extern int cifs_invalidate_mapping(struct inode *inode);
extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
extern int cifs_setattr(struct dentry *, struct iattr *);
@@ -80,12 +82,12 @@ extern const struct file_operations cifs_file_strict_nobrl_ops;
extern int cifs_open(struct inode *inode, struct file *file);
extern int cifs_close(struct inode *inode, struct file *file);
extern int cifs_closedir(struct inode *inode, struct file *file);
-extern ssize_t cifs_user_read(struct file *file, char __user *read_data,
- size_t read_size, loff_t *poffset);
+extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
-extern ssize_t cifs_user_write(struct file *file, const char __user *write_data,
- size_t write_size, loff_t *poffset);
+extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
extern int cifs_lock(struct file *, int, struct file_lock *);
@@ -123,9 +125,9 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CIFS_NFSD_EXPORT
extern const struct export_operations cifs_export_ops;
-#endif /* EXPERIMENTAL */
+#endif /* CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "1.71"
+#define CIFS_VERSION "1.72"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a5d1106fcbde..76b4517e74b0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -274,7 +274,8 @@ struct cifsSesInfo {
int capabilities;
char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
TCP names - will ipv6 and sctp addresses fit? */
- char *user_name;
+ char *user_name; /* must not be null except during init of sess
+ and after mount option parsing we fill it */
char *domainName;
char *password;
struct session_key auth_key;
@@ -780,10 +781,12 @@ GLOBAL_EXTERN spinlock_t cifs_tcp_ses_lock;
*/
GLOBAL_EXTERN spinlock_t cifs_file_list_lock;
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
/* Outstanding dir notify requests */
GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
/* DirNotify response queue */
GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
/*
* Global transaction id (XID) information
@@ -830,6 +833,11 @@ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
/* reconnect after this many failed echo attempts */
GLOBAL_EXTERN unsigned short echo_retries;
+GLOBAL_EXTERN struct rb_root uidtree;
+GLOBAL_EXTERN struct rb_root gidtree;
+GLOBAL_EXTERN spinlock_t siduidlock;
+GLOBAL_EXTERN spinlock_t sidgidlock;
+
void cifs_oplock_break(struct work_struct *work);
void cifs_oplock_break_get(struct cifsFileInfo *cfile);
void cifs_oplock_break_put(struct cifsFileInfo *cfile);
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b5c8cc5d7a7f..de3aa285de03 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -397,9 +397,9 @@
#define GETU32(var) (*((__u32 *)var)) /* BB check for endian issues */
struct smb_hdr {
- __u32 smb_buf_length; /* big endian on wire *//* BB length is only two
- or three bytes - with one or two byte type preceding it that are
- zero - we could mask the type byte off just in case BB */
+ __be32 smb_buf_length; /* BB length is only two (rarely three) bytes,
+ with one or two byte "type" preceding it that will be
+ zero - we could mask the type byte off */
__u8 Protocol[4];
__u8 Command;
union {
@@ -428,43 +428,28 @@ struct smb_hdr {
__u8 WordCount;
} __attribute__((packed));
-/* given a pointer to an smb_hdr retrieve a char pointer to the byte count */
-#define BCC(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + \
- (2 * (smb_var)->WordCount))
+/* given a pointer to an smb_hdr, retrieve a void pointer to the ByteCount */
+static inline void *
+BCC(struct smb_hdr *smb)
+{
+ return (void *)smb + sizeof(*smb) + 2 * smb->WordCount;
+}
/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
#define pByteArea(smb_var) (BCC(smb_var) + 2)
-/* get the converted ByteCount for a SMB packet and return it */
-static inline __u16
-get_bcc(struct smb_hdr *hdr)
-{
- __u16 *bc_ptr = (__u16 *)BCC(hdr);
-
- return get_unaligned(bc_ptr);
-}
-
/* get the unconverted ByteCount for a SMB packet and return it */
static inline __u16
-get_bcc_le(struct smb_hdr *hdr)
+get_bcc(struct smb_hdr *hdr)
{
__le16 *bc_ptr = (__le16 *)BCC(hdr);
return get_unaligned_le16(bc_ptr);
}
-/* set the ByteCount for a SMB packet in host-byte order */
-static inline void
-put_bcc(__u16 count, struct smb_hdr *hdr)
-{
- __u16 *bc_ptr = (__u16 *)BCC(hdr);
-
- put_unaligned(count, bc_ptr);
-}
-
/* set the ByteCount for a SMB packet in little-endian */
static inline void
-put_bcc_le(__u16 count, struct smb_hdr *hdr)
+put_bcc(__u16 count, struct smb_hdr *hdr)
{
__le16 *bc_ptr = (__le16 *)BCC(hdr);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 8096f27ad9a8..6e69e06a30b3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -53,6 +53,9 @@ do { \
cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \
__func__, curr_xid, (int)rc); \
} while (0)
+extern int init_cifs_idmap(void);
+extern void exit_cifs_idmap(void);
+extern void cifs_destroy_idmaptrees(void);
extern char *build_path_from_dentry(struct dentry *);
extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
struct cifsTconInfo *tcon);
@@ -90,7 +93,6 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
extern unsigned int smbCalcSize(struct smb_hdr *ptr);
-extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
extern int decode_negTokenInit(unsigned char *security_blob, int length,
struct TCP_Server_Info *server);
extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
@@ -143,8 +145,10 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
const char *, u32 *);
+extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
+ const char *);
-extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
+extern int cifs_mount(struct super_block *, struct cifs_sb_info *,
const char *);
extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
extern void cifs_dfs_release_automount_timer(void);
@@ -304,12 +308,13 @@ extern int CIFSSMBUnixQuerySymLink(const int xid,
struct cifsTconInfo *tcon,
const unsigned char *searchName, char **syminfo,
const struct nls_table *nls_codepage);
+#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
extern int CIFSSMBQueryReparseLinkInfo(const int xid,
struct cifsTconInfo *tcon,
const unsigned char *searchName,
char *symlinkinfo, const int buflen, __u16 fid,
const struct nls_table *nls_codepage);
-
+#endif /* temporarily unused until cifs_symlink fixed */
extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon,
const char *fileName, const int disposition,
const int access_flags, const int omode,
@@ -348,8 +353,6 @@ extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
const unsigned char *searchName, __u64 *inode_number,
const struct nls_table *nls_codepage,
int remap_special_chars);
-extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
- const struct nls_table *cp, int mapChars);
extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
const __u16 netfid, const __u64 len,
@@ -383,9 +386,15 @@ extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
extern int calc_seckey(struct cifsSesInfo *);
#ifdef CONFIG_CIFS_WEAK_PW_HASH
-extern void calc_lanman_hash(const char *password, const char *cryptkey,
+extern int calc_lanman_hash(const char *password, const char *cryptkey,
bool encrypt, char *lnm_session_key);
#endif /* CIFS_WEAK_PW_HASH */
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
+extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
+ const int notify_subdirs, const __u16 netfid,
+ __u32 filter, struct file *file, int multishot,
+ const struct nls_table *nls_codepage);
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
extern int CIFSSMBCopy(int xid,
struct cifsTconInfo *source_tcon,
const char *fromName,
@@ -393,10 +402,6 @@ extern int CIFSSMBCopy(int xid,
const char *toName, const int flags,
const struct nls_table *nls_codepage,
int remap_special_chars);
-extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
- const int notify_subdirs, const __u16 netfid,
- __u32 filter, struct file *file, int multishot,
- const struct nls_table *nls_codepage);
extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
const unsigned char *searchName,
const unsigned char *ea_name, char *EAData,
@@ -427,9 +432,6 @@ extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
struct cifs_sb_info *cifs_sb, int xid);
extern int mdfour(unsigned char *, unsigned char *, int);
extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
-extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
- unsigned char *p24);
-extern void E_P16(unsigned char *p14, unsigned char *p16);
-extern void E_P24(unsigned char *p21, const unsigned char *c8,
+extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
unsigned char *p24);
#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index df959bae6728..83df937b814e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -339,12 +339,13 @@ static int validate_t2(struct smb_t2_rsp *pSMB)
get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024)
goto vt2_err;
- /* check that bcc is at least as big as parms + data */
- /* check that bcc is less than negotiated smb buffer */
total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount);
if (total_size >= 512)
goto vt2_err;
+ /* check that bcc is at least as big as parms + data, and that it is
+ * less than negotiated smb buffer
+ */
total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount);
if (total_size > get_bcc(&pSMB->hdr) ||
total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)
@@ -357,6 +358,13 @@ vt2_err:
return -EINVAL;
}
+static inline void inc_rfc1001_len(void *pSMB, int count)
+{
+ struct smb_hdr *hdr = (struct smb_hdr *)pSMB;
+
+ be32_add_cpu(&hdr->smb_buf_length, count);
+}
+
int
CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
{
@@ -409,7 +417,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
count += strlen(protocols[i].name) + 1;
/* null at end of source and target buffers anyway */
}
- pSMB->hdr.smb_buf_length += count;
+ inc_rfc1001_len(pSMB, count);
pSMB->ByteCount = cpu_to_le16(count);
rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -541,10 +549,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
server->secType = RawNTLMSSP;
else if (secFlags & CIFSSEC_MAY_LANMAN)
server->secType = LANMAN;
-/* #ifdef CONFIG_CIFS_EXPERIMENTAL
- else if (secFlags & CIFSSEC_MAY_PLNTXT)
- server->secType = ??
-#endif */
else {
rc = -EOPNOTSUPP;
cERROR(1, "Invalid security type");
@@ -578,7 +582,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
(server->capabilities & CAP_EXTENDED_SECURITY)) {
- count = pSMBr->ByteCount;
+ count = get_bcc(&pSMBr->hdr);
if (count < 16) {
rc = -EIO;
goto neg_err_exit;
@@ -732,9 +736,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
smb->hdr.Tid = 0xffff;
smb->hdr.WordCount = 1;
put_unaligned_le16(1, &smb->EchoCount);
- put_bcc_le(1, &smb->hdr);
+ put_bcc(1, &smb->hdr);
smb->Data[0] = 'a';
- smb->hdr.smb_buf_length += 3;
+ inc_rfc1001_len(smb, 3);
rc = cifs_call_async(server, (struct smb_hdr *)smb,
cifs_echo_callback, server);
@@ -852,7 +856,7 @@ PsxDelete:
pSMB->TotalParameterCount = pSMB->ParameterCount;
pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -898,7 +902,7 @@ DelFileRetry:
pSMB->SearchAttributes =
cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM);
pSMB->BufferFormat = 0x04;
- pSMB->hdr.smb_buf_length += name_len + 1;
+ inc_rfc1001_len(pSMB, name_len + 1);
pSMB->ByteCount = cpu_to_le16(name_len + 1);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -942,7 +946,7 @@ RmDirRetry:
}
pSMB->BufferFormat = 0x04;
- pSMB->hdr.smb_buf_length += name_len + 1;
+ inc_rfc1001_len(pSMB, name_len + 1);
pSMB->ByteCount = cpu_to_le16(name_len + 1);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -985,7 +989,7 @@ MkDirRetry:
}
pSMB->BufferFormat = 0x04;
- pSMB->hdr.smb_buf_length += name_len + 1;
+ inc_rfc1001_len(pSMB, name_len + 1);
pSMB->ByteCount = cpu_to_le16(name_len + 1);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1063,7 +1067,7 @@ PsxCreat:
pSMB->TotalParameterCount = pSMB->ParameterCount;
pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1075,7 +1079,7 @@ PsxCreat:
cFYI(1, "copying inode info");
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) {
+ if (rc || get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)) {
rc = -EIO; /* bad smb */
goto psx_create_err;
}
@@ -1096,7 +1100,7 @@ PsxCreat:
pRetData->Type = cpu_to_le32(-1); /* unknown */
cFYI(DBG2, "unknown type");
} else {
- if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
+ if (get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)
+ sizeof(FILE_UNIX_BASIC_INFO)) {
cERROR(1, "Open response data too small");
pRetData->Type = cpu_to_le32(-1);
@@ -1228,7 +1232,7 @@ OldOpenRetry:
pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY);
pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition));
count += name_len;
- pSMB->hdr.smb_buf_length += count;
+ inc_rfc1001_len(pSMB, count);
pSMB->ByteCount = cpu_to_le16(count);
/* long_op set to 1 to allow for oplock break timeouts */
@@ -1341,7 +1345,7 @@ openRetry:
SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY;
count += name_len;
- pSMB->hdr.smb_buf_length += count;
+ inc_rfc1001_len(pSMB, count);
pSMB->ByteCount = cpu_to_le16(count);
/* long_op set to 1 to allow for oplock break timeouts */
@@ -1426,7 +1430,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
}
iov[0].iov_base = (char *)pSMB;
- iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+ iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
&resp_buf_type, CIFS_LOG_ERROR);
cifs_stats_inc(&tcon->num_reads);
@@ -1560,7 +1564,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF);
pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16);
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
if (wct == 14)
pSMB->ByteCount = cpu_to_le16(byte_count);
@@ -1644,11 +1648,12 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF);
pSMB->DataLengthHigh = cpu_to_le16(count >> 16);
- smb_hdr_len = pSMB->hdr.smb_buf_length + 1; /* hdr + 1 byte pad */
+ /* header + 1 byte pad */
+ smb_hdr_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 1;
if (wct == 14)
- pSMB->hdr.smb_buf_length += count+1;
+ inc_rfc1001_len(pSMB, count + 1);
else /* wct == 12 */
- pSMB->hdr.smb_buf_length += count+5; /* smb data starts later */
+ inc_rfc1001_len(pSMB, count + 5); /* smb data starts later */
if (wct == 14)
pSMB->ByteCount = cpu_to_le16(count + 1);
else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ {
@@ -1748,7 +1753,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
/* oplock break */
count = 0;
}
- pSMB->hdr.smb_buf_length += count;
+ inc_rfc1001_len(pSMB, count);
pSMB->ByteCount = cpu_to_le16(count);
if (waitFlag) {
@@ -1839,14 +1844,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
pSMB->Fid = smb_file_id;
pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
if (waitFlag) {
rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned);
} else {
iov[0].iov_base = (char *)pSMB;
- iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+ iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
&resp_buf_type, timeout);
pSMB = NULL; /* request buf already freed by SendReceive2. Do
@@ -1862,7 +1867,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
__u16 data_count;
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < sizeof(struct cifs_posix_lock))) {
+ if (rc || get_bcc(&pSMBr->hdr) < sizeof(*parm_data)) {
rc = -EIO; /* bad smb */
goto plk_err_exit;
}
@@ -2012,7 +2017,7 @@ renameRetry:
}
count = 1 /* 1st signature byte */ + name_len + name_len2;
- pSMB->hdr.smb_buf_length += count;
+ inc_rfc1001_len(pSMB, count);
pSMB->ByteCount = cpu_to_le16(count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2092,7 +2097,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
pSMB->InformationLevel =
cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2159,7 +2164,7 @@ copyRetry:
}
count = 1 /* 1st signature byte */ + name_len + name_len2;
- pSMB->hdr.smb_buf_length += count;
+ inc_rfc1001_len(pSMB, count);
pSMB->ByteCount = cpu_to_le16(count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2249,7 +2254,7 @@ createSymLinkRetry:
pSMB->DataOffset = cpu_to_le16(offset);
pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2335,7 +2340,7 @@ createHardLinkRetry:
pSMB->DataOffset = cpu_to_le16(offset);
pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2406,7 +2411,7 @@ winCreateHardLinkRetry:
}
count = 1 /* string type byte */ + name_len + name_len2;
- pSMB->hdr.smb_buf_length += count;
+ inc_rfc1001_len(pSMB, count);
pSMB->ByteCount = cpu_to_le16(count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2477,7 +2482,7 @@ querySymLinkRetry:
pSMB->ParameterCount = pSMB->TotalParameterCount;
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2489,7 +2494,7 @@ querySymLinkRetry:
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
/* BB also check enough total bytes returned */
- if (rc || (pSMBr->ByteCount < 2))
+ if (rc || get_bcc(&pSMBr->hdr) < 2)
rc = -EIO;
else {
bool is_unicode;
@@ -2516,7 +2521,17 @@ querySymLinkRetry:
return rc;
}
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
+/*
+ * Recent Windows versions now create symlinks more frequently
+ * and they use the "reparse point" mechanism below. We can of course
+ * do symlinks nicely to Samba and other servers which support the
+ * CIFS Unix Extensions and we can also do SFU symlinks and "client only"
+ * "MF" symlinks optionally, but for recent Windows we really need to
+ * reenable the code below and fix the cifs_symlink callers to handle this.
+ * In the interim this code has been moved to its own config option so
+ * it is not compiled in by default until callers fixed up and more tested.
+ */
int
CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
const unsigned char *searchName,
@@ -2561,14 +2576,14 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
} else { /* decode response */
__u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
__u32 data_count = le32_to_cpu(pSMBr->DataCount);
- if ((pSMBr->ByteCount < 2) || (data_offset > 512)) {
- /* BB also check enough total bytes returned */
+ if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
+ /* BB also check enough total bytes returned */
rc = -EIO; /* bad smb */
goto qreparse_out;
}
if (data_count && (data_count < 2048)) {
char *end_of_smb = 2 /* sizeof byte count */ +
- pSMBr->ByteCount + (char *)&pSMBr->ByteCount;
+ get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
struct reparse_data *reparse_buf =
(struct reparse_data *)
@@ -2618,7 +2633,7 @@ qreparse_out:
return rc;
}
-#endif /* CIFS_EXPERIMENTAL */
+#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */
#ifdef CONFIG_CIFS_POSIX
@@ -2814,7 +2829,7 @@ queryAclRetry:
pSMB->ParameterCount = pSMB->TotalParameterCount;
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2826,8 +2841,8 @@ queryAclRetry:
/* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < 2))
/* BB also check enough total bytes returned */
+ if (rc || get_bcc(&pSMBr->hdr) < 2)
rc = -EIO; /* bad smb */
else {
__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -2908,7 +2923,7 @@ setAclRetry:
pSMB->ParameterCount = cpu_to_le16(params);
pSMB->TotalParameterCount = pSMB->ParameterCount;
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2966,7 +2981,7 @@ GetExtAttrRetry:
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS);
pSMB->Pad = 0;
pSMB->Fid = netfid;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->t2.ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2976,8 +2991,8 @@ GetExtAttrRetry:
} else {
/* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < 2))
/* BB also check enough total bytes returned */
+ if (rc || get_bcc(&pSMBr->hdr) < 2)
/* If rc should we check for EOPNOSUPP and
disable the srvino flag? or in caller? */
rc = -EIO; /* bad smb */
@@ -3052,6 +3067,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
char *end_of_smb;
__u32 data_count, data_offset, parm_count, parm_offset;
struct smb_com_ntransact_rsp *pSMBr;
+ u16 bcc;
*pdatalen = 0;
*pparmlen = 0;
@@ -3061,8 +3077,8 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
pSMBr = (struct smb_com_ntransact_rsp *)buf;
- /* ByteCount was converted from little endian in SendReceive */
- end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
+ bcc = get_bcc(&pSMBr->hdr);
+ end_of_smb = 2 /* sizeof byte count */ + bcc +
(char *)&pSMBr->ByteCount;
data_offset = le32_to_cpu(pSMBr->DataOffset);
@@ -3088,7 +3104,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
*ppdata, data_count, (data_count + *ppdata),
end_of_smb, pSMBr);
return -EINVAL;
- } else if (parm_count + data_count > pSMBr->ByteCount) {
+ } else if (parm_count + data_count > bcc) {
cFYI(1, "parm count and data count larger than SMB");
return -EINVAL;
}
@@ -3124,9 +3140,9 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP |
CIFS_ACL_DACL);
pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */
- pSMB->hdr.smb_buf_length += 11;
+ inc_rfc1001_len(pSMB, 11);
iov[0].iov_base = (char *)pSMB;
- iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+ iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
0);
@@ -3235,10 +3251,9 @@ setCifsAclRetry:
memcpy((char *) &pSMBr->hdr.Protocol + data_offset,
(char *) pntsd,
acllen);
- pSMB->hdr.smb_buf_length += (byte_count + data_count);
-
+ inc_rfc1001_len(pSMB, byte_count + data_count);
} else
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3289,7 +3304,7 @@ QInfRetry:
}
pSMB->BufferFormat = 0x04;
name_len++; /* account for buffer type byte */
- pSMB->hdr.smb_buf_length += (__u16) name_len;
+ inc_rfc1001_len(pSMB, (__u16)name_len);
pSMB->ByteCount = cpu_to_le16(name_len);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3364,7 +3379,7 @@ QFileInfoRetry:
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
pSMB->Pad = 0;
pSMB->Fid = netfid;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3375,7 +3390,7 @@ QFileInfoRetry:
if (rc) /* BB add auto retry on EOPNOTSUPP? */
rc = -EIO;
- else if (pSMBr->ByteCount < 40)
+ else if (get_bcc(&pSMBr->hdr) < 40)
rc = -EIO; /* bad smb */
else if (pFindData) {
__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3451,7 +3466,7 @@ QPathInfoRetry:
else
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3463,9 +3478,9 @@ QPathInfoRetry:
if (rc) /* BB add auto retry on EOPNOTSUPP? */
rc = -EIO;
- else if (!legacy && (pSMBr->ByteCount < 40))
+ else if (!legacy && get_bcc(&pSMBr->hdr) < 40)
rc = -EIO; /* bad smb */
- else if (legacy && (pSMBr->ByteCount < 24))
+ else if (legacy && get_bcc(&pSMBr->hdr) < 24)
rc = -EIO; /* 24 or 26 expected but we do not read
last field */
else if (pFindData) {
@@ -3532,7 +3547,7 @@ UnixQFileInfoRetry:
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
pSMB->Pad = 0;
pSMB->Fid = netfid;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3541,7 +3556,7 @@ UnixQFileInfoRetry:
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
+ if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
"Unix Extensions can be disabled on mount "
"by specifying the nosfu mount option.");
@@ -3617,7 +3632,7 @@ UnixQPathInfoRetry:
pSMB->ParameterCount = pSMB->TotalParameterCount;
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3627,7 +3642,7 @@ UnixQPathInfoRetry:
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
+ if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
"Unix Extensions can be disabled on mount "
"by specifying the nosfu mount option.");
@@ -3731,7 +3746,7 @@ findFirstRetry:
/* BB what should we set StorageType to? Does it matter? BB */
pSMB->SearchStorageType = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3860,7 +3875,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
byte_count = params + 1 /* pad */ ;
pSMB->TotalParameterCount = cpu_to_le16(params);
pSMB->ParameterCount = pSMB->TotalParameterCount;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4022,7 +4037,7 @@ GetInodeNumberRetry:
pSMB->ParameterCount = pSMB->TotalParameterCount;
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4032,8 +4047,8 @@ GetInodeNumberRetry:
} else {
/* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < 2))
/* BB also check enough total bytes returned */
+ if (rc || get_bcc(&pSMBr->hdr) < 2)
/* If rc should we check for EOPNOSUPP and
disable the srvino flag? or in caller? */
rc = -EIO; /* bad smb */
@@ -4246,7 +4261,7 @@ getDFSRetry:
pSMB->ParameterCount = cpu_to_le16(params);
pSMB->TotalParameterCount = pSMB->ParameterCount;
pSMB->MaxReferralLevel = cpu_to_le16(3);
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -4258,13 +4273,13 @@ getDFSRetry:
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
/* BB Also check if enough total bytes returned? */
- if (rc || (pSMBr->ByteCount < 17)) {
+ if (rc || get_bcc(&pSMBr->hdr) < 17) {
rc = -EIO; /* bad smb */
goto GetDFSRefExit;
}
cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d",
- pSMBr->ByteCount,
+ get_bcc(&pSMBr->hdr),
le16_to_cpu(pSMBr->t2.DataOffset));
/* parse returned result into more usable form */
@@ -4320,7 +4335,7 @@ oldQFSInfoRetry:
pSMB->Reserved3 = 0;
pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION);
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4330,12 +4345,12 @@ oldQFSInfoRetry:
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < 18))
+ if (rc || get_bcc(&pSMBr->hdr) < 18)
rc = -EIO; /* bad smb */
else {
__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
cFYI(1, "qfsinf resp BCC: %d Offset %d",
- pSMBr->ByteCount, data_offset);
+ get_bcc(&pSMBr->hdr), data_offset);
response_data = (FILE_SYSTEM_ALLOC_INFO *)
(((char *) &pSMBr->hdr.Protocol) + data_offset);
@@ -4399,7 +4414,7 @@ QFSInfoRetry:
pSMB->Reserved3 = 0;
pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO);
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4409,7 +4424,7 @@ QFSInfoRetry:
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < 24))
+ if (rc || get_bcc(&pSMBr->hdr) < 24)
rc = -EIO; /* bad smb */
else {
__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4479,7 +4494,7 @@ QFSAttributeRetry:
pSMB->Reserved3 = 0;
pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO);
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4489,7 +4504,7 @@ QFSAttributeRetry:
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < 13)) {
+ if (rc || get_bcc(&pSMBr->hdr) < 13) {
/* BB also check if enough bytes returned */
rc = -EIO; /* bad smb */
} else {
@@ -4550,7 +4565,7 @@ QFSDeviceRetry:
pSMB->Reserved3 = 0;
pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO);
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4560,7 +4575,8 @@ QFSDeviceRetry:
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < sizeof(FILE_SYSTEM_DEVICE_INFO)))
+ if (rc || get_bcc(&pSMBr->hdr) <
+ sizeof(FILE_SYSTEM_DEVICE_INFO))
rc = -EIO; /* bad smb */
else {
__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4619,7 +4635,7 @@ QFSUnixRetry:
pSMB->Reserved3 = 0;
pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO);
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4629,7 +4645,7 @@ QFSUnixRetry:
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < 13)) {
+ if (rc || get_bcc(&pSMBr->hdr) < 13) {
rc = -EIO; /* bad smb */
} else {
__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4702,7 +4718,7 @@ SETFSUnixRetry:
pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION);
pSMB->ClientUnixCap = cpu_to_le64(cap);
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4764,7 +4780,7 @@ QFSPosixRetry:
pSMB->Reserved3 = 0;
pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO);
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4774,7 +4790,7 @@ QFSPosixRetry:
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < 13)) {
+ if (rc || get_bcc(&pSMBr->hdr) < 13) {
rc = -EIO; /* bad smb */
} else {
__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4890,7 +4906,7 @@ SetEOFRetry:
pSMB->ParameterCount = cpu_to_le16(params);
pSMB->TotalParameterCount = pSMB->ParameterCount;
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
parm_data->FileSize = cpu_to_le64(size);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4969,7 +4985,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO);
}
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
if (rc) {
@@ -5037,7 +5053,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
else
pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5096,7 +5112,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
pSMB->Fid = fid;
pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
*data_offset = delete_file ? 1 : 0;
rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5169,7 +5185,7 @@ SetTimesRetry:
else
pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5221,7 +5237,7 @@ SetAttrLgcyRetry:
}
pSMB->attr = cpu_to_le16(dos_attrs);
pSMB->BufferFormat = 0x04;
- pSMB->hdr.smb_buf_length += name_len + 1;
+ inc_rfc1001_len(pSMB, name_len + 1);
pSMB->ByteCount = cpu_to_le16(name_len + 1);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5326,7 +5342,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
pSMB->Fid = fid;
pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
cifs_fill_unix_set_info(data_offset, args);
@@ -5402,7 +5418,7 @@ setPermsRetry:
pSMB->TotalDataCount = pSMB->DataCount;
pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
cifs_fill_unix_set_info(data_offset, args);
@@ -5418,79 +5434,6 @@ setPermsRetry:
return rc;
}
-int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
- const int notify_subdirs, const __u16 netfid,
- __u32 filter, struct file *pfile, int multishot,
- const struct nls_table *nls_codepage)
-{
- int rc = 0;
- struct smb_com_transaction_change_notify_req *pSMB = NULL;
- struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
- struct dir_notify_req *dnotify_req;
- int bytes_returned;
-
- cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
- rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
- (void **) &pSMBr);
- if (rc)
- return rc;
-
- pSMB->TotalParameterCount = 0 ;
- pSMB->TotalDataCount = 0;
- pSMB->MaxParameterCount = cpu_to_le32(2);
- /* BB find exact data count max from sess structure BB */
- pSMB->MaxDataCount = 0; /* same in little endian or be */
-/* BB VERIFY verify which is correct for above BB */
- pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
- MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
-
- pSMB->MaxSetupCount = 4;
- pSMB->Reserved = 0;
- pSMB->ParameterOffset = 0;
- pSMB->DataCount = 0;
- pSMB->DataOffset = 0;
- pSMB->SetupCount = 4; /* single byte does not need le conversion */
- pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
- pSMB->ParameterCount = pSMB->TotalParameterCount;
- if (notify_subdirs)
- pSMB->WatchTree = 1; /* one byte - no le conversion needed */
- pSMB->Reserved2 = 0;
- pSMB->CompletionFilter = cpu_to_le32(filter);
- pSMB->Fid = netfid; /* file handle always le */
- pSMB->ByteCount = 0;
-
- rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *)pSMBr, &bytes_returned,
- CIFS_ASYNC_OP);
- if (rc) {
- cFYI(1, "Error in Notify = %d", rc);
- } else {
- /* Add file to outstanding requests */
- /* BB change to kmem cache alloc */
- dnotify_req = kmalloc(
- sizeof(struct dir_notify_req),
- GFP_KERNEL);
- if (dnotify_req) {
- dnotify_req->Pid = pSMB->hdr.Pid;
- dnotify_req->PidHigh = pSMB->hdr.PidHigh;
- dnotify_req->Mid = pSMB->hdr.Mid;
- dnotify_req->Tid = pSMB->hdr.Tid;
- dnotify_req->Uid = pSMB->hdr.Uid;
- dnotify_req->netfid = netfid;
- dnotify_req->pfile = pfile;
- dnotify_req->filter = filter;
- dnotify_req->multishot = multishot;
- spin_lock(&GlobalMid_Lock);
- list_add_tail(&dnotify_req->lhead,
- &GlobalDnotifyReqList);
- spin_unlock(&GlobalMid_Lock);
- } else
- rc = -ENOMEM;
- }
- cifs_buf_release(pSMB);
- return rc;
-}
-
#ifdef CONFIG_CIFS_XATTR
/*
* Do a path-based QUERY_ALL_EAS call and parse the result. This is a common
@@ -5560,7 +5503,7 @@ QAllEAsRetry:
pSMB->ParameterCount = pSMB->TotalParameterCount;
pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS);
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5576,7 +5519,7 @@ QAllEAsRetry:
of these trans2 responses */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
- if (rc || (pSMBr->ByteCount < 4)) {
+ if (rc || get_bcc(&pSMBr->hdr) < 4) {
rc = -EIO; /* bad smb */
goto QAllEAsOut;
}
@@ -5773,7 +5716,7 @@ SetEARetry:
pSMB->ParameterCount = cpu_to_le16(params);
pSMB->TotalParameterCount = pSMB->ParameterCount;
pSMB->Reserved4 = 0;
- pSMB->hdr.smb_buf_length += byte_count;
+ inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5787,5 +5730,99 @@ SetEARetry:
return rc;
}
-
#endif
+
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* BB unused temporarily */
+/*
+ * Years ago the kernel added a "dnotify" function for Samba server,
+ * to allow network clients (such as Windows) to display updated
+ * lists of files in directory listings automatically when
+ * files are added by one user when another user has the
+ * same directory open on their desktop. The Linux cifs kernel
+ * client hooked into the kernel side of this interface for
+ * the same reason, but ironically when the VFS moved from
+ * "dnotify" to "inotify" it became harder to plug in Linux
+ * network file system clients (the most obvious use case
+ * for notify interfaces is when multiple users can update
+ * the contents of the same directory - exactly what network
+ * file systems can do) although the server (Samba) could
+ * still use it. For the short term we leave the worker
+ * function ifdeffed out (below) until inotify is fixed
+ * in the VFS to make it easier to plug in network file
+ * system clients. If inotify turns out to be permanently
+ * incompatible for network fs clients, we could instead simply
+ * expose this config flag by adding a future cifs (and smb2) notify ioctl.
+ */
+int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
+ const int notify_subdirs, const __u16 netfid,
+ __u32 filter, struct file *pfile, int multishot,
+ const struct nls_table *nls_codepage)
+{
+ int rc = 0;
+ struct smb_com_transaction_change_notify_req *pSMB = NULL;
+ struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
+ struct dir_notify_req *dnotify_req;
+ int bytes_returned;
+
+ cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
+ rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
+ (void **) &pSMBr);
+ if (rc)
+ return rc;
+
+ pSMB->TotalParameterCount = 0 ;
+ pSMB->TotalDataCount = 0;
+ pSMB->MaxParameterCount = cpu_to_le32(2);
+ /* BB find exact data count max from sess structure BB */
+ pSMB->MaxDataCount = 0; /* same in little endian or be */
+/* BB VERIFY verify which is correct for above BB */
+ pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
+ MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
+
+ pSMB->MaxSetupCount = 4;
+ pSMB->Reserved = 0;
+ pSMB->ParameterOffset = 0;
+ pSMB->DataCount = 0;
+ pSMB->DataOffset = 0;
+ pSMB->SetupCount = 4; /* single byte does not need le conversion */
+ pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
+ pSMB->ParameterCount = pSMB->TotalParameterCount;
+ if (notify_subdirs)
+ pSMB->WatchTree = 1; /* one byte - no le conversion needed */
+ pSMB->Reserved2 = 0;
+ pSMB->CompletionFilter = cpu_to_le32(filter);
+ pSMB->Fid = netfid; /* file handle always le */
+ pSMB->ByteCount = 0;
+
+ rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
+ (struct smb_hdr *)pSMBr, &bytes_returned,
+ CIFS_ASYNC_OP);
+ if (rc) {
+ cFYI(1, "Error in Notify = %d", rc);
+ } else {
+ /* Add file to outstanding requests */
+ /* BB change to kmem cache alloc */
+ dnotify_req = kmalloc(
+ sizeof(struct dir_notify_req),
+ GFP_KERNEL);
+ if (dnotify_req) {
+ dnotify_req->Pid = pSMB->hdr.Pid;
+ dnotify_req->PidHigh = pSMB->hdr.PidHigh;
+ dnotify_req->Mid = pSMB->hdr.Mid;
+ dnotify_req->Tid = pSMB->hdr.Tid;
+ dnotify_req->Uid = pSMB->hdr.Uid;
+ dnotify_req->netfid = netfid;
+ dnotify_req->pfile = pfile;
+ dnotify_req->filter = filter;
+ dnotify_req->multishot = multishot;
+ spin_lock(&GlobalMid_Lock);
+ list_add_tail(&dnotify_req->lhead,
+ &GlobalDnotifyReqList);
+ spin_unlock(&GlobalMid_Lock);
+ } else
+ rc = -ENOMEM;
+ }
+ cifs_buf_release(pSMB);
+ return rc;
+}
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4bc862a80efa..da284e3cb653 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -102,6 +102,7 @@ struct smb_vol {
bool fsc:1; /* enable fscache */
bool mfsymlinks:1; /* use Minshall+French Symlinks */
bool multiuser:1;
+ bool use_smb2:1; /* force smb2 use on mount instead of cifs */
unsigned int rsize;
unsigned int wsize;
bool sockopt_tcp_nodelay:1;
@@ -274,7 +275,8 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
char *data_area_of_target;
char *data_area_of_buf2;
int remaining;
- __u16 byte_count, total_data_size, total_in_buf, total_in_buf2;
+ unsigned int byte_count, total_in_buf;
+ __u16 total_data_size, total_in_buf2;
total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
@@ -287,7 +289,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
remaining = total_data_size - total_in_buf;
if (remaining < 0)
- return -EINVAL;
+ return -EPROTO;
if (remaining == 0) /* nothing to do, ignore */
return 0;
@@ -308,19 +310,28 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
data_area_of_target += total_in_buf;
/* copy second buffer into end of first buffer */
- memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
total_in_buf += total_in_buf2;
+ /* is the result too big for the field? */
+ if (total_in_buf > USHRT_MAX)
+ return -EPROTO;
put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount);
- byte_count = get_bcc_le(pTargetSMB);
- byte_count += total_in_buf2;
- put_bcc_le(byte_count, pTargetSMB);
- byte_count = pTargetSMB->smb_buf_length;
+ /* fix up the BCC */
+ byte_count = get_bcc(pTargetSMB);
byte_count += total_in_buf2;
+ /* is the result too big for the field? */
+ if (byte_count > USHRT_MAX)
+ return -EPROTO;
+ put_bcc(byte_count, pTargetSMB);
- /* BB also add check that we are not beyond maximum buffer size */
+ byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
+ byte_count += total_in_buf2;
+ /* don't allow buffer to overflow */
+ if (byte_count > CIFSMaxBufSize)
+ return -ENOBUFS;
+ pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
- pTargetSMB->smb_buf_length = byte_count;
+ memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
if (remaining == total_in_buf2) {
cFYI(1, "found the last secondary response");
@@ -485,8 +496,7 @@ incomplete_rcv:
/* Note that FC 1001 length is big endian on the wire,
but we convert it here so it is always manipulated
as host byte order */
- pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length);
- smb_buffer->smb_buf_length = pdu_length;
+ pdu_length = be32_to_cpu(smb_buffer->smb_buf_length);
cFYI(1, "rfc1002 length 0x%x", pdu_length+4);
@@ -607,59 +617,63 @@ incomplete_rcv:
list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
- if ((mid_entry->mid == smb_buffer->Mid) &&
- (mid_entry->midState == MID_REQUEST_SUBMITTED) &&
- (mid_entry->command == smb_buffer->Command)) {
- if (length == 0 &&
- check2ndT2(smb_buffer, server->maxBuf) > 0) {
- /* We have a multipart transact2 resp */
- isMultiRsp = true;
- if (mid_entry->resp_buf) {
- /* merge response - fix up 1st*/
- if (coalesce_t2(smb_buffer,
- mid_entry->resp_buf)) {
- mid_entry->multiRsp =
- true;
- break;
- } else {
- /* all parts received */
- mid_entry->multiEnd =
- true;
- goto multi_t2_fnd;
- }
+ if (mid_entry->mid != smb_buffer->Mid ||
+ mid_entry->midState != MID_REQUEST_SUBMITTED ||
+ mid_entry->command != smb_buffer->Command) {
+ mid_entry = NULL;
+ continue;
+ }
+
+ if (length == 0 &&
+ check2ndT2(smb_buffer, server->maxBuf) > 0) {
+ /* We have a multipart transact2 resp */
+ isMultiRsp = true;
+ if (mid_entry->resp_buf) {
+ /* merge response - fix up 1st*/
+ length = coalesce_t2(smb_buffer,
+ mid_entry->resp_buf);
+ if (length > 0) {
+ length = 0;
+ mid_entry->multiRsp = true;
+ break;
} else {
- if (!isLargeBuf) {
- cERROR(1, "1st trans2 resp needs bigbuf");
- /* BB maybe we can fix this up, switch
- to already allocated large buffer? */
- } else {
- /* Have first buffer */
- mid_entry->resp_buf =
- smb_buffer;
- mid_entry->largeBuf =
- true;
- bigbuf = NULL;
- }
+ /* all parts received or
+ * packet is malformed
+ */
+ mid_entry->multiEnd = true;
+ goto multi_t2_fnd;
+ }
+ } else {
+ if (!isLargeBuf) {
+ /*
+ * FIXME: switch to already
+ * allocated largebuf?
+ */
+ cERROR(1, "1st trans2 resp "
+ "needs bigbuf");
+ } else {
+ /* Have first buffer */
+ mid_entry->resp_buf =
+ smb_buffer;
+ mid_entry->largeBuf = true;
+ bigbuf = NULL;
}
- break;
}
- mid_entry->resp_buf = smb_buffer;
- mid_entry->largeBuf = isLargeBuf;
+ break;
+ }
+ mid_entry->resp_buf = smb_buffer;
+ mid_entry->largeBuf = isLargeBuf;
multi_t2_fnd:
- if (length == 0)
- mid_entry->midState =
- MID_RESPONSE_RECEIVED;
- else
- mid_entry->midState =
- MID_RESPONSE_MALFORMED;
+ if (length == 0)
+ mid_entry->midState = MID_RESPONSE_RECEIVED;
+ else
+ mid_entry->midState = MID_RESPONSE_MALFORMED;
#ifdef CONFIG_CIFS_STATS2
- mid_entry->when_received = jiffies;
+ mid_entry->when_received = jiffies;
#endif
- list_del_init(&mid_entry->qhead);
- mid_entry->callback(mid_entry);
- break;
- }
- mid_entry = NULL;
+ list_del_init(&mid_entry->qhead);
+ mid_entry->callback(mid_entry);
+ break;
}
spin_unlock(&GlobalMid_Lock);
@@ -721,7 +735,7 @@ multi_t2_fnd:
sock_release(csocket);
server->ssocket = NULL;
}
- /* buffer usuallly freed in free_mid - need to free it here on exit */
+ /* buffer usually freed in free_mid - need to free it here on exit */
cifs_buf_release(bigbuf);
if (smallbuf) /* no sense logging a debug message if NULL */
cifs_small_buf_release(smallbuf);
@@ -804,10 +818,11 @@ extract_hostname(const char *unc)
}
static int
-cifs_parse_mount_options(char *options, const char *devname,
+cifs_parse_mount_options(const char *mountdata, const char *devname,
struct smb_vol *vol)
{
char *value, *data, *end;
+ char *mountdata_copy, *options;
unsigned int temp_len, i, j;
char separator[2];
short int override_uid = -1;
@@ -847,9 +862,14 @@ cifs_parse_mount_options(char *options, const char *devname,
vol->actimeo = CIFS_DEF_ACTIMEO;
- if (!options)
- return 1;
+ if (!mountdata)
+ goto cifs_parse_mount_err;
+ mountdata_copy = kstrndup(mountdata, PAGE_SIZE, GFP_KERNEL);
+ if (!mountdata_copy)
+ goto cifs_parse_mount_err;
+
+ options = mountdata_copy;
end = options + strlen(options);
if (strncmp(options, "sep=", 4) == 0) {
if (options[4] != 0) {
@@ -875,17 +895,22 @@ cifs_parse_mount_options(char *options, const char *devname,
if (!value) {
printk(KERN_WARNING
"CIFS: invalid or missing username\n");
- return 1; /* needs_arg; */
+ goto cifs_parse_mount_err;
} else if (!*value) {
/* null user, ie anonymous, authentication */
vol->nullauth = 1;
}
if (strnlen(value, MAX_USERNAME_SIZE) <
MAX_USERNAME_SIZE) {
- vol->username = value;
+ vol->username = kstrdup(value, GFP_KERNEL);
+ if (!vol->username) {
+ printk(KERN_WARNING "CIFS: no memory "
+ "for username\n");
+ goto cifs_parse_mount_err;
+ }
} else {
printk(KERN_WARNING "CIFS: username too long\n");
- return 1;
+ goto cifs_parse_mount_err;
}
} else if (strnicmp(data, "pass", 4) == 0) {
if (!value) {
@@ -949,7 +974,7 @@ cifs_parse_mount_options(char *options, const char *devname,
if (vol->password == NULL) {
printk(KERN_WARNING "CIFS: no memory "
"for password\n");
- return 1;
+ goto cifs_parse_mount_err;
}
for (i = 0, j = 0; i < temp_len; i++, j++) {
vol->password[j] = value[i];
@@ -965,7 +990,7 @@ cifs_parse_mount_options(char *options, const char *devname,
if (vol->password == NULL) {
printk(KERN_WARNING "CIFS: no memory "
"for password\n");
- return 1;
+ goto cifs_parse_mount_err;
}
strcpy(vol->password, value);
}
@@ -975,11 +1000,16 @@ cifs_parse_mount_options(char *options, const char *devname,
vol->UNCip = NULL;
} else if (strnlen(value, INET6_ADDRSTRLEN) <
INET6_ADDRSTRLEN) {
- vol->UNCip = value;
+ vol->UNCip = kstrdup(value, GFP_KERNEL);
+ if (!vol->UNCip) {
+ printk(KERN_WARNING "CIFS: no memory "
+ "for UNC IP\n");
+ goto cifs_parse_mount_err;
+ }
} else {
printk(KERN_WARNING "CIFS: ip address "
"too long\n");
- return 1;
+ goto cifs_parse_mount_err;
}
} else if (strnicmp(data, "sec", 3) == 0) {
if (!value || !*value) {
@@ -992,7 +1022,7 @@ cifs_parse_mount_options(char *options, const char *devname,
/* vol->secFlg |= CIFSSEC_MUST_SEAL |
CIFSSEC_MAY_KRB5; */
cERROR(1, "Krb5 cifs privacy not supported");
- return 1;
+ goto cifs_parse_mount_err;
} else if (strnicmp(value, "krb5", 4) == 0) {
vol->secFlg |= CIFSSEC_MAY_KRB5;
} else if (strnicmp(value, "ntlmsspi", 8) == 0) {
@@ -1022,7 +1052,23 @@ cifs_parse_mount_options(char *options, const char *devname,
vol->nullauth = 1;
} else {
cERROR(1, "bad security option: %s", value);
- return 1;
+ goto cifs_parse_mount_err;
+ }
+ } else if (strnicmp(data, "vers", 3) == 0) {
+ if (!value || !*value) {
+ cERROR(1, "no protocol version specified"
+ " after vers= mount option");
+ } else if ((strnicmp(value, "cifs", 4) == 0) ||
+ (strnicmp(value, "1", 1) == 0)) {
+ /* this is the default */
+ continue;
+ } else if ((strnicmp(value, "smb2", 4) == 0) ||
+ (strnicmp(value, "2", 1) == 0)) {
+#ifdef CONFIG_CIFS_SMB2
+ vol->use_smb2 = true;
+#else
+ cERROR(1, "smb2 support not enabled");
+#endif /* CONFIG_CIFS_SMB2 */
}
} else if ((strnicmp(data, "unc", 3) == 0)
|| (strnicmp(data, "target", 6) == 0)
@@ -1030,12 +1076,12 @@ cifs_parse_mount_options(char *options, const char *devname,
if (!value || !*value) {
printk(KERN_WARNING "CIFS: invalid path to "
"network resource\n");
- return 1; /* needs_arg; */
+ goto cifs_parse_mount_err;
}
if ((temp_len = strnlen(value, 300)) < 300) {
vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
if (vol->UNC == NULL)
- return 1;
+ goto cifs_parse_mount_err;
strcpy(vol->UNC, value);
if (strncmp(vol->UNC, "//", 2) == 0) {
vol->UNC[0] = '\\';
@@ -1044,27 +1090,32 @@ cifs_parse_mount_options(char *options, const char *devname,
printk(KERN_WARNING
"CIFS: UNC Path does not begin "
"with // or \\\\ \n");
- return 1;
+ goto cifs_parse_mount_err;
}
} else {
printk(KERN_WARNING "CIFS: UNC name too long\n");
- return 1;
+ goto cifs_parse_mount_err;
}
} else if ((strnicmp(data, "domain", 3) == 0)
|| (strnicmp(data, "workgroup", 5) == 0)) {
if (!value || !*value) {
printk(KERN_WARNING "CIFS: invalid domain name\n");
- return 1; /* needs_arg; */
+ goto cifs_parse_mount_err;
}
/* BB are there cases in which a comma can be valid in
a domain name and need special handling? */
if (strnlen(value, 256) < 256) {
- vol->domainname = value;
+ vol->domainname = kstrdup(value, GFP_KERNEL);
+ if (!vol->domainname) {
+ printk(KERN_WARNING "CIFS: no memory "
+ "for domainname\n");
+ goto cifs_parse_mount_err;
+ }
cFYI(1, "Domain name set");
} else {
printk(KERN_WARNING "CIFS: domain name too "
"long\n");
- return 1;
+ goto cifs_parse_mount_err;
}
} else if (strnicmp(data, "srcaddr", 7) == 0) {
vol->srcaddr.ss_family = AF_UNSPEC;
@@ -1072,7 +1123,7 @@ cifs_parse_mount_options(char *options, const char *devname,
if (!value || !*value) {
printk(KERN_WARNING "CIFS: srcaddr value"
" not specified.\n");
- return 1; /* needs_arg; */
+ goto cifs_parse_mount_err;
}
i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
value, strlen(value));
@@ -1080,20 +1131,20 @@ cifs_parse_mount_options(char *options, const char *devname,
printk(KERN_WARNING "CIFS: Could not parse"
" srcaddr: %s\n",
value);
- return 1;
+ goto cifs_parse_mount_err;
}
} else if (strnicmp(data, "prefixpath", 10) == 0) {
if (!value || !*value) {
printk(KERN_WARNING
"CIFS: invalid path prefix\n");
- return 1; /* needs_argument */
+ goto cifs_parse_mount_err;
}
if ((temp_len = strnlen(value, 1024)) < 1024) {
if (value[0] != '/')
temp_len++; /* missing leading slash */
vol->prepath = kmalloc(temp_len+1, GFP_KERNEL);
if (vol->prepath == NULL)
- return 1;
+ goto cifs_parse_mount_err;
if (value[0] != '/') {
vol->prepath[0] = '/';
strcpy(vol->prepath+1, value);
@@ -1102,24 +1153,33 @@ cifs_parse_mount_options(char *options, const char *devname,
cFYI(1, "prefix path %s", vol->prepath);
} else {
printk(KERN_WARNING "CIFS: prefix too long\n");
- return 1;
+ goto cifs_parse_mount_err;
}
} else if (strnicmp(data, "iocharset", 9) == 0) {
if (!value || !*value) {
printk(KERN_WARNING "CIFS: invalid iocharset "
"specified\n");
- return 1; /* needs_arg; */
+ goto cifs_parse_mount_err;
}
if (strnlen(value, 65) < 65) {
- if (strnicmp(value, "default", 7))
- vol->iocharset = value;
+ if (strnicmp(value, "default", 7)) {
+ vol->iocharset = kstrdup(value,
+ GFP_KERNEL);
+
+ if (!vol->iocharset) {
+ printk(KERN_WARNING "CIFS: no "
+ "memory for"
+ "charset\n");
+ goto cifs_parse_mount_err;
+ }
+ }
/* if iocharset not set then load_nls_default
is used by caller */
cFYI(1, "iocharset set to %s", value);
} else {
printk(KERN_WARNING "CIFS: iocharset name "
"too long.\n");
- return 1;
+ goto cifs_parse_mount_err;
}
} else if (!strnicmp(data, "uid", 3) && value && *value) {
vol->linux_uid = simple_strtoul(value, &value, 0);
@@ -1232,7 +1292,7 @@ cifs_parse_mount_options(char *options, const char *devname,
if (vol->actimeo > CIFS_MAX_ACTIMEO) {
cERROR(1, "CIFS: attribute cache"
"timeout too large");
- return 1;
+ goto cifs_parse_mount_err;
}
}
} else if (strnicmp(data, "credentials", 4) == 0) {
@@ -1376,7 +1436,7 @@ cifs_parse_mount_options(char *options, const char *devname,
#ifndef CONFIG_CIFS_FSCACHE
cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
"kernel config option set");
- return 1;
+ goto cifs_parse_mount_err;
#endif
vol->fsc = true;
} else if (strnicmp(data, "mfsymlinks", 10) == 0) {
@@ -1391,12 +1451,12 @@ cifs_parse_mount_options(char *options, const char *devname,
if (devname == NULL) {
printk(KERN_WARNING "CIFS: Missing UNC name for mount "
"target\n");
- return 1;
+ goto cifs_parse_mount_err;
}
if ((temp_len = strnlen(devname, 300)) < 300) {
vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
if (vol->UNC == NULL)
- return 1;
+ goto cifs_parse_mount_err;
strcpy(vol->UNC, devname);
if (strncmp(vol->UNC, "//", 2) == 0) {
vol->UNC[0] = '\\';
@@ -1404,21 +1464,21 @@ cifs_parse_mount_options(char *options, const char *devname,
} else if (strncmp(vol->UNC, "\\\\", 2) != 0) {
printk(KERN_WARNING "CIFS: UNC Path does not "
"begin with // or \\\\ \n");
- return 1;
+ goto cifs_parse_mount_err;
}
value = strpbrk(vol->UNC+2, "/\\");
if (value)
*value = '\\';
} else {
printk(KERN_WARNING "CIFS: UNC name too long\n");
- return 1;
+ goto cifs_parse_mount_err;
}
}
if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) {
cERROR(1, "Multiuser mounts currently require krb5 "
"authentication!");
- return 1;
+ goto cifs_parse_mount_err;
}
if (vol->UNCip == NULL)
@@ -1436,7 +1496,12 @@ cifs_parse_mount_options(char *options, const char *devname,
printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
"specified with no gid= option.\n");
+ kfree(mountdata_copy);
return 0;
+
+cifs_parse_mount_err:
+ kfree(mountdata_copy);
+ return 1;
}
/** Returns true if srcaddr isn't specified and rhs isn't
@@ -2266,7 +2331,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
smb_buf = (struct smb_hdr *)ses_init_buf;
/* sizeof RFC1002_SESSION_REQUEST with no scope */
- smb_buf->smb_buf_length = 0x81000044;
+ smb_buf->smb_buf_length = cpu_to_be32(0x81000044);
rc = smb_send(server, smb_buf, 0x44);
kfree(ses_init_buf);
/*
@@ -2659,6 +2724,11 @@ is_path_accessible(int xid, struct cifsTconInfo *tcon,
0 /* not legacy */, cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ if (rc == -EOPNOTSUPP || rc == -EINVAL)
+ rc = SMBQueryInformation(xid, tcon, full_path, pfile_info,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
kfree(pfile_info);
return rc;
}
@@ -2672,8 +2742,12 @@ cleanup_volume_info(struct smb_vol **pvolume_info)
return;
volume_info = *pvolume_info;
+ kfree(volume_info->username);
kzfree(volume_info->password);
kfree(volume_info->UNC);
+ kfree(volume_info->UNCip);
+ kfree(volume_info->domainname);
+ kfree(volume_info->iocharset);
kfree(volume_info->prepath);
kfree(volume_info);
*pvolume_info = NULL;
@@ -2710,11 +2784,65 @@ build_unc_path_to_root(const struct smb_vol *volume_info,
full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */
return full_path;
}
+
+/*
+ * Perform a dfs referral query for a share and (optionally) prefix
+ *
+ * If a referral is found, cifs_sb->mountdata will be (re-)allocated
+ * to a string containing updated options for the submount. Otherwise it
+ * will be left untouched.
+ *
+ * Returns the rc from get_dfs_path to the caller, which can be used to
+ * determine whether there were referrals.
+ */
+static int
+expand_dfs_referral(int xid, struct cifsSesInfo *pSesInfo,
+ struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
+ int check_prefix)
+{
+ int rc;
+ unsigned int num_referrals = 0;
+ struct dfs_info3_param *referrals = NULL;
+ char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
+
+ full_path = build_unc_path_to_root(volume_info, cifs_sb);
+ if (IS_ERR(full_path))
+ return PTR_ERR(full_path);
+
+ /* For DFS paths, skip the first '\' of the UNC */
+ ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1;
+
+ rc = get_dfs_path(xid, pSesInfo , ref_path, cifs_sb->local_nls,
+ &num_referrals, &referrals,
+ cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ if (!rc && num_referrals > 0) {
+ char *fake_devname = NULL;
+
+ mdata = cifs_compose_mount_options(cifs_sb->mountdata,
+ full_path + 1, referrals,
+ &fake_devname);
+
+ free_dfs_info_array(referrals, num_referrals);
+ kfree(fake_devname);
+
+ if (cifs_sb->mountdata != NULL)
+ kfree(cifs_sb->mountdata);
+
+ if (IS_ERR(mdata)) {
+ rc = PTR_ERR(mdata);
+ mdata = NULL;
+ }
+ cifs_sb->mountdata = mdata;
+ }
+ kfree(full_path);
+ return rc;
+}
#endif
int
cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
- char *mount_data_global, const char *devname)
+ const char *devname)
{
int rc;
int xid;
@@ -2723,13 +2851,20 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
struct cifsTconInfo *tcon;
struct TCP_Server_Info *srvTcp;
char *full_path;
- char *mount_data = mount_data_global;
struct tcon_link *tlink;
#ifdef CONFIG_CIFS_DFS_UPCALL
- struct dfs_info3_param *referrals = NULL;
- unsigned int num_referrals = 0;
int referral_walks_count = 0;
try_mount_again:
+ /* cleanup activities if we're chasing a referral */
+ if (referral_walks_count) {
+ if (tcon)
+ cifs_put_tcon(tcon);
+ else if (pSesInfo)
+ cifs_put_smb_ses(pSesInfo);
+
+ cleanup_volume_info(&volume_info);
+ FreeXid(xid);
+ }
#endif
rc = 0;
tcon = NULL;
@@ -2746,7 +2881,8 @@ try_mount_again:
goto out;
}
- if (cifs_parse_mount_options(mount_data, devname, volume_info)) {
+ if (cifs_parse_mount_options(cifs_sb->mountdata, devname,
+ volume_info)) {
rc = -EINVAL;
goto out;
}
@@ -2842,6 +2978,24 @@ try_mount_again:
(tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
remote_path_check:
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ /*
+ * Perform an unconditional check for whether there are DFS
+ * referrals for this path without prefix, to provide support
+ * for DFS referrals from w2k8 servers which don't seem to respond
+ * with PATH_NOT_COVERED to requests that include the prefix.
+ * Chase the referral if found, otherwise continue normally.
+ */
+ if (referral_walks_count == 0) {
+ int refrc = expand_dfs_referral(xid, pSesInfo, volume_info,
+ cifs_sb, false);
+ if (!refrc) {
+ referral_walks_count++;
+ goto try_mount_again;
+ }
+ }
+#endif
+
/* check if a whole path (including prepath) is not remote */
if (!rc && tcon) {
/* build_path_to_root works only when we have a valid tcon */
@@ -2875,46 +3029,15 @@ remote_path_check:
if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
convert_delimiter(cifs_sb->prepath,
CIFS_DIR_SEP(cifs_sb));
- full_path = build_unc_path_to_root(volume_info, cifs_sb);
- if (IS_ERR(full_path)) {
- rc = PTR_ERR(full_path);
- goto mount_fail_check;
- }
-
- cFYI(1, "Getting referral for: %s", full_path);
- rc = get_dfs_path(xid, pSesInfo , full_path + 1,
- cifs_sb->local_nls, &num_referrals, &referrals,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (!rc && num_referrals > 0) {
- char *fake_devname = NULL;
-
- if (mount_data != mount_data_global)
- kfree(mount_data);
- mount_data = cifs_compose_mount_options(
- cifs_sb->mountdata, full_path + 1,
- referrals, &fake_devname);
+ rc = expand_dfs_referral(xid, pSesInfo, volume_info, cifs_sb,
+ true);
- free_dfs_info_array(referrals, num_referrals);
- kfree(fake_devname);
- kfree(full_path);
-
- if (IS_ERR(mount_data)) {
- rc = PTR_ERR(mount_data);
- mount_data = NULL;
- goto mount_fail_check;
- }
-
- if (tcon)
- cifs_put_tcon(tcon);
- else if (pSesInfo)
- cifs_put_smb_ses(pSesInfo);
-
- cleanup_volume_info(&volume_info);
+ if (!rc) {
referral_walks_count++;
- FreeXid(xid);
goto try_mount_again;
}
+ goto mount_fail_check;
#else /* No DFS support, return error on mount */
rc = -EOPNOTSUPP;
#endif
@@ -2947,8 +3070,6 @@ remote_path_check:
mount_fail_check:
/* on error free sesinfo and tcon struct if needed */
if (rc) {
- if (mount_data != mount_data_global)
- kfree(mount_data);
/* If find_unc succeeded then rc == 0 so we can not end */
/* up accidentally freeing someone elses tcon struct */
if (tcon)
@@ -3064,7 +3185,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
bcc_ptr += strlen("?????");
bcc_ptr += 1;
count = bcc_ptr - &pSMB->Password[0];
- pSMB->hdr.smb_buf_length += count;
+ pSMB->hdr.smb_buf_length = cpu_to_be32(be32_to_cpu(
+ pSMB->hdr.smb_buf_length) + count);
pSMB->ByteCount = cpu_to_le16(count);
rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
@@ -3239,7 +3361,9 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
struct cifsSesInfo *ses;
struct cifsTconInfo *tcon = NULL;
struct smb_vol *vol_info;
- char username[MAX_USERNAME_SIZE + 1];
+ char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */
+ /* We used to have this as MAX_USERNAME which is */
+ /* way too big now (256 instead of 32) */
vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
if (vol_info == NULL) {
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 993f82045bf6..55d87ac52000 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -45,7 +45,7 @@
#include "cifs_debug.h"
#include "cifsfs.h"
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CIFS_NFSD_EXPORT
static struct dentry *cifs_get_parent(struct dentry *dentry)
{
/* BB need to add code here eventually to enable export via NFSD */
@@ -63,5 +63,5 @@ const struct export_operations cifs_export_ops = {
.encode_fs = */
};
-#endif /* EXPERIMENTAL */
+#endif /* CIFS_NFSD_EXPORT */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index faf59529e847..c672afef0c09 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -857,95 +857,6 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
cifsi->server_eof = end_of_write;
}
-ssize_t cifs_user_write(struct file *file, const char __user *write_data,
- size_t write_size, loff_t *poffset)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
- int rc = 0;
- unsigned int bytes_written = 0;
- unsigned int total_written;
- struct cifs_sb_info *cifs_sb;
- struct cifsTconInfo *pTcon;
- int xid;
- struct cifsFileInfo *open_file;
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
-
- cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-
- /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
- *poffset, file->f_path.dentry->d_name.name); */
-
- if (file->private_data == NULL)
- return -EBADF;
-
- open_file = file->private_data;
- pTcon = tlink_tcon(open_file->tlink);
-
- rc = generic_write_checks(file, poffset, &write_size, 0);
- if (rc)
- return rc;
-
- xid = GetXid();
-
- for (total_written = 0; write_size > total_written;
- total_written += bytes_written) {
- rc = -EAGAIN;
- while (rc == -EAGAIN) {
- if (file->private_data == NULL) {
- /* file has been closed on us */
- FreeXid(xid);
- /* if we have gotten here we have written some data
- and blocked, and the file has been freed on us while
- we blocked so return what we managed to write */
- return total_written;
- }
- if (open_file->invalidHandle) {
- /* we could deadlock if we called
- filemap_fdatawait from here so tell
- reopen_file not to flush data to server
- now */
- rc = cifs_reopen_file(open_file, false);
- if (rc != 0)
- break;
- }
-
- rc = CIFSSMBWrite(xid, pTcon,
- open_file->netfid,
- min_t(const int, cifs_sb->wsize,
- write_size - total_written),
- *poffset, &bytes_written,
- NULL, write_data + total_written, 0);
- }
- if (rc || (bytes_written == 0)) {
- if (total_written)
- break;
- else {
- FreeXid(xid);
- return rc;
- }
- } else {
- cifs_update_eof(cifsi, *poffset, bytes_written);
- *poffset += bytes_written;
- }
- }
-
- cifs_stats_bytes_written(pTcon, total_written);
-
-/* Do not update local mtime - server will set its actual value on write
- * inode->i_ctime = inode->i_mtime =
- * current_fs_time(inode->i_sb);*/
- if (total_written > 0) {
- spin_lock(&inode->i_lock);
- if (*poffset > inode->i_size)
- i_size_write(inode, *poffset);
- spin_unlock(&inode->i_lock);
- }
- mark_inode_dirty_sync(inode);
-
- FreeXid(xid);
- return total_written;
-}
-
static ssize_t cifs_write(struct cifsFileInfo *open_file,
const char *write_data, size_t write_size,
loff_t *poffset)
@@ -1420,9 +1331,10 @@ retry_write:
return rc;
}
-static int cifs_writepage(struct page *page, struct writeback_control *wbc)
+static int
+cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
{
- int rc = -EFAULT;
+ int rc;
int xid;
xid = GetXid();
@@ -1442,15 +1354,29 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
* to fail to update with the state of the page correctly.
*/
set_page_writeback(page);
+retry_write:
rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
- SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
- unlock_page(page);
+ if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
+ goto retry_write;
+ else if (rc == -EAGAIN)
+ redirty_page_for_writepage(wbc, page);
+ else if (rc != 0)
+ SetPageError(page);
+ else
+ SetPageUptodate(page);
end_page_writeback(page);
page_cache_release(page);
FreeXid(xid);
return rc;
}
+static int cifs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ int rc = cifs_writepage_locked(page, wbc);
+ unlock_page(page);
+ return rc;
+}
+
static int cifs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
@@ -1519,8 +1445,13 @@ int cifs_strict_fsync(struct file *file, int datasync)
cFYI(1, "Sync file - name: %s datasync: 0x%x",
file->f_path.dentry->d_name.name, datasync);
- if (!CIFS_I(inode)->clientCanCacheRead)
- cifs_invalidate_mapping(inode);
+ if (!CIFS_I(inode)->clientCanCacheRead) {
+ rc = cifs_invalidate_mapping(inode);
+ if (rc) {
+ cFYI(1, "rc: %d during invalidate phase", rc);
+ rc = 0; /* don't care about it in fsync */
+ }
+ }
tcon = tlink_tcon(smbfile->tlink);
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
@@ -1726,7 +1657,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
return total_written;
}
-static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
+ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
ssize_t written;
@@ -1849,17 +1780,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
return total_read;
}
-ssize_t cifs_user_read(struct file *file, char __user *read_data,
- size_t read_size, loff_t *poffset)
-{
- struct iovec iov;
- iov.iov_base = read_data;
- iov.iov_len = read_size;
-
- return cifs_iovec_read(file, &iov, 1, poffset);
-}
-
-static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
+ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
ssize_t read;
@@ -1987,8 +1908,11 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
xid = GetXid();
- if (!CIFS_I(inode)->clientCanCacheRead)
- cifs_invalidate_mapping(inode);
+ if (!CIFS_I(inode)->clientCanCacheRead) {
+ rc = cifs_invalidate_mapping(inode);
+ if (rc)
+ return rc;
+ }
rc = generic_file_mmap(file, vma);
if (rc == 0)
@@ -2415,6 +2339,27 @@ static void cifs_invalidate_page(struct page *page, unsigned long offset)
cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
}
+static int cifs_launder_page(struct page *page)
+{
+ int rc = 0;
+ loff_t range_start = page_offset(page);
+ loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0,
+ .range_start = range_start,
+ .range_end = range_end,
+ };
+
+ cFYI(1, "Launder page: %p", page);
+
+ if (clear_page_dirty_for_io(page))
+ rc = cifs_writepage_locked(page, &wbc);
+
+ cifs_fscache_invalidate_page(page, page->mapping->host);
+ return rc;
+}
+
void cifs_oplock_break(struct work_struct *work)
{
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -2486,7 +2431,7 @@ const struct address_space_operations cifs_addr_ops = {
.set_page_dirty = __set_page_dirty_nobuffers,
.releasepage = cifs_release_page,
.invalidatepage = cifs_invalidate_page,
- /* .direct_IO = */
+ .launder_page = cifs_launder_page,
};
/*
@@ -2503,5 +2448,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.set_page_dirty = __set_page_dirty_nobuffers,
.releasepage = cifs_release_page,
.invalidatepage = cifs_invalidate_page,
- /* .direct_IO = */
+ .launder_page = cifs_launder_page,
};
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 8852470b4fbb..de02ed5e25c2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -878,7 +878,7 @@ retry_iget5_locked:
}
/* gets root inode */
-struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
+struct inode *cifs_root_iget(struct super_block *sb)
{
int xid;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -1683,71 +1683,70 @@ cifs_inode_needs_reval(struct inode *inode)
/*
* Zap the cache. Called when invalid_mapping flag is set.
*/
-void
+int
cifs_invalidate_mapping(struct inode *inode)
{
- int rc;
+ int rc = 0;
struct cifsInodeInfo *cifs_i = CIFS_I(inode);
cifs_i->invalid_mapping = false;
- /* write back any cached data */
if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
- rc = filemap_write_and_wait(inode->i_mapping);
- mapping_set_error(inode->i_mapping, rc);
+ rc = invalidate_inode_pages2(inode->i_mapping);
+ if (rc) {
+ cERROR(1, "%s: could not invalidate inode %p", __func__,
+ inode);
+ cifs_i->invalid_mapping = true;
+ }
}
- invalidate_remote_inode(inode);
+
cifs_fscache_reset_inode_cookie(inode);
+ return rc;
}
-int cifs_revalidate_file(struct file *filp)
+int cifs_revalidate_file_attr(struct file *filp)
{
int rc = 0;
struct inode *inode = filp->f_path.dentry->d_inode;
struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
if (!cifs_inode_needs_reval(inode))
- goto check_inval;
+ return rc;
if (tlink_tcon(cfile->tlink)->unix_ext)
rc = cifs_get_file_info_unix(filp);
else
rc = cifs_get_file_info(filp);
-check_inval:
- if (CIFS_I(inode)->invalid_mapping)
- cifs_invalidate_mapping(inode);
-
return rc;
}
-/* revalidate a dentry's inode attributes */
-int cifs_revalidate_dentry(struct dentry *dentry)
+int cifs_revalidate_dentry_attr(struct dentry *dentry)
{
int xid;
int rc = 0;
- char *full_path = NULL;
struct inode *inode = dentry->d_inode;
struct super_block *sb = dentry->d_sb;
+ char *full_path = NULL;
if (inode == NULL)
return -ENOENT;
- xid = GetXid();
-
if (!cifs_inode_needs_reval(inode))
- goto check_inval;
+ return rc;
+
+ xid = GetXid();
/* can not safely grab the rename sem here if rename calls revalidate
since that would deadlock */
full_path = build_path_from_dentry(dentry);
if (full_path == NULL) {
rc = -ENOMEM;
- goto check_inval;
+ goto out;
}
- cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
- "jiffies %ld", full_path, inode, inode->i_count.counter,
+ cFYI(1, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time "
+ "%ld jiffies %ld", full_path, inode, inode->i_count.counter,
dentry, dentry->d_time, jiffies);
if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
@@ -1756,41 +1755,83 @@ int cifs_revalidate_dentry(struct dentry *dentry)
rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
xid, NULL);
-check_inval:
- if (CIFS_I(inode)->invalid_mapping)
- cifs_invalidate_mapping(inode);
-
+out:
kfree(full_path);
FreeXid(xid);
return rc;
}
+int cifs_revalidate_file(struct file *filp)
+{
+ int rc;
+ struct inode *inode = filp->f_path.dentry->d_inode;
+
+ rc = cifs_revalidate_file_attr(filp);
+ if (rc)
+ return rc;
+
+ if (CIFS_I(inode)->invalid_mapping)
+ rc = cifs_invalidate_mapping(inode);
+ return rc;
+}
+
+/* revalidate a dentry's inode attributes */
+int cifs_revalidate_dentry(struct dentry *dentry)
+{
+ int rc;
+ struct inode *inode = dentry->d_inode;
+
+ rc = cifs_revalidate_dentry_attr(dentry);
+ if (rc)
+ return rc;
+
+ if (CIFS_I(inode)->invalid_mapping)
+ rc = cifs_invalidate_mapping(inode);
+ return rc;
+}
+
int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
- int err = cifs_revalidate_dentry(dentry);
-
- if (!err) {
- generic_fillattr(dentry->d_inode, stat);
- stat->blksize = CIFS_MAX_MSGSIZE;
- stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
+ struct inode *inode = dentry->d_inode;
+ int rc;
- /*
- * If on a multiuser mount without unix extensions, and the
- * admin hasn't overridden them, set the ownership to the
- * fsuid/fsgid of the current process.
- */
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
- !tcon->unix_ext) {
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
- stat->uid = current_fsuid();
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
- stat->gid = current_fsgid();
+ /*
+ * We need to be sure that all dirty pages are written and the server
+ * has actual ctime, mtime and file length.
+ */
+ if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
+ inode->i_mapping->nrpages != 0) {
+ rc = filemap_fdatawait(inode->i_mapping);
+ if (rc) {
+ mapping_set_error(inode->i_mapping, rc);
+ return rc;
}
}
- return err;
+
+ rc = cifs_revalidate_dentry_attr(dentry);
+ if (rc)
+ return rc;
+
+ generic_fillattr(inode, stat);
+ stat->blksize = CIFS_MAX_MSGSIZE;
+ stat->ino = CIFS_I(inode)->uniqueid;
+
+ /*
+ * If on a multiuser mount without unix extensions, and the admin hasn't
+ * overridden them, set the ownership to the fsuid/fsgid of the current
+ * process.
+ */
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
+ !tcon->unix_ext) {
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
+ stat->uid = current_fsuid();
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
+ stat->gid = current_fsgid();
+ }
+ return rc;
}
static int cifs_truncate_page(struct address_space *mapping, loff_t from)
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 0c684ae4c071..907531ac5888 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -304,12 +304,10 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */
- buffer->smb_buf_length =
+ buffer->smb_buf_length = cpu_to_be32(
(2 * word_count) + sizeof(struct smb_hdr) -
4 /* RFC 1001 length field does not count */ +
- 2 /* for bcc field itself */ ;
- /* Note that this is the only network field that has to be converted
- to big endian and it is done just before we send it */
+ 2 /* for bcc field itself */) ;
buffer->Protocol[0] = 0xFF;
buffer->Protocol[1] = 'S';
@@ -424,7 +422,7 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid)
int
checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
{
- __u32 len = smb->smb_buf_length;
+ __u32 len = be32_to_cpu(smb->smb_buf_length);
__u32 clc_len; /* calculated length */
cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len);
@@ -464,7 +462,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
if (check_smb_hdr(smb, mid))
return 1;
- clc_len = smbCalcSize_LE(smb);
+ clc_len = smbCalcSize(smb);
if (4 + len != length) {
cERROR(1, "Length read does not match RFC1001 length %d",
@@ -521,7 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
(struct smb_com_transaction_change_notify_rsp *)buf;
struct file_notify_information *pnotify;
__u32 data_offset = 0;
- if (get_bcc_le(buf) > sizeof(struct file_notify_information)) {
+ if (get_bcc(buf) > sizeof(struct file_notify_information)) {
data_offset = le32_to_cpu(pSMBr->DataOffset);
pnotify = (struct file_notify_information *)
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 79f641eeda30..79b71c2c7c9d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -919,13 +919,6 @@ smbCalcSize(struct smb_hdr *ptr)
2 /* size of the bcc field */ + get_bcc(ptr));
}
-unsigned int
-smbCalcSize_LE(struct smb_hdr *ptr)
-{
- return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
- 2 /* size of the bcc field */ + get_bcc_le(ptr));
-}
-
/* The following are taken from fs/ntfs/util.c */
#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index f6728eb6f4b9..7dd462100378 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -276,7 +276,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
}
static void
-decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses,
+decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
const struct nls_table *nls_cp)
{
int len;
@@ -284,19 +284,6 @@ decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses,
cFYI(1, "bleft %d", bleft);
- /*
- * Windows servers do not always double null terminate their final
- * Unicode string. Check to see if there are an uneven number of bytes
- * left. If so, then add an extra NULL pad byte to the end of the
- * response.
- *
- * See section 2.7.2 in "Implementing CIFS" for details
- */
- if (bleft % 2) {
- data[bleft] = 0;
- ++bleft;
- }
-
kfree(ses->serverOS);
ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
cFYI(1, "serverOS=%s", ses->serverOS);
@@ -634,7 +621,7 @@ ssetup_ntlmssp_authenticate:
and rest of bcc area. This allows us to avoid
a large buffer 17K allocation */
iov[0].iov_base = (char *)pSMB;
- iov[0].iov_len = smb_buf->smb_buf_length + 4;
+ iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
/* setting this here allows the code at the end of the function
to free the request buffer if there's an error */
@@ -669,7 +656,7 @@ ssetup_ntlmssp_authenticate:
* to use challenge/response method (i.e. Password bit is 1).
*/
- calc_lanman_hash(ses->password, ses->server->cryptkey,
+ rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
ses->server->secMode & SECMODE_PW_ENCRYPT ?
true : false, lnm_session_key);
@@ -872,9 +859,10 @@ ssetup_ntlmssp_authenticate:
iov[2].iov_len = (long) bcc_ptr - (long) str_area;
count = iov[1].iov_len + iov[2].iov_len;
- smb_buf->smb_buf_length += count;
+ smb_buf->smb_buf_length =
+ cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
- put_bcc_le(count, smb_buf);
+ put_bcc(count, smb_buf);
rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type,
CIFS_LOG_ERROR);
@@ -929,7 +917,9 @@ ssetup_ntlmssp_authenticate:
}
/* BB check if Unicode and decode strings */
- if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
+ if (bytes_remaining == 0) {
+ /* no string area to decode, do nothing */
+ } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
/* unicode string area must be word-aligned */
if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
++bcc_ptr;
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
deleted file mode 100644
index 04721485925d..000000000000
--- a/fs/cifs/smbdes.c
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
- Unix SMB/Netbios implementation.
- Version 1.9.
-
- a partial implementation of DES designed for use in the
- SMB authentication protocol
-
- Copyright (C) Andrew Tridgell 1998
- Modified by Steve French (sfrench@us.ibm.com) 2002,2004
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-/* NOTES:
-
- This code makes no attempt to be fast! In fact, it is a very
- slow implementation
-
- This code is NOT a complete DES implementation. It implements only
- the minimum necessary for SMB authentication, as used by all SMB
- products (including every copy of Microsoft Windows95 ever sold)
-
- In particular, it can only do a unchained forward DES pass. This
- means it is not possible to use this code for encryption/decryption
- of data, instead it is only useful as a "hash" algorithm.
-
- There is no entry point into this code that allows normal DES operation.
-
- I believe this means that this code does not come under ITAR
- regulations but this is NOT a legal opinion. If you are concerned
- about the applicability of ITAR regulations to this code then you
- should confirm it for yourself (and maybe let me know if you come
- up with a different answer to the one above)
-*/
-#include <linux/slab.h>
-#define uchar unsigned char
-
-static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
- 1, 58, 50, 42, 34, 26, 18,
- 10, 2, 59, 51, 43, 35, 27,
- 19, 11, 3, 60, 52, 44, 36,
- 63, 55, 47, 39, 31, 23, 15,
- 7, 62, 54, 46, 38, 30, 22,
- 14, 6, 61, 53, 45, 37, 29,
- 21, 13, 5, 28, 20, 12, 4
-};
-
-static uchar perm2[48] = { 14, 17, 11, 24, 1, 5,
- 3, 28, 15, 6, 21, 10,
- 23, 19, 12, 4, 26, 8,
- 16, 7, 27, 20, 13, 2,
- 41, 52, 31, 37, 47, 55,
- 30, 40, 51, 45, 33, 48,
- 44, 49, 39, 56, 34, 53,
- 46, 42, 50, 36, 29, 32
-};
-
-static uchar perm3[64] = { 58, 50, 42, 34, 26, 18, 10, 2,
- 60, 52, 44, 36, 28, 20, 12, 4,
- 62, 54, 46, 38, 30, 22, 14, 6,
- 64, 56, 48, 40, 32, 24, 16, 8,
- 57, 49, 41, 33, 25, 17, 9, 1,
- 59, 51, 43, 35, 27, 19, 11, 3,
- 61, 53, 45, 37, 29, 21, 13, 5,
- 63, 55, 47, 39, 31, 23, 15, 7
-};
-
-static uchar perm4[48] = { 32, 1, 2, 3, 4, 5,
- 4, 5, 6, 7, 8, 9,
- 8, 9, 10, 11, 12, 13,
- 12, 13, 14, 15, 16, 17,
- 16, 17, 18, 19, 20, 21,
- 20, 21, 22, 23, 24, 25,
- 24, 25, 26, 27, 28, 29,
- 28, 29, 30, 31, 32, 1
-};
-
-static uchar perm5[32] = { 16, 7, 20, 21,
- 29, 12, 28, 17,
- 1, 15, 23, 26,
- 5, 18, 31, 10,
- 2, 8, 24, 14,
- 32, 27, 3, 9,
- 19, 13, 30, 6,
- 22, 11, 4, 25
-};
-
-static uchar perm6[64] = { 40, 8, 48, 16, 56, 24, 64, 32,
- 39, 7, 47, 15, 55, 23, 63, 31,
- 38, 6, 46, 14, 54, 22, 62, 30,
- 37, 5, 45, 13, 53, 21, 61, 29,
- 36, 4, 44, 12, 52, 20, 60, 28,
- 35, 3, 43, 11, 51, 19, 59, 27,
- 34, 2, 42, 10, 50, 18, 58, 26,
- 33, 1, 41, 9, 49, 17, 57, 25
-};
-
-static uchar sc[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
-
-static uchar sbox[8][4][16] = {
- {{14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7},
- {0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8},
- {4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0},
- {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13} },
-
- {{15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10},
- {3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5},
- {0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15},
- {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9} },
-
- {{10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8},
- {13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1},
- {13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7},
- {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12} },
-
- {{7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15},
- {13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9},
- {10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4},
- {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14} },
-
- {{2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9},
- {14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6},
- {4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14},
- {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3} },
-
- {{12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11},
- {10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8},
- {9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6},
- {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13} },
-
- {{4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1},
- {13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6},
- {1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2},
- {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12} },
-
- {{13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7},
- {1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2},
- {7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8},
- {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11} }
-};
-
-static void
-permute(char *out, char *in, uchar *p, int n)
-{
- int i;
- for (i = 0; i < n; i++)
- out[i] = in[p[i] - 1];
-}
-
-static void
-lshift(char *d, int count, int n)
-{
- char out[64];
- int i;
- for (i = 0; i < n; i++)
- out[i] = d[(i + count) % n];
- for (i = 0; i < n; i++)
- d[i] = out[i];
-}
-
-static void
-concat(char *out, char *in1, char *in2, int l1, int l2)
-{
- while (l1--)
- *out++ = *in1++;
- while (l2--)
- *out++ = *in2++;
-}
-
-static void
-xor(char *out, char *in1, char *in2, int n)
-{
- int i;
- for (i = 0; i < n; i++)
- out[i] = in1[i] ^ in2[i];
-}
-
-static void
-dohash(char *out, char *in, char *key, int forw)
-{
- int i, j, k;
- char *pk1;
- char c[28];
- char d[28];
- char *cd;
- char (*ki)[48];
- char *pd1;
- char l[32], r[32];
- char *rl;
-
- /* Have to reduce stack usage */
- pk1 = kmalloc(56+56+64+64, GFP_KERNEL);
- if (pk1 == NULL)
- return;
-
- ki = kmalloc(16*48, GFP_KERNEL);
- if (ki == NULL) {
- kfree(pk1);
- return;
- }
-
- cd = pk1 + 56;
- pd1 = cd + 56;
- rl = pd1 + 64;
-
- permute(pk1, key, perm1, 56);
-
- for (i = 0; i < 28; i++)
- c[i] = pk1[i];
- for (i = 0; i < 28; i++)
- d[i] = pk1[i + 28];
-
- for (i = 0; i < 16; i++) {
- lshift(c, sc[i], 28);
- lshift(d, sc[i], 28);
-
- concat(cd, c, d, 28, 28);
- permute(ki[i], cd, perm2, 48);
- }
-
- permute(pd1, in, perm3, 64);
-
- for (j = 0; j < 32; j++) {
- l[j] = pd1[j];
- r[j] = pd1[j + 32];
- }
-
- for (i = 0; i < 16; i++) {
- char *er; /* er[48] */
- char *erk; /* erk[48] */
- char b[8][6];
- char *cb; /* cb[32] */
- char *pcb; /* pcb[32] */
- char *r2; /* r2[32] */
-
- er = kmalloc(48+48+32+32+32, GFP_KERNEL);
- if (er == NULL) {
- kfree(pk1);
- kfree(ki);
- return;
- }
- erk = er+48;
- cb = erk+48;
- pcb = cb+32;
- r2 = pcb+32;
-
- permute(er, r, perm4, 48);
-
- xor(erk, er, ki[forw ? i : 15 - i], 48);
-
- for (j = 0; j < 8; j++)
- for (k = 0; k < 6; k++)
- b[j][k] = erk[j * 6 + k];
-
- for (j = 0; j < 8; j++) {
- int m, n;
- m = (b[j][0] << 1) | b[j][5];
-
- n = (b[j][1] << 3) | (b[j][2] << 2) | (b[j][3] <<
- 1) | b[j][4];
-
- for (k = 0; k < 4; k++)
- b[j][k] =
- (sbox[j][m][n] & (1 << (3 - k))) ? 1 : 0;
- }
-
- for (j = 0; j < 8; j++)
- for (k = 0; k < 4; k++)
- cb[j * 4 + k] = b[j][k];
- permute(pcb, cb, perm5, 32);
-
- xor(r2, l, pcb, 32);
-
- for (j = 0; j < 32; j++)
- l[j] = r[j];
-
- for (j = 0; j < 32; j++)
- r[j] = r2[j];
-
- kfree(er);
- }
-
- concat(rl, r, l, 32, 32);
-
- permute(out, rl, perm6, 64);
- kfree(pk1);
- kfree(ki);
-}
-
-static void
-str_to_key(unsigned char *str, unsigned char *key)
-{
- int i;
-
- key[0] = str[0] >> 1;
- key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
- key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
- key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
- key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
- key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
- key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
- key[7] = str[6] & 0x7F;
- for (i = 0; i < 8; i++)
- key[i] = (key[i] << 1);
-}
-
-static void
-smbhash(unsigned char *out, const unsigned char *in, unsigned char *key,
- int forw)
-{
- int i;
- char *outb; /* outb[64] */
- char *inb; /* inb[64] */
- char *keyb; /* keyb[64] */
- unsigned char key2[8];
-
- outb = kmalloc(64 * 3, GFP_KERNEL);
- if (outb == NULL)
- return;
-
- inb = outb + 64;
- keyb = inb + 64;
-
- str_to_key(key, key2);
-
- for (i = 0; i < 64; i++) {
- inb[i] = (in[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
- keyb[i] = (key2[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
- outb[i] = 0;
- }
-
- dohash(outb, inb, keyb, forw);
-
- for (i = 0; i < 8; i++)
- out[i] = 0;
-
- for (i = 0; i < 64; i++) {
- if (outb[i])
- out[i / 8] |= (1 << (7 - (i % 8)));
- }
- kfree(outb);
-}
-
-void
-E_P16(unsigned char *p14, unsigned char *p16)
-{
- unsigned char sp8[8] =
- { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
- smbhash(p16, sp8, p14, 1);
- smbhash(p16 + 8, sp8, p14 + 7, 1);
-}
-
-void
-E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
-{
- smbhash(p24, c8, p21, 1);
- smbhash(p24 + 8, c8, p21 + 7, 1);
- smbhash(p24 + 16, c8, p21 + 14, 1);
-}
-
-#if 0 /* currently unused */
-static void
-D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
-{
- smbhash(out, in, p14, 0);
- smbhash(out + 8, in + 8, p14 + 7, 0);
-}
-
-static void
-E_old_pw_hash(unsigned char *p14, unsigned char *in, unsigned char *out)
-{
- smbhash(out, in, p14, 1);
- smbhash(out + 8, in + 8, p14 + 7, 1);
-}
-/* these routines are currently unneeded, but may be
- needed later */
-void
-cred_hash1(unsigned char *out, unsigned char *in, unsigned char *key)
-{
- unsigned char buf[8];
-
- smbhash(buf, in, key, 1);
- smbhash(out, buf, key + 9, 1);
-}
-
-void
-cred_hash2(unsigned char *out, unsigned char *in, unsigned char *key)
-{
- unsigned char buf[8];
- static unsigned char key2[8];
-
- smbhash(buf, in, key, 1);
- key2[0] = key[7];
- smbhash(out, buf, key2, 1);
-}
-
-void
-cred_hash3(unsigned char *out, unsigned char *in, unsigned char *key, int forw)
-{
- static unsigned char key2[8];
-
- smbhash(out, in, key, forw);
- key2[0] = key[7];
- smbhash(out + 8, in + 8, key2, forw);
-}
-#endif /* unneeded routines */
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index b5041c849981..1525d5e662b6 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -47,6 +47,88 @@
#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
+static void
+str_to_key(unsigned char *str, unsigned char *key)
+{
+ int i;
+
+ key[0] = str[0] >> 1;
+ key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
+ key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
+ key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
+ key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
+ key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
+ key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
+ key[7] = str[6] & 0x7F;
+ for (i = 0; i < 8; i++)
+ key[i] = (key[i] << 1);
+}
+
+static int
+smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
+{
+ int rc;
+ unsigned char key2[8];
+ struct crypto_blkcipher *tfm_des;
+ struct scatterlist sgin, sgout;
+ struct blkcipher_desc desc;
+
+ str_to_key(key, key2);
+
+ tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm_des)) {
+ rc = PTR_ERR(tfm_des);
+ cERROR(1, "could not allocate des crypto API\n");
+ goto smbhash_err;
+ }
+
+ desc.tfm = tfm_des;
+
+ crypto_blkcipher_setkey(tfm_des, key2, 8);
+
+ sg_init_one(&sgin, in, 8);
+ sg_init_one(&sgout, out, 8);
+
+ rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
+ if (rc) {
+ cERROR(1, "could not encrypt crypt key rc: %d\n", rc);
+ crypto_free_blkcipher(tfm_des);
+ goto smbhash_err;
+ }
+
+smbhash_err:
+ return rc;
+}
+
+static int
+E_P16(unsigned char *p14, unsigned char *p16)
+{
+ int rc;
+ unsigned char sp8[8] =
+ { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
+
+ rc = smbhash(p16, sp8, p14);
+ if (rc)
+ return rc;
+ rc = smbhash(p16 + 8, sp8, p14 + 7);
+ return rc;
+}
+
+static int
+E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
+{
+ int rc;
+
+ rc = smbhash(p24, c8, p21);
+ if (rc)
+ return rc;
+ rc = smbhash(p24 + 8, c8, p21 + 7);
+ if (rc)
+ return rc;
+ rc = smbhash(p24 + 16, c8, p21 + 14);
+ return rc;
+}
+
/* produce a md4 message digest from data of length n bytes */
int
mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
@@ -87,40 +169,30 @@ mdfour_err:
return rc;
}
-/* Does the des encryption from the NT or LM MD4 hash. */
-static void
-SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
- unsigned char p24[24])
-{
- unsigned char p21[21];
-
- memset(p21, '\0', 21);
-
- memcpy(p21, passwd, 16);
- E_P24(p21, c8, p24);
-}
-
/*
This implements the X/Open SMB password encryption
It takes a password, a 8 byte "crypt key" and puts 24 bytes of
encrypted password into p24 */
/* Note that password must be uppercased and null terminated */
-void
+int
SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
{
- unsigned char p14[15], p21[21];
+ int rc;
+ unsigned char p14[14], p16[16], p21[21];
- memset(p21, '\0', 21);
memset(p14, '\0', 14);
- strncpy((char *) p14, (char *) passwd, 14);
+ memset(p16, '\0', 16);
+ memset(p21, '\0', 21);
-/* strupper((char *)p14); *//* BB at least uppercase the easy range */
- E_P16(p14, p21);
+ memcpy(p14, passwd, 14);
+ rc = E_P16(p14, p16);
+ if (rc)
+ return rc;
- SMBOWFencrypt(p21, c8, p24);
+ memcpy(p21, p16, 16);
+ rc = E_P24(p21, c8, p24);
- memset(p14, 0, 15);
- memset(p21, 0, 21);
+ return rc;
}
/* Routines for Windows NT MD4 Hash functions. */
@@ -279,16 +351,18 @@ int
SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
{
int rc;
- unsigned char p21[21];
+ unsigned char p16[16], p21[21];
+ memset(p16, '\0', 16);
memset(p21, '\0', 21);
- rc = E_md4hash(passwd, p21);
+ rc = E_md4hash(passwd, p16);
if (rc) {
cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
return rc;
}
- SMBOWFencrypt(p21, c8, p24);
+ memcpy(p21, p16, 16);
+ rc = E_P24(p21, c8, p24);
return rc;
}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 46d8756f2b24..f2513fb8c391 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -129,7 +129,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
unsigned int len = iov[0].iov_len;
unsigned int total_len;
int first_vec = 0;
- unsigned int smb_buf_length = smb_buffer->smb_buf_length;
+ unsigned int smb_buf_length = be32_to_cpu(smb_buffer->smb_buf_length);
struct socket *ssocket = server->ssocket;
if (ssocket == NULL)
@@ -144,17 +144,10 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
else
smb_msg.msg_flags = MSG_NOSIGNAL;
- /* smb header is converted in header_assemble. bcc and rest of SMB word
- area, and byte area if necessary, is converted to littleendian in
- cifssmb.c and RFC1001 len is converted to bigendian in smb_send
- Flags2 is converted in SendReceive */
-
-
total_len = 0;
for (i = 0; i < n_vec; i++)
total_len += iov[i].iov_len;
- smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
cFYI(1, "Sending smb: total_len %d", total_len);
dump_smb(smb_buffer, len);
@@ -243,7 +236,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
/* Don't want to modify the buffer as a
side effect of this call. */
- smb_buffer->smb_buf_length = smb_buf_length;
+ smb_buffer->smb_buf_length = cpu_to_be32(smb_buf_length);
return rc;
}
@@ -387,7 +380,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
#ifdef CONFIG_CIFS_STATS2
atomic_inc(&server->inSend);
#endif
- rc = smb_send(server, in_buf, in_buf->smb_buf_length);
+ rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
#ifdef CONFIG_CIFS_STATS2
atomic_dec(&server->inSend);
mid->when_sent = jiffies;
@@ -422,7 +415,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
int resp_buf_type;
iov[0].iov_base = (char *)in_buf;
- iov[0].iov_len = in_buf->smb_buf_length + 4;
+ iov[0].iov_len = be32_to_cpu(in_buf->smb_buf_length) + 4;
flags |= CIFS_NO_RESP;
rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
@@ -488,10 +481,10 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
int rc = 0;
/* -4 for RFC1001 length and +2 for BCC field */
- in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4 + 2;
+ in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2);
in_buf->Command = SMB_COM_NT_CANCEL;
in_buf->WordCount = 0;
- put_bcc_le(0, in_buf);
+ put_bcc(0, in_buf);
mutex_lock(&server->srv_mutex);
rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
@@ -499,7 +492,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
mutex_unlock(&server->srv_mutex);
return rc;
}
- rc = smb_send(server, in_buf, in_buf->smb_buf_length);
+ rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
mutex_unlock(&server->srv_mutex);
cFYI(1, "issued NT_CANCEL for mid %u, rc = %d",
@@ -612,7 +605,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
return rc;
}
- receive_len = midQ->resp_buf->smb_buf_length;
+ receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
cERROR(1, "Frame too large received. Length: %d Xid: %d",
@@ -651,11 +644,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
rc = map_smb_to_linux_error(midQ->resp_buf,
flags & CIFS_LOG_ERROR);
- /* convert ByteCount if necessary */
- if (receive_len >= sizeof(struct smb_hdr) - 4
- /* do not count RFC1001 header */ +
- (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
- put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
if ((flags & CIFS_NO_RESP) == 0)
midQ->resp_buf = NULL; /* mark it so buf will
not be freed by
@@ -698,9 +686,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
to the same server. We may make this configurable later or
use ses->maxReq */
- if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
+ if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
+ MAX_CIFS_HDR_SIZE - 4) {
cERROR(1, "Illegal length, greater than maximum frame, %d",
- in_buf->smb_buf_length);
+ be32_to_cpu(in_buf->smb_buf_length));
return -EIO;
}
@@ -733,7 +722,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
#ifdef CONFIG_CIFS_STATS2
atomic_inc(&ses->server->inSend);
#endif
- rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length);
+ rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
#ifdef CONFIG_CIFS_STATS2
atomic_dec(&ses->server->inSend);
midQ->when_sent = jiffies;
@@ -768,7 +757,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
return rc;
}
- receive_len = midQ->resp_buf->smb_buf_length;
+ receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
cERROR(1, "Frame too large received. Length: %d Xid: %d",
@@ -781,7 +770,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
if (midQ->resp_buf && out_buf
&& (midQ->midState == MID_RESPONSE_RECEIVED)) {
- out_buf->smb_buf_length = receive_len;
+ out_buf->smb_buf_length = cpu_to_be32(receive_len);
memcpy((char *)out_buf + 4,
(char *)midQ->resp_buf + 4,
receive_len);
@@ -800,16 +789,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
}
}
- *pbytes_returned = out_buf->smb_buf_length;
+ *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
/* BB special case reconnect tid and uid here? */
rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
-
- /* convert ByteCount if necessary */
- if (receive_len >= sizeof(struct smb_hdr) - 4
- /* do not count RFC1001 header */ +
- (2 * out_buf->WordCount) + 2 /* bcc */ )
- put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
} else {
rc = -EIO;
cERROR(1, "Bad MID state?");
@@ -877,9 +860,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
to the same server. We may make this configurable later or
use ses->maxReq */
- if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
+ if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
+ MAX_CIFS_HDR_SIZE - 4) {
cERROR(1, "Illegal length, greater than maximum frame, %d",
- in_buf->smb_buf_length);
+ be32_to_cpu(in_buf->smb_buf_length));
return -EIO;
}
@@ -910,7 +894,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
#ifdef CONFIG_CIFS_STATS2
atomic_inc(&ses->server->inSend);
#endif
- rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length);
+ rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
#ifdef CONFIG_CIFS_STATS2
atomic_dec(&ses->server->inSend);
midQ->when_sent = jiffies;
@@ -977,7 +961,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
if (rc != 0)
return rc;
- receive_len = midQ->resp_buf->smb_buf_length;
+ receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
cERROR(1, "Frame too large received. Length: %d Xid: %d",
receive_len, xid);
@@ -993,7 +977,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
goto out;
}
- out_buf->smb_buf_length = receive_len;
+ out_buf->smb_buf_length = cpu_to_be32(receive_len);
memcpy((char *)out_buf + 4,
(char *)midQ->resp_buf + 4,
receive_len);
@@ -1012,17 +996,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
}
}
- *pbytes_returned = out_buf->smb_buf_length;
+ *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
/* BB special case reconnect tid and uid here? */
rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
- /* convert ByteCount if necessary */
- if (receive_len >= sizeof(struct smb_hdr) - 4
- /* do not count RFC1001 header */ +
- (2 * out_buf->WordCount) + 2 /* bcc */ )
- put_bcc(get_bcc_le(out_buf), out_buf);
-
out:
delete_mid(midQ);
if (rstart && rc == -EACCES)
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index eae2a1491608..912995e013ec 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -112,6 +112,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
struct cifsTconInfo *pTcon;
struct super_block *sb;
char *full_path;
+ struct cifs_ntsd *pacl;
if (direntry == NULL)
return -EIO;
@@ -166,6 +167,25 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
(__u16)value_size, cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
+ strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
+ pacl = kmalloc(value_size, GFP_KERNEL);
+ if (!pacl) {
+ cFYI(1, "%s: Can't allocate memory for ACL",
+ __func__);
+ rc = -ENOMEM;
+ } else {
+#ifdef CONFIG_CIFS_ACL
+ memcpy(pacl, ea_value, value_size);
+ rc = set_cifs_acl(pacl, value_size,
+ direntry->d_inode, full_path);
+ if (rc == 0) /* force revalidate of the inode */
+ CIFS_I(direntry->d_inode)->time = 0;
+ kfree(pacl);
+#else
+ cFYI(1, "Set CIFS ACL not supported yet");
+#endif /* CONFIG_CIFS_ACL */
+ }
} else {
int temp;
temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
diff --git a/fs/compat.c b/fs/compat.c
index 72fe6cda9108..0ea00832de23 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1306,241 +1306,6 @@ compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int
return do_sys_open(dfd, filename, flags, mode);
}
-/*
- * compat_count() counts the number of arguments/envelopes. It is basically
- * a copy of count() from fs/exec.c, except that it works with 32 bit argv
- * and envp pointers.
- */
-static int compat_count(compat_uptr_t __user *argv, int max)
-{
- int i = 0;
-
- if (argv != NULL) {
- for (;;) {
- compat_uptr_t p;
-
- if (get_user(p, argv))
- return -EFAULT;
- if (!p)
- break;
- argv++;
- if (i++ >= max)
- return -E2BIG;
-
- if (fatal_signal_pending(current))
- return -ERESTARTNOHAND;
- cond_resched();
- }
- }
- return i;
-}
-
-/*
- * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c
- * except that it works with 32 bit argv and envp pointers.
- */
-static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
- struct linux_binprm *bprm)
-{
- struct page *kmapped_page = NULL;
- char *kaddr = NULL;
- unsigned long kpos = 0;
- int ret;
-
- while (argc-- > 0) {
- compat_uptr_t str;
- int len;
- unsigned long pos;
-
- if (get_user(str, argv+argc) ||
- !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
- ret = -EFAULT;
- goto out;
- }
-
- if (len > MAX_ARG_STRLEN) {
- ret = -E2BIG;
- goto out;
- }
-
- /* We're going to work our way backwords. */
- pos = bprm->p;
- str += len;
- bprm->p -= len;
-
- while (len > 0) {
- int offset, bytes_to_copy;
-
- if (fatal_signal_pending(current)) {
- ret = -ERESTARTNOHAND;
- goto out;
- }
- cond_resched();
-
- offset = pos % PAGE_SIZE;
- if (offset == 0)
- offset = PAGE_SIZE;
-
- bytes_to_copy = offset;
- if (bytes_to_copy > len)
- bytes_to_copy = len;
-
- offset -= bytes_to_copy;
- pos -= bytes_to_copy;
- str -= bytes_to_copy;
- len -= bytes_to_copy;
-
- if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
- struct page *page;
-
- page = get_arg_page(bprm, pos, 1);
- if (!page) {
- ret = -E2BIG;
- goto out;
- }
-
- if (kmapped_page) {
- flush_kernel_dcache_page(kmapped_page);
- kunmap(kmapped_page);
- put_page(kmapped_page);
- }
- kmapped_page = page;
- kaddr = kmap(kmapped_page);
- kpos = pos & PAGE_MASK;
- flush_cache_page(bprm->vma, kpos,
- page_to_pfn(kmapped_page));
- }
- if (copy_from_user(kaddr+offset, compat_ptr(str),
- bytes_to_copy)) {
- ret = -EFAULT;
- goto out;
- }
- }
- }
- ret = 0;
-out:
- if (kmapped_page) {
- flush_kernel_dcache_page(kmapped_page);
- kunmap(kmapped_page);
- put_page(kmapped_page);
- }
- return ret;
-}
-
-/*
- * compat_do_execve() is mostly a copy of do_execve(), with the exception
- * that it processes 32 bit argv and envp pointers.
- */
-int compat_do_execve(char * filename,
- compat_uptr_t __user *argv,
- compat_uptr_t __user *envp,
- struct pt_regs * regs)
-{
- struct linux_binprm *bprm;
- struct file *file;
- struct files_struct *displaced;
- bool clear_in_exec;
- int retval;
-
- retval = unshare_files(&displaced);
- if (retval)
- goto out_ret;
-
- retval = -ENOMEM;
- bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
- if (!bprm)
- goto out_files;
-
- retval = prepare_bprm_creds(bprm);
- if (retval)
- goto out_free;
-
- retval = check_unsafe_exec(bprm);
- if (retval < 0)
- goto out_free;
- clear_in_exec = retval;
- current->in_execve = 1;
-
- file = open_exec(filename);
- retval = PTR_ERR(file);
- if (IS_ERR(file))
- goto out_unmark;
-
- sched_exec();
-
- bprm->file = file;
- bprm->filename = filename;
- bprm->interp = filename;
-
- retval = bprm_mm_init(bprm);
- if (retval)
- goto out_file;
-
- bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
- if ((retval = bprm->argc) < 0)
- goto out;
-
- bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
- if ((retval = bprm->envc) < 0)
- goto out;
-
- retval = prepare_binprm(bprm);
- if (retval < 0)
- goto out;
-
- retval = copy_strings_kernel(1, &bprm->filename, bprm);
- if (retval < 0)
- goto out;
-
- bprm->exec = bprm->p;
- retval = compat_copy_strings(bprm->envc, envp, bprm);
- if (retval < 0)
- goto out;
-
- retval = compat_copy_strings(bprm->argc, argv, bprm);
- if (retval < 0)
- goto out;
-
- retval = search_binary_handler(bprm, regs);
- if (retval < 0)
- goto out;
-
- /* execve succeeded */
- current->fs->in_exec = 0;
- current->in_execve = 0;
- acct_update_integrals(current);
- free_bprm(bprm);
- if (displaced)
- put_files_struct(displaced);
- return retval;
-
-out:
- if (bprm->mm) {
- acct_arg_size(bprm, 0);
- mmput(bprm->mm);
- }
-
-out_file:
- if (bprm->file) {
- allow_write_access(bprm->file);
- fput(bprm->file);
- }
-
-out_unmark:
- if (clear_in_exec)
- current->fs->in_exec = 0;
- current->in_execve = 0;
-
-out_free:
- free_bprm(bprm);
-
-out_files:
- if (displaced)
- reset_files_struct(displaced);
-out_ret:
- return retval;
-}
-
#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 9908c20bb1a5..9d17d350abc5 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -53,11 +53,14 @@ DEFINE_SPINLOCK(configfs_dirent_lock);
static void configfs_d_iput(struct dentry * dentry,
struct inode * inode)
{
- struct configfs_dirent * sd = dentry->d_fsdata;
+ struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
BUG_ON(sd->s_dentry != dentry);
+ /* Coordinate with configfs_readdir */
+ spin_lock(&configfs_dirent_lock);
sd->s_dentry = NULL;
+ spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
}
iput(inode);
@@ -689,7 +692,8 @@ static int create_default_group(struct config_group *parent_group,
sd = child->d_fsdata;
sd->s_type |= CONFIGFS_USET_DEFAULT;
} else {
- d_delete(child);
+ BUG_ON(child->d_inode);
+ d_drop(child);
dput(child);
}
}
@@ -1547,7 +1551,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
struct configfs_dirent * parent_sd = dentry->d_fsdata;
struct configfs_dirent *cursor = filp->private_data;
struct list_head *p, *q = &cursor->s_sibling;
- ino_t ino;
+ ino_t ino = 0;
int i = filp->f_pos;
switch (i) {
@@ -1575,6 +1579,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
struct configfs_dirent *next;
const char * name;
int len;
+ struct inode *inode = NULL;
next = list_entry(p, struct configfs_dirent,
s_sibling);
@@ -1583,9 +1588,28 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
name = configfs_get_name(next);
len = strlen(name);
- if (next->s_dentry)
- ino = next->s_dentry->d_inode->i_ino;
- else
+
+ /*
+ * We'll have a dentry and an inode for
+ * PINNED items and for open attribute
+ * files. We lock here to prevent a race
+ * with configfs_d_iput() clearing
+ * s_dentry before calling iput().
+ *
+ * Why do we go to the trouble? If
+ * someone has an attribute file open,
+ * the inode number should match until
+ * they close it. Beyond that, we don't
+ * care.
+ */
+ spin_lock(&configfs_dirent_lock);
+ dentry = next->s_dentry;
+ if (dentry)
+ inode = dentry->d_inode;
+ if (inode)
+ ino = inode->i_ino;
+ spin_unlock(&configfs_dirent_lock);
+ if (!inode)
ino = iunique(configfs_sb, 2);
if (filldir(dirent, name, len, filp->f_pos, ino,
@@ -1685,7 +1709,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
err = configfs_attach_group(sd->s_element, &group->cg_item,
dentry);
if (err) {
- d_delete(dentry);
+ BUG_ON(dentry->d_inode);
+ d_drop(dentry);
dput(dentry);
} else {
spin_lock(&configfs_dirent_lock);
diff --git a/fs/dcache.c b/fs/dcache.c
index 22a0ef41bad1..37f72ee5bf7c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,7 @@
#include <linux/hardirq.h>
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
+#include <linux/prefetch.h>
#include "internal.h"
/*
@@ -1219,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent)
EXPORT_SYMBOL(shrink_dcache_parent);
/*
- * Scan `nr' dentries and return the number which remain.
+ * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
*
* We need to avoid reentering the filesystem if the caller is performing a
* GFP_NOFS allocation attempt. One example deadlock is:
@@ -1230,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent);
*
* In this case we return -1 to tell the caller that we baled.
*/
-static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink,
+ struct shrink_control *sc)
{
+ int nr = sc->nr_to_scan;
+ gfp_t gfp_mask = sc->gfp_mask;
+
if (nr) {
if (!(gfp_mask & __GFP_FS))
return -1;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 89d394d8fe24..90f76575c056 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -428,26 +428,17 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
char buf[32];
- int buf_size;
+ size_t buf_size;
+ bool bv;
u32 *val = file->private_data;
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size))
return -EFAULT;
- switch (buf[0]) {
- case 'y':
- case 'Y':
- case '1':
- *val = 1;
- break;
- case 'n':
- case 'N':
- case '0':
- *val = 0;
- break;
- }
-
+ if (strtobool(buf, &bv) == 0)
+ *val = bv;
+
return count;
}
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 0d329ff8ed4c..9b026ea8baa9 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -100,6 +100,7 @@ struct dlm_cluster {
unsigned int cl_log_debug;
unsigned int cl_protocol;
unsigned int cl_timewarn_cs;
+ unsigned int cl_waitwarn_us;
};
enum {
@@ -114,6 +115,7 @@ enum {
CLUSTER_ATTR_LOG_DEBUG,
CLUSTER_ATTR_PROTOCOL,
CLUSTER_ATTR_TIMEWARN_CS,
+ CLUSTER_ATTR_WAITWARN_US,
};
struct cluster_attribute {
@@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1);
CLUSTER_ATTR(log_debug, 0);
CLUSTER_ATTR(protocol, 0);
CLUSTER_ATTR(timewarn_cs, 1);
+CLUSTER_ATTR(waitwarn_us, 0);
static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
+ [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
NULL,
};
@@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g,
cl->cl_log_debug = dlm_config.ci_log_debug;
cl->cl_protocol = dlm_config.ci_protocol;
cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
+ cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
space_list = &sps->ss_group;
comm_list = &cms->cs_group;
@@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_LOG_DEBUG 0
#define DEFAULT_PROTOCOL 0
#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
+#define DEFAULT_WAITWARN_US 0
struct dlm_config_info dlm_config = {
.ci_tcp_port = DEFAULT_TCP_PORT,
@@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = {
.ci_scan_secs = DEFAULT_SCAN_SECS,
.ci_log_debug = DEFAULT_LOG_DEBUG,
.ci_protocol = DEFAULT_PROTOCOL,
- .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
+ .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
+ .ci_waitwarn_us = DEFAULT_WAITWARN_US
};
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 4f1d6fce58c5..dd0ce24d5a80 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -28,6 +28,7 @@ struct dlm_config_info {
int ci_log_debug;
int ci_protocol;
int ci_timewarn_cs;
+ int ci_waitwarn_us;
};
extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index b94204913011..0262451eb9c6 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -209,6 +209,7 @@ struct dlm_args {
#define DLM_IFL_WATCH_TIMEWARN 0x00400000
#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
+#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
@@ -245,6 +246,7 @@ struct dlm_lkb {
int8_t lkb_wait_type; /* type of reply waiting for */
int8_t lkb_wait_count;
+ int lkb_wait_nodeid; /* for debugging */
struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
struct list_head lkb_statequeue; /* rsb g/c/w list */
@@ -254,6 +256,7 @@ struct dlm_lkb {
struct list_head lkb_ownqueue; /* list of locks for a process */
struct list_head lkb_time_list;
ktime_t lkb_timestamp;
+ ktime_t lkb_wait_time;
unsigned long lkb_timeout_cs;
struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 56d6bfcc1e48..f71d0b5abd95 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -799,10 +799,84 @@ static int msg_reply_type(int mstype)
return -1;
}
+static int nodeid_warned(int nodeid, int num_nodes, int *warned)
+{
+ int i;
+
+ for (i = 0; i < num_nodes; i++) {
+ if (!warned[i]) {
+ warned[i] = nodeid;
+ return 0;
+ }
+ if (warned[i] == nodeid)
+ return 1;
+ }
+ return 0;
+}
+
+void dlm_scan_waiters(struct dlm_ls *ls)
+{
+ struct dlm_lkb *lkb;
+ ktime_t zero = ktime_set(0, 0);
+ s64 us;
+ s64 debug_maxus = 0;
+ u32 debug_scanned = 0;
+ u32 debug_expired = 0;
+ int num_nodes = 0;
+ int *warned = NULL;
+
+ if (!dlm_config.ci_waitwarn_us)
+ return;
+
+ mutex_lock(&ls->ls_waiters_mutex);
+
+ list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
+ if (ktime_equal(lkb->lkb_wait_time, zero))
+ continue;
+
+ debug_scanned++;
+
+ us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
+
+ if (us < dlm_config.ci_waitwarn_us)
+ continue;
+
+ lkb->lkb_wait_time = zero;
+
+ debug_expired++;
+ if (us > debug_maxus)
+ debug_maxus = us;
+
+ if (!num_nodes) {
+ num_nodes = ls->ls_num_nodes;
+ warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
+ if (warned)
+ memset(warned, 0, num_nodes * sizeof(int));
+ }
+ if (!warned)
+ continue;
+ if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
+ continue;
+
+ log_error(ls, "waitwarn %x %lld %d us check connection to "
+ "node %d", lkb->lkb_id, (long long)us,
+ dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
+ }
+ mutex_unlock(&ls->ls_waiters_mutex);
+
+ if (warned)
+ kfree(warned);
+
+ if (debug_expired)
+ log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
+ debug_scanned, debug_expired,
+ dlm_config.ci_waitwarn_us, (long long)debug_maxus);
+}
+
/* add/remove lkb from global waiters list of lkb's waiting for
a reply from a remote node */
-static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
+static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
int error = 0;
@@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
lkb->lkb_wait_count++;
lkb->lkb_wait_type = mstype;
+ lkb->lkb_wait_time = ktime_get();
+ lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
hold_lkb(lkb);
list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
out:
@@ -961,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
int error;
- if (ms != &ls->ls_stub_ms)
+ if (ms->m_flags != DLM_IFL_STUB_MS)
mutex_lock(&ls->ls_waiters_mutex);
error = _remove_from_waiters(lkb, ms->m_type, ms);
- if (ms != &ls->ls_stub_ms)
+ if (ms->m_flags != DLM_IFL_STUB_MS)
mutex_unlock(&ls->ls_waiters_mutex);
return error;
}
@@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
mutex_unlock(&ls->ls_timeout_mutex);
+
+ if (!dlm_config.ci_waitwarn_us)
+ return;
+
+ mutex_lock(&ls->ls_waiters_mutex);
+ list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
+ if (ktime_to_us(lkb->lkb_wait_time))
+ lkb->lkb_wait_time = ktime_get();
+ }
+ mutex_unlock(&ls->ls_waiters_mutex);
}
/* lkb is master or local copy */
@@ -1376,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
compatible with other granted locks */
-static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms)
+static void munge_demoted(struct dlm_lkb *lkb)
{
- if (ms->m_type != DLM_MSG_CONVERT_REPLY) {
- log_print("munge_demoted %x invalid reply type %d",
- lkb->lkb_id, ms->m_type);
- return;
- }
-
if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
log_print("munge_demoted %x invalid modes gr %d rq %d",
lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
@@ -2844,12 +2924,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
struct dlm_mhandle *mh;
int to_nodeid, error;
- error = add_to_waiters(lkb, mstype);
+ to_nodeid = r->res_nodeid;
+
+ error = add_to_waiters(lkb, mstype, to_nodeid);
if (error)
return error;
- to_nodeid = r->res_nodeid;
-
error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
if (error)
goto fail;
@@ -2880,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
/* down conversions go without a reply from the master */
if (!error && down_conversion(lkb)) {
remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
+ r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS;
r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
r->res_ls->ls_stub_ms.m_result = 0;
- r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags;
__receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
}
@@ -2951,12 +3031,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
struct dlm_mhandle *mh;
int to_nodeid, error;
- error = add_to_waiters(lkb, DLM_MSG_LOOKUP);
+ to_nodeid = dlm_dir_nodeid(r);
+
+ error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid);
if (error)
return error;
- to_nodeid = dlm_dir_nodeid(r);
-
error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
if (error)
goto fail;
@@ -3070,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
{
+ if (ms->m_flags == DLM_IFL_STUB_MS)
+ return;
+
lkb->lkb_sbflags = ms->m_sbflags;
lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
(ms->m_flags & 0x0000FFFF);
@@ -3612,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
/* convert was queued on remote master */
receive_flags_reply(lkb, ms);
if (is_demoted(lkb))
- munge_demoted(lkb, ms);
+ munge_demoted(lkb);
del_lkb(r, lkb);
add_lkb(r, lkb, DLM_LKSTS_CONVERT);
add_timeout(lkb);
@@ -3622,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
/* convert was granted on remote master */
receive_flags_reply(lkb, ms);
if (is_demoted(lkb))
- munge_demoted(lkb, ms);
+ munge_demoted(lkb);
grant_lock_pc(r, lkb, ms);
queue_cast(r, lkb, 0);
break;
@@ -3996,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
dlm_put_lockspace(ls);
}
-static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
+static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
+ struct dlm_message *ms_stub)
{
if (middle_conversion(lkb)) {
hold_lkb(lkb);
- ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
- ls->ls_stub_ms.m_result = -EINPROGRESS;
- ls->ls_stub_ms.m_flags = lkb->lkb_flags;
- ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
- _receive_convert_reply(lkb, &ls->ls_stub_ms);
+ memset(ms_stub, 0, sizeof(struct dlm_message));
+ ms_stub->m_flags = DLM_IFL_STUB_MS;
+ ms_stub->m_type = DLM_MSG_CONVERT_REPLY;
+ ms_stub->m_result = -EINPROGRESS;
+ ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
+ _receive_convert_reply(lkb, ms_stub);
/* Same special case as in receive_rcom_lock_args() */
lkb->lkb_grmode = DLM_LOCK_IV;
@@ -4045,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
void dlm_recover_waiters_pre(struct dlm_ls *ls)
{
struct dlm_lkb *lkb, *safe;
+ struct dlm_message *ms_stub;
int wait_type, stub_unlock_result, stub_cancel_result;
+ ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
+ if (!ms_stub) {
+ log_error(ls, "dlm_recover_waiters_pre no mem");
+ return;
+ }
+
mutex_lock(&ls->ls_waiters_mutex);
list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
- log_debug(ls, "pre recover waiter lkid %x type %d flags %x",
- lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags);
+
+ /* exclude debug messages about unlocks because there can be so
+ many and they aren't very interesting */
+
+ if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) {
+ log_debug(ls, "recover_waiter %x nodeid %d "
+ "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid,
+ lkb->lkb_wait_type, lkb->lkb_wait_nodeid);
+ }
/* all outstanding lookups, regardless of destination will be
resent after recovery is done */
@@ -4097,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
break;
case DLM_MSG_CONVERT:
- recover_convert_waiter(ls, lkb);
+ recover_convert_waiter(ls, lkb, ms_stub);
break;
case DLM_MSG_UNLOCK:
hold_lkb(lkb);
- ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY;
- ls->ls_stub_ms.m_result = stub_unlock_result;
- ls->ls_stub_ms.m_flags = lkb->lkb_flags;
- ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
- _receive_unlock_reply(lkb, &ls->ls_stub_ms);
+ memset(ms_stub, 0, sizeof(struct dlm_message));
+ ms_stub->m_flags = DLM_IFL_STUB_MS;
+ ms_stub->m_type = DLM_MSG_UNLOCK_REPLY;
+ ms_stub->m_result = stub_unlock_result;
+ ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
+ _receive_unlock_reply(lkb, ms_stub);
dlm_put_lkb(lkb);
break;
case DLM_MSG_CANCEL:
hold_lkb(lkb);
- ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY;
- ls->ls_stub_ms.m_result = stub_cancel_result;
- ls->ls_stub_ms.m_flags = lkb->lkb_flags;
- ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
- _receive_cancel_reply(lkb, &ls->ls_stub_ms);
+ memset(ms_stub, 0, sizeof(struct dlm_message));
+ ms_stub->m_flags = DLM_IFL_STUB_MS;
+ ms_stub->m_type = DLM_MSG_CANCEL_REPLY;
+ ms_stub->m_result = stub_cancel_result;
+ ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
+ _receive_cancel_reply(lkb, ms_stub);
dlm_put_lkb(lkb);
break;
@@ -4127,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
schedule();
}
mutex_unlock(&ls->ls_waiters_mutex);
+ kfree(ms_stub);
}
static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
@@ -4191,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
ou = is_overlap_unlock(lkb);
err = 0;
- log_debug(ls, "recover_waiters_post %x type %d flags %x %s",
- lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name);
+ log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d",
+ lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid);
/* At this point we assume that we won't get a reply to any
previous op or overlap op on this lock. First, do a big
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 88e93c80cc22..265017a7c3e7 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
void dlm_scan_rsbs(struct dlm_ls *ls);
int dlm_lock_recovery_try(struct dlm_ls *ls);
void dlm_unlock_recovery(struct dlm_ls *ls);
+void dlm_scan_waiters(struct dlm_ls *ls);
void dlm_scan_timeout(struct dlm_ls *ls);
void dlm_adjust_timeouts(struct dlm_ls *ls);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f994a7dfda85..14cbf4099753 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void)
static int dlm_scand(void *data)
{
struct dlm_ls *ls;
- int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
while (!kthread_should_stop()) {
ls = find_ls_to_scan();
@@ -252,13 +251,14 @@ static int dlm_scand(void *data)
ls->ls_scan_time = jiffies;
dlm_scan_rsbs(ls);
dlm_scan_timeout(ls);
+ dlm_scan_waiters(ls);
dlm_unlock_recovery(ls);
} else {
ls->ls_scan_time += HZ;
}
- } else {
- schedule_timeout_interruptible(timeout_jiffies);
+ continue;
}
+ schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
}
return 0;
}
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 30d8b85febbf..e2b878004364 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -71,6 +71,36 @@ static void send_op(struct plock_op *op)
wake_up(&send_wq);
}
+/* If a process was killed while waiting for the only plock on a file,
+ locks_remove_posix will not see any lock on the file so it won't
+ send an unlock-close to us to pass on to userspace to clean up the
+ abandoned waiter. So, we have to insert the unlock-close when the
+ lock call is interrupted. */
+
+static void do_unlock_close(struct dlm_ls *ls, u64 number,
+ struct file *file, struct file_lock *fl)
+{
+ struct plock_op *op;
+
+ op = kzalloc(sizeof(*op), GFP_NOFS);
+ if (!op)
+ return;
+
+ op->info.optype = DLM_PLOCK_OP_UNLOCK;
+ op->info.pid = fl->fl_pid;
+ op->info.fsid = ls->ls_global_id;
+ op->info.number = number;
+ op->info.start = 0;
+ op->info.end = OFFSET_MAX;
+ if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+ op->info.owner = (__u64) fl->fl_pid;
+ else
+ op->info.owner = (__u64)(long) fl->fl_owner;
+
+ op->info.flags |= DLM_PLOCK_FL_CLOSE;
+ send_op(op);
+}
+
int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
int cmd, struct file_lock *fl)
{
@@ -114,9 +144,19 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
send_op(op);
- if (xop->callback == NULL)
- wait_event(recv_wq, (op->done != 0));
- else {
+ if (xop->callback == NULL) {
+ rv = wait_event_killable(recv_wq, (op->done != 0));
+ if (rv == -ERESTARTSYS) {
+ log_debug(ls, "dlm_posix_lock: wait killed %llx",
+ (unsigned long long)number);
+ spin_lock(&ops_lock);
+ list_del(&op->list);
+ spin_unlock(&ops_lock);
+ kfree(xop);
+ do_unlock_close(ls, number, file, fl);
+ goto out;
+ }
+ } else {
rv = FILE_LOCK_DEFERRED;
goto out;
}
@@ -233,6 +273,13 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
else
op->info.owner = (__u64)(long) fl->fl_owner;
+ if (fl->fl_flags & FL_CLOSE) {
+ op->info.flags |= DLM_PLOCK_FL_CLOSE;
+ send_op(op);
+ rv = 0;
+ goto out;
+ }
+
send_op(op);
wait_event(recv_wq, (op->done != 0));
@@ -334,7 +381,10 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
op = list_entry(send_list.next, struct plock_op, list);
- list_move(&op->list, &recv_list);
+ if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+ list_del(&op->list);
+ else
+ list_move(&op->list, &recv_list);
memcpy(&info, &op->info, sizeof(info));
}
spin_unlock(&ops_lock);
@@ -342,6 +392,13 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
if (!op)
return -EAGAIN;
+ /* there is no need to get a reply from userspace for unlocks
+ that were generated by the vfs cleaning up for a close
+ (the process did not make an unlock call). */
+
+ if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+ kfree(op);
+
if (copy_to_user(u, &info, sizeof(info)))
return -EFAULT;
return sizeof(info);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index d5ab3fe7c198..e96bf3e9be88 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf,
out_sig:
sigprocmask(SIG_SETMASK, &tmpsig, NULL);
- recalc_sigpending();
out_free:
kfree(kbuf);
return error;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 98b77c89494c..c00e055b6282 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -40,9 +40,12 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
static void drop_slab(void)
{
int nr_objects;
+ struct shrink_control shrink = {
+ .gfp_mask = GFP_KERNEL,
+ };
do {
- nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+ nr_objects = shrink_slab(&shrink, 1000, 1000);
} while (nr_objects > 10);
}
diff --git a/fs/exec.c b/fs/exec.c
index 5e62d26a4fec..936f5776655c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
#include <linux/fs_struct.h>
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
+#include <linux/compat.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -166,8 +167,13 @@ out:
}
#ifdef CONFIG_MMU
-
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+/*
+ * The nascent bprm->mm is not visible until exec_mmap() but it can
+ * use a lot of memory, account these pages in current->mm temporary
+ * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
+ * change the counter back via acct_arg_size(0).
+ */
+static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
{
struct mm_struct *mm = current->mm;
long diff = (long)(pages - bprm->vma_pages);
@@ -186,7 +192,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
#endif
}
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
@@ -194,7 +200,7 @@ struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
#ifdef CONFIG_STACK_GROWSUP
if (write) {
- ret = expand_stack_downwards(bprm->vma, pos);
+ ret = expand_downwards(bprm->vma, pos);
if (ret < 0)
return NULL;
}
@@ -305,11 +311,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
#else
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
{
}
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
@@ -398,22 +404,56 @@ err:
return err;
}
+struct user_arg_ptr {
+#ifdef CONFIG_COMPAT
+ bool is_compat;
+#endif
+ union {
+ const char __user *const __user *native;
+#ifdef CONFIG_COMPAT
+ compat_uptr_t __user *compat;
+#endif
+ } ptr;
+};
+
+static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
+{
+ const char __user *native;
+
+#ifdef CONFIG_COMPAT
+ if (unlikely(argv.is_compat)) {
+ compat_uptr_t compat;
+
+ if (get_user(compat, argv.ptr.compat + nr))
+ return ERR_PTR(-EFAULT);
+
+ return compat_ptr(compat);
+ }
+#endif
+
+ if (get_user(native, argv.ptr.native + nr))
+ return ERR_PTR(-EFAULT);
+
+ return native;
+}
+
/*
* count() counts the number of strings in array ARGV.
*/
-static int count(const char __user * const __user * argv, int max)
+static int count(struct user_arg_ptr argv, int max)
{
int i = 0;
- if (argv != NULL) {
+ if (argv.ptr.native != NULL) {
for (;;) {
- const char __user * p;
+ const char __user *p = get_user_arg_ptr(argv, i);
- if (get_user(p, argv))
- return -EFAULT;
if (!p)
break;
- argv++;
+
+ if (IS_ERR(p))
+ return -EFAULT;
+
if (i++ >= max)
return -E2BIG;
@@ -430,7 +470,7 @@ static int count(const char __user * const __user * argv, int max)
* processes's memory to the new process's stack. The call to get_user_pages()
* ensures the destination page is created and not swapped out.
*/
-static int copy_strings(int argc, const char __user *const __user *argv,
+static int copy_strings(int argc, struct user_arg_ptr argv,
struct linux_binprm *bprm)
{
struct page *kmapped_page = NULL;
@@ -443,16 +483,18 @@ static int copy_strings(int argc, const char __user *const __user *argv,
int len;
unsigned long pos;
- if (get_user(str, argv+argc) ||
- !(len = strnlen_user(str, MAX_ARG_STRLEN))) {
- ret = -EFAULT;
+ ret = -EFAULT;
+ str = get_user_arg_ptr(argv, argc);
+ if (IS_ERR(str))
goto out;
- }
- if (!valid_arg_len(bprm, len)) {
- ret = -E2BIG;
+ len = strnlen_user(str, MAX_ARG_STRLEN);
+ if (!len)
+ goto out;
+
+ ret = -E2BIG;
+ if (!valid_arg_len(bprm, len))
goto out;
- }
/* We're going to work our way backwords. */
pos = bprm->p;
@@ -519,14 +561,19 @@ out:
/*
* Like copy_strings, but get argv and its values from kernel memory.
*/
-int copy_strings_kernel(int argc, const char *const *argv,
+int copy_strings_kernel(int argc, const char *const *__argv,
struct linux_binprm *bprm)
{
int r;
mm_segment_t oldfs = get_fs();
+ struct user_arg_ptr argv = {
+ .ptr.native = (const char __user *const __user *)__argv,
+ };
+
set_fs(KERNEL_DS);
- r = copy_strings(argc, (const char __user *const __user *)argv, bprm);
+ r = copy_strings(argc, argv, bprm);
set_fs(oldfs);
+
return r;
}
EXPORT_SYMBOL(copy_strings_kernel);
@@ -553,7 +600,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
BUG_ON(new_start > new_end);
@@ -579,12 +626,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
return -ENOMEM;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
- free_pgd_range(tlb, new_end, old_end, new_end,
+ free_pgd_range(&tlb, new_end, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
} else {
/*
@@ -593,10 +640,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* have constraints on va-space that make this illegal (IA64) -
* for the others its just a little faster.
*/
- free_pgd_range(tlb, old_start, old_end, new_end,
+ free_pgd_range(&tlb, old_start, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
}
- tlb_finish_mmu(tlb, new_end, old_end);
+ tlb_finish_mmu(&tlb, new_end, old_end);
/*
* Shrink the vma to just the new range. Always succeeds.
@@ -1004,6 +1051,7 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
task_unlock(tsk);
return buf;
}
+EXPORT_SYMBOL_GPL(get_task_comm);
void set_task_comm(struct task_struct *tsk, char *buf)
{
@@ -1379,10 +1427,10 @@ EXPORT_SYMBOL(search_binary_handler);
/*
* sys_execve() executes a new program.
*/
-int do_execve(const char * filename,
- const char __user *const __user *argv,
- const char __user *const __user *envp,
- struct pt_regs * regs)
+static int do_execve_common(const char *filename,
+ struct user_arg_ptr argv,
+ struct user_arg_ptr envp,
+ struct pt_regs *regs)
{
struct linux_binprm *bprm;
struct file *file;
@@ -1489,6 +1537,34 @@ out_ret:
return retval;
}
+int do_execve(const char *filename,
+ const char __user *const __user *__argv,
+ const char __user *const __user *__envp,
+ struct pt_regs *regs)
+{
+ struct user_arg_ptr argv = { .ptr.native = __argv };
+ struct user_arg_ptr envp = { .ptr.native = __envp };
+ return do_execve_common(filename, argv, envp, regs);
+}
+
+#ifdef CONFIG_COMPAT
+int compat_do_execve(char *filename,
+ compat_uptr_t __user *__argv,
+ compat_uptr_t __user *__envp,
+ struct pt_regs *regs)
+{
+ struct user_arg_ptr argv = {
+ .is_compat = true,
+ .ptr.compat = __argv,
+ };
+ struct user_arg_ptr envp = {
+ .is_compat = true,
+ .ptr.compat = __envp,
+ };
+ return do_execve_common(filename, argv, envp, regs);
+}
+#endif
+
void set_binfmt(struct linux_binfmt *new)
{
struct mm_struct *mm = current->mm;
@@ -1659,6 +1735,7 @@ static int zap_process(struct task_struct *start, int exit_code)
t = start;
do {
+ task_clear_group_stop_pending(t);
if (t != current && t->mm) {
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0a78dae7e2cb..1dd62ed35b85 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -898,7 +898,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
brelse(bh);
if (!sb_set_blocksize(sb, blocksize)) {
- ext2_msg(sb, KERN_ERR, "error: blocksize is too small");
+ ext2_msg(sb, KERN_ERR,
+ "error: bad blocksize %d", blocksize);
goto failed_sbi;
}
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 32f3b8695859..34b6d9bfc48a 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1416,10 +1416,19 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
frame->at = entries;
frame->bh = bh;
bh = bh2;
+ /*
+ * Mark buffers dirty here so that if do_split() fails we write a
+ * consistent set of buffers to disk.
+ */
+ ext3_journal_dirty_metadata(handle, frame->bh);
+ ext3_journal_dirty_metadata(handle, bh);
de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
- dx_release (frames);
- if (!(de))
+ if (!de) {
+ ext3_mark_inode_dirty(handle, dir);
+ dx_release(frames);
return retval;
+ }
+ dx_release(frames);
return add_dirent_to_buf(handle, dentry, inode, de, bh);
}
@@ -2189,6 +2198,7 @@ static int ext3_symlink (struct inode * dir,
handle_t *handle;
struct inode * inode;
int l, err, retries = 0;
+ int credits;
l = strlen(symname)+1;
if (l > dir->i_sb->s_blocksize)
@@ -2196,10 +2206,26 @@ static int ext3_symlink (struct inode * dir,
dquot_initialize(dir);
+ if (l > EXT3_N_BLOCKS * 4) {
+ /*
+ * For non-fast symlinks, we just allocate inode and put it on
+ * orphan list in the first transaction => we need bitmap,
+ * group descriptor, sb, inode block, quota blocks.
+ */
+ credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+ } else {
+ /*
+ * Fast symlink. We have to add entry to directory
+ * (EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS),
+ * allocate new inode (bitmap, group descriptor, inode block,
+ * quota blocks, sb is already counted in previous macros).
+ */
+ credits = EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+ EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+ }
retry:
- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
- EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+ handle = ext3_journal_start(dir, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -2211,21 +2237,45 @@ retry:
if (IS_ERR(inode))
goto out_stop;
- if (l > sizeof (EXT3_I(inode)->i_data)) {
+ if (l > EXT3_N_BLOCKS * 4) {
inode->i_op = &ext3_symlink_inode_operations;
ext3_set_aops(inode);
/*
- * page_symlink() calls into ext3_prepare/commit_write.
- * We have a transaction open. All is sweetness. It also sets
- * i_size in generic_commit_write().
+ * We cannot call page_symlink() with transaction started
+ * because it calls into ext3_write_begin() which acquires page
+ * lock which ranks below transaction start (and it can also
+ * wait for journal commit if we are running out of space). So
+ * we have to stop transaction now and restart it when symlink
+ * contents is written.
+ *
+ * To keep fs consistent in case of crash, we have to put inode
+ * to orphan list in the mean time.
*/
+ drop_nlink(inode);
+ err = ext3_orphan_add(handle, inode);
+ ext3_journal_stop(handle);
+ if (err)
+ goto err_drop_inode;
err = __page_symlink(inode, symname, l, 1);
+ if (err)
+ goto err_drop_inode;
+ /*
+ * Now inode is being linked into dir (EXT3_DATA_TRANS_BLOCKS
+ * + EXT3_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
+ */
+ handle = ext3_journal_start(dir,
+ EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto err_drop_inode;
+ }
+ inc_nlink(inode);
+ err = ext3_orphan_del(handle, inode);
if (err) {
+ ext3_journal_stop(handle);
drop_nlink(inode);
- unlock_new_inode(inode);
- ext3_mark_inode_dirty(handle, inode);
- iput (inode);
- goto out_stop;
+ goto err_drop_inode;
}
} else {
inode->i_op = &ext3_fast_symlink_inode_operations;
@@ -2239,6 +2289,10 @@ out_stop:
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
return err;
+err_drop_inode:
+ unlock_new_inode(inode);
+ iput(inode);
+ return err;
}
static int ext3_link (struct dentry * old_dentry,
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index ae8200f84e39..1cc7038e273d 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -151,6 +151,13 @@ static void fat_cache_add(struct inode *inode, struct fat_cache_id *new)
spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
tmp = fat_cache_alloc(inode);
+ if (!tmp) {
+ spin_lock(&MSDOS_I(inode)->cache_lru_lock);
+ MSDOS_I(inode)->nr_caches--;
+ spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
+ return;
+ }
+
spin_lock(&MSDOS_I(inode)->cache_lru_lock);
cache = fat_cache_merge(inode, new);
if (cache != NULL) {
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ee42b9e0b16a..4ad64732cbce 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -98,7 +98,7 @@ next:
*bh = sb_bread(sb, phys);
if (*bh == NULL) {
- printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n",
+ fat_msg(sb, KERN_ERR, "Directory bread(block %llu) failed",
(llu)phys);
/* skip this block */
*pos = (iblock + 1) << sb->s_blocksize_bits;
@@ -136,9 +136,10 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
* but ignore that right now.
* Ahem... Stack smashing in ring 0 isn't fun. Fixed.
*/
-static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
- int uni_xlate, struct nls_table *nls)
+static int uni16_to_x8(struct super_block *sb, unsigned char *ascii,
+ const wchar_t *uni, int len, struct nls_table *nls)
{
+ int uni_xlate = MSDOS_SB(sb)->options.unicode_xlate;
const wchar_t *ip;
wchar_t ec;
unsigned char *op;
@@ -166,23 +167,23 @@ static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
}
if (unlikely(*ip)) {
- printk(KERN_WARNING "FAT: filename was truncated while "
- "converting.");
+ fat_msg(sb, KERN_WARNING, "filename was truncated while "
+ "converting.");
}
*op = 0;
return (op - ascii);
}
-static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
+static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni,
unsigned char *buf, int size)
{
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
if (sbi->options.utf8)
return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS,
UTF16_HOST_ENDIAN, buf, size);
else
- return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
- sbi->nls_io);
+ return uni16_to_x8(sb, buf, uni, size, sbi->nls_io);
}
static inline int
@@ -419,7 +420,7 @@ parse_record:
/* Compare shortname */
bufuname[last_u] = 0x0000;
- len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+ len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
if (fat_name_match(sbi, name, name_len, bufname, len))
goto found;
@@ -428,7 +429,7 @@ parse_record:
int size = PATH_MAX - FAT_MAX_UNI_SIZE;
/* Compare longname */
- len = fat_uni_to_x8(sbi, unicode, longname, size);
+ len = fat_uni_to_x8(sb, unicode, longname, size);
if (fat_name_match(sbi, name, name_len, longname, len))
goto found;
}
@@ -545,7 +546,7 @@ parse_record:
if (nr_slots) {
void *longname = unicode + FAT_MAX_UNI_CHARS;
int size = PATH_MAX - FAT_MAX_UNI_SIZE;
- int len = fat_uni_to_x8(sbi, unicode, longname, size);
+ int len = fat_uni_to_x8(sb, unicode, longname, size);
fill_name = longname;
fill_len = len;
@@ -621,7 +622,7 @@ parse_record:
if (isvfat) {
bufuname[j] = 0x0000;
- i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+ i = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
}
if (nr_slots) {
/* hack for fat_ioctl_filldir() */
@@ -979,6 +980,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
{
+ struct super_block *sb = dir->i_sb;
struct msdos_dir_entry *de;
struct buffer_head *bh;
int err = 0, nr_slots;
@@ -1013,8 +1015,8 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
*/
err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots);
if (err) {
- printk(KERN_WARNING
- "FAT: Couldn't remove the long name slots\n");
+ fat_msg(sb, KERN_WARNING,
+ "Couldn't remove the long name slots");
}
}
@@ -1265,7 +1267,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
if (sbi->fat_bits != 32)
goto error;
} else if (MSDOS_I(dir)->i_start == 0) {
- printk(KERN_ERR "FAT: Corrupted directory (i_pos %lld)\n",
+ fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)",
MSDOS_I(dir)->i_pos);
err = -EIO;
goto error;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index f50408901f7e..8276cc282dec 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -319,19 +319,20 @@ extern struct inode *fat_build_inode(struct super_block *sb,
struct msdos_dir_entry *de, loff_t i_pos);
extern int fat_sync_inode(struct inode *inode);
extern int fat_fill_super(struct super_block *sb, void *data, int silent,
- const struct inode_operations *fs_dir_inode_ops,
- int isvfat, void (*setup)(struct super_block *));
+ int isvfat, void (*setup)(struct super_block *));
extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
struct inode *i2);
/* fat/misc.c */
extern void
-__fat_fs_error(struct super_block *s, int report, const char *fmt, ...)
+__fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
+ __attribute__ ((format (printf, 3, 4))) __cold;
+#define fat_fs_error(sb, fmt, args...) \
+ __fat_fs_error(sb, 1, fmt , ## args)
+#define fat_fs_error_ratelimit(sb, fmt, args...) \
+ __fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args)
+void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
__attribute__ ((format (printf, 3, 4))) __cold;
-#define fat_fs_error(s, fmt, args...) \
- __fat_fs_error(s, 1, fmt , ## args)
-#define fat_fs_error_ratelimit(s, fmt, args...) \
- __fat_fs_error(s, __ratelimit(&MSDOS_SB(s)->ratelimit), fmt , ## args)
extern int fat_clusters_flush(struct super_block *sb);
extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index b47d2c9f4fa1..2e81ac0df7e2 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -95,7 +95,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
err_brelse:
brelse(bhs[0]);
err:
- printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr);
+ fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr);
return -EIO;
}
@@ -108,7 +108,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
fatent->bhs[0] = sb_bread(sb, blocknr);
if (!fatent->bhs[0]) {
- printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n",
+ fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
(llu)blocknr);
return -EIO;
}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8d68690bdcf1..cb8d8391ac0b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -581,7 +581,8 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = sbi->free_clusters;
buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32);
- buf->f_namelen = sbi->options.isvfat ? FAT_LFN_LEN : 12;
+ buf->f_namelen =
+ (sbi->options.isvfat ? FAT_LFN_LEN : 12) * NLS_MAX_CHARSET_SIZE;
return 0;
}
@@ -619,8 +620,8 @@ retry:
bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
if (!bh) {
- printk(KERN_ERR "FAT: unable to read inode block "
- "for updating (i_pos %lld)\n", i_pos);
+ fat_msg(sb, KERN_ERR, "unable to read inode block "
+ "for updating (i_pos %lld)", i_pos);
return -EIO;
}
spin_lock(&sbi->inode_hash_lock);
@@ -976,8 +977,8 @@ static const match_table_t vfat_tokens = {
{Opt_err, NULL}
};
-static int parse_options(char *options, int is_vfat, int silent, int *debug,
- struct fat_mount_options *opts)
+static int parse_options(struct super_block *sb, char *options, int is_vfat,
+ int silent, int *debug, struct fat_mount_options *opts)
{
char *p;
substring_t args[MAX_OPT_ARGS];
@@ -1168,15 +1169,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
/* obsolete mount options */
case Opt_obsolate:
- printk(KERN_INFO "FAT: \"%s\" option is obsolete, "
- "not supported now\n", p);
+ fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
+ "not supported now", p);
break;
/* unknown option */
default:
if (!silent) {
- printk(KERN_ERR
- "FAT: Unrecognized mount option \"%s\" "
- "or missing value\n", p);
+ fat_msg(sb, KERN_ERR,
+ "Unrecognized mount option \"%s\" "
+ "or missing value", p);
}
return -EINVAL;
}
@@ -1185,7 +1186,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
out:
/* UTF-8 doesn't provide FAT semantics */
if (!strcmp(opts->iocharset, "utf8")) {
- printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
+ fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset"
" for FAT filesystems, filesystem will be "
"case sensitive!\n");
}
@@ -1238,8 +1239,7 @@ static int fat_read_root(struct inode *inode)
/*
* Read the super block of an MS-DOS FS.
*/
-int fat_fill_super(struct super_block *sb, void *data, int silent,
- const struct inode_operations *fs_dir_inode_ops, int isvfat,
+int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
void (*setup)(struct super_block *))
{
struct inode *root_inode = NULL, *fat_inode = NULL;
@@ -1268,11 +1268,10 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
sb->s_magic = MSDOS_SUPER_MAGIC;
sb->s_op = &fat_sops;
sb->s_export_op = &fat_export_ops;
- sbi->dir_ops = fs_dir_inode_ops;
ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
- error = parse_options(data, isvfat, silent, &debug, &sbi->options);
+ error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options);
if (error)
goto out_fail;
@@ -1282,20 +1281,20 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
sb_min_blocksize(sb, 512);
bh = sb_bread(sb, 0);
if (bh == NULL) {
- printk(KERN_ERR "FAT: unable to read boot sector\n");
+ fat_msg(sb, KERN_ERR, "unable to read boot sector");
goto out_fail;
}
b = (struct fat_boot_sector *) bh->b_data;
if (!b->reserved) {
if (!silent)
- printk(KERN_ERR "FAT: bogus number of reserved sectors\n");
+ fat_msg(sb, KERN_ERR, "bogus number of reserved sectors");
brelse(bh);
goto out_invalid;
}
if (!b->fats) {
if (!silent)
- printk(KERN_ERR "FAT: bogus number of FAT structure\n");
+ fat_msg(sb, KERN_ERR, "bogus number of FAT structure");
brelse(bh);
goto out_invalid;
}
@@ -1308,7 +1307,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
media = b->media;
if (!fat_valid_media(media)) {
if (!silent)
- printk(KERN_ERR "FAT: invalid media value (0x%02x)\n",
+ fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)",
media);
brelse(bh);
goto out_invalid;
@@ -1318,7 +1317,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
|| (logical_sector_size < 512)
|| (logical_sector_size > 4096)) {
if (!silent)
- printk(KERN_ERR "FAT: bogus logical sector size %u\n",
+ fat_msg(sb, KERN_ERR, "bogus logical sector size %u",
logical_sector_size);
brelse(bh);
goto out_invalid;
@@ -1326,15 +1325,15 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
sbi->sec_per_clus = b->sec_per_clus;
if (!is_power_of_2(sbi->sec_per_clus)) {
if (!silent)
- printk(KERN_ERR "FAT: bogus sectors per cluster %u\n",
+ fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u",
sbi->sec_per_clus);
brelse(bh);
goto out_invalid;
}
if (logical_sector_size < sb->s_blocksize) {
- printk(KERN_ERR "FAT: logical sector size too small for device"
- " (logical sector size = %u)\n", logical_sector_size);
+ fat_msg(sb, KERN_ERR, "logical sector size too small for device"
+ " (logical sector size = %u)", logical_sector_size);
brelse(bh);
goto out_fail;
}
@@ -1342,14 +1341,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
brelse(bh);
if (!sb_set_blocksize(sb, logical_sector_size)) {
- printk(KERN_ERR "FAT: unable to set blocksize %u\n",
+ fat_msg(sb, KERN_ERR, "unable to set blocksize %u",
logical_sector_size);
goto out_fail;
}
bh = sb_bread(sb, 0);
if (bh == NULL) {
- printk(KERN_ERR "FAT: unable to read boot sector"
- " (logical sector size = %lu)\n",
+ fat_msg(sb, KERN_ERR, "unable to read boot sector"
+ " (logical sector size = %lu)",
sb->s_blocksize);
goto out_fail;
}
@@ -1385,16 +1384,16 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector);
if (fsinfo_bh == NULL) {
- printk(KERN_ERR "FAT: bread failed, FSINFO block"
- " (sector = %lu)\n", sbi->fsinfo_sector);
+ fat_msg(sb, KERN_ERR, "bread failed, FSINFO block"
+ " (sector = %lu)", sbi->fsinfo_sector);
brelse(bh);
goto out_fail;
}
fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data;
if (!IS_FSINFO(fsinfo)) {
- printk(KERN_WARNING "FAT: Invalid FSINFO signature: "
- "0x%08x, 0x%08x (sector = %lu)\n",
+ fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: "
+ "0x%08x, 0x%08x (sector = %lu)",
le32_to_cpu(fsinfo->signature1),
le32_to_cpu(fsinfo->signature2),
sbi->fsinfo_sector);
@@ -1415,8 +1414,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
if (!silent)
- printk(KERN_ERR "FAT: bogus directroy-entries per block"
- " (%u)\n", sbi->dir_entries);
+ fat_msg(sb, KERN_ERR, "bogus directroy-entries per block"
+ " (%u)", sbi->dir_entries);
brelse(bh);
goto out_invalid;
}
@@ -1438,7 +1437,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
if (total_clusters > MAX_FAT(sb)) {
if (!silent)
- printk(KERN_ERR "FAT: count of clusters too big (%u)\n",
+ fat_msg(sb, KERN_ERR, "count of clusters too big (%u)",
total_clusters);
brelse(bh);
goto out_invalid;
@@ -1471,7 +1470,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
sprintf(buf, "cp%d", sbi->options.codepage);
sbi->nls_disk = load_nls(buf);
if (!sbi->nls_disk) {
- printk(KERN_ERR "FAT: codepage %s not found\n", buf);
+ fat_msg(sb, KERN_ERR, "codepage %s not found", buf);
goto out_fail;
}
@@ -1479,7 +1478,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
if (sbi->options.isvfat) {
sbi->nls_io = load_nls(sbi->options.iocharset);
if (!sbi->nls_io) {
- printk(KERN_ERR "FAT: IO charset %s not found\n",
+ fat_msg(sb, KERN_ERR, "IO charset %s not found",
sbi->options.iocharset);
goto out_fail;
}
@@ -1503,7 +1502,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
insert_inode_hash(root_inode);
sb->s_root = d_alloc_root(root_inode);
if (!sb->s_root) {
- printk(KERN_ERR "FAT: get root inode failed\n");
+ fat_msg(sb, KERN_ERR, "get root inode failed");
goto out_fail;
}
@@ -1512,8 +1511,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
out_invalid:
error = -EINVAL;
if (!silent)
- printk(KERN_INFO "VFS: Can't find a valid FAT filesystem"
- " on dev %s.\n", sb->s_id);
+ fat_msg(sb, KERN_INFO, "Can't find a valid FAT filesystem");
out_fail:
if (fat_inode)
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 970e682ea754..6d93360ca0cc 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -20,30 +20,46 @@
* In case the file system is remounted read-only, it can be made writable
* again by remounting it.
*/
-void __fat_fs_error(struct super_block *s, int report, const char *fmt, ...)
+void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
{
- struct fat_mount_options *opts = &MSDOS_SB(s)->options;
+ struct fat_mount_options *opts = &MSDOS_SB(sb)->options;
va_list args;
+ struct va_format vaf;
if (report) {
- printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id);
-
- printk(KERN_ERR " ");
va_start(args, fmt);
- vprintk(fmt, args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf);
va_end(args);
- printk("\n");
}
if (opts->errors == FAT_ERRORS_PANIC)
- panic("FAT: fs panic from previous error\n");
- else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) {
- s->s_flags |= MS_RDONLY;
- printk(KERN_ERR "FAT: Filesystem has been set read-only\n");
+ panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id);
+ else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) {
+ sb->s_flags |= MS_RDONLY;
+ printk(KERN_ERR "FAT-fs (%s): Filesystem has been "
+ "set read-only\n", sb->s_id);
}
}
EXPORT_SYMBOL_GPL(__fat_fs_error);
+/**
+ * fat_msg() - print preformated FAT specific messages. Every thing what is
+ * not fat_fs_error() should be fat_msg().
+ */
+void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk("%sFAT-fs (%s): %pV\n", level, sb->s_id, &vaf);
+ va_end(args);
+}
+
/* Flushes the number of free clusters on FAT32 */
/* XXX: Need to write one per FSINFO block. Currently only writes 1 */
int fat_clusters_flush(struct super_block *sb)
@@ -57,15 +73,15 @@ int fat_clusters_flush(struct super_block *sb)
bh = sb_bread(sb, sbi->fsinfo_sector);
if (bh == NULL) {
- printk(KERN_ERR "FAT: bread failed in fat_clusters_flush\n");
+ fat_msg(sb, KERN_ERR, "bread failed in fat_clusters_flush");
return -EIO;
}
fsinfo = (struct fat_boot_fsinfo *)bh->b_data;
/* Sanity check */
if (!IS_FSINFO(fsinfo)) {
- printk(KERN_ERR "FAT: Invalid FSINFO signature: "
- "0x%08x, 0x%08x (sector = %lu)\n",
+ fat_msg(sb, KERN_ERR, "Invalid FSINFO signature: "
+ "0x%08x, 0x%08x (sector = %lu)",
le32_to_cpu(fsinfo->signature1),
le32_to_cpu(fsinfo->signature2),
sbi->fsinfo_sector);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index c3eccbd02037..be15437c272e 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -664,14 +664,14 @@ static const struct inode_operations msdos_dir_inode_operations = {
static void setup(struct super_block *sb)
{
+ MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations;
sb->s_d_op = &msdos_dentry_operations;
sb->s_flags |= MS_NOATIME;
}
static int msdos_fill_super(struct super_block *sb, void *data, int silent)
{
- return fat_fill_super(sb, data, silent, &msdos_dir_inode_operations,
- 0, setup);
+ return fat_fill_super(sb, data, silent, 0, setup);
}
static struct dentry *msdos_mount(struct file_system_type *fs_type,
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index e2466b2f8cf2..c61a6789f36c 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1070,6 +1070,7 @@ static const struct inode_operations vfat_dir_inode_operations = {
static void setup(struct super_block *sb)
{
+ MSDOS_SB(sb)->dir_ops = &vfat_dir_inode_operations;
if (MSDOS_SB(sb)->options.name_check != 's')
sb->s_d_op = &vfat_ci_dentry_ops;
else
@@ -1078,8 +1079,7 @@ static void setup(struct super_block *sb)
static int vfat_fill_super(struct super_block *sb, void *data, int silent)
{
- return fat_fill_super(sb, data, silent, &vfat_dir_inode_operations,
- 1, setup);
+ return fat_fill_super(sb, data, silent, 1, setup);
}
static struct dentry *vfat_mount(struct file_system_type *fs_type,
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 2ba6719ac612..1a4311437a8b 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -272,7 +272,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
* *ip: VFS inode
*
* Description:
- * vxfs_put_fake_inode frees all data asssociated with @ip.
+ * vxfs_put_fake_inode frees all data associated with @ip.
*/
void
vxfs_put_fake_inode(struct inode *ip)
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 48a18f184d50..30afdfa7aec7 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -33,8 +33,6 @@ void fscache_enqueue_operation(struct fscache_operation *op)
_enter("{OBJ%x OP%x,%u}",
op->object->debug_id, op->debug_id, atomic_read(&op->usage));
- fscache_set_op_state(op, "EnQ");
-
ASSERT(list_empty(&op->pend_link));
ASSERT(op->processor != NULL);
ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
@@ -66,8 +64,6 @@ EXPORT_SYMBOL(fscache_enqueue_operation);
static void fscache_run_op(struct fscache_object *object,
struct fscache_operation *op)
{
- fscache_set_op_state(op, "Run");
-
object->n_in_progress++;
if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -88,8 +84,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
- fscache_set_op_state(op, "SubmitX");
-
spin_lock(&object->lock);
ASSERTCMP(object->n_ops, >=, object->n_in_progress);
ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -194,8 +188,6 @@ int fscache_submit_op(struct fscache_object *object,
ASSERTCMP(atomic_read(&op->usage), >, 0);
- fscache_set_op_state(op, "Submit");
-
spin_lock(&object->lock);
ASSERTCMP(object->n_ops, >=, object->n_in_progress);
ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -335,8 +327,6 @@ void fscache_put_operation(struct fscache_operation *op)
if (!atomic_dec_and_test(&op->usage))
return;
- fscache_set_op_state(op, "Put");
-
_debug("PUT OP");
if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
BUG();
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 41c441c2058d..a2a5d19ece6a 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -155,11 +155,9 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
fscache_stat(&fscache_n_attr_changed_calls);
if (fscache_object_is_active(object)) {
- fscache_set_op_state(op, "CallFS");
fscache_stat(&fscache_n_cop_attr_changed);
ret = object->cache->ops->attr_changed(object);
fscache_stat_d(&fscache_n_cop_attr_changed);
- fscache_set_op_state(op, "Done");
if (ret < 0)
fscache_abort_object(object);
}
@@ -190,7 +188,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
fscache_operation_init(op, fscache_attr_changed_op, NULL);
op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
- fscache_set_op_name(op, "Attr");
spin_lock(&cookie->lock);
@@ -257,7 +254,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
op->context = context;
op->start_time = jiffies;
INIT_LIST_HEAD(&op->to_do);
- fscache_set_op_name(&op->op, "Retr");
return op;
}
@@ -368,7 +364,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
_leave(" = -ENOMEM");
return -ENOMEM;
}
- fscache_set_op_name(&op->op, "RetrRA1");
spin_lock(&cookie->lock);
@@ -487,7 +482,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
op = fscache_alloc_retrieval(mapping, end_io_func, context);
if (!op)
return -ENOMEM;
- fscache_set_op_name(&op->op, "RetrRAN");
spin_lock(&cookie->lock);
@@ -589,7 +583,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
if (!op)
return -ENOMEM;
- fscache_set_op_name(&op->op, "RetrAL1");
spin_lock(&cookie->lock);
@@ -662,8 +655,6 @@ static void fscache_write_op(struct fscache_operation *_op)
_enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
- fscache_set_op_state(&op->op, "GetPage");
-
spin_lock(&object->lock);
cookie = object->cookie;
@@ -698,15 +689,12 @@ static void fscache_write_op(struct fscache_operation *_op)
spin_unlock(&cookie->stores_lock);
spin_unlock(&object->lock);
- fscache_set_op_state(&op->op, "Store");
fscache_stat(&fscache_n_store_pages);
fscache_stat(&fscache_n_cop_write_page);
ret = object->cache->ops->write_page(op, page);
fscache_stat_d(&fscache_n_cop_write_page);
- fscache_set_op_state(&op->op, "EndWrite");
fscache_end_page_write(object, page);
if (ret < 0) {
- fscache_set_op_state(&op->op, "Abort");
fscache_abort_object(object);
} else {
fscache_enqueue_operation(&op->op);
@@ -778,7 +766,6 @@ int __fscache_write_page(struct fscache_cookie *cookie,
fscache_operation_init(&op->op, fscache_write_op,
fscache_release_write_op);
op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING);
- fscache_set_op_name(&op->op, "Write1");
ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
if (ret < 0)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e462a7a281bf..0d0e3faddcfa 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
if (!inode)
return 0;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && (nd->flags & LOOKUP_RCU))
return -ECHILD;
fc = get_fuse_conn(inode);
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index f3d23ef4e876..86128202384f 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
ccflags-y := -I$(src)
obj-$(CONFIG_GFS2_FS) += gfs2.o
gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
- glops.o inode.o log.o lops.o main.o meta_io.o \
+ glops.o log.o lops.o main.o meta_io.o \
aops.o dentry.o export.o file.o \
- ops_fstype.o ops_inode.o quota.o \
+ ops_fstype.o inode.o quota.o \
recovery.o rgrp.o super.o sys.o trans.o util.o
gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 0f5c4f9d5d62..802ac5eeba28 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1076,8 +1076,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
bd = bh->b_private;
if (bd && bd->bd_ail)
goto cannot_release;
- gfs2_assert_warn(sdp, !buffer_pinned(bh));
- gfs2_assert_warn(sdp, !buffer_dirty(bh));
+ if (buffer_pinned(bh) || buffer_dirty(bh))
+ goto not_possible;
bh = bh->b_this_page;
} while(bh != head);
gfs2_log_unlock(sdp);
@@ -1107,6 +1107,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
} while (bh != head);
return try_to_free_buffers(page);
+
+not_possible: /* Should never happen */
+ WARN_ON(buffer_dirty(bh));
+ WARN_ON(buffer_pinned(bh));
cannot_release:
gfs2_log_unlock(sdp);
return 0;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 74add2ddcc3f..e65493a8ac00 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -780,6 +780,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
metadata = (height != ip->i_height - 1);
if (metadata)
revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
+ else if (ip->i_depth)
+ revokes = sdp->sd_inptrs;
if (ip != GFS2_I(sdp->sd_rindex))
error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index f789c5732b7c..091ee4779538 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -82,12 +82,9 @@
struct qstr gfs2_qdot __read_mostly;
struct qstr gfs2_qdotdot __read_mostly;
-typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len,
- u64 leaf_no, void *data);
typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
const struct qstr *name, void *opaque);
-
int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
struct buffer_head **bhp)
{
@@ -1600,7 +1597,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
*/
int gfs2_dir_add(struct inode *inode, const struct qstr *name,
- const struct gfs2_inode *nip, unsigned type)
+ const struct gfs2_inode *nip)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *bh;
@@ -1616,7 +1613,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
return PTR_ERR(dent);
dent = gfs2_init_dirent(inode, dent, name, bh);
gfs2_inum_out(nip, dent);
- dent->de_type = cpu_to_be16(type);
+ dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
if (ip->i_diskflags & GFS2_DIF_EXHASH) {
leaf = (struct gfs2_leaf *)bh->b_data;
be16_add_cpu(&leaf->lf_entries, 1);
@@ -1628,6 +1625,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
gfs2_trans_add_bh(ip->i_gl, bh, 1);
ip->i_entries++;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
+ if (S_ISDIR(nip->i_inode.i_mode))
+ inc_nlink(&ip->i_inode);
gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
error = 0;
@@ -1672,8 +1671,9 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
* Returns: 0 on success, error code on failure
*/
-int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
+int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
{
+ const struct qstr *name = &dentry->d_name;
struct gfs2_dirent *dent, *prev = NULL;
struct buffer_head *bh;
int error;
@@ -1714,6 +1714,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
gfs2_trans_add_bh(dip->i_gl, bh, 1);
dip->i_entries--;
dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
+ if (S_ISDIR(dentry->d_inode->i_mode))
+ drop_nlink(&dip->i_inode);
gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
mark_inode_dirty(&dip->i_inode);
@@ -1768,94 +1770,20 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
}
/**
- * foreach_leaf - call a function for each leaf in a directory
- * @dip: the directory
- * @lc: the function to call for each each
- * @data: private data to pass to it
- *
- * Returns: errno
- */
-
-static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
- struct buffer_head *bh;
- struct gfs2_leaf *leaf;
- u32 hsize, len;
- u32 ht_offset, lp_offset, ht_offset_cur = -1;
- u32 index = 0;
- __be64 *lp;
- u64 leaf_no;
- int error = 0;
-
- hsize = 1 << dip->i_depth;
- if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
- gfs2_consist_inode(dip);
- return -EIO;
- }
-
- lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
- if (!lp)
- return -ENOMEM;
-
- while (index < hsize) {
- lp_offset = index & (sdp->sd_hash_ptrs - 1);
- ht_offset = index - lp_offset;
-
- if (ht_offset_cur != ht_offset) {
- error = gfs2_dir_read_data(dip, (char *)lp,
- ht_offset * sizeof(__be64),
- sdp->sd_hash_bsize, 1);
- if (error != sdp->sd_hash_bsize) {
- if (error >= 0)
- error = -EIO;
- goto out;
- }
- ht_offset_cur = ht_offset;
- }
-
- leaf_no = be64_to_cpu(lp[lp_offset]);
- if (leaf_no) {
- error = get_leaf(dip, leaf_no, &bh);
- if (error)
- goto out;
- leaf = (struct gfs2_leaf *)bh->b_data;
- len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
- brelse(bh);
-
- error = lc(dip, index, len, leaf_no, data);
- if (error)
- goto out;
-
- index = (index & ~(len - 1)) + len;
- } else
- index++;
- }
-
- if (index != hsize) {
- gfs2_consist_inode(dip);
- error = -EIO;
- }
-
-out:
- kfree(lp);
-
- return error;
-}
-
-/**
* leaf_dealloc - Deallocate a directory leaf
* @dip: the directory
* @index: the hash table offset in the directory
* @len: the number of pointers to this leaf
* @leaf_no: the leaf number
- * @data: not used
+ * @leaf_bh: buffer_head for the starting leaf
+ * last_dealloc: 1 if this is the final dealloc for the leaf, else 0
*
* Returns: errno
*/
static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
- u64 leaf_no, void *data)
+ u64 leaf_no, struct buffer_head *leaf_bh,
+ int last_dealloc)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_leaf *tmp_leaf;
@@ -1887,14 +1815,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
goto out_qs;
/* Count the number of leaves */
+ bh = leaf_bh;
for (blk = leaf_no; blk; blk = nblk) {
- error = get_leaf(dip, blk, &bh);
- if (error)
- goto out_rlist;
+ if (blk != leaf_no) {
+ error = get_leaf(dip, blk, &bh);
+ if (error)
+ goto out_rlist;
+ }
tmp_leaf = (struct gfs2_leaf *)bh->b_data;
nblk = be64_to_cpu(tmp_leaf->lf_next);
- brelse(bh);
+ if (blk != leaf_no)
+ brelse(bh);
gfs2_rlist_add(sdp, &rlist, blk);
l_blocks++;
@@ -1918,13 +1850,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
if (error)
goto out_rg_gunlock;
+ bh = leaf_bh;
+
for (blk = leaf_no; blk; blk = nblk) {
- error = get_leaf(dip, blk, &bh);
- if (error)
- goto out_end_trans;
+ if (blk != leaf_no) {
+ error = get_leaf(dip, blk, &bh);
+ if (error)
+ goto out_end_trans;
+ }
tmp_leaf = (struct gfs2_leaf *)bh->b_data;
nblk = be64_to_cpu(tmp_leaf->lf_next);
- brelse(bh);
+ if (blk != leaf_no)
+ brelse(bh);
gfs2_free_meta(dip, blk, 1);
gfs2_add_inode_blocks(&dip->i_inode, -1);
@@ -1942,6 +1879,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
goto out_end_trans;
gfs2_trans_add_bh(dip->i_gl, dibh, 1);
+ /* On the last dealloc, make this a regular file in case we crash.
+ (We don't want to free these blocks a second time.) */
+ if (last_dealloc)
+ dip->i_inode.i_mode = S_IFREG;
gfs2_dinode_out(dip, dibh->b_data);
brelse(dibh);
@@ -1975,29 +1916,67 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct buffer_head *bh;
- int error;
+ struct gfs2_leaf *leaf;
+ u32 hsize, len;
+ u32 ht_offset, lp_offset, ht_offset_cur = -1;
+ u32 index = 0, next_index;
+ __be64 *lp;
+ u64 leaf_no;
+ int error = 0, last;
- /* Dealloc on-disk leaves to FREEMETA state */
- error = foreach_leaf(dip, leaf_dealloc, NULL);
- if (error)
- return error;
+ hsize = 1 << dip->i_depth;
+ if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
+ gfs2_consist_inode(dip);
+ return -EIO;
+ }
- /* Make this a regular file in case we crash.
- (We don't want to free these blocks a second time.) */
+ lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
+ if (!lp)
+ return -ENOMEM;
- error = gfs2_trans_begin(sdp, RES_DINODE, 0);
- if (error)
- return error;
+ while (index < hsize) {
+ lp_offset = index & (sdp->sd_hash_ptrs - 1);
+ ht_offset = index - lp_offset;
- error = gfs2_meta_inode_buffer(dip, &bh);
- if (!error) {
- gfs2_trans_add_bh(dip->i_gl, bh, 1);
- ((struct gfs2_dinode *)bh->b_data)->di_mode =
- cpu_to_be32(S_IFREG);
- brelse(bh);
+ if (ht_offset_cur != ht_offset) {
+ error = gfs2_dir_read_data(dip, (char *)lp,
+ ht_offset * sizeof(__be64),
+ sdp->sd_hash_bsize, 1);
+ if (error != sdp->sd_hash_bsize) {
+ if (error >= 0)
+ error = -EIO;
+ goto out;
+ }
+ ht_offset_cur = ht_offset;
+ }
+
+ leaf_no = be64_to_cpu(lp[lp_offset]);
+ if (leaf_no) {
+ error = get_leaf(dip, leaf_no, &bh);
+ if (error)
+ goto out;
+ leaf = (struct gfs2_leaf *)bh->b_data;
+ len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
+
+ next_index = (index & ~(len - 1)) + len;
+ last = ((next_index >= hsize) ? 1 : 0);
+ error = leaf_dealloc(dip, index, len, leaf_no, bh,
+ last);
+ brelse(bh);
+ if (error)
+ goto out;
+ index = next_index;
+ } else
+ index++;
}
- gfs2_trans_end(sdp);
+ if (index != hsize) {
+ gfs2_consist_inode(dip);
+ error = -EIO;
+ }
+
+out:
+ kfree(lp);
return error;
}
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index a98f644bd3df..e686af11becd 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -22,8 +22,8 @@ extern struct inode *gfs2_dir_search(struct inode *dir,
extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
const struct gfs2_inode *ip);
extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
- const struct gfs2_inode *ip, unsigned int type);
-extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
+ const struct gfs2_inode *ip);
+extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
filldir_t filldir);
extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index b5a5e60df0d5..fe9945f2ff72 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -139,7 +139,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
struct gfs2_sbd *sdp = sb->s_fs_info;
struct inode *inode;
- inode = gfs2_ilookup(sb, inum->no_addr);
+ inode = gfs2_ilookup(sb, inum->no_addr, 0);
if (inode) {
if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
iput(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e48310885c48..a9f5cbe45cd9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -545,18 +545,10 @@ static int gfs2_close(struct inode *inode, struct file *file)
/**
* gfs2_fsync - sync the dirty data for a file (across the cluster)
* @file: the file that points to the dentry (we ignore this)
- * @dentry: the dentry that points to the inode to sync
+ * @datasync: set if we can ignore timestamp changes
*
- * The VFS will flush "normal" data for us. We only need to worry
- * about metadata here. For journaled data, we just do a log flush
- * as we can't avoid it. Otherwise we can just bale out if datasync
- * is set. For stuffed inodes we must flush the log in order to
- * ensure that all data is on disk.
- *
- * The call to write_inode_now() is there to write back metadata and
- * the inode itself. It does also try and write the data, but thats
- * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
- * for us.
+ * The VFS will flush data for us. We only need to worry
+ * about metadata here.
*
* Returns: errno
*/
@@ -565,22 +557,20 @@ static int gfs2_fsync(struct file *file, int datasync)
{
struct inode *inode = file->f_mapping->host;
int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
- int ret = 0;
-
- if (gfs2_is_jdata(GFS2_I(inode))) {
- gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
- return 0;
- }
+ struct gfs2_inode *ip = GFS2_I(inode);
+ int ret;
- if (sync_state != 0) {
- if (!datasync)
- ret = write_inode_now(inode, 0);
+ if (datasync)
+ sync_state &= ~I_DIRTY_SYNC;
- if (gfs2_is_stuffed(GFS2_I(inode)))
- gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
+ if (sync_state) {
+ ret = sync_inode_metadata(inode, 1);
+ if (ret)
+ return ret;
+ gfs2_ail_flush(ip->i_gl);
}
- return ret;
+ return 0;
}
/**
@@ -826,6 +816,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
loff_t bytes, max_bytes;
struct gfs2_alloc *al;
int error;
+ loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
@@ -833,13 +824,15 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
if (mode & ~FALLOC_FL_KEEP_SIZE)
return -EOPNOTSUPP;
- offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
- sdp->sd_sb.sb_bsize_shift;
+ offset &= bsize_mask;
len = next - offset;
bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
if (!bytes)
bytes = UINT_MAX;
+ bytes &= bsize_mask;
+ if (bytes == 0)
+ bytes = sdp->sd_sb.sb_bsize;
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
error = gfs2_glock_nq(&ip->i_gh);
@@ -870,6 +863,9 @@ retry:
if (error) {
if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
bytes >>= 1;
+ bytes &= bsize_mask;
+ if (bytes == 0)
+ bytes = sdp->sd_sb.sb_bsize;
goto retry;
}
goto out_qunlock;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7a4fb630a320..2792a790e50b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -143,14 +143,9 @@ static int demote_ok(const struct gfs2_glock *gl)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
- /* assert_spin_locked(&gl->gl_spin); */
-
if (gl->gl_state == LM_ST_UNLOCKED)
return 0;
- if (test_bit(GLF_LFLUSH, &gl->gl_flags))
- return 0;
- if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
- !list_empty(&gl->gl_holders))
+ if (!list_empty(&gl->gl_holders))
return 0;
if (glops->go_demote_ok)
return glops->go_demote_ok(gl);
@@ -158,6 +153,31 @@ static int demote_ok(const struct gfs2_glock *gl)
}
+void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
+{
+ spin_lock(&lru_lock);
+
+ if (!list_empty(&gl->gl_lru))
+ list_del_init(&gl->gl_lru);
+ else
+ atomic_inc(&lru_count);
+
+ list_add_tail(&gl->gl_lru, &lru_list);
+ set_bit(GLF_LRU, &gl->gl_flags);
+ spin_unlock(&lru_lock);
+}
+
+static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
+{
+ spin_lock(&lru_lock);
+ if (!list_empty(&gl->gl_lru)) {
+ list_del_init(&gl->gl_lru);
+ atomic_dec(&lru_count);
+ clear_bit(GLF_LRU, &gl->gl_flags);
+ }
+ spin_unlock(&lru_lock);
+}
+
/**
* __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
* @gl: the glock
@@ -168,24 +188,8 @@ static int demote_ok(const struct gfs2_glock *gl)
static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
{
- if (demote_ok(gl)) {
- spin_lock(&lru_lock);
-
- if (!list_empty(&gl->gl_lru))
- list_del_init(&gl->gl_lru);
- else
- atomic_inc(&lru_count);
-
- list_add_tail(&gl->gl_lru, &lru_list);
- spin_unlock(&lru_lock);
- }
-}
-
-void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
-{
- spin_lock(&gl->gl_spin);
- __gfs2_glock_schedule_for_reclaim(gl);
- spin_unlock(&gl->gl_spin);
+ if (demote_ok(gl))
+ gfs2_glock_add_to_lru(gl);
}
/**
@@ -217,12 +221,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
spin_lock_bucket(gl->gl_hash);
hlist_bl_del_rcu(&gl->gl_list);
spin_unlock_bucket(gl->gl_hash);
- spin_lock(&lru_lock);
- if (!list_empty(&gl->gl_lru)) {
- list_del_init(&gl->gl_lru);
- atomic_dec(&lru_count);
- }
- spin_unlock(&lru_lock);
+ gfs2_glock_remove_from_lru(gl);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
trace_gfs2_glock_put(gl);
@@ -542,11 +541,6 @@ __acquires(&gl->gl_spin)
clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
gfs2_glock_hold(gl);
- if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED ||
- gl->gl_state == LM_ST_DEFERRED) &&
- !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
- lck_flags |= LM_FLAG_TRY_1CB;
-
if (sdp->sd_lockstruct.ls_ops->lm_lock) {
/* lock_dlm */
ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
@@ -648,7 +642,7 @@ static void delete_work_func(struct work_struct *work)
/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
if (ip)
- inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
+ inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
else
inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
if (inode && !IS_ERR(inode)) {
@@ -1025,6 +1019,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
+ if (test_bit(GLF_LRU, &gl->gl_flags))
+ gfs2_glock_remove_from_lru(gl);
+
spin_lock(&gl->gl_spin);
add_to_queue(gh);
if ((LM_FLAG_NOEXP & gh->gh_flags) &&
@@ -1082,7 +1079,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
!test_bit(GLF_DEMOTE, &gl->gl_flags))
fast_path = 1;
}
- __gfs2_glock_schedule_for_reclaim(gl);
+ if (!test_bit(GLF_LFLUSH, &gl->gl_flags))
+ __gfs2_glock_schedule_for_reclaim(gl);
trace_gfs2_glock_queue(gh, 0);
spin_unlock(&gl->gl_spin);
if (likely(fast_path))
@@ -1348,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
}
-static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink,
+ struct shrink_control *sc)
{
struct gfs2_glock *gl;
int may_demote;
int nr_skipped = 0;
+ int nr = sc->nr_to_scan;
+ gfp_t gfp_mask = sc->gfp_mask;
LIST_HEAD(skipped);
if (nr == 0)
@@ -1365,6 +1366,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
while(nr && !list_empty(&lru_list)) {
gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
list_del_init(&gl->gl_lru);
+ clear_bit(GLF_LRU, &gl->gl_flags);
atomic_dec(&lru_count);
/* Test for being demotable */
@@ -1387,6 +1389,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
}
nr_skipped++;
list_add(&gl->gl_lru, &skipped);
+ set_bit(GLF_LRU, &gl->gl_flags);
}
list_splice(&skipped, &lru_list);
atomic_add(nr_skipped, &lru_count);
@@ -1459,12 +1462,7 @@ static void thaw_glock(struct gfs2_glock *gl)
static void clear_glock(struct gfs2_glock *gl)
{
- spin_lock(&lru_lock);
- if (!list_empty(&gl->gl_lru)) {
- list_del_init(&gl->gl_lru);
- atomic_dec(&lru_count);
- }
- spin_unlock(&lru_lock);
+ gfs2_glock_remove_from_lru(gl);
spin_lock(&gl->gl_spin);
if (gl->gl_state != LM_ST_UNLOCKED)
@@ -1599,9 +1597,11 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
return 0;
}
-static const char *gflags2str(char *buf, const unsigned long *gflags)
+static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
{
+ const unsigned long *gflags = &gl->gl_flags;
char *p = buf;
+
if (test_bit(GLF_LOCK, gflags))
*p++ = 'l';
if (test_bit(GLF_DEMOTE, gflags))
@@ -1624,6 +1624,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
*p++ = 'F';
if (test_bit(GLF_QUEUED, gflags))
*p++ = 'q';
+ if (test_bit(GLF_LRU, gflags))
+ *p++ = 'L';
+ if (gl->gl_object)
+ *p++ = 'o';
*p = 0;
return buf;
}
@@ -1658,14 +1662,15 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
dtime *= 1000000/HZ; /* demote time in uSec */
if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
dtime = 0;
- gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n",
+ gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
state2str(gl->gl_state),
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number,
- gflags2str(gflags_buf, &gl->gl_flags),
+ gflags2str(gflags_buf, gl),
state2str(gl->gl_target),
state2str(gl->gl_demote_state), dtime,
atomic_read(&gl->gl_ail_count),
+ atomic_read(&gl->gl_revokes),
atomic_read(&gl->gl_ref));
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index aea160690e94..6b2f757b9281 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -225,11 +225,10 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
-extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
-extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
+extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
extern void gfs2_glock_free(struct gfs2_glock *gl);
extern int __init gfs2_glock_init(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 25eeb2bcee47..8ef70f464731 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,33 +28,18 @@
#include "trans.h"
/**
- * ail_empty_gl - remove all buffers for a given lock from the AIL
+ * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
* @gl: the glock
*
* None of the buffers should be dirty, locked, or pinned.
*/
-static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
+static void __gfs2_ail_flush(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct list_head *head = &gl->gl_ail_list;
struct gfs2_bufdata *bd;
struct buffer_head *bh;
- struct gfs2_trans tr;
-
- memset(&tr, 0, sizeof(tr));
- tr.tr_revokes = atomic_read(&gl->gl_ail_count);
-
- if (!tr.tr_revokes)
- return;
-
- /* A shortened, inline version of gfs2_trans_begin() */
- tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
- tr.tr_ip = (unsigned long)__builtin_return_address(0);
- INIT_LIST_HEAD(&tr.tr_list_buf);
- gfs2_log_reserve(sdp, tr.tr_reserved);
- BUG_ON(current->journal_info);
- current->journal_info = &tr;
spin_lock(&sdp->sd_ail_lock);
while (!list_empty(head)) {
@@ -76,7 +61,47 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
}
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
spin_unlock(&sdp->sd_ail_lock);
+}
+
+
+static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_trans tr;
+
+ memset(&tr, 0, sizeof(tr));
+ tr.tr_revokes = atomic_read(&gl->gl_ail_count);
+
+ if (!tr.tr_revokes)
+ return;
+
+ /* A shortened, inline version of gfs2_trans_begin() */
+ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
+ tr.tr_ip = (unsigned long)__builtin_return_address(0);
+ INIT_LIST_HEAD(&tr.tr_list_buf);
+ gfs2_log_reserve(sdp, tr.tr_reserved);
+ BUG_ON(current->journal_info);
+ current->journal_info = &tr;
+
+ __gfs2_ail_flush(gl);
+
+ gfs2_trans_end(sdp);
+ gfs2_log_flush(sdp, NULL);
+}
+
+void gfs2_ail_flush(struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_sbd;
+ unsigned int revokes = atomic_read(&gl->gl_ail_count);
+ int ret;
+
+ if (!revokes)
+ return;
+ ret = gfs2_trans_begin(sdp, 0, revokes);
+ if (ret)
+ return;
+ __gfs2_ail_flush(gl);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
}
@@ -227,6 +252,119 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
}
/**
+ * gfs2_set_nlink - Set the inode's link count based on on-disk info
+ * @inode: The inode in question
+ * @nlink: The link count
+ *
+ * If the link count has hit zero, it must never be raised, whatever the
+ * on-disk inode might say. When new struct inodes are created the link
+ * count is set to 1, so that we can safely use this test even when reading
+ * in on disk information for the first time.
+ */
+
+static void gfs2_set_nlink(struct inode *inode, u32 nlink)
+{
+ /*
+ * We will need to review setting the nlink count here in the
+ * light of the forthcoming ro bind mount work. This is a reminder
+ * to do that.
+ */
+ if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
+ if (nlink == 0)
+ clear_nlink(inode);
+ else
+ inode->i_nlink = nlink;
+ }
+}
+
+static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+{
+ const struct gfs2_dinode *str = buf;
+ struct timespec atime;
+ u16 height, depth;
+
+ if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
+ goto corrupt;
+ ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
+ ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
+ ip->i_inode.i_rdev = 0;
+ switch (ip->i_inode.i_mode & S_IFMT) {
+ case S_IFBLK:
+ case S_IFCHR:
+ ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
+ be32_to_cpu(str->di_minor));
+ break;
+ };
+
+ ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
+ ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
+ gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
+ i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
+ gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
+ atime.tv_sec = be64_to_cpu(str->di_atime);
+ atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
+ if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
+ ip->i_inode.i_atime = atime;
+ ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
+ ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
+ ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
+ ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
+
+ ip->i_goal = be64_to_cpu(str->di_goal_meta);
+ ip->i_generation = be64_to_cpu(str->di_generation);
+
+ ip->i_diskflags = be32_to_cpu(str->di_flags);
+ gfs2_set_inode_flags(&ip->i_inode);
+ height = be16_to_cpu(str->di_height);
+ if (unlikely(height > GFS2_MAX_META_HEIGHT))
+ goto corrupt;
+ ip->i_height = (u8)height;
+
+ depth = be16_to_cpu(str->di_depth);
+ if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
+ goto corrupt;
+ ip->i_depth = (u8)depth;
+ ip->i_entries = be32_to_cpu(str->di_entries);
+
+ ip->i_eattr = be64_to_cpu(str->di_eattr);
+ if (S_ISREG(ip->i_inode.i_mode))
+ gfs2_set_aops(&ip->i_inode);
+
+ return 0;
+corrupt:
+ gfs2_consist_inode(ip);
+ return -EIO;
+}
+
+/**
+ * gfs2_inode_refresh - Refresh the incore copy of the dinode
+ * @ip: The GFS2 inode
+ *
+ * Returns: errno
+ */
+
+int gfs2_inode_refresh(struct gfs2_inode *ip)
+{
+ struct buffer_head *dibh;
+ int error;
+
+ error = gfs2_meta_inode_buffer(ip, &dibh);
+ if (error)
+ return error;
+
+ if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
+ brelse(dibh);
+ return -EIO;
+ }
+
+ error = gfs2_dinode_in(ip, dibh->b_data);
+ brelse(dibh);
+ clear_bit(GIF_INVALID, &ip->i_flags);
+
+ return error;
+}
+
+/**
* inode_go_lock - operation done after an inode lock is locked by a process
* @gl: the glock
* @flags:
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index b3aa2e3210fd..6fce409b5a50 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -23,4 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
extern const struct gfs2_glock_operations gfs2_journal_glops;
extern const struct gfs2_glock_operations *gfs2_glops_list[];
+extern void gfs2_ail_flush(struct gfs2_glock *gl);
+
#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 870a89d6d4dc..0a064e91ac70 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -20,7 +20,6 @@
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
-#define DIO_ALL 0x00000100
struct gfs2_log_operations;
struct gfs2_log_element;
@@ -200,6 +199,8 @@ enum {
GLF_INITIAL = 10,
GLF_FROZEN = 11,
GLF_QUEUED = 12,
+ GLF_LRU = 13,
+ GLF_OBJECT = 14, /* Used only for tracing */
};
struct gfs2_glock {
@@ -234,6 +235,7 @@ struct gfs2_glock {
struct list_head gl_ail_list;
atomic_t gl_ail_count;
+ atomic_t gl_revokes;
struct delayed_work gl_work;
struct work_struct gl_delete;
struct rcu_head gl_rcu;
@@ -374,8 +376,6 @@ struct gfs2_ail {
unsigned int ai_first;
struct list_head ai_ail1_list;
struct list_head ai_ail2_list;
-
- u64 ai_sync_gen;
};
struct gfs2_journal_extent {
@@ -488,7 +488,6 @@ struct gfs2_sb_host {
char sb_lockproto[GFS2_LOCKNAME_LEN];
char sb_locktable[GFS2_LOCKNAME_LEN];
- u8 sb_uuid[16];
};
/*
@@ -654,7 +653,6 @@ struct gfs2_sbd {
spinlock_t sd_ail_lock;
struct list_head sd_ail1_list;
struct list_head sd_ail2_list;
- u64 sd_ail_sync_gen;
/* Replay stuff */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9134dcb89479..03e0c529063e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1,23 +1,25 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
+#include <linux/namei.h>
+#include <linux/mm.h>
+#include <linux/xattr.h>
#include <linux/posix_acl.h>
-#include <linux/sort.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
+#include <linux/fiemap.h>
#include <linux/security.h>
-#include <linux/time.h>
+#include <asm/uaccess.h>
#include "gfs2.h"
#include "incore.h"
@@ -26,19 +28,14 @@
#include "dir.h"
#include "xattr.h"
#include "glock.h"
-#include "glops.h"
#include "inode.h"
-#include "log.h"
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
#include "trans.h"
#include "util.h"
-
-struct gfs2_inum_range_host {
- u64 ir_start;
- u64 ir_length;
-};
+#include "super.h"
+#include "glops.h"
struct gfs2_skip_data {
u64 no_addr;
@@ -74,14 +71,14 @@ static int iget_set(struct inode *inode, void *opaque)
return 0;
}
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
{
unsigned long hash = (unsigned long)no_addr;
struct gfs2_skip_data data;
data.no_addr = no_addr;
data.skipped = 0;
- data.non_block = 0;
+ data.non_block = non_block;
return ilookup5(sb, hash, iget_test, &data);
}
@@ -248,203 +245,6 @@ fail_iput:
goto fail;
}
-static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
-{
- const struct gfs2_dinode *str = buf;
- struct timespec atime;
- u16 height, depth;
-
- if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
- goto corrupt;
- ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
- ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
- ip->i_inode.i_rdev = 0;
- switch (ip->i_inode.i_mode & S_IFMT) {
- case S_IFBLK:
- case S_IFCHR:
- ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
- be32_to_cpu(str->di_minor));
- break;
- };
-
- ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
- ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
- /*
- * We will need to review setting the nlink count here in the
- * light of the forthcoming ro bind mount work. This is a reminder
- * to do that.
- */
- ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
- i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
- gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
- atime.tv_sec = be64_to_cpu(str->di_atime);
- atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
- if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
- ip->i_inode.i_atime = atime;
- ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
- ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
- ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
- ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
-
- ip->i_goal = be64_to_cpu(str->di_goal_meta);
- ip->i_generation = be64_to_cpu(str->di_generation);
-
- ip->i_diskflags = be32_to_cpu(str->di_flags);
- gfs2_set_inode_flags(&ip->i_inode);
- height = be16_to_cpu(str->di_height);
- if (unlikely(height > GFS2_MAX_META_HEIGHT))
- goto corrupt;
- ip->i_height = (u8)height;
-
- depth = be16_to_cpu(str->di_depth);
- if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
- goto corrupt;
- ip->i_depth = (u8)depth;
- ip->i_entries = be32_to_cpu(str->di_entries);
-
- ip->i_eattr = be64_to_cpu(str->di_eattr);
- if (S_ISREG(ip->i_inode.i_mode))
- gfs2_set_aops(&ip->i_inode);
-
- return 0;
-corrupt:
- if (gfs2_consist_inode(ip))
- gfs2_dinode_print(ip);
- return -EIO;
-}
-
-/**
- * gfs2_inode_refresh - Refresh the incore copy of the dinode
- * @ip: The GFS2 inode
- *
- * Returns: errno
- */
-
-int gfs2_inode_refresh(struct gfs2_inode *ip)
-{
- struct buffer_head *dibh;
- int error;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- return error;
-
- if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
- brelse(dibh);
- return -EIO;
- }
-
- error = gfs2_dinode_in(ip, dibh->b_data);
- brelse(dibh);
- clear_bit(GIF_INVALID, &ip->i_flags);
-
- return error;
-}
-
-int gfs2_dinode_dealloc(struct gfs2_inode *ip)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct gfs2_alloc *al;
- struct gfs2_rgrpd *rgd;
- int error;
-
- if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
- if (gfs2_consist_inode(ip))
- gfs2_dinode_print(ip);
- return -EIO;
- }
-
- al = gfs2_alloc_get(ip);
- if (!al)
- return -ENOMEM;
-
- error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
- if (error)
- goto out;
-
- error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
- if (error)
- goto out_qs;
-
- rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
- if (!rgd) {
- gfs2_consist_inode(ip);
- error = -EIO;
- goto out_rindex_relse;
- }
-
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
- &al->al_rgd_gh);
- if (error)
- goto out_rindex_relse;
-
- error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1);
- if (error)
- goto out_rg_gunlock;
-
- set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
- set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags);
-
- gfs2_free_di(rgd, ip);
-
- gfs2_trans_end(sdp);
-
-out_rg_gunlock:
- gfs2_glock_dq_uninit(&al->al_rgd_gh);
-out_rindex_relse:
- gfs2_glock_dq_uninit(&al->al_ri_gh);
-out_qs:
- gfs2_quota_unhold(ip);
-out:
- gfs2_alloc_put(ip);
- return error;
-}
-
-/**
- * gfs2_change_nlink - Change nlink count on inode
- * @ip: The GFS2 inode
- * @diff: The change in the nlink count required
- *
- * Returns: errno
- */
-int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
-{
- struct buffer_head *dibh;
- u32 nlink;
- int error;
-
- BUG_ON(diff != 1 && diff != -1);
- nlink = ip->i_inode.i_nlink + diff;
-
- /* If we are reducing the nlink count, but the new value ends up being
- bigger than the old one, we must have underflowed. */
- if (diff < 0 && nlink > ip->i_inode.i_nlink) {
- if (gfs2_consist_inode(ip))
- gfs2_dinode_print(ip);
- return -EIO;
- }
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- return error;
-
- if (diff > 0)
- inc_nlink(&ip->i_inode);
- else
- drop_nlink(&ip->i_inode);
-
- ip->i_inode.i_ctime = CURRENT_TIME;
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(ip, dibh->b_data);
- brelse(dibh);
- mark_inode_dirty(&ip->i_inode);
-
- if (ip->i_inode.i_nlink == 0)
- gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
-
- return error;
-}
struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
{
@@ -543,7 +343,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
/* Don't create entries in an unlinked directory */
if (!dip->i_inode.i_nlink)
- return -EPERM;
+ return -ENOENT;
error = gfs2_dir_check(&dip->i_inode, name, NULL);
switch (error) {
@@ -613,21 +413,44 @@ out:
return error;
}
+static void gfs2_init_dir(struct buffer_head *dibh,
+ const struct gfs2_inode *parent)
+{
+ struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
+ struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
+
+ gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
+ dent->de_inum = di->di_num; /* already GFS2 endian */
+ dent->de_type = cpu_to_be16(DT_DIR);
+
+ dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
+ gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
+ gfs2_inum_out(parent, dent);
+ dent->de_type = cpu_to_be16(DT_DIR);
+
+}
+
/**
* init_dinode - Fill in a new dinode structure
- * @dip: the directory this inode is being created in
+ * @dip: The directory this inode is being created in
* @gl: The glock covering the new inode
- * @inum: the inode number
- * @mode: the file permissions
- * @uid:
- * @gid:
+ * @inum: The inode number
+ * @mode: The file permissions
+ * @uid: The uid of the new inode
+ * @gid: The gid of the new inode
+ * @generation: The generation number of the new inode
+ * @dev: The device number (if a device node)
+ * @symname: The symlink destination (if a symlink)
+ * @size: The inode size (ignored for directories)
+ * @bhp: The buffer head (returned to caller)
*
*/
static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
const struct gfs2_inum_host *inum, unsigned int mode,
unsigned int uid, unsigned int gid,
- const u64 *generation, dev_t dev, struct buffer_head **bhp)
+ const u64 *generation, dev_t dev, const char *symname,
+ unsigned size, struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_dinode *di;
@@ -646,7 +469,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
di->di_uid = cpu_to_be32(uid);
di->di_gid = cpu_to_be32(gid);
di->di_nlink = 0;
- di->di_size = 0;
+ di->di_size = cpu_to_be64(size);
di->di_blocks = cpu_to_be64(1);
di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
di->di_major = cpu_to_be32(MAJOR(dev));
@@ -654,16 +477,6 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
di->di_generation = cpu_to_be64(*generation);
di->di_flags = 0;
-
- if (S_ISREG(mode)) {
- if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
- gfs2_tune_get(sdp, gt_new_files_jdata))
- di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
- } else if (S_ISDIR(mode)) {
- di->di_flags |= cpu_to_be32(dip->i_diskflags &
- GFS2_DIF_INHERIT_JDATA);
- }
-
di->__pad1 = 0;
di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
di->di_height = 0;
@@ -677,7 +490,26 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
memset(&di->di_reserved, 0, sizeof(di->di_reserved));
-
+
+ switch(mode & S_IFMT) {
+ case S_IFREG:
+ if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
+ gfs2_tune_get(sdp, gt_new_files_jdata))
+ di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
+ break;
+ case S_IFDIR:
+ di->di_flags |= cpu_to_be32(dip->i_diskflags &
+ GFS2_DIF_INHERIT_JDATA);
+ di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
+ di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
+ di->di_entries = cpu_to_be32(2);
+ gfs2_init_dir(dibh, dip);
+ break;
+ case S_IFLNK:
+ memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size);
+ break;
+ }
+
set_buffer_uptodate(dibh);
*bhp = dibh;
@@ -685,7 +517,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
unsigned int mode, const struct gfs2_inum_host *inum,
- const u64 *generation, dev_t dev, struct buffer_head **bhp)
+ const u64 *generation, dev_t dev, const char *symname,
+ unsigned int size, struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
unsigned int uid, gid;
@@ -707,7 +540,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
if (error)
goto out_quota;
- init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp);
+ init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp);
gfs2_quota_change(dip, +1, uid, gid);
gfs2_trans_end(sdp);
@@ -761,14 +594,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
goto fail_quota_locks;
}
- error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode));
+ error = gfs2_dir_add(&dip->i_inode, name, ip);
if (error)
goto fail_end_trans;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto fail_end_trans;
- ip->i_inode.i_nlink = 1;
+ inc_nlink(&ip->i_inode);
+ if (S_ISDIR(ip->i_inode.i_mode))
+ inc_nlink(&ip->i_inode);
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -815,27 +650,25 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
}
/**
- * gfs2_createi - Create a new inode
- * @ghs: An array of two holders
- * @name: The name of the new file
- * @mode: the permissions on the new inode
- *
- * @ghs[0] is an initialized holder for the directory
- * @ghs[1] is the holder for the inode lock
+ * gfs2_create_inode - Create a new inode
+ * @dir: The parent directory
+ * @dentry: The new dentry
+ * @mode: The permissions on the new inode
+ * @dev: For device nodes, this is the device number
+ * @symname: For symlinks, this is the link destination
+ * @size: The initial size of the inode (ignored for directories)
*
- * If the return value is not NULL, the glocks on both the directory and the new
- * file are held. A transaction has been started and an inplace reservation
- * is held, as well.
- *
- * Returns: An inode
+ * Returns: 0 on success, or error code
*/
-struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
- unsigned int mode, dev_t dev)
+static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
+ unsigned int mode, dev_t dev, const char *symname,
+ unsigned int size)
{
+ const struct qstr *name = &dentry->d_name;
+ struct gfs2_holder ghs[2];
struct inode *inode = NULL;
- struct gfs2_inode *dip = ghs->gh_gl->gl_object;
- struct inode *dir = &dip->i_inode;
+ struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
int error;
@@ -843,10 +676,9 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
struct buffer_head *bh = NULL;
if (!name->len || name->len > GFS2_FNAMESIZE)
- return ERR_PTR(-ENAMETOOLONG);
+ return -ENAMETOOLONG;
- gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
- error = gfs2_glock_nq(ghs);
+ error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
if (error)
goto fail;
@@ -864,7 +696,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
if (error)
goto fail_gunlock;
- error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh);
+ error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh);
if (error)
goto fail_gunlock2;
@@ -891,18 +723,852 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
if (bh)
brelse(bh);
- return inode;
+
+ gfs2_trans_end(sdp);
+ if (dip->i_alloc->al_rgd)
+ gfs2_inplace_release(dip);
+ gfs2_quota_unlock(dip);
+ gfs2_alloc_put(dip);
+ gfs2_glock_dq_uninit_m(2, ghs);
+ mark_inode_dirty(inode);
+ d_instantiate(dentry, inode);
+ return 0;
fail_gunlock2:
gfs2_glock_dq_uninit(ghs + 1);
if (inode && !IS_ERR(inode))
iput(inode);
fail_gunlock:
- gfs2_glock_dq(ghs);
+ gfs2_glock_dq_uninit(ghs);
fail:
if (bh)
brelse(bh);
- return ERR_PTR(error);
+ return error;
+}
+
+/**
+ * gfs2_create - Create a file
+ * @dir: The directory in which to create the file
+ * @dentry: The dentry of the new file
+ * @mode: The mode of the new file
+ *
+ * Returns: errno
+ */
+
+static int gfs2_create(struct inode *dir, struct dentry *dentry,
+ int mode, struct nameidata *nd)
+{
+ struct inode *inode;
+ int ret;
+
+ for (;;) {
+ ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0);
+ if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL)))
+ return ret;
+
+ inode = gfs2_lookupi(dir, &dentry->d_name, 0);
+ if (inode) {
+ if (!IS_ERR(inode))
+ break;
+ return PTR_ERR(inode);
+ }
+ }
+
+ d_instantiate(dentry, inode);
+ return 0;
+}
+
+/**
+ * gfs2_lookup - Look up a filename in a directory and return its inode
+ * @dir: The directory inode
+ * @dentry: The dentry of the new inode
+ * @nd: passed from Linux VFS, ignored by us
+ *
+ * Called by the VFS layer. Lock dir and call gfs2_lookupi()
+ *
+ * Returns: errno
+ */
+
+static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct inode *inode = NULL;
+
+ inode = gfs2_lookupi(dir, &dentry->d_name, 0);
+ if (inode && IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ if (inode) {
+ struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
+ struct gfs2_holder gh;
+ int error;
+ error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+ if (error) {
+ iput(inode);
+ return ERR_PTR(error);
+ }
+ gfs2_glock_dq_uninit(&gh);
+ return d_splice_alias(inode, dentry);
+ }
+ d_add(dentry, inode);
+
+ return NULL;
+}
+
+/**
+ * gfs2_link - Link to a file
+ * @old_dentry: The inode to link
+ * @dir: Add link to this directory
+ * @dentry: The name of the link
+ *
+ * Link the inode in "old_dentry" into the directory "dir" with the
+ * name in "dentry".
+ *
+ * Returns: errno
+ */
+
+static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ struct gfs2_inode *dip = GFS2_I(dir);
+ struct gfs2_sbd *sdp = GFS2_SB(dir);
+ struct inode *inode = old_dentry->d_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder ghs[2];
+ struct buffer_head *dibh;
+ int alloc_required;
+ int error;
+
+ if (S_ISDIR(inode->i_mode))
+ return -EPERM;
+
+ gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+
+ error = gfs2_glock_nq(ghs); /* parent */
+ if (error)
+ goto out_parent;
+
+ error = gfs2_glock_nq(ghs + 1); /* child */
+ if (error)
+ goto out_child;
+
+ error = -ENOENT;
+ if (inode->i_nlink == 0)
+ goto out_gunlock;
+
+ error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
+ if (error)
+ goto out_gunlock;
+
+ error = gfs2_dir_check(dir, &dentry->d_name, NULL);
+ switch (error) {
+ case -ENOENT:
+ break;
+ case 0:
+ error = -EEXIST;
+ default:
+ goto out_gunlock;
+ }
+
+ error = -EINVAL;
+ if (!dip->i_inode.i_nlink)
+ goto out_gunlock;
+ error = -EFBIG;
+ if (dip->i_entries == (u32)-1)
+ goto out_gunlock;
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto out_gunlock;
+ error = -EINVAL;
+ if (!ip->i_inode.i_nlink)
+ goto out_gunlock;
+ error = -EMLINK;
+ if (ip->i_inode.i_nlink == (u32)-1)
+ goto out_gunlock;
+
+ alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
+ if (error < 0)
+ goto out_gunlock;
+ error = 0;
+
+ if (alloc_required) {
+ struct gfs2_alloc *al = gfs2_alloc_get(dip);
+ if (!al) {
+ error = -ENOMEM;
+ goto out_gunlock;
+ }
+
+ error = gfs2_quota_lock_check(dip);
+ if (error)
+ goto out_alloc;
+
+ al->al_requested = sdp->sd_max_dirres;
+
+ error = gfs2_inplace_reserve(dip);
+ if (error)
+ goto out_gunlock_q;
+
+ error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
+ gfs2_rg_blocks(al) +
+ 2 * RES_DINODE + RES_STATFS +
+ RES_QUOTA, 0);
+ if (error)
+ goto out_ipres;
+ } else {
+ error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
+ if (error)
+ goto out_ipres;
+ }
+
+ error = gfs2_meta_inode_buffer(ip, &dibh);
+ if (error)
+ goto out_end_trans;
+
+ error = gfs2_dir_add(dir, &dentry->d_name, ip);
+ if (error)
+ goto out_brelse;
+
+ gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+ inc_nlink(&ip->i_inode);
+ ip->i_inode.i_ctime = CURRENT_TIME;
+ gfs2_dinode_out(ip, dibh->b_data);
+ mark_inode_dirty(&ip->i_inode);
+
+out_brelse:
+ brelse(dibh);
+out_end_trans:
+ gfs2_trans_end(sdp);
+out_ipres:
+ if (alloc_required)
+ gfs2_inplace_release(dip);
+out_gunlock_q:
+ if (alloc_required)
+ gfs2_quota_unlock(dip);
+out_alloc:
+ if (alloc_required)
+ gfs2_alloc_put(dip);
+out_gunlock:
+ gfs2_glock_dq(ghs + 1);
+out_child:
+ gfs2_glock_dq(ghs);
+out_parent:
+ gfs2_holder_uninit(ghs);
+ gfs2_holder_uninit(ghs + 1);
+ if (!error) {
+ ihold(inode);
+ d_instantiate(dentry, inode);
+ mark_inode_dirty(inode);
+ }
+ return error;
+}
+
+/*
+ * gfs2_unlink_ok - check to see that a inode is still in a directory
+ * @dip: the directory
+ * @name: the name of the file
+ * @ip: the inode
+ *
+ * Assumes that the lock on (at least) @dip is held.
+ *
+ * Returns: 0 if the parent/child relationship is correct, errno if it isn't
+ */
+
+static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
+ const struct gfs2_inode *ip)
+{
+ int error;
+
+ if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
+ return -EPERM;
+
+ if ((dip->i_inode.i_mode & S_ISVTX) &&
+ dip->i_inode.i_uid != current_fsuid() &&
+ ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
+ return -EPERM;
+
+ if (IS_APPEND(&dip->i_inode))
+ return -EPERM;
+
+ error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
+ if (error)
+ return error;
+
+ error = gfs2_dir_check(&dip->i_inode, name, ip);
+ if (error)
+ return error;
+
+ return 0;
+}
+
+/**
+ * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it
+ * @dip: The parent directory
+ * @name: The name of the entry in the parent directory
+ * @bh: The inode buffer for the inode to be removed
+ * @inode: The inode to be removed
+ *
+ * Called with all the locks and in a transaction. This will only be
+ * called for a directory after it has been checked to ensure it is empty.
+ *
+ * Returns: 0 on success, or an error
+ */
+
+static int gfs2_unlink_inode(struct gfs2_inode *dip,
+ const struct dentry *dentry,
+ struct buffer_head *bh)
+{
+ struct inode *inode = dentry->d_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ int error;
+
+ error = gfs2_dir_del(dip, dentry);
+ if (error)
+ return error;
+
+ ip->i_entries = 0;
+ inode->i_ctime = CURRENT_TIME;
+ if (S_ISDIR(inode->i_mode))
+ clear_nlink(inode);
+ else
+ drop_nlink(inode);
+ gfs2_trans_add_bh(ip->i_gl, bh, 1);
+ gfs2_dinode_out(ip, bh->b_data);
+ mark_inode_dirty(inode);
+ if (inode->i_nlink == 0)
+ gfs2_unlink_di(inode);
+ return 0;
+}
+
+
+/**
+ * gfs2_unlink - Unlink an inode (this does rmdir as well)
+ * @dir: The inode of the directory containing the inode to unlink
+ * @dentry: The file itself
+ *
+ * This routine uses the type of the inode as a flag to figure out
+ * whether this is an unlink or an rmdir.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct gfs2_inode *dip = GFS2_I(dir);
+ struct gfs2_sbd *sdp = GFS2_SB(dir);
+ struct inode *inode = dentry->d_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct buffer_head *bh;
+ struct gfs2_holder ghs[3];
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_holder ri_gh;
+ int error;
+
+ error = gfs2_rindex_hold(sdp, &ri_gh);
+ if (error)
+ return error;
+
+ gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
+ gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+
+
+ error = gfs2_glock_nq(ghs); /* parent */
+ if (error)
+ goto out_parent;
+
+ error = gfs2_glock_nq(ghs + 1); /* child */
+ if (error)
+ goto out_child;
+
+ error = -ENOENT;
+ if (inode->i_nlink == 0)
+ goto out_rgrp;
+
+ if (S_ISDIR(inode->i_mode)) {
+ error = -ENOTEMPTY;
+ if (ip->i_entries > 2 || inode->i_nlink > 2)
+ goto out_rgrp;
+ }
+
+ error = gfs2_glock_nq(ghs + 2); /* rgrp */
+ if (error)
+ goto out_rgrp;
+
+ error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
+ if (error)
+ goto out_gunlock;
+
+ error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
+ if (error)
+ goto out_gunlock;
+
+ error = gfs2_meta_inode_buffer(ip, &bh);
+ if (error)
+ goto out_end_trans;
+
+ error = gfs2_unlink_inode(dip, dentry, bh);
+ brelse(bh);
+
+out_end_trans:
+ gfs2_trans_end(sdp);
+out_gunlock:
+ gfs2_glock_dq(ghs + 2);
+out_rgrp:
+ gfs2_holder_uninit(ghs + 2);
+ gfs2_glock_dq(ghs + 1);
+out_child:
+ gfs2_holder_uninit(ghs + 1);
+ gfs2_glock_dq(ghs);
+out_parent:
+ gfs2_holder_uninit(ghs);
+ gfs2_glock_dq_uninit(&ri_gh);
+ return error;
+}
+
+/**
+ * gfs2_symlink - Create a symlink
+ * @dir: The directory to create the symlink in
+ * @dentry: The dentry to put the symlink in
+ * @symname: The thing which the link points to
+ *
+ * Returns: errno
+ */
+
+static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(dir);
+ unsigned int size;
+
+ size = strlen(symname);
+ if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
+ return -ENAMETOOLONG;
+
+ return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size);
+}
+
+/**
+ * gfs2_mkdir - Make a directory
+ * @dir: The parent directory of the new one
+ * @dentry: The dentry of the new directory
+ * @mode: The mode of the new directory
+ *
+ * Returns: errno
+ */
+
+static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+ return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0);
+}
+
+/**
+ * gfs2_mknod - Make a special file
+ * @dir: The directory in which the special file will reside
+ * @dentry: The dentry of the special file
+ * @mode: The mode of the special file
+ * @dev: The device specification of the special file
+ *
+ */
+
+static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
+ dev_t dev)
+{
+ return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0);
+}
+
+/*
+ * gfs2_ok_to_move - check if it's ok to move a directory to another directory
+ * @this: move this
+ * @to: to here
+ *
+ * Follow @to back to the root and make sure we don't encounter @this
+ * Assumes we already hold the rename lock.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
+{
+ struct inode *dir = &to->i_inode;
+ struct super_block *sb = dir->i_sb;
+ struct inode *tmp;
+ int error = 0;
+
+ igrab(dir);
+
+ for (;;) {
+ if (dir == &this->i_inode) {
+ error = -EINVAL;
+ break;
+ }
+ if (dir == sb->s_root->d_inode) {
+ error = 0;
+ break;
+ }
+
+ tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
+ if (IS_ERR(tmp)) {
+ error = PTR_ERR(tmp);
+ break;
+ }
+
+ iput(dir);
+ dir = tmp;
+ }
+
+ iput(dir);
+
+ return error;
+}
+
+/**
+ * gfs2_rename - Rename a file
+ * @odir: Parent directory of old file name
+ * @odentry: The old dentry of the file
+ * @ndir: Parent directory of new file name
+ * @ndentry: The new dentry of the file
+ *
+ * Returns: errno
+ */
+
+static int gfs2_rename(struct inode *odir, struct dentry *odentry,
+ struct inode *ndir, struct dentry *ndentry)
+{
+ struct gfs2_inode *odip = GFS2_I(odir);
+ struct gfs2_inode *ndip = GFS2_I(ndir);
+ struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
+ struct gfs2_inode *nip = NULL;
+ struct gfs2_sbd *sdp = GFS2_SB(odir);
+ struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
+ struct gfs2_rgrpd *nrgd;
+ unsigned int num_gh;
+ int dir_rename = 0;
+ int alloc_required = 0;
+ unsigned int x;
+ int error;
+
+ if (ndentry->d_inode) {
+ nip = GFS2_I(ndentry->d_inode);
+ if (ip == nip)
+ return 0;
+ }
+
+ error = gfs2_rindex_hold(sdp, &ri_gh);
+ if (error)
+ return error;
+
+ if (odip != ndip) {
+ error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
+ 0, &r_gh);
+ if (error)
+ goto out;
+
+ if (S_ISDIR(ip->i_inode.i_mode)) {
+ dir_rename = 1;
+ /* don't move a dirctory into it's subdir */
+ error = gfs2_ok_to_move(ip, ndip);
+ if (error)
+ goto out_gunlock_r;
+ }
+ }
+
+ num_gh = 1;
+ gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+ if (odip != ndip) {
+ gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+ num_gh++;
+ }
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+ num_gh++;
+
+ if (nip) {
+ gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+ num_gh++;
+ /* grab the resource lock for unlink flag twiddling
+ * this is the case of the target file already existing
+ * so we unlink before doing the rename
+ */
+ nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
+ if (nrgd)
+ gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
+ }
+
+ for (x = 0; x < num_gh; x++) {
+ error = gfs2_glock_nq(ghs + x);
+ if (error)
+ goto out_gunlock;
+ }
+
+ error = -ENOENT;
+ if (ip->i_inode.i_nlink == 0)
+ goto out_gunlock;
+
+ /* Check out the old directory */
+
+ error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
+ if (error)
+ goto out_gunlock;
+
+ /* Check out the new directory */
+
+ if (nip) {
+ error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
+ if (error)
+ goto out_gunlock;
+
+ if (nip->i_inode.i_nlink == 0) {
+ error = -EAGAIN;
+ goto out_gunlock;
+ }
+
+ if (S_ISDIR(nip->i_inode.i_mode)) {
+ if (nip->i_entries < 2) {
+ gfs2_consist_inode(nip);
+ error = -EIO;
+ goto out_gunlock;
+ }
+ if (nip->i_entries > 2) {
+ error = -ENOTEMPTY;
+ goto out_gunlock;
+ }
+ }
+ } else {
+ error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
+ if (error)
+ goto out_gunlock;
+
+ error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
+ switch (error) {
+ case -ENOENT:
+ error = 0;
+ break;
+ case 0:
+ error = -EEXIST;
+ default:
+ goto out_gunlock;
+ };
+
+ if (odip != ndip) {
+ if (!ndip->i_inode.i_nlink) {
+ error = -ENOENT;
+ goto out_gunlock;
+ }
+ if (ndip->i_entries == (u32)-1) {
+ error = -EFBIG;
+ goto out_gunlock;
+ }
+ if (S_ISDIR(ip->i_inode.i_mode) &&
+ ndip->i_inode.i_nlink == (u32)-1) {
+ error = -EMLINK;
+ goto out_gunlock;
+ }
+ }
+ }
+
+ /* Check out the dir to be renamed */
+
+ if (dir_rename) {
+ error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
+ if (error)
+ goto out_gunlock;
+ }
+
+ if (nip == NULL)
+ alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
+ error = alloc_required;
+ if (error < 0)
+ goto out_gunlock;
+ error = 0;
+
+ if (alloc_required) {
+ struct gfs2_alloc *al = gfs2_alloc_get(ndip);
+ if (!al) {
+ error = -ENOMEM;
+ goto out_gunlock;
+ }
+
+ error = gfs2_quota_lock_check(ndip);
+ if (error)
+ goto out_alloc;
+
+ al->al_requested = sdp->sd_max_dirres;
+
+ error = gfs2_inplace_reserve_ri(ndip);
+ if (error)
+ goto out_gunlock_q;
+
+ error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
+ gfs2_rg_blocks(al) +
+ 4 * RES_DINODE + 4 * RES_LEAF +
+ RES_STATFS + RES_QUOTA + 4, 0);
+ if (error)
+ goto out_ipreserv;
+ } else {
+ error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
+ 5 * RES_LEAF + 4, 0);
+ if (error)
+ goto out_gunlock;
+ }
+
+ /* Remove the target file, if it exists */
+
+ if (nip) {
+ struct buffer_head *bh;
+ error = gfs2_meta_inode_buffer(nip, &bh);
+ if (error)
+ goto out_end_trans;
+ error = gfs2_unlink_inode(ndip, ndentry, bh);
+ brelse(bh);
+ }
+
+ if (dir_rename) {
+ error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
+ if (error)
+ goto out_end_trans;
+ } else {
+ struct buffer_head *dibh;
+ error = gfs2_meta_inode_buffer(ip, &dibh);
+ if (error)
+ goto out_end_trans;
+ ip->i_inode.i_ctime = CURRENT_TIME;
+ gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+ gfs2_dinode_out(ip, dibh->b_data);
+ brelse(dibh);
+ }
+
+ error = gfs2_dir_del(odip, odentry);
+ if (error)
+ goto out_end_trans;
+
+ error = gfs2_dir_add(ndir, &ndentry->d_name, ip);
+ if (error)
+ goto out_end_trans;
+
+out_end_trans:
+ gfs2_trans_end(sdp);
+out_ipreserv:
+ if (alloc_required)
+ gfs2_inplace_release(ndip);
+out_gunlock_q:
+ if (alloc_required)
+ gfs2_quota_unlock(ndip);
+out_alloc:
+ if (alloc_required)
+ gfs2_alloc_put(ndip);
+out_gunlock:
+ while (x--) {
+ gfs2_glock_dq(ghs + x);
+ gfs2_holder_uninit(ghs + x);
+ }
+out_gunlock_r:
+ if (r_gh.gh_gl)
+ gfs2_glock_dq_uninit(&r_gh);
+out:
+ gfs2_glock_dq_uninit(&ri_gh);
+ return error;
+}
+
+/**
+ * gfs2_follow_link - Follow a symbolic link
+ * @dentry: The dentry of the link
+ * @nd: Data that we pass to vfs_follow_link()
+ *
+ * This can handle symlinks of any size.
+ *
+ * Returns: 0 on success or error code
+ */
+
+static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+ struct gfs2_holder i_gh;
+ struct buffer_head *dibh;
+ unsigned int size;
+ char *buf;
+ int error;
+
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+ error = gfs2_glock_nq(&i_gh);
+ if (error) {
+ gfs2_holder_uninit(&i_gh);
+ nd_set_link(nd, ERR_PTR(error));
+ return NULL;
+ }
+
+ size = (unsigned int)i_size_read(&ip->i_inode);
+ if (size == 0) {
+ gfs2_consist_inode(ip);
+ buf = ERR_PTR(-EIO);
+ goto out;
+ }
+
+ error = gfs2_meta_inode_buffer(ip, &dibh);
+ if (error) {
+ buf = ERR_PTR(error);
+ goto out;
+ }
+
+ buf = kzalloc(size + 1, GFP_NOFS);
+ if (!buf)
+ buf = ERR_PTR(-ENOMEM);
+ else
+ memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
+ brelse(dibh);
+out:
+ gfs2_glock_dq_uninit(&i_gh);
+ nd_set_link(nd, buf);
+ return NULL;
+}
+
+static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+{
+ char *s = nd_get_link(nd);
+ if (!IS_ERR(s))
+ kfree(s);
+}
+
+/**
+ * gfs2_permission -
+ * @inode: The inode
+ * @mask: The mask to be tested
+ * @flags: Indicates whether this is an RCU path walk or not
+ *
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done.
+ *
+ * Returns: errno
+ */
+
+int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
+{
+ struct gfs2_inode *ip;
+ struct gfs2_holder i_gh;
+ int error;
+ int unlock = 0;
+
+
+ ip = GFS2_I(inode);
+ if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+ if (error)
+ return error;
+ unlock = 1;
+ }
+
+ if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
+ error = -EACCES;
+ else
+ error = generic_permission(inode, mask, flags, gfs2_check_acl);
+ if (unlock)
+ gfs2_glock_dq_uninit(&i_gh);
+
+ return error;
}
static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
@@ -928,8 +1594,6 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
* @ip:
* @attr:
*
- * Called with a reference on the vnode.
- *
* Returns: errno
*/
@@ -949,60 +1613,280 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
return error;
}
-void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
-{
- struct gfs2_dinode *str = buf;
-
- str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
- str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
- str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
- str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
- str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
- str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
- str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
- str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
- str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
- str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
- str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
- str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
- str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
- str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
-
- str->di_goal_meta = cpu_to_be64(ip->i_goal);
- str->di_goal_data = cpu_to_be64(ip->i_goal);
- str->di_generation = cpu_to_be64(ip->i_generation);
-
- str->di_flags = cpu_to_be32(ip->i_diskflags);
- str->di_height = cpu_to_be16(ip->i_height);
- str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
- !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
- GFS2_FORMAT_DE : 0);
- str->di_depth = cpu_to_be16(ip->i_depth);
- str->di_entries = cpu_to_be32(ip->i_entries);
-
- str->di_eattr = cpu_to_be64(ip->i_eattr);
- str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
- str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
- str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
-}
-
-void gfs2_dinode_print(const struct gfs2_inode *ip)
-{
- printk(KERN_INFO " no_formal_ino = %llu\n",
- (unsigned long long)ip->i_no_formal_ino);
- printk(KERN_INFO " no_addr = %llu\n",
- (unsigned long long)ip->i_no_addr);
- printk(KERN_INFO " i_size = %llu\n",
- (unsigned long long)i_size_read(&ip->i_inode));
- printk(KERN_INFO " blocks = %llu\n",
- (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode));
- printk(KERN_INFO " i_goal = %llu\n",
- (unsigned long long)ip->i_goal);
- printk(KERN_INFO " i_diskflags = 0x%.8X\n", ip->i_diskflags);
- printk(KERN_INFO " i_height = %u\n", ip->i_height);
- printk(KERN_INFO " i_depth = %u\n", ip->i_depth);
- printk(KERN_INFO " i_entries = %u\n", ip->i_entries);
- printk(KERN_INFO " i_eattr = %llu\n",
- (unsigned long long)ip->i_eattr);
+static int setattr_chown(struct inode *inode, struct iattr *attr)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ u32 ouid, ogid, nuid, ngid;
+ int error;
+
+ ouid = inode->i_uid;
+ ogid = inode->i_gid;
+ nuid = attr->ia_uid;
+ ngid = attr->ia_gid;
+
+ if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
+ ouid = nuid = NO_QUOTA_CHANGE;
+ if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
+ ogid = ngid = NO_QUOTA_CHANGE;
+
+ if (!gfs2_alloc_get(ip))
+ return -ENOMEM;
+
+ error = gfs2_quota_lock(ip, nuid, ngid);
+ if (error)
+ goto out_alloc;
+
+ if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
+ error = gfs2_quota_check(ip, nuid, ngid);
+ if (error)
+ goto out_gunlock_q;
+ }
+
+ error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
+ if (error)
+ goto out_gunlock_q;
+
+ error = gfs2_setattr_simple(ip, attr);
+ if (error)
+ goto out_end_trans;
+
+ if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
+ u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
+ gfs2_quota_change(ip, -blocks, ouid, ogid);
+ gfs2_quota_change(ip, blocks, nuid, ngid);
+ }
+
+out_end_trans:
+ gfs2_trans_end(sdp);
+out_gunlock_q:
+ gfs2_quota_unlock(ip);
+out_alloc:
+ gfs2_alloc_put(ip);
+ return error;
+}
+
+/**
+ * gfs2_setattr - Change attributes on an inode
+ * @dentry: The dentry which is changing
+ * @attr: The structure describing the change
+ *
+ * The VFS layer wants to change one or more of an inodes attributes. Write
+ * that change out to disk.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ struct inode *inode = dentry->d_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder i_gh;
+ int error;
+
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+ if (error)
+ return error;
+
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto out;
+
+ error = inode_change_ok(inode, attr);
+ if (error)
+ goto out;
+
+ if (attr->ia_valid & ATTR_SIZE)
+ error = gfs2_setattr_size(inode, attr->ia_size);
+ else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
+ error = setattr_chown(inode, attr);
+ else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
+ error = gfs2_acl_chmod(ip, attr);
+ else
+ error = gfs2_setattr_simple(ip, attr);
+
+out:
+ gfs2_glock_dq_uninit(&i_gh);
+ if (!error)
+ mark_inode_dirty(inode);
+ return error;
+}
+
+/**
+ * gfs2_getattr - Read out an inode's attributes
+ * @mnt: The vfsmount the inode is being accessed from
+ * @dentry: The dentry to stat
+ * @stat: The inode's stats
+ *
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done. Note that its the NFS
+ * readdirplus operation which causes this to be called (from filldir)
+ * with the glock already held.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int error;
+ int unlock = 0;
+
+ if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+ if (error)
+ return error;
+ unlock = 1;
+ }
+
+ generic_fillattr(inode, stat);
+ if (unlock)
+ gfs2_glock_dq_uninit(&gh);
+
+ return 0;
+}
+
+static int gfs2_setxattr(struct dentry *dentry, const char *name,
+ const void *data, size_t size, int flags)
+{
+ struct inode *inode = dentry->d_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
+
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ ret = gfs2_glock_nq(&gh);
+ if (ret == 0) {
+ ret = generic_setxattr(dentry, name, data, size, flags);
+ gfs2_glock_dq(&gh);
+ }
+ gfs2_holder_uninit(&gh);
+ return ret;
+}
+
+static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
+ void *data, size_t size)
+{
+ struct inode *inode = dentry->d_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
+
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+ ret = gfs2_glock_nq(&gh);
+ if (ret == 0) {
+ ret = generic_getxattr(dentry, name, data, size);
+ gfs2_glock_dq(&gh);
+ }
+ gfs2_holder_uninit(&gh);
+ return ret;
+}
+
+static int gfs2_removexattr(struct dentry *dentry, const char *name)
+{
+ struct inode *inode = dentry->d_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
+
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ ret = gfs2_glock_nq(&gh);
+ if (ret == 0) {
+ ret = generic_removexattr(dentry, name);
+ gfs2_glock_dq(&gh);
+ }
+ gfs2_holder_uninit(&gh);
+ return ret;
+}
+
+static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
+
+ ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+ if (ret)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
+
+ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ if (ret)
+ goto out;
+
+ if (gfs2_is_stuffed(ip)) {
+ u64 phys = ip->i_no_addr << inode->i_blkbits;
+ u64 size = i_size_read(inode);
+ u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
+ FIEMAP_EXTENT_DATA_INLINE;
+ phys += sizeof(struct gfs2_dinode);
+ phys += start;
+ if (start + len > size)
+ len = size - start;
+ if (start < size)
+ ret = fiemap_fill_next_extent(fieinfo, start, phys,
+ len, flags);
+ if (ret == 1)
+ ret = 0;
+ } else {
+ ret = __generic_block_fiemap(inode, fieinfo, start, len,
+ gfs2_block_map);
+ }
+
+ gfs2_glock_dq_uninit(&gh);
+out:
+ mutex_unlock(&inode->i_mutex);
+ return ret;
}
+const struct inode_operations gfs2_file_iops = {
+ .permission = gfs2_permission,
+ .setattr = gfs2_setattr,
+ .getattr = gfs2_getattr,
+ .setxattr = gfs2_setxattr,
+ .getxattr = gfs2_getxattr,
+ .listxattr = gfs2_listxattr,
+ .removexattr = gfs2_removexattr,
+ .fiemap = gfs2_fiemap,
+};
+
+const struct inode_operations gfs2_dir_iops = {
+ .create = gfs2_create,
+ .lookup = gfs2_lookup,
+ .link = gfs2_link,
+ .unlink = gfs2_unlink,
+ .symlink = gfs2_symlink,
+ .mkdir = gfs2_mkdir,
+ .rmdir = gfs2_unlink,
+ .mknod = gfs2_mknod,
+ .rename = gfs2_rename,
+ .permission = gfs2_permission,
+ .setattr = gfs2_setattr,
+ .getattr = gfs2_getattr,
+ .setxattr = gfs2_setxattr,
+ .getxattr = gfs2_getxattr,
+ .listxattr = gfs2_listxattr,
+ .removexattr = gfs2_removexattr,
+ .fiemap = gfs2_fiemap,
+};
+
+const struct inode_operations gfs2_symlink_iops = {
+ .readlink = generic_readlink,
+ .follow_link = gfs2_follow_link,
+ .put_link = gfs2_put_link,
+ .permission = gfs2_permission,
+ .setattr = gfs2_setattr,
+ .getattr = gfs2_getattr,
+ .setxattr = gfs2_setxattr,
+ .getxattr = gfs2_getxattr,
+ .listxattr = gfs2_listxattr,
+ .removexattr = gfs2_removexattr,
+ .fiemap = gfs2_fiemap,
+};
+
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 099ca305e518..31606076f701 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -102,22 +102,16 @@ extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
u64 *no_formal_ino,
unsigned int blktype);
-extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
+extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
extern int gfs2_inode_refresh(struct gfs2_inode *ip);
-extern int gfs2_dinode_dealloc(struct gfs2_inode *inode);
-extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
int is_root);
-extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
- const struct qstr *name,
- unsigned int mode, dev_t dev);
extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
-extern void gfs2_dinode_print(const struct gfs2_inode *ip);
extern const struct inode_operations gfs2_file_iops;
extern const struct inode_operations gfs2_dir_iops;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5b102c1887fd..903115f2bb34 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/bio.h>
+#include <linux/writeback.h>
#include "gfs2.h"
#include "incore.h"
@@ -83,55 +84,97 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
/**
* gfs2_ail1_start_one - Start I/O on a part of the AIL
* @sdp: the filesystem
- * @tr: the part of the AIL
+ * @wbc: The writeback control structure
+ * @ai: The ail structure
*
*/
-static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
+ struct writeback_control *wbc,
+ struct gfs2_ail *ai)
__releases(&sdp->sd_ail_lock)
__acquires(&sdp->sd_ail_lock)
{
+ struct gfs2_glock *gl = NULL;
+ struct address_space *mapping;
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
- int retry;
- do {
- retry = 0;
+ list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) {
+ bh = bd->bd_bh;
- list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
- bd_ail_st_list) {
- bh = bd->bd_bh;
+ gfs2_assert(sdp, bd->bd_ail == ai);
- gfs2_assert(sdp, bd->bd_ail == ai);
+ if (!buffer_busy(bh)) {
+ if (!buffer_uptodate(bh))
+ gfs2_io_error_bh(sdp, bh);
+ list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+ continue;
+ }
- if (!buffer_busy(bh)) {
- if (!buffer_uptodate(bh))
- gfs2_io_error_bh(sdp, bh);
- list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
- continue;
- }
+ if (!buffer_dirty(bh))
+ continue;
+ if (gl == bd->bd_gl)
+ continue;
+ gl = bd->bd_gl;
+ list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
+ mapping = bh->b_page->mapping;
+ if (!mapping)
+ continue;
+ spin_unlock(&sdp->sd_ail_lock);
+ generic_writepages(mapping, wbc);
+ spin_lock(&sdp->sd_ail_lock);
+ if (wbc->nr_to_write <= 0)
+ break;
+ return 1;
+ }
- if (!buffer_dirty(bh))
- continue;
+ return 0;
+}
- list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
- get_bh(bh);
- spin_unlock(&sdp->sd_ail_lock);
- lock_buffer(bh);
- if (test_clear_buffer_dirty(bh)) {
- bh->b_end_io = end_buffer_write_sync;
- submit_bh(WRITE_SYNC, bh);
- } else {
- unlock_buffer(bh);
- brelse(bh);
- }
- spin_lock(&sdp->sd_ail_lock);
-
- retry = 1;
+/**
+ * gfs2_ail1_flush - start writeback of some ail1 entries
+ * @sdp: The super block
+ * @wbc: The writeback control structure
+ *
+ * Writes back some ail1 entries, according to the limits in the
+ * writeback control structure
+ */
+
+void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
+{
+ struct list_head *head = &sdp->sd_ail1_list;
+ struct gfs2_ail *ai;
+
+ trace_gfs2_ail_flush(sdp, wbc, 1);
+ spin_lock(&sdp->sd_ail_lock);
+restart:
+ list_for_each_entry_reverse(ai, head, ai_list) {
+ if (wbc->nr_to_write <= 0)
break;
- }
- } while (retry);
+ if (gfs2_ail1_start_one(sdp, wbc, ai))
+ goto restart;
+ }
+ spin_unlock(&sdp->sd_ail_lock);
+ trace_gfs2_ail_flush(sdp, wbc, 0);
+}
+
+/**
+ * gfs2_ail1_start - start writeback of all ail1 entries
+ * @sdp: The superblock
+ */
+
+static void gfs2_ail1_start(struct gfs2_sbd *sdp)
+{
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE,
+ .nr_to_write = LONG_MAX,
+ .range_start = 0,
+ .range_end = LLONG_MAX,
+ };
+
+ return gfs2_ail1_flush(sdp, &wbc);
}
/**
@@ -141,7 +184,7 @@ __acquires(&sdp->sd_ail_lock)
*
*/
-static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
+static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
@@ -149,76 +192,63 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
bd_ail_st_list) {
bh = bd->bd_bh;
-
gfs2_assert(sdp, bd->bd_ail == ai);
-
- if (buffer_busy(bh)) {
- if (flags & DIO_ALL)
- continue;
- else
- break;
- }
-
+ if (buffer_busy(bh))
+ continue;
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
-
list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
}
- return list_empty(&ai->ai_ail1_list);
}
-static void gfs2_ail1_start(struct gfs2_sbd *sdp)
-{
- struct list_head *head;
- u64 sync_gen;
- struct gfs2_ail *ai;
- int done = 0;
-
- spin_lock(&sdp->sd_ail_lock);
- head = &sdp->sd_ail1_list;
- if (list_empty(head)) {
- spin_unlock(&sdp->sd_ail_lock);
- return;
- }
- sync_gen = sdp->sd_ail_sync_gen++;
-
- while(!done) {
- done = 1;
- list_for_each_entry_reverse(ai, head, ai_list) {
- if (ai->ai_sync_gen >= sync_gen)
- continue;
- ai->ai_sync_gen = sync_gen;
- gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
- done = 0;
- break;
- }
- }
-
- spin_unlock(&sdp->sd_ail_lock);
-}
+/**
+ * gfs2_ail1_empty - Try to empty the ail1 lists
+ * @sdp: The superblock
+ *
+ * Tries to empty the ail1 lists, starting with the oldest first
+ */
-static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
+static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
{
struct gfs2_ail *ai, *s;
int ret;
spin_lock(&sdp->sd_ail_lock);
-
list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
- if (gfs2_ail1_empty_one(sdp, ai, flags))
+ gfs2_ail1_empty_one(sdp, ai);
+ if (list_empty(&ai->ai_ail1_list))
list_move(&ai->ai_list, &sdp->sd_ail2_list);
- else if (!(flags & DIO_ALL))
+ else
break;
}
-
ret = list_empty(&sdp->sd_ail1_list);
-
spin_unlock(&sdp->sd_ail_lock);
return ret;
}
+static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
+{
+ struct gfs2_ail *ai;
+ struct gfs2_bufdata *bd;
+ struct buffer_head *bh;
+
+ spin_lock(&sdp->sd_ail_lock);
+ list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) {
+ list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) {
+ bh = bd->bd_bh;
+ if (!buffer_locked(bh))
+ continue;
+ get_bh(bh);
+ spin_unlock(&sdp->sd_ail_lock);
+ wait_on_buffer(bh);
+ brelse(bh);
+ return;
+ }
+ }
+ spin_unlock(&sdp->sd_ail_lock);
+}
/**
* gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
@@ -574,7 +604,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
- gfs2_ail1_empty(sdp, 0);
+ gfs2_ail1_empty(sdp);
tail = current_tail(sdp);
lh = (struct gfs2_log_header *)bh->b_data;
@@ -869,9 +899,9 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
gfs2_log_flush(sdp, NULL);
for (;;) {
gfs2_ail1_start(sdp);
- if (gfs2_ail1_empty(sdp, DIO_ALL))
+ gfs2_ail1_wait(sdp);
+ if (gfs2_ail1_empty(sdp))
break;
- msleep(10);
}
}
@@ -905,20 +935,20 @@ int gfs2_logd(void *data)
preflush = atomic_read(&sdp->sd_log_pinned);
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
- gfs2_ail1_empty(sdp, DIO_ALL);
+ gfs2_ail1_empty(sdp);
gfs2_log_flush(sdp, NULL);
- gfs2_ail1_empty(sdp, DIO_ALL);
}
if (gfs2_ail_flush_reqd(sdp)) {
gfs2_ail1_start(sdp);
- io_schedule();
- gfs2_ail1_empty(sdp, 0);
+ gfs2_ail1_wait(sdp);
+ gfs2_ail1_empty(sdp);
gfs2_log_flush(sdp, NULL);
- gfs2_ail1_empty(sdp, DIO_ALL);
}
- wake_up(&sdp->sd_log_waitq);
+ if (!gfs2_ail_flush_reqd(sdp))
+ wake_up(&sdp->sd_log_waitq);
+
t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
if (freezing(current))
refrigerator();
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 0d007f920234..ab0621698b73 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -12,6 +12,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/writeback.h>
#include "incore.h"
/**
@@ -59,6 +60,7 @@ extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
+extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 51d27f00ebb4..05bbb124699f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -40,7 +40,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
struct gfs2_bufdata *bd;
- gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
+ BUG_ON(!current->journal_info);
clear_buffer_dirty(bh);
if (test_set_buffer_pinned(bh))
@@ -65,6 +65,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to unpin
* @ai:
+ * @flags: The inode dirty flags
*
*/
@@ -73,10 +74,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
{
struct gfs2_bufdata *bd = bh->b_private;
- gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
-
- if (!buffer_pinned(bh))
- gfs2_assert_withdraw(sdp, 0);
+ BUG_ON(!buffer_uptodate(bh));
+ BUG_ON(!buffer_pinned(bh));
lock_buffer(bh);
mark_buffer_dirty(bh);
@@ -95,8 +94,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
spin_unlock(&sdp->sd_ail_lock);
- if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
- gfs2_glock_schedule_for_reclaim(bd->bd_gl);
+ clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
trace_gfs2_pin(bd, 0);
unlock_buffer(bh);
atomic_dec(&sdp->sd_log_pinned);
@@ -322,12 +320,16 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
+ struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
+ struct gfs2_glock *gl = bd->bd_gl;
struct gfs2_trans *tr;
tr = current->journal_info;
tr->tr_touched = 1;
tr->tr_num_revoke++;
sdp->sd_log_num_revoke++;
+ atomic_inc(&gl->gl_revokes);
+ set_bit(GLF_LFLUSH, &gl->gl_flags);
list_add(&le->le_list, &sdp->sd_log_le_revoke);
}
@@ -350,9 +352,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
offset = sizeof(struct gfs2_log_descriptor);
- while (!list_empty(head)) {
- bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
- list_del_init(&bd->bd_le.le_list);
+ list_for_each_entry(bd, head, bd_le.le_list) {
sdp->sd_log_num_revoke--;
if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
@@ -367,8 +367,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
}
*(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
- kmem_cache_free(gfs2_bufdata_cachep, bd);
-
offset += sizeof(u64);
}
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
@@ -376,6 +374,22 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
submit_bh(WRITE_SYNC, bh);
}
+static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+ struct list_head *head = &sdp->sd_log_le_revoke;
+ struct gfs2_bufdata *bd;
+ struct gfs2_glock *gl;
+
+ while (!list_empty(head)) {
+ bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
+ list_del_init(&bd->bd_le.le_list);
+ gl = bd->bd_gl;
+ atomic_dec(&gl->gl_revokes);
+ clear_bit(GLF_LFLUSH, &gl->gl_flags);
+ kmem_cache_free(gfs2_bufdata_cachep, bd);
+ }
+}
+
static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, int pass)
{
@@ -749,6 +763,7 @@ const struct gfs2_log_operations gfs2_buf_lops = {
const struct gfs2_log_operations gfs2_revoke_lops = {
.lo_add = revoke_lo_add,
.lo_before_commit = revoke_lo_before_commit,
+ .lo_after_commit = revoke_lo_after_commit,
.lo_before_scan = revoke_lo_before_scan,
.lo_scan_elements = revoke_lo_scan_elements,
.lo_after_scan = revoke_lo_after_scan,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 888a5f5a1a58..cfa327d33194 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -53,6 +53,7 @@ static void gfs2_init_glock_once(void *foo)
INIT_LIST_HEAD(&gl->gl_lru);
INIT_LIST_HEAD(&gl->gl_ail_list);
atomic_set(&gl->gl_ail_count, 0);
+ atomic_set(&gl->gl_revokes, 0);
}
static void gfs2_init_gl_aspace_once(void *foo)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 675349b5a133..747238cd9f96 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -31,6 +31,7 @@
#include "rgrp.h"
#include "trans.h"
#include "util.h"
+#include "trace_gfs2.h"
static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
{
@@ -310,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
struct gfs2_bufdata *bd = bh->b_private;
if (test_clear_buffer_pinned(bh)) {
+ trace_gfs2_pin(bd, 0);
atomic_dec(&sdp->sd_log_pinned);
list_del_init(&bd->bd_le.le_list);
if (meta) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 6a1d9ba16411..22c526593131 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -77,8 +77,6 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
#define buffer_busy(bh) \
((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
-#define buffer_in_io(bh) \
-((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
#endif /* __DIO_DOT_H__ */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d3c69eb91c74..8ac9ae189b53 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -126,8 +126,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
* changed.
*/
-static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
+static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
{
+ struct gfs2_sb_host *sb = &sdp->sd_sb;
+
if (sb->sb_magic != GFS2_MAGIC ||
sb->sb_type != GFS2_METATYPE_SB) {
if (!silent)
@@ -157,8 +159,10 @@ static void end_bio_io_page(struct bio *bio, int error)
unlock_page(page);
}
-static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
{
+ struct gfs2_sb_host *sb = &sdp->sd_sb;
+ struct super_block *s = sdp->sd_vfs;
const struct gfs2_sb *str = buf;
sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
@@ -175,7 +179,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
- memcpy(sb->sb_uuid, str->sb_uuid, 16);
+ memcpy(s->s_uuid, str->sb_uuid, 16);
}
/**
@@ -197,7 +201,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
* Returns: 0 on success or error
*/
-static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
+static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
{
struct super_block *sb = sdp->sd_vfs;
struct gfs2_sb *p;
@@ -227,10 +231,10 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
return -EIO;
}
p = kmap(page);
- gfs2_sb_in(&sdp->sd_sb, p);
+ gfs2_sb_in(sdp, p);
kunmap(page);
__free_page(page);
- return 0;
+ return gfs2_check_sb(sdp, silent);
}
/**
@@ -247,17 +251,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
unsigned int x;
int error;
- error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+ error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
if (error) {
if (!silent)
fs_err(sdp, "can't read superblock\n");
return error;
}
- error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
- if (error)
- return error;
-
sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
GFS2_BASIC_BLOCK_SHIFT;
sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
@@ -340,14 +340,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
/* Try to autodetect */
if (!proto[0] || !table[0]) {
- error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+ error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
if (error)
return error;
- error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
- if (error)
- goto out;
-
if (!proto[0])
proto = sdp->sd_sb.sb_lockproto;
if (!table[0])
@@ -364,7 +360,6 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
while ((table = strchr(table, '/')))
*table = '_';
-out:
return error;
}
@@ -1119,8 +1114,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
if (sdp->sd_args.ar_statfs_quantum) {
sdp->sd_tune.gt_statfs_slow = 0;
sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
- }
- else {
+ } else {
sdp->sd_tune.gt_statfs_slow = 1;
sdp->sd_tune.gt_statfs_quantum = 30;
}
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
deleted file mode 100644
index 09e436a50723..000000000000
--- a/fs/gfs2/ops_inode.c
+++ /dev/null
@@ -1,1344 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/namei.h>
-#include <linux/mm.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/crc32.h>
-#include <linux/fiemap.h>
-#include <asm/uaccess.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "acl.h"
-#include "bmap.h"
-#include "dir.h"
-#include "xattr.h"
-#include "glock.h"
-#include "inode.h"
-#include "meta_io.h"
-#include "quota.h"
-#include "rgrp.h"
-#include "trans.h"
-#include "util.h"
-#include "super.h"
-
-/**
- * gfs2_create - Create a file
- * @dir: The directory in which to create the file
- * @dentry: The dentry of the new file
- * @mode: The mode of the new file
- *
- * Returns: errno
- */
-
-static int gfs2_create(struct inode *dir, struct dentry *dentry,
- int mode, struct nameidata *nd)
-{
- struct gfs2_inode *dip = GFS2_I(dir);
- struct gfs2_sbd *sdp = GFS2_SB(dir);
- struct gfs2_holder ghs[2];
- struct inode *inode;
-
- gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-
- for (;;) {
- inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
- if (!IS_ERR(inode)) {
- gfs2_trans_end(sdp);
- if (dip->i_alloc->al_rgd)
- gfs2_inplace_release(dip);
- gfs2_quota_unlock(dip);
- gfs2_alloc_put(dip);
- gfs2_glock_dq_uninit_m(2, ghs);
- mark_inode_dirty(inode);
- break;
- } else if (PTR_ERR(inode) != -EEXIST ||
- (nd && nd->flags & LOOKUP_EXCL)) {
- gfs2_holder_uninit(ghs);
- return PTR_ERR(inode);
- }
-
- inode = gfs2_lookupi(dir, &dentry->d_name, 0);
- if (inode) {
- if (!IS_ERR(inode)) {
- gfs2_holder_uninit(ghs);
- break;
- } else {
- gfs2_holder_uninit(ghs);
- return PTR_ERR(inode);
- }
- }
- }
-
- d_instantiate(dentry, inode);
-
- return 0;
-}
-
-/**
- * gfs2_lookup - Look up a filename in a directory and return its inode
- * @dir: The directory inode
- * @dentry: The dentry of the new inode
- * @nd: passed from Linux VFS, ignored by us
- *
- * Called by the VFS layer. Lock dir and call gfs2_lookupi()
- *
- * Returns: errno
- */
-
-static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
- struct nameidata *nd)
-{
- struct inode *inode = NULL;
-
- inode = gfs2_lookupi(dir, &dentry->d_name, 0);
- if (inode && IS_ERR(inode))
- return ERR_CAST(inode);
-
- if (inode) {
- struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
- struct gfs2_holder gh;
- int error;
- error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
- if (error) {
- iput(inode);
- return ERR_PTR(error);
- }
- gfs2_glock_dq_uninit(&gh);
- return d_splice_alias(inode, dentry);
- }
- d_add(dentry, inode);
-
- return NULL;
-}
-
-/**
- * gfs2_link - Link to a file
- * @old_dentry: The inode to link
- * @dir: Add link to this directory
- * @dentry: The name of the link
- *
- * Link the inode in "old_dentry" into the directory "dir" with the
- * name in "dentry".
- *
- * Returns: errno
- */
-
-static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
- struct dentry *dentry)
-{
- struct gfs2_inode *dip = GFS2_I(dir);
- struct gfs2_sbd *sdp = GFS2_SB(dir);
- struct inode *inode = old_dentry->d_inode;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder ghs[2];
- int alloc_required;
- int error;
-
- if (S_ISDIR(inode->i_mode))
- return -EPERM;
-
- gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
-
- error = gfs2_glock_nq(ghs); /* parent */
- if (error)
- goto out_parent;
-
- error = gfs2_glock_nq(ghs + 1); /* child */
- if (error)
- goto out_child;
-
- error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
- if (error)
- goto out_gunlock;
-
- error = gfs2_dir_check(dir, &dentry->d_name, NULL);
- switch (error) {
- case -ENOENT:
- break;
- case 0:
- error = -EEXIST;
- default:
- goto out_gunlock;
- }
-
- error = -EINVAL;
- if (!dip->i_inode.i_nlink)
- goto out_gunlock;
- error = -EFBIG;
- if (dip->i_entries == (u32)-1)
- goto out_gunlock;
- error = -EPERM;
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- goto out_gunlock;
- error = -EINVAL;
- if (!ip->i_inode.i_nlink)
- goto out_gunlock;
- error = -EMLINK;
- if (ip->i_inode.i_nlink == (u32)-1)
- goto out_gunlock;
-
- alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
- if (error < 0)
- goto out_gunlock;
- error = 0;
-
- if (alloc_required) {
- struct gfs2_alloc *al = gfs2_alloc_get(dip);
- if (!al) {
- error = -ENOMEM;
- goto out_gunlock;
- }
-
- error = gfs2_quota_lock_check(dip);
- if (error)
- goto out_alloc;
-
- al->al_requested = sdp->sd_max_dirres;
-
- error = gfs2_inplace_reserve(dip);
- if (error)
- goto out_gunlock_q;
-
- error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
- gfs2_rg_blocks(al) +
- 2 * RES_DINODE + RES_STATFS +
- RES_QUOTA, 0);
- if (error)
- goto out_ipres;
- } else {
- error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
- if (error)
- goto out_ipres;
- }
-
- error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
- if (error)
- goto out_end_trans;
-
- error = gfs2_change_nlink(ip, +1);
-
-out_end_trans:
- gfs2_trans_end(sdp);
-out_ipres:
- if (alloc_required)
- gfs2_inplace_release(dip);
-out_gunlock_q:
- if (alloc_required)
- gfs2_quota_unlock(dip);
-out_alloc:
- if (alloc_required)
- gfs2_alloc_put(dip);
-out_gunlock:
- gfs2_glock_dq(ghs + 1);
-out_child:
- gfs2_glock_dq(ghs);
-out_parent:
- gfs2_holder_uninit(ghs);
- gfs2_holder_uninit(ghs + 1);
- if (!error) {
- ihold(inode);
- d_instantiate(dentry, inode);
- mark_inode_dirty(inode);
- }
- return error;
-}
-
-/*
- * gfs2_unlink_ok - check to see that a inode is still in a directory
- * @dip: the directory
- * @name: the name of the file
- * @ip: the inode
- *
- * Assumes that the lock on (at least) @dip is held.
- *
- * Returns: 0 if the parent/child relationship is correct, errno if it isn't
- */
-
-static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
- const struct gfs2_inode *ip)
-{
- int error;
-
- if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
- return -EPERM;
-
- if ((dip->i_inode.i_mode & S_ISVTX) &&
- dip->i_inode.i_uid != current_fsuid() &&
- ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
- return -EPERM;
-
- if (IS_APPEND(&dip->i_inode))
- return -EPERM;
-
- error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
- if (error)
- return error;
-
- error = gfs2_dir_check(&dip->i_inode, name, ip);
- if (error)
- return error;
-
- return 0;
-}
-
-/**
- * gfs2_unlink - Unlink a file
- * @dir: The inode of the directory containing the file to unlink
- * @dentry: The file itself
- *
- * Unlink a file. Call gfs2_unlinki()
- *
- * Returns: errno
- */
-
-static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
-{
- struct gfs2_inode *dip = GFS2_I(dir);
- struct gfs2_sbd *sdp = GFS2_SB(dir);
- struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
- struct gfs2_holder ghs[3];
- struct gfs2_rgrpd *rgd;
- struct gfs2_holder ri_gh;
- int error;
-
- error = gfs2_rindex_hold(sdp, &ri_gh);
- if (error)
- return error;
-
- gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
-
- rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
- gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
-
-
- error = gfs2_glock_nq(ghs); /* parent */
- if (error)
- goto out_parent;
-
- error = gfs2_glock_nq(ghs + 1); /* child */
- if (error)
- goto out_child;
-
- error = gfs2_glock_nq(ghs + 2); /* rgrp */
- if (error)
- goto out_rgrp;
-
- error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
- if (error)
- goto out_gunlock;
-
- error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
- if (error)
- goto out_gunlock;
-
- error = gfs2_dir_del(dip, &dentry->d_name);
- if (error)
- goto out_end_trans;
-
- error = gfs2_change_nlink(ip, -1);
-
-out_end_trans:
- gfs2_trans_end(sdp);
-out_gunlock:
- gfs2_glock_dq(ghs + 2);
-out_rgrp:
- gfs2_holder_uninit(ghs + 2);
- gfs2_glock_dq(ghs + 1);
-out_child:
- gfs2_holder_uninit(ghs + 1);
- gfs2_glock_dq(ghs);
-out_parent:
- gfs2_holder_uninit(ghs);
- gfs2_glock_dq_uninit(&ri_gh);
- return error;
-}
-
-/**
- * gfs2_symlink - Create a symlink
- * @dir: The directory to create the symlink in
- * @dentry: The dentry to put the symlink in
- * @symname: The thing which the link points to
- *
- * Returns: errno
- */
-
-static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
-{
- struct gfs2_inode *dip = GFS2_I(dir), *ip;
- struct gfs2_sbd *sdp = GFS2_SB(dir);
- struct gfs2_holder ghs[2];
- struct inode *inode;
- struct buffer_head *dibh;
- int size;
- int error;
-
- /* Must be stuffed with a null terminator for gfs2_follow_link() */
- size = strlen(symname);
- if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
- return -ENAMETOOLONG;
-
- gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-
- inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0);
- if (IS_ERR(inode)) {
- gfs2_holder_uninit(ghs);
- return PTR_ERR(inode);
- }
-
- ip = ghs[1].gh_gl->gl_object;
-
- i_size_write(inode, size);
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
-
- if (!gfs2_assert_withdraw(sdp, !error)) {
- gfs2_dinode_out(ip, dibh->b_data);
- memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
- size);
- brelse(dibh);
- }
-
- gfs2_trans_end(sdp);
- if (dip->i_alloc->al_rgd)
- gfs2_inplace_release(dip);
- gfs2_quota_unlock(dip);
- gfs2_alloc_put(dip);
-
- gfs2_glock_dq_uninit_m(2, ghs);
-
- d_instantiate(dentry, inode);
- mark_inode_dirty(inode);
-
- return 0;
-}
-
-/**
- * gfs2_mkdir - Make a directory
- * @dir: The parent directory of the new one
- * @dentry: The dentry of the new directory
- * @mode: The mode of the new directory
- *
- * Returns: errno
- */
-
-static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
- struct gfs2_inode *dip = GFS2_I(dir), *ip;
- struct gfs2_sbd *sdp = GFS2_SB(dir);
- struct gfs2_holder ghs[2];
- struct inode *inode;
- struct buffer_head *dibh;
- int error;
-
- gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-
- inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0);
- if (IS_ERR(inode)) {
- gfs2_holder_uninit(ghs);
- return PTR_ERR(inode);
- }
-
- ip = ghs[1].gh_gl->gl_object;
-
- ip->i_inode.i_nlink = 2;
- i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
- ip->i_diskflags |= GFS2_DIF_JDATA;
- ip->i_entries = 2;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
-
- if (!gfs2_assert_withdraw(sdp, !error)) {
- struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
- struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
- dent->de_inum = di->di_num; /* already GFS2 endian */
- dent->de_type = cpu_to_be16(DT_DIR);
- di->di_entries = cpu_to_be32(1);
-
- dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
- gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
-
- gfs2_inum_out(dip, dent);
- dent->de_type = cpu_to_be16(DT_DIR);
-
- gfs2_dinode_out(ip, di);
-
- brelse(dibh);
- }
-
- error = gfs2_change_nlink(dip, +1);
- gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
-
- gfs2_trans_end(sdp);
- if (dip->i_alloc->al_rgd)
- gfs2_inplace_release(dip);
- gfs2_quota_unlock(dip);
- gfs2_alloc_put(dip);
-
- gfs2_glock_dq_uninit_m(2, ghs);
-
- d_instantiate(dentry, inode);
- mark_inode_dirty(inode);
-
- return 0;
-}
-
-/**
- * gfs2_rmdiri - Remove a directory
- * @dip: The parent directory of the directory to be removed
- * @name: The name of the directory to be removed
- * @ip: The GFS2 inode of the directory to be removed
- *
- * Assumes Glocks on dip and ip are held
- *
- * Returns: errno
- */
-
-static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
- struct gfs2_inode *ip)
-{
- int error;
-
- if (ip->i_entries != 2) {
- if (gfs2_consist_inode(ip))
- gfs2_dinode_print(ip);
- return -EIO;
- }
-
- error = gfs2_dir_del(dip, name);
- if (error)
- return error;
-
- error = gfs2_change_nlink(dip, -1);
- if (error)
- return error;
-
- error = gfs2_dir_del(ip, &gfs2_qdot);
- if (error)
- return error;
-
- error = gfs2_dir_del(ip, &gfs2_qdotdot);
- if (error)
- return error;
-
- /* It looks odd, but it really should be done twice */
- error = gfs2_change_nlink(ip, -1);
- if (error)
- return error;
-
- error = gfs2_change_nlink(ip, -1);
- if (error)
- return error;
-
- return error;
-}
-
-/**
- * gfs2_rmdir - Remove a directory
- * @dir: The parent directory of the directory to be removed
- * @dentry: The dentry of the directory to remove
- *
- * Remove a directory. Call gfs2_rmdiri()
- *
- * Returns: errno
- */
-
-static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
-{
- struct gfs2_inode *dip = GFS2_I(dir);
- struct gfs2_sbd *sdp = GFS2_SB(dir);
- struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
- struct gfs2_holder ghs[3];
- struct gfs2_rgrpd *rgd;
- struct gfs2_holder ri_gh;
- int error;
-
- error = gfs2_rindex_hold(sdp, &ri_gh);
- if (error)
- return error;
- gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
-
- rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
- gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
-
- error = gfs2_glock_nq(ghs); /* parent */
- if (error)
- goto out_parent;
-
- error = gfs2_glock_nq(ghs + 1); /* child */
- if (error)
- goto out_child;
-
- error = gfs2_glock_nq(ghs + 2); /* rgrp */
- if (error)
- goto out_rgrp;
-
- error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
- if (error)
- goto out_gunlock;
-
- if (ip->i_entries < 2) {
- if (gfs2_consist_inode(ip))
- gfs2_dinode_print(ip);
- error = -EIO;
- goto out_gunlock;
- }
- if (ip->i_entries > 2) {
- error = -ENOTEMPTY;
- goto out_gunlock;
- }
-
- error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0);
- if (error)
- goto out_gunlock;
-
- error = gfs2_rmdiri(dip, &dentry->d_name, ip);
-
- gfs2_trans_end(sdp);
-
-out_gunlock:
- gfs2_glock_dq(ghs + 2);
-out_rgrp:
- gfs2_holder_uninit(ghs + 2);
- gfs2_glock_dq(ghs + 1);
-out_child:
- gfs2_holder_uninit(ghs + 1);
- gfs2_glock_dq(ghs);
-out_parent:
- gfs2_holder_uninit(ghs);
- gfs2_glock_dq_uninit(&ri_gh);
- return error;
-}
-
-/**
- * gfs2_mknod - Make a special file
- * @dir: The directory in which the special file will reside
- * @dentry: The dentry of the special file
- * @mode: The mode of the special file
- * @rdev: The device specification of the special file
- *
- */
-
-static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
- dev_t dev)
-{
- struct gfs2_inode *dip = GFS2_I(dir);
- struct gfs2_sbd *sdp = GFS2_SB(dir);
- struct gfs2_holder ghs[2];
- struct inode *inode;
-
- gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-
- inode = gfs2_createi(ghs, &dentry->d_name, mode, dev);
- if (IS_ERR(inode)) {
- gfs2_holder_uninit(ghs);
- return PTR_ERR(inode);
- }
-
- gfs2_trans_end(sdp);
- if (dip->i_alloc->al_rgd)
- gfs2_inplace_release(dip);
- gfs2_quota_unlock(dip);
- gfs2_alloc_put(dip);
-
- gfs2_glock_dq_uninit_m(2, ghs);
-
- d_instantiate(dentry, inode);
- mark_inode_dirty(inode);
-
- return 0;
-}
-
-/*
- * gfs2_ok_to_move - check if it's ok to move a directory to another directory
- * @this: move this
- * @to: to here
- *
- * Follow @to back to the root and make sure we don't encounter @this
- * Assumes we already hold the rename lock.
- *
- * Returns: errno
- */
-
-static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
-{
- struct inode *dir = &to->i_inode;
- struct super_block *sb = dir->i_sb;
- struct inode *tmp;
- int error = 0;
-
- igrab(dir);
-
- for (;;) {
- if (dir == &this->i_inode) {
- error = -EINVAL;
- break;
- }
- if (dir == sb->s_root->d_inode) {
- error = 0;
- break;
- }
-
- tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
- if (IS_ERR(tmp)) {
- error = PTR_ERR(tmp);
- break;
- }
-
- iput(dir);
- dir = tmp;
- }
-
- iput(dir);
-
- return error;
-}
-
-/**
- * gfs2_rename - Rename a file
- * @odir: Parent directory of old file name
- * @odentry: The old dentry of the file
- * @ndir: Parent directory of new file name
- * @ndentry: The new dentry of the file
- *
- * Returns: errno
- */
-
-static int gfs2_rename(struct inode *odir, struct dentry *odentry,
- struct inode *ndir, struct dentry *ndentry)
-{
- struct gfs2_inode *odip = GFS2_I(odir);
- struct gfs2_inode *ndip = GFS2_I(ndir);
- struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
- struct gfs2_inode *nip = NULL;
- struct gfs2_sbd *sdp = GFS2_SB(odir);
- struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
- struct gfs2_rgrpd *nrgd;
- unsigned int num_gh;
- int dir_rename = 0;
- int alloc_required = 0;
- unsigned int x;
- int error;
-
- if (ndentry->d_inode) {
- nip = GFS2_I(ndentry->d_inode);
- if (ip == nip)
- return 0;
- }
-
- error = gfs2_rindex_hold(sdp, &ri_gh);
- if (error)
- return error;
-
- if (odip != ndip) {
- error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
- 0, &r_gh);
- if (error)
- goto out;
-
- if (S_ISDIR(ip->i_inode.i_mode)) {
- dir_rename = 1;
- /* don't move a dirctory into it's subdir */
- error = gfs2_ok_to_move(ip, ndip);
- if (error)
- goto out_gunlock_r;
- }
- }
-
- num_gh = 1;
- gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
- if (odip != ndip) {
- gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
- num_gh++;
- }
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
- num_gh++;
-
- if (nip) {
- gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
- num_gh++;
- /* grab the resource lock for unlink flag twiddling
- * this is the case of the target file already existing
- * so we unlink before doing the rename
- */
- nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
- if (nrgd)
- gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
- }
-
- for (x = 0; x < num_gh; x++) {
- error = gfs2_glock_nq(ghs + x);
- if (error)
- goto out_gunlock;
- }
-
- /* Check out the old directory */
-
- error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
- if (error)
- goto out_gunlock;
-
- /* Check out the new directory */
-
- if (nip) {
- error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
- if (error)
- goto out_gunlock;
-
- if (S_ISDIR(nip->i_inode.i_mode)) {
- if (nip->i_entries < 2) {
- if (gfs2_consist_inode(nip))
- gfs2_dinode_print(nip);
- error = -EIO;
- goto out_gunlock;
- }
- if (nip->i_entries > 2) {
- error = -ENOTEMPTY;
- goto out_gunlock;
- }
- }
- } else {
- error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
- if (error)
- goto out_gunlock;
-
- error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
- switch (error) {
- case -ENOENT:
- error = 0;
- break;
- case 0:
- error = -EEXIST;
- default:
- goto out_gunlock;
- };
-
- if (odip != ndip) {
- if (!ndip->i_inode.i_nlink) {
- error = -EINVAL;
- goto out_gunlock;
- }
- if (ndip->i_entries == (u32)-1) {
- error = -EFBIG;
- goto out_gunlock;
- }
- if (S_ISDIR(ip->i_inode.i_mode) &&
- ndip->i_inode.i_nlink == (u32)-1) {
- error = -EMLINK;
- goto out_gunlock;
- }
- }
- }
-
- /* Check out the dir to be renamed */
-
- if (dir_rename) {
- error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
- if (error)
- goto out_gunlock;
- }
-
- if (nip == NULL)
- alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
- error = alloc_required;
- if (error < 0)
- goto out_gunlock;
- error = 0;
-
- if (alloc_required) {
- struct gfs2_alloc *al = gfs2_alloc_get(ndip);
- if (!al) {
- error = -ENOMEM;
- goto out_gunlock;
- }
-
- error = gfs2_quota_lock_check(ndip);
- if (error)
- goto out_alloc;
-
- al->al_requested = sdp->sd_max_dirres;
-
- error = gfs2_inplace_reserve_ri(ndip);
- if (error)
- goto out_gunlock_q;
-
- error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
- gfs2_rg_blocks(al) +
- 4 * RES_DINODE + 4 * RES_LEAF +
- RES_STATFS + RES_QUOTA + 4, 0);
- if (error)
- goto out_ipreserv;
- } else {
- error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
- 5 * RES_LEAF + 4, 0);
- if (error)
- goto out_gunlock;
- }
-
- /* Remove the target file, if it exists */
-
- if (nip) {
- if (S_ISDIR(nip->i_inode.i_mode))
- error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
- else {
- error = gfs2_dir_del(ndip, &ndentry->d_name);
- if (error)
- goto out_end_trans;
- error = gfs2_change_nlink(nip, -1);
- }
- if (error)
- goto out_end_trans;
- }
-
- if (dir_rename) {
- error = gfs2_change_nlink(ndip, +1);
- if (error)
- goto out_end_trans;
- error = gfs2_change_nlink(odip, -1);
- if (error)
- goto out_end_trans;
-
- error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
- if (error)
- goto out_end_trans;
- } else {
- struct buffer_head *dibh;
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto out_end_trans;
- ip->i_inode.i_ctime = CURRENT_TIME;
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(ip, dibh->b_data);
- brelse(dibh);
- }
-
- error = gfs2_dir_del(odip, &odentry->d_name);
- if (error)
- goto out_end_trans;
-
- error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
- if (error)
- goto out_end_trans;
-
-out_end_trans:
- gfs2_trans_end(sdp);
-out_ipreserv:
- if (alloc_required)
- gfs2_inplace_release(ndip);
-out_gunlock_q:
- if (alloc_required)
- gfs2_quota_unlock(ndip);
-out_alloc:
- if (alloc_required)
- gfs2_alloc_put(ndip);
-out_gunlock:
- while (x--) {
- gfs2_glock_dq(ghs + x);
- gfs2_holder_uninit(ghs + x);
- }
-out_gunlock_r:
- if (r_gh.gh_gl)
- gfs2_glock_dq_uninit(&r_gh);
-out:
- gfs2_glock_dq_uninit(&ri_gh);
- return error;
-}
-
-/**
- * gfs2_follow_link - Follow a symbolic link
- * @dentry: The dentry of the link
- * @nd: Data that we pass to vfs_follow_link()
- *
- * This can handle symlinks of any size.
- *
- * Returns: 0 on success or error code
- */
-
-static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
- struct gfs2_holder i_gh;
- struct buffer_head *dibh;
- unsigned int x, size;
- char *buf;
- int error;
-
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
- error = gfs2_glock_nq(&i_gh);
- if (error) {
- gfs2_holder_uninit(&i_gh);
- nd_set_link(nd, ERR_PTR(error));
- return NULL;
- }
-
- size = (unsigned int)i_size_read(&ip->i_inode);
- if (size == 0) {
- gfs2_consist_inode(ip);
- buf = ERR_PTR(-EIO);
- goto out;
- }
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error) {
- buf = ERR_PTR(error);
- goto out;
- }
-
- x = size + 1;
- buf = kmalloc(x, GFP_NOFS);
- if (!buf)
- buf = ERR_PTR(-ENOMEM);
- else
- memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
- brelse(dibh);
-out:
- gfs2_glock_dq_uninit(&i_gh);
- nd_set_link(nd, buf);
- return NULL;
-}
-
-static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
- char *s = nd_get_link(nd);
- if (!IS_ERR(s))
- kfree(s);
-}
-
-/**
- * gfs2_permission -
- * @inode: The inode
- * @mask: The mask to be tested
- * @flags: Indicates whether this is an RCU path walk or not
- *
- * This may be called from the VFS directly, or from within GFS2 with the
- * inode locked, so we look to see if the glock is already locked and only
- * lock the glock if its not already been done.
- *
- * Returns: errno
- */
-
-int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
-{
- struct gfs2_inode *ip;
- struct gfs2_holder i_gh;
- int error;
- int unlock = 0;
-
-
- ip = GFS2_I(inode);
- if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
- if (error)
- return error;
- unlock = 1;
- }
-
- if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
- error = -EACCES;
- else
- error = generic_permission(inode, mask, flags, gfs2_check_acl);
- if (unlock)
- gfs2_glock_dq_uninit(&i_gh);
-
- return error;
-}
-
-static int setattr_chown(struct inode *inode, struct iattr *attr)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
- u32 ouid, ogid, nuid, ngid;
- int error;
-
- ouid = inode->i_uid;
- ogid = inode->i_gid;
- nuid = attr->ia_uid;
- ngid = attr->ia_gid;
-
- if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
- ouid = nuid = NO_QUOTA_CHANGE;
- if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
- ogid = ngid = NO_QUOTA_CHANGE;
-
- if (!gfs2_alloc_get(ip))
- return -ENOMEM;
-
- error = gfs2_quota_lock(ip, nuid, ngid);
- if (error)
- goto out_alloc;
-
- if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
- error = gfs2_quota_check(ip, nuid, ngid);
- if (error)
- goto out_gunlock_q;
- }
-
- error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
- if (error)
- goto out_gunlock_q;
-
- error = gfs2_setattr_simple(ip, attr);
- if (error)
- goto out_end_trans;
-
- if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
- u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
- gfs2_quota_change(ip, -blocks, ouid, ogid);
- gfs2_quota_change(ip, blocks, nuid, ngid);
- }
-
-out_end_trans:
- gfs2_trans_end(sdp);
-out_gunlock_q:
- gfs2_quota_unlock(ip);
-out_alloc:
- gfs2_alloc_put(ip);
- return error;
-}
-
-/**
- * gfs2_setattr - Change attributes on an inode
- * @dentry: The dentry which is changing
- * @attr: The structure describing the change
- *
- * The VFS layer wants to change one or more of an inodes attributes. Write
- * that change out to disk.
- *
- * Returns: errno
- */
-
-static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
-{
- struct inode *inode = dentry->d_inode;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder i_gh;
- int error;
-
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
- if (error)
- return error;
-
- error = -EPERM;
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- goto out;
-
- error = inode_change_ok(inode, attr);
- if (error)
- goto out;
-
- if (attr->ia_valid & ATTR_SIZE)
- error = gfs2_setattr_size(inode, attr->ia_size);
- else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
- error = setattr_chown(inode, attr);
- else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
- error = gfs2_acl_chmod(ip, attr);
- else
- error = gfs2_setattr_simple(ip, attr);
-
-out:
- gfs2_glock_dq_uninit(&i_gh);
- if (!error)
- mark_inode_dirty(inode);
- return error;
-}
-
-/**
- * gfs2_getattr - Read out an inode's attributes
- * @mnt: The vfsmount the inode is being accessed from
- * @dentry: The dentry to stat
- * @stat: The inode's stats
- *
- * This may be called from the VFS directly, or from within GFS2 with the
- * inode locked, so we look to see if the glock is already locked and only
- * lock the glock if its not already been done. Note that its the NFS
- * readdirplus operation which causes this to be called (from filldir)
- * with the glock already held.
- *
- * Returns: errno
- */
-
-static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
-{
- struct inode *inode = dentry->d_inode;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
- int error;
- int unlock = 0;
-
- if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
- if (error)
- return error;
- unlock = 1;
- }
-
- generic_fillattr(inode, stat);
- if (unlock)
- gfs2_glock_dq_uninit(&gh);
-
- return 0;
-}
-
-static int gfs2_setxattr(struct dentry *dentry, const char *name,
- const void *data, size_t size, int flags)
-{
- struct inode *inode = dentry->d_inode;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
- int ret;
-
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
- ret = gfs2_glock_nq(&gh);
- if (ret == 0) {
- ret = generic_setxattr(dentry, name, data, size, flags);
- gfs2_glock_dq(&gh);
- }
- gfs2_holder_uninit(&gh);
- return ret;
-}
-
-static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
- void *data, size_t size)
-{
- struct inode *inode = dentry->d_inode;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
- int ret;
-
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
- ret = gfs2_glock_nq(&gh);
- if (ret == 0) {
- ret = generic_getxattr(dentry, name, data, size);
- gfs2_glock_dq(&gh);
- }
- gfs2_holder_uninit(&gh);
- return ret;
-}
-
-static int gfs2_removexattr(struct dentry *dentry, const char *name)
-{
- struct inode *inode = dentry->d_inode;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
- int ret;
-
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
- ret = gfs2_glock_nq(&gh);
- if (ret == 0) {
- ret = generic_removexattr(dentry, name);
- gfs2_glock_dq(&gh);
- }
- gfs2_holder_uninit(&gh);
- return ret;
-}
-
-static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- u64 start, u64 len)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
- int ret;
-
- ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
- if (ret)
- return ret;
-
- mutex_lock(&inode->i_mutex);
-
- ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
- if (ret)
- goto out;
-
- if (gfs2_is_stuffed(ip)) {
- u64 phys = ip->i_no_addr << inode->i_blkbits;
- u64 size = i_size_read(inode);
- u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
- FIEMAP_EXTENT_DATA_INLINE;
- phys += sizeof(struct gfs2_dinode);
- phys += start;
- if (start + len > size)
- len = size - start;
- if (start < size)
- ret = fiemap_fill_next_extent(fieinfo, start, phys,
- len, flags);
- if (ret == 1)
- ret = 0;
- } else {
- ret = __generic_block_fiemap(inode, fieinfo, start, len,
- gfs2_block_map);
- }
-
- gfs2_glock_dq_uninit(&gh);
-out:
- mutex_unlock(&inode->i_mutex);
- return ret;
-}
-
-const struct inode_operations gfs2_file_iops = {
- .permission = gfs2_permission,
- .setattr = gfs2_setattr,
- .getattr = gfs2_getattr,
- .setxattr = gfs2_setxattr,
- .getxattr = gfs2_getxattr,
- .listxattr = gfs2_listxattr,
- .removexattr = gfs2_removexattr,
- .fiemap = gfs2_fiemap,
-};
-
-const struct inode_operations gfs2_dir_iops = {
- .create = gfs2_create,
- .lookup = gfs2_lookup,
- .link = gfs2_link,
- .unlink = gfs2_unlink,
- .symlink = gfs2_symlink,
- .mkdir = gfs2_mkdir,
- .rmdir = gfs2_rmdir,
- .mknod = gfs2_mknod,
- .rename = gfs2_rename,
- .permission = gfs2_permission,
- .setattr = gfs2_setattr,
- .getattr = gfs2_getattr,
- .setxattr = gfs2_setxattr,
- .getxattr = gfs2_getxattr,
- .listxattr = gfs2_listxattr,
- .removexattr = gfs2_removexattr,
- .fiemap = gfs2_fiemap,
-};
-
-const struct inode_operations gfs2_symlink_iops = {
- .readlink = generic_readlink,
- .follow_link = gfs2_follow_link,
- .put_link = gfs2_put_link,
- .permission = gfs2_permission,
- .setattr = gfs2_setattr,
- .getattr = gfs2_getattr,
- .setxattr = gfs2_setxattr,
- .getxattr = gfs2_getxattr,
- .listxattr = gfs2_listxattr,
- .removexattr = gfs2_removexattr,
- .fiemap = gfs2_fiemap,
-};
-
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e23d9864c418..42e8d23bc047 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -38,6 +38,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
@@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list);
static atomic_t qd_lru_count = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(qd_lru_lock);
-int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
{
struct gfs2_quota_data *qd;
struct gfs2_sbd *sdp;
+ int nr_to_scan = sc->nr_to_scan;
- if (nr == 0)
+ if (nr_to_scan == 0)
goto out;
- if (!(gfp_mask & __GFP_FS))
+ if (!(sc->gfp_mask & __GFP_FS))
return -1;
spin_lock(&qd_lru_lock);
- while (nr && !list_empty(&qd_lru_list)) {
+ while (nr_to_scan && !list_empty(&qd_lru_list)) {
qd = list_entry(qd_lru_list.next,
struct gfs2_quota_data, qd_reclaim);
sdp = qd->qd_gl->gl_sbd;
@@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
spin_unlock(&qd_lru_lock);
kmem_cache_free(gfs2_quotad_cachep, qd);
spin_lock(&qd_lru_lock);
- nr--;
+ nr_to_scan--;
}
spin_unlock(&qd_lru_lock);
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e7d236ca48bd..90bf1c302a98 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -12,6 +12,7 @@
struct gfs2_inode;
struct gfs2_sbd;
+struct shrink_control;
#define NO_QUOTA_CHANGE ((u32)-1)
@@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
}
-extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink,
+ struct shrink_control *sc);
extern const struct quotactl_ops gfs2_quotactl_ops;
#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6fcae8469f6d..9b780df3fd54 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -78,10 +78,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
unsigned char *buf2, unsigned int offset,
- unsigned int buflen, u32 block,
+ struct gfs2_bitmap *bi, u32 block,
unsigned char new_state)
{
unsigned char *byte1, *byte2, *end, cur_state;
+ unsigned int buflen = bi->bi_len;
const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
byte1 = buf1 + offset + (block / GFS2_NBBY);
@@ -92,6 +93,16 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
if (unlikely(!valid_change[new_state * 4 + cur_state])) {
+ printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
+ "new_state=%d\n",
+ (unsigned long long)block, cur_state, new_state);
+ printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
+ (unsigned long long)rgd->rd_addr,
+ (unsigned long)bi->bi_start);
+ printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
+ (unsigned long)bi->bi_offset,
+ (unsigned long)bi->bi_len);
+ dump_stack();
gfs2_consist_rgrpd(rgd);
return;
}
@@ -381,6 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp)
if (gl) {
gl->gl_object = NULL;
+ gfs2_glock_add_to_lru(gl);
gfs2_glock_put(gl);
}
@@ -1365,7 +1377,7 @@ skip:
gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
- bi->bi_len, blk, new_state);
+ bi, blk, new_state);
goal = blk;
while (*n < elen) {
goal++;
@@ -1375,7 +1387,7 @@ skip:
GFS2_BLKST_FREE)
break;
gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
- bi->bi_len, goal, new_state);
+ bi, goal, new_state);
(*n)++;
}
out:
@@ -1432,7 +1444,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
}
gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
- bi->bi_len, buf_blk, new_state);
+ bi, buf_blk, new_state);
}
return rgd;
@@ -1617,6 +1629,10 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_trans_add_rg(rgd);
+
+ /* Directories keep their data in the metadata address space */
+ if (ip->i_depth)
+ gfs2_meta_wipe(ip, bstart, blen);
}
/**
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b9f28e66dad1..ed540e7018be 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -23,6 +23,7 @@
#include <linux/time.h>
#include <linux/wait.h>
#include <linux/writeback.h>
+#include <linux/backing-dev.h>
#include "gfs2.h"
#include "incore.h"
@@ -700,11 +701,47 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
mutex_unlock(&sdp->sd_freeze_lock);
}
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+{
+ struct gfs2_dinode *str = buf;
+
+ str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+ str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+ str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+ str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
+ str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+ str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+ str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+ str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+ str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
+ str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
+ str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
+ str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+ str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+ str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+
+ str->di_goal_meta = cpu_to_be64(ip->i_goal);
+ str->di_goal_data = cpu_to_be64(ip->i_goal);
+ str->di_generation = cpu_to_be64(ip->i_generation);
+
+ str->di_flags = cpu_to_be32(ip->i_diskflags);
+ str->di_height = cpu_to_be16(ip->i_height);
+ str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+ !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
+ GFS2_FORMAT_DE : 0);
+ str->di_depth = cpu_to_be16(ip->i_depth);
+ str->di_entries = cpu_to_be32(ip->i_entries);
+
+ str->di_eattr = cpu_to_be64(ip->i_eattr);
+ str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+ str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
+ str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+}
/**
* gfs2_write_inode - Make sure the inode is stable on the disk
* @inode: The inode
- * @sync: synchronous write flag
+ * @wbc: The writeback control structure
*
* Returns: errno
*/
@@ -713,15 +750,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
+ struct backing_dev_info *bdi = metamapping->backing_dev_info;
struct gfs2_holder gh;
struct buffer_head *bh;
struct timespec atime;
struct gfs2_dinode *di;
- int ret = 0;
+ int ret = -EAGAIN;
- /* Check this is a "normal" inode, etc */
+ /* Skip timestamp update, if this is from a memalloc */
if (current->flags & PF_MEMALLOC)
- return 0;
+ goto do_flush;
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
if (ret)
goto do_flush;
@@ -745,6 +784,13 @@ do_unlock:
do_flush:
if (wbc->sync_mode == WB_SYNC_ALL)
gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+ filemap_fdatawrite(metamapping);
+ if (bdi->dirty_exceeded)
+ gfs2_ail1_flush(sdp, wbc);
+ if (!ret && (wbc->sync_mode == WB_SYNC_ALL))
+ ret = filemap_fdatawait(metamapping);
+ if (ret)
+ mark_inode_dirty_sync(inode);
return ret;
}
@@ -874,8 +920,9 @@ restart:
static int gfs2_sync_fs(struct super_block *sb, int wait)
{
- if (wait && sb->s_fs_info)
- gfs2_log_flush(sb->s_fs_info, NULL);
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+ if (wait && sdp)
+ gfs2_log_flush(sdp, NULL);
return 0;
}
@@ -1308,6 +1355,78 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
return 0;
}
+static void gfs2_final_release_pages(struct gfs2_inode *ip)
+{
+ struct inode *inode = &ip->i_inode;
+ struct gfs2_glock *gl = ip->i_gl;
+
+ truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
+ truncate_inode_pages(&inode->i_data, 0);
+
+ if (atomic_read(&gl->gl_revokes) == 0) {
+ clear_bit(GLF_LFLUSH, &gl->gl_flags);
+ clear_bit(GLF_DIRTY, &gl->gl_flags);
+ }
+}
+
+static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct gfs2_alloc *al;
+ struct gfs2_rgrpd *rgd;
+ int error;
+
+ if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
+
+ al = gfs2_alloc_get(ip);
+ if (!al)
+ return -ENOMEM;
+
+ error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+ if (error)
+ goto out;
+
+ error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+ if (error)
+ goto out_qs;
+
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
+ if (!rgd) {
+ gfs2_consist_inode(ip);
+ error = -EIO;
+ goto out_rindex_relse;
+ }
+
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
+ &al->al_rgd_gh);
+ if (error)
+ goto out_rindex_relse;
+
+ error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
+ sdp->sd_jdesc->jd_blocks);
+ if (error)
+ goto out_rg_gunlock;
+
+ gfs2_free_di(rgd, ip);
+
+ gfs2_final_release_pages(ip);
+
+ gfs2_trans_end(sdp);
+
+out_rg_gunlock:
+ gfs2_glock_dq_uninit(&al->al_rgd_gh);
+out_rindex_relse:
+ gfs2_glock_dq_uninit(&al->al_ri_gh);
+out_qs:
+ gfs2_quota_unhold(ip);
+out:
+ gfs2_alloc_put(ip);
+ return error;
+}
+
/*
* We have to (at the moment) hold the inodes main lock to cover
* the gap between unlocking the shared lock on the iopen lock and
@@ -1371,15 +1490,13 @@ static void gfs2_evict_inode(struct inode *inode)
}
error = gfs2_dinode_dealloc(ip);
- if (error)
- goto out_unlock;
+ goto out_unlock;
out_truncate:
error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
if (error)
goto out_unlock;
- /* Needs to be done before glock release & also in a transaction */
- truncate_inode_pages(&inode->i_data, 0);
+ gfs2_final_release_pages(ip);
gfs2_trans_end(sdp);
out_unlock:
@@ -1394,6 +1511,7 @@ out:
end_writeback(inode);
ip->i_gl->gl_object = NULL;
+ gfs2_glock_add_to_lru(ip->i_gl);
gfs2_glock_put(ip->i_gl);
ip->i_gl = NULL;
if (ip->i_iopen_gh.gh_gl) {
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 748ccb557c18..e20eab37bc80 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -81,7 +81,8 @@ static int gfs2_uuid_valid(const u8 *uuid)
static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
{
- const u8 *uuid = sdp->sd_sb.sb_uuid;
+ struct super_block *s = sdp->sd_vfs;
+ const u8 *uuid = s->s_uuid;
buf[0] = '\0';
if (!gfs2_uuid_valid(uuid))
return 0;
@@ -616,7 +617,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
struct kobj_uevent_env *env)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
- const u8 *uuid = sdp->sd_sb.sb_uuid;
+ struct super_block *s = sdp->sd_vfs;
+ const u8 *uuid = s->s_uuid;
add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index cedb0bb96d96..5d07609ec57d 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -10,6 +10,7 @@
#include <linux/buffer_head.h>
#include <linux/dlmconstants.h>
#include <linux/gfs2_ondisk.h>
+#include <linux/writeback.h>
#include "incore.h"
#include "glock.h"
@@ -40,7 +41,9 @@
{(1UL << GLF_REPLY_PENDING), "r" }, \
{(1UL << GLF_INITIAL), "I" }, \
{(1UL << GLF_FROZEN), "F" }, \
- {(1UL << GLF_QUEUED), "q" })
+ {(1UL << GLF_QUEUED), "q" }, \
+ {(1UL << GLF_LRU), "L" }, \
+ {(1UL << GLF_OBJECT), "o" })
#ifndef NUMPTY
#define NUMPTY
@@ -94,7 +97,7 @@ TRACE_EVENT(gfs2_glock_state_change,
__entry->new_state = glock_trace_state(new_state);
__entry->tgt_state = glock_trace_state(gl->gl_target);
__entry->dmt_state = glock_trace_state(gl->gl_demote_state);
- __entry->flags = gl->gl_flags;
+ __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
),
TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
@@ -127,7 +130,7 @@ TRACE_EVENT(gfs2_glock_put,
__entry->gltype = gl->gl_name.ln_type;
__entry->glnum = gl->gl_name.ln_number;
__entry->cur_state = glock_trace_state(gl->gl_state);
- __entry->flags = gl->gl_flags;
+ __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
),
TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
@@ -161,7 +164,7 @@ TRACE_EVENT(gfs2_demote_rq,
__entry->glnum = gl->gl_name.ln_number;
__entry->cur_state = glock_trace_state(gl->gl_state);
__entry->dmt_state = glock_trace_state(gl->gl_demote_state);
- __entry->flags = gl->gl_flags;
+ __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
),
TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
@@ -318,6 +321,33 @@ TRACE_EVENT(gfs2_log_blocks,
MINOR(__entry->dev), __entry->blocks)
);
+/* Writing back the AIL */
+TRACE_EVENT(gfs2_ail_flush,
+
+ TP_PROTO(const struct gfs2_sbd *sdp, const struct writeback_control *wbc, int start),
+
+ TP_ARGS(sdp, wbc, start),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, start )
+ __field( int, sync_mode )
+ __field( long, nr_to_write )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sdp->sd_vfs->s_dev;
+ __entry->start = start;
+ __entry->sync_mode = wbc->sync_mode;
+ __entry->nr_to_write = wbc->nr_to_write;
+ ),
+
+ TP_printk("%u,%u ail flush %s %s %ld", MAJOR(__entry->dev),
+ MINOR(__entry->dev), __entry->start ? "start" : "end",
+ __entry->sync_mode == WB_SYNC_ALL ? "all" : "none",
+ __entry->nr_to_write)
+);
+
/* Section 3 - bmap
*
* Objectives:
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 0c39dc3ef7d7..56bd15c5bf6c 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -1,7 +1,6 @@
config HPFS_FS
tristate "OS/2 HPFS file system support"
depends on BLOCK
- depends on BROKEN || !PREEMPT
help
OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
is the file system used for organizing files on OS/2 hard disk
diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c
index 5503e2c28910..7a5eb2c718c8 100644
--- a/fs/hpfs/alloc.c
+++ b/fs/hpfs/alloc.c
@@ -8,8 +8,6 @@
#include "hpfs_fn.h"
-static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec);
-
/*
* Check if a sector is allocated in bitmap
* This is really slow. Turned on only if chk==2
@@ -18,9 +16,9 @@ static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec);
static int chk_if_allocated(struct super_block *s, secno sec, char *msg)
{
struct quad_buffer_head qbh;
- unsigned *bmp;
+ u32 *bmp;
if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail;
- if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f)) & 1) {
+ if ((cpu_to_le32(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) {
hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec);
goto fail1;
}
@@ -28,7 +26,7 @@ static int chk_if_allocated(struct super_block *s, secno sec, char *msg)
if (sec >= hpfs_sb(s)->sb_dirband_start && sec < hpfs_sb(s)->sb_dirband_start + hpfs_sb(s)->sb_dirband_size) {
unsigned ssec = (sec - hpfs_sb(s)->sb_dirband_start) / 4;
if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) goto fail;
- if ((bmp[ssec >> 5] >> (ssec & 0x1f)) & 1) {
+ if ((le32_to_cpu(bmp[ssec >> 5]) >> (ssec & 0x1f)) & 1) {
hpfs_error(s, "sector '%s' - %08x not allocated in directory bitmap", msg, sec);
goto fail1;
}
@@ -75,7 +73,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
hpfs_error(s, "Bad allocation size: %d", n);
return 0;
}
- lock_super(s);
if (bs != ~0x3fff) {
if (!(bmp = hpfs_map_bitmap(s, near >> 14, &qbh, "aib"))) goto uls;
} else {
@@ -85,10 +82,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
ret = bs + nr;
goto rt;
}
- /*if (!tstbits(bmp, nr + n, n + forward)) {
- ret = bs + nr + n;
- goto rt;
- }*/
q = nr + n; b = 0;
while ((a = tstbits(bmp, q, n + forward)) != 0) {
q += a;
@@ -105,14 +98,14 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
goto rt;
}
nr >>= 5;
- /*for (i = nr + 1; i != nr; i++, i &= 0x1ff) {*/
+ /*for (i = nr + 1; i != nr; i++, i &= 0x1ff) */
i = nr;
do {
- if (!bmp[i]) goto cont;
- if (n + forward >= 0x3f && bmp[i] != -1) goto cont;
+ if (!le32_to_cpu(bmp[i])) goto cont;
+ if (n + forward >= 0x3f && le32_to_cpu(bmp[i]) != 0xffffffff) goto cont;
q = i<<5;
if (i > 0) {
- unsigned k = bmp[i-1];
+ unsigned k = le32_to_cpu(bmp[i-1]);
while (k & 0x80000000) {
q--; k <<= 1;
}
@@ -132,18 +125,17 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
} while (i != nr);
rt:
if (ret) {
- if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (bmp[(ret & 0x3fff) >> 5] | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) {
+ if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (le32_to_cpu(bmp[(ret & 0x3fff) >> 5]) | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) {
hpfs_error(s, "Allocation doesn't work! Wanted %d, allocated at %08x", n, ret);
ret = 0;
goto b;
}
- bmp[(ret & 0x3fff) >> 5] &= ~(((1 << n) - 1) << (ret & 0x1f));
+ bmp[(ret & 0x3fff) >> 5] &= cpu_to_le32(~(((1 << n) - 1) << (ret & 0x1f)));
hpfs_mark_4buffers_dirty(&qbh);
}
b:
hpfs_brelse4(&qbh);
uls:
- unlock_super(s);
return ret;
}
@@ -155,7 +147,7 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
* sectors
*/
-secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward, int lock)
+secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward)
{
secno sec;
int i;
@@ -167,7 +159,6 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa
forward = -forward;
f_p = 1;
}
- if (lock) hpfs_lock_creation(s);
n_bmps = (sbi->sb_fs_size + 0x4000 - 1) >> 14;
if (near && near < sbi->sb_fs_size) {
if ((sec = alloc_in_bmp(s, near, n, f_p ? forward : forward/4))) goto ret;
@@ -214,18 +205,17 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa
ret:
if (sec && f_p) {
for (i = 0; i < forward; i++) {
- if (!hpfs_alloc_if_possible_nolock(s, sec + i + 1)) {
+ if (!hpfs_alloc_if_possible(s, sec + i + 1)) {
hpfs_error(s, "Prealloc doesn't work! Wanted %d, allocated at %08x, can't allocate %d", forward, sec, i);
sec = 0;
break;
}
}
}
- if (lock) hpfs_unlock_creation(s);
return sec;
}
-static secno alloc_in_dirband(struct super_block *s, secno near, int lock)
+static secno alloc_in_dirband(struct super_block *s, secno near)
{
unsigned nr = near;
secno sec;
@@ -236,49 +226,35 @@ static secno alloc_in_dirband(struct super_block *s, secno near, int lock)
nr = sbi->sb_dirband_start + sbi->sb_dirband_size - 4;
nr -= sbi->sb_dirband_start;
nr >>= 2;
- if (lock) hpfs_lock_creation(s);
sec = alloc_in_bmp(s, (~0x3fff) | nr, 1, 0);
- if (lock) hpfs_unlock_creation(s);
if (!sec) return 0;
return ((sec & 0x3fff) << 2) + sbi->sb_dirband_start;
}
/* Alloc sector if it's free */
-static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec)
+int hpfs_alloc_if_possible(struct super_block *s, secno sec)
{
struct quad_buffer_head qbh;
- unsigned *bmp;
- lock_super(s);
+ u32 *bmp;
if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end;
- if (bmp[(sec & 0x3fff) >> 5] & (1 << (sec & 0x1f))) {
- bmp[(sec & 0x3fff) >> 5] &= ~(1 << (sec & 0x1f));
+ if (le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) & (1 << (sec & 0x1f))) {
+ bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f)));
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
- unlock_super(s);
return 1;
}
hpfs_brelse4(&qbh);
end:
- unlock_super(s);
return 0;
}
-int hpfs_alloc_if_possible(struct super_block *s, secno sec)
-{
- int r;
- hpfs_lock_creation(s);
- r = hpfs_alloc_if_possible_nolock(s, sec);
- hpfs_unlock_creation(s);
- return r;
-}
-
/* Free sectors in bitmaps */
void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n)
{
struct quad_buffer_head qbh;
- unsigned *bmp;
+ u32 *bmp;
struct hpfs_sb_info *sbi = hpfs_sb(s);
/*printk("2 - ");*/
if (!n) return;
@@ -286,26 +262,22 @@ void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n)
hpfs_error(s, "Trying to free reserved sector %08x", sec);
return;
}
- lock_super(s);
sbi->sb_max_fwd_alloc += n > 0xffff ? 0xffff : n;
if (sbi->sb_max_fwd_alloc > 0xffffff) sbi->sb_max_fwd_alloc = 0xffffff;
new_map:
if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "free"))) {
- unlock_super(s);
return;
}
new_tst:
- if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f) & 1)) {
+ if ((le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f) & 1)) {
hpfs_error(s, "sector %08x not allocated", sec);
hpfs_brelse4(&qbh);
- unlock_super(s);
return;
}
- bmp[(sec & 0x3fff) >> 5] |= 1 << (sec & 0x1f);
+ bmp[(sec & 0x3fff) >> 5] |= cpu_to_le32(1 << (sec & 0x1f));
if (!--n) {
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
- unlock_super(s);
return;
}
if (!(++sec & 0x3fff)) {
@@ -327,13 +299,13 @@ int hpfs_check_free_dnodes(struct super_block *s, int n)
int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14;
int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff;
int i, j;
- unsigned *bmp;
+ u32 *bmp;
struct quad_buffer_head qbh;
if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
for (j = 0; j < 512; j++) {
unsigned k;
- if (!bmp[j]) continue;
- for (k = bmp[j]; k; k >>= 1) if (k & 1) if (!--n) {
+ if (!le32_to_cpu(bmp[j])) continue;
+ for (k = le32_to_cpu(bmp[j]); k; k >>= 1) if (k & 1) if (!--n) {
hpfs_brelse4(&qbh);
return 0;
}
@@ -352,10 +324,10 @@ int hpfs_check_free_dnodes(struct super_block *s, int n)
chk_bmp:
if (bmp) {
for (j = 0; j < 512; j++) {
- unsigned k;
- if (!bmp[j]) continue;
+ u32 k;
+ if (!le32_to_cpu(bmp[j])) continue;
for (k = 0xf; k; k <<= 4)
- if ((bmp[j] & k) == k) {
+ if ((le32_to_cpu(bmp[j]) & k) == k) {
if (!--n) {
hpfs_brelse4(&qbh);
return 0;
@@ -379,44 +351,40 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno)
hpfs_free_sectors(s, dno, 4);
} else {
struct quad_buffer_head qbh;
- unsigned *bmp;
+ u32 *bmp;
unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4;
- lock_super(s);
if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
- unlock_super(s);
return;
}
- bmp[ssec >> 5] |= 1 << (ssec & 0x1f);
+ bmp[ssec >> 5] |= cpu_to_le32(1 << (ssec & 0x1f));
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
- unlock_super(s);
}
}
struct dnode *hpfs_alloc_dnode(struct super_block *s, secno near,
- dnode_secno *dno, struct quad_buffer_head *qbh,
- int lock)
+ dnode_secno *dno, struct quad_buffer_head *qbh)
{
struct dnode *d;
if (hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_dmap) > FREE_DNODES_ADD) {
- if (!(*dno = alloc_in_dirband(s, near, lock)))
- if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock))) return NULL;
+ if (!(*dno = alloc_in_dirband(s, near)))
+ if (!(*dno = hpfs_alloc_sector(s, near, 4, 0))) return NULL;
} else {
- if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock)))
- if (!(*dno = alloc_in_dirband(s, near, lock))) return NULL;
+ if (!(*dno = hpfs_alloc_sector(s, near, 4, 0)))
+ if (!(*dno = alloc_in_dirband(s, near))) return NULL;
}
if (!(d = hpfs_get_4sectors(s, *dno, qbh))) {
hpfs_free_dnode(s, *dno);
return NULL;
}
memset(d, 0, 2048);
- d->magic = DNODE_MAGIC;
- d->first_free = 52;
+ d->magic = cpu_to_le32(DNODE_MAGIC);
+ d->first_free = cpu_to_le32(52);
d->dirent[0] = 32;
d->dirent[2] = 8;
d->dirent[30] = 1;
d->dirent[31] = 255;
- d->self = *dno;
+ d->self = cpu_to_le32(*dno);
return d;
}
@@ -424,16 +392,16 @@ struct fnode *hpfs_alloc_fnode(struct super_block *s, secno near, fnode_secno *f
struct buffer_head **bh)
{
struct fnode *f;
- if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD, 1))) return NULL;
+ if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD))) return NULL;
if (!(f = hpfs_get_sector(s, *fno, bh))) {
hpfs_free_sectors(s, *fno, 1);
return NULL;
}
memset(f, 0, 512);
- f->magic = FNODE_MAGIC;
- f->ea_offs = 0xc4;
+ f->magic = cpu_to_le32(FNODE_MAGIC);
+ f->ea_offs = cpu_to_le16(0xc4);
f->btree.n_free_nodes = 8;
- f->btree.first_free = 8;
+ f->btree.first_free = cpu_to_le16(8);
return f;
}
@@ -441,16 +409,16 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a
struct buffer_head **bh)
{
struct anode *a;
- if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD, 1))) return NULL;
+ if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD))) return NULL;
if (!(a = hpfs_get_sector(s, *ano, bh))) {
hpfs_free_sectors(s, *ano, 1);
return NULL;
}
memset(a, 0, 512);
- a->magic = ANODE_MAGIC;
- a->self = *ano;
+ a->magic = cpu_to_le32(ANODE_MAGIC);
+ a->self = cpu_to_le32(*ano);
a->btree.n_free_nodes = 40;
a->btree.n_used_nodes = 0;
- a->btree.first_free = 8;
+ a->btree.first_free = cpu_to_le16(8);
return a;
}
diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c
index 6a2f04bf3df0..08b503e8ed29 100644
--- a/fs/hpfs/anode.c
+++ b/fs/hpfs/anode.c
@@ -22,8 +22,8 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode,
if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1;
if (btree->internal) {
for (i = 0; i < btree->n_used_nodes; i++)
- if (btree->u.internal[i].file_secno > sec) {
- a = btree->u.internal[i].down;
+ if (le32_to_cpu(btree->u.internal[i].file_secno) > sec) {
+ a = le32_to_cpu(btree->u.internal[i].down);
brelse(bh);
if (!(anode = hpfs_map_anode(s, a, &bh))) return -1;
btree = &anode->btree;
@@ -34,18 +34,18 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode,
return -1;
}
for (i = 0; i < btree->n_used_nodes; i++)
- if (btree->u.external[i].file_secno <= sec &&
- btree->u.external[i].file_secno + btree->u.external[i].length > sec) {
- a = btree->u.external[i].disk_secno + sec - btree->u.external[i].file_secno;
+ if (le32_to_cpu(btree->u.external[i].file_secno) <= sec &&
+ le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > sec) {
+ a = le32_to_cpu(btree->u.external[i].disk_secno) + sec - le32_to_cpu(btree->u.external[i].file_secno);
if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, a, 1, "data")) {
brelse(bh);
return -1;
}
if (inode) {
struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
- hpfs_inode->i_file_sec = btree->u.external[i].file_secno;
- hpfs_inode->i_disk_sec = btree->u.external[i].disk_secno;
- hpfs_inode->i_n_secs = btree->u.external[i].length;
+ hpfs_inode->i_file_sec = le32_to_cpu(btree->u.external[i].file_secno);
+ hpfs_inode->i_disk_sec = le32_to_cpu(btree->u.external[i].disk_secno);
+ hpfs_inode->i_n_secs = le32_to_cpu(btree->u.external[i].length);
}
brelse(bh);
return a;
@@ -83,8 +83,8 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
return -1;
}
if (btree->internal) {
- a = btree->u.internal[n].down;
- btree->u.internal[n].file_secno = -1;
+ a = le32_to_cpu(btree->u.internal[n].down);
+ btree->u.internal[n].file_secno = cpu_to_le32(-1);
mark_buffer_dirty(bh);
brelse(bh);
if (hpfs_sb(s)->sb_chk)
@@ -94,15 +94,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
goto go_down;
}
if (n >= 0) {
- if (btree->u.external[n].file_secno + btree->u.external[n].length != fsecno) {
+ if (le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length) != fsecno) {
hpfs_error(s, "allocated size %08x, trying to add sector %08x, %cnode %08x",
- btree->u.external[n].file_secno + btree->u.external[n].length, fsecno,
+ le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length), fsecno,
fnod?'f':'a', node);
brelse(bh);
return -1;
}
- if (hpfs_alloc_if_possible(s, se = btree->u.external[n].disk_secno + btree->u.external[n].length)) {
- btree->u.external[n].length++;
+ if (hpfs_alloc_if_possible(s, se = le32_to_cpu(btree->u.external[n].disk_secno) + le32_to_cpu(btree->u.external[n].length))) {
+ btree->u.external[n].length = cpu_to_le32(le32_to_cpu(btree->u.external[n].length) + 1);
mark_buffer_dirty(bh);
brelse(bh);
return se;
@@ -115,20 +115,20 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
}
se = !fnod ? node : (node + 16384) & ~16383;
}
- if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M, 1))) {
+ if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M))) {
brelse(bh);
return -1;
}
- fs = n < 0 ? 0 : btree->u.external[n].file_secno + btree->u.external[n].length;
+ fs = n < 0 ? 0 : le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length);
if (!btree->n_free_nodes) {
- up = a != node ? anode->up : -1;
+ up = a != node ? le32_to_cpu(anode->up) : -1;
if (!(anode = hpfs_alloc_anode(s, a, &na, &bh1))) {
brelse(bh);
hpfs_free_sectors(s, se, 1);
return -1;
}
if (a == node && fnod) {
- anode->up = node;
+ anode->up = cpu_to_le32(node);
anode->btree.fnode_parent = 1;
anode->btree.n_used_nodes = btree->n_used_nodes;
anode->btree.first_free = btree->first_free;
@@ -137,9 +137,9 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
btree->internal = 1;
btree->n_free_nodes = 11;
btree->n_used_nodes = 1;
- btree->first_free = (char *)&(btree->u.internal[1]) - (char *)btree;
- btree->u.internal[0].file_secno = -1;
- btree->u.internal[0].down = na;
+ btree->first_free = cpu_to_le16((char *)&(btree->u.internal[1]) - (char *)btree);
+ btree->u.internal[0].file_secno = cpu_to_le32(-1);
+ btree->u.internal[0].down = cpu_to_le32(na);
mark_buffer_dirty(bh);
} else if (!(ranode = hpfs_alloc_anode(s, /*a*/0, &ra, &bh2))) {
brelse(bh);
@@ -153,15 +153,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
btree = &anode->btree;
}
btree->n_free_nodes--; n = btree->n_used_nodes++;
- btree->first_free += 12;
- btree->u.external[n].disk_secno = se;
- btree->u.external[n].file_secno = fs;
- btree->u.external[n].length = 1;
+ btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 12);
+ btree->u.external[n].disk_secno = cpu_to_le32(se);
+ btree->u.external[n].file_secno = cpu_to_le32(fs);
+ btree->u.external[n].length = cpu_to_le32(1);
mark_buffer_dirty(bh);
brelse(bh);
if ((a == node && fnod) || na == -1) return se;
c2 = 0;
- while (up != -1) {
+ while (up != (anode_secno)-1) {
struct anode *new_anode;
if (hpfs_sb(s)->sb_chk)
if (hpfs_stop_cycles(s, up, &c1, &c2, "hpfs_add_sector_to_btree #2")) return -1;
@@ -174,47 +174,47 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
}
if (btree->n_free_nodes) {
btree->n_free_nodes--; n = btree->n_used_nodes++;
- btree->first_free += 8;
- btree->u.internal[n].file_secno = -1;
- btree->u.internal[n].down = na;
- btree->u.internal[n-1].file_secno = fs;
+ btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 8);
+ btree->u.internal[n].file_secno = cpu_to_le32(-1);
+ btree->u.internal[n].down = cpu_to_le32(na);
+ btree->u.internal[n-1].file_secno = cpu_to_le32(fs);
mark_buffer_dirty(bh);
brelse(bh);
brelse(bh2);
hpfs_free_sectors(s, ra, 1);
if ((anode = hpfs_map_anode(s, na, &bh))) {
- anode->up = up;
+ anode->up = cpu_to_le32(up);
anode->btree.fnode_parent = up == node && fnod;
mark_buffer_dirty(bh);
brelse(bh);
}
return se;
}
- up = up != node ? anode->up : -1;
- btree->u.internal[btree->n_used_nodes - 1].file_secno = /*fs*/-1;
+ up = up != node ? le32_to_cpu(anode->up) : -1;
+ btree->u.internal[btree->n_used_nodes - 1].file_secno = cpu_to_le32(/*fs*/-1);
mark_buffer_dirty(bh);
brelse(bh);
a = na;
if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) {
anode = new_anode;
- /*anode->up = up != -1 ? up : ra;*/
+ /*anode->up = cpu_to_le32(up != -1 ? up : ra);*/
anode->btree.internal = 1;
anode->btree.n_used_nodes = 1;
anode->btree.n_free_nodes = 59;
- anode->btree.first_free = 16;
- anode->btree.u.internal[0].down = a;
- anode->btree.u.internal[0].file_secno = -1;
+ anode->btree.first_free = cpu_to_le16(16);
+ anode->btree.u.internal[0].down = cpu_to_le32(a);
+ anode->btree.u.internal[0].file_secno = cpu_to_le32(-1);
mark_buffer_dirty(bh);
brelse(bh);
if ((anode = hpfs_map_anode(s, a, &bh))) {
- anode->up = na;
+ anode->up = cpu_to_le32(na);
mark_buffer_dirty(bh);
brelse(bh);
}
} else na = a;
}
if ((anode = hpfs_map_anode(s, na, &bh))) {
- anode->up = node;
+ anode->up = cpu_to_le32(node);
if (fnod) anode->btree.fnode_parent = 1;
mark_buffer_dirty(bh);
brelse(bh);
@@ -232,14 +232,14 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
}
btree = &fnode->btree;
}
- ranode->up = node;
- memcpy(&ranode->btree, btree, btree->first_free);
+ ranode->up = cpu_to_le32(node);
+ memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free));
if (fnod) ranode->btree.fnode_parent = 1;
ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes;
if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) {
struct anode *unode;
- if ((unode = hpfs_map_anode(s, ranode->u.internal[n].down, &bh1))) {
- unode->up = ra;
+ if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) {
+ unode->up = cpu_to_le32(ra);
unode->btree.fnode_parent = 0;
mark_buffer_dirty(bh1);
brelse(bh1);
@@ -248,11 +248,11 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
btree->internal = 1;
btree->n_free_nodes = fnod ? 10 : 58;
btree->n_used_nodes = 2;
- btree->first_free = (char *)&btree->u.internal[2] - (char *)btree;
- btree->u.internal[0].file_secno = fs;
- btree->u.internal[0].down = ra;
- btree->u.internal[1].file_secno = -1;
- btree->u.internal[1].down = na;
+ btree->first_free = cpu_to_le16((char *)&btree->u.internal[2] - (char *)btree);
+ btree->u.internal[0].file_secno = cpu_to_le32(fs);
+ btree->u.internal[0].down = cpu_to_le32(ra);
+ btree->u.internal[1].file_secno = cpu_to_le32(-1);
+ btree->u.internal[1].down = cpu_to_le32(na);
mark_buffer_dirty(bh);
brelse(bh);
mark_buffer_dirty(bh2);
@@ -279,7 +279,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
go_down:
d2 = 0;
while (btree1->internal) {
- ano = btree1->u.internal[pos].down;
+ ano = le32_to_cpu(btree1->u.internal[pos].down);
if (level) brelse(bh);
if (hpfs_sb(s)->sb_chk)
if (hpfs_stop_cycles(s, ano, &d1, &d2, "hpfs_remove_btree #1"))
@@ -290,7 +290,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
pos = 0;
}
for (i = 0; i < btree1->n_used_nodes; i++)
- hpfs_free_sectors(s, btree1->u.external[i].disk_secno, btree1->u.external[i].length);
+ hpfs_free_sectors(s, le32_to_cpu(btree1->u.external[i].disk_secno), le32_to_cpu(btree1->u.external[i].length));
go_up:
if (!level) return;
brelse(bh);
@@ -298,13 +298,13 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
if (hpfs_stop_cycles(s, ano, &c1, &c2, "hpfs_remove_btree #2")) return;
hpfs_free_sectors(s, ano, 1);
oano = ano;
- ano = anode->up;
+ ano = le32_to_cpu(anode->up);
if (--level) {
if (!(anode = hpfs_map_anode(s, ano, &bh))) return;
btree1 = &anode->btree;
} else btree1 = btree;
for (i = 0; i < btree1->n_used_nodes; i++) {
- if (btree1->u.internal[i].down == oano) {
+ if (le32_to_cpu(btree1->u.internal[i].down) == oano) {
if ((pos = i + 1) < btree1->n_used_nodes)
goto go_down;
else
@@ -411,7 +411,7 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
if (fno) {
btree->n_free_nodes = 8;
btree->n_used_nodes = 0;
- btree->first_free = 8;
+ btree->first_free = cpu_to_le16(8);
btree->internal = 0;
mark_buffer_dirty(bh);
} else hpfs_free_sectors(s, f, 1);
@@ -421,22 +421,22 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
while (btree->internal) {
nodes = btree->n_used_nodes + btree->n_free_nodes;
for (i = 0; i < btree->n_used_nodes; i++)
- if (btree->u.internal[i].file_secno >= secs) goto f;
+ if (le32_to_cpu(btree->u.internal[i].file_secno) >= secs) goto f;
brelse(bh);
hpfs_error(s, "internal btree %08x doesn't end with -1", node);
return;
f:
for (j = i + 1; j < btree->n_used_nodes; j++)
- hpfs_ea_remove(s, btree->u.internal[j].down, 1, 0);
+ hpfs_ea_remove(s, le32_to_cpu(btree->u.internal[j].down), 1, 0);
btree->n_used_nodes = i + 1;
btree->n_free_nodes = nodes - btree->n_used_nodes;
- btree->first_free = 8 + 8 * btree->n_used_nodes;
+ btree->first_free = cpu_to_le16(8 + 8 * btree->n_used_nodes);
mark_buffer_dirty(bh);
- if (btree->u.internal[i].file_secno == secs) {
+ if (btree->u.internal[i].file_secno == cpu_to_le32(secs)) {
brelse(bh);
return;
}
- node = btree->u.internal[i].down;
+ node = le32_to_cpu(btree->u.internal[i].down);
brelse(bh);
if (hpfs_sb(s)->sb_chk)
if (hpfs_stop_cycles(s, node, &c1, &c2, "hpfs_truncate_btree"))
@@ -446,25 +446,25 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
}
nodes = btree->n_used_nodes + btree->n_free_nodes;
for (i = 0; i < btree->n_used_nodes; i++)
- if (btree->u.external[i].file_secno + btree->u.external[i].length >= secs) goto ff;
+ if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) >= secs) goto ff;
brelse(bh);
return;
ff:
- if (secs <= btree->u.external[i].file_secno) {
+ if (secs <= le32_to_cpu(btree->u.external[i].file_secno)) {
hpfs_error(s, "there is an allocation error in file %08x, sector %08x", f, secs);
if (i) i--;
}
- else if (btree->u.external[i].file_secno + btree->u.external[i].length > secs) {
- hpfs_free_sectors(s, btree->u.external[i].disk_secno + secs -
- btree->u.external[i].file_secno, btree->u.external[i].length
- - secs + btree->u.external[i].file_secno); /* I hope gcc optimizes this :-) */
- btree->u.external[i].length = secs - btree->u.external[i].file_secno;
+ else if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > secs) {
+ hpfs_free_sectors(s, le32_to_cpu(btree->u.external[i].disk_secno) + secs -
+ le32_to_cpu(btree->u.external[i].file_secno), le32_to_cpu(btree->u.external[i].length)
+ - secs + le32_to_cpu(btree->u.external[i].file_secno)); /* I hope gcc optimizes this :-) */
+ btree->u.external[i].length = cpu_to_le32(secs - le32_to_cpu(btree->u.external[i].file_secno));
}
for (j = i + 1; j < btree->n_used_nodes; j++)
- hpfs_free_sectors(s, btree->u.external[j].disk_secno, btree->u.external[j].length);
+ hpfs_free_sectors(s, le32_to_cpu(btree->u.external[j].disk_secno), le32_to_cpu(btree->u.external[j].length));
btree->n_used_nodes = i + 1;
btree->n_free_nodes = nodes - btree->n_used_nodes;
- btree->first_free = 8 + 12 * btree->n_used_nodes;
+ btree->first_free = cpu_to_le16(8 + 12 * btree->n_used_nodes);
mark_buffer_dirty(bh);
brelse(bh);
}
@@ -480,12 +480,12 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno)
struct extended_attribute *ea_end;
if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return;
if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree);
- else hpfs_remove_dtree(s, fnode->u.external[0].disk_secno);
+ else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno));
ea_end = fnode_end_ea(fnode);
for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea))
if (ea->indirect)
hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea));
- hpfs_ea_ext_remove(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l);
+ hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l));
brelse(bh);
hpfs_free_sectors(s, fno, 1);
}
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index 793cb9d943d2..9ecde27d1e29 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -9,22 +9,6 @@
#include <linux/slab.h>
#include "hpfs_fn.h"
-void hpfs_lock_creation(struct super_block *s)
-{
-#ifdef DEBUG_LOCKS
- printk("lock creation\n");
-#endif
- mutex_lock(&hpfs_sb(s)->hpfs_creation_de);
-}
-
-void hpfs_unlock_creation(struct super_block *s)
-{
-#ifdef DEBUG_LOCKS
- printk("unlock creation\n");
-#endif
- mutex_unlock(&hpfs_sb(s)->hpfs_creation_de);
-}
-
/* Map a sector into a buffer and return pointers to it and to the buffer. */
void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp,
@@ -32,6 +16,8 @@ void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head
{
struct buffer_head *bh;
+ hpfs_lock_assert(s);
+
cond_resched();
*bhp = bh = sb_bread(s, secno);
@@ -50,6 +36,8 @@ void *hpfs_get_sector(struct super_block *s, unsigned secno, struct buffer_head
struct buffer_head *bh;
/*return hpfs_map_sector(s, secno, bhp, 0);*/
+ hpfs_lock_assert(s);
+
cond_resched();
if ((*bhp = bh = sb_getblk(s, secno)) != NULL) {
@@ -70,6 +58,8 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe
struct buffer_head *bh;
char *data;
+ hpfs_lock_assert(s);
+
cond_resched();
if (secno & 3) {
@@ -125,6 +115,8 @@ void *hpfs_get_4sectors(struct super_block *s, unsigned secno,
{
cond_resched();
+ hpfs_lock_assert(s);
+
if (secno & 3) {
printk("HPFS: hpfs_get_4sectors: unaligned read\n");
return NULL;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index b3d7c0ddb609..f46ae025bfb5 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -88,9 +88,9 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
hpfs_error(inode->i_sb, "not a directory, fnode %08lx",
(unsigned long)inode->i_ino);
}
- if (hpfs_inode->i_dno != fno->u.external[0].disk_secno) {
+ if (hpfs_inode->i_dno != le32_to_cpu(fno->u.external[0].disk_secno)) {
e = 1;
- hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, fno->u.external[0].disk_secno);
+ hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, le32_to_cpu(fno->u.external[0].disk_secno));
}
brelse(bh);
if (e) {
@@ -156,7 +156,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
goto again;
}
tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3);
- if (filldir(dirent, tempname, de->namelen, old_pos, de->fnode, DT_UNKNOWN) < 0) {
+ if (filldir(dirent, tempname, de->namelen, old_pos, le32_to_cpu(de->fnode), DT_UNKNOWN) < 0) {
filp->f_pos = old_pos;
if (tempname != de->name) kfree(tempname);
hpfs_brelse4(&qbh);
@@ -221,7 +221,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
* Get inode number, what we're after.
*/
- ino = de->fnode;
+ ino = le32_to_cpu(de->fnode);
/*
* Go find or make an inode.
@@ -236,7 +236,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
hpfs_init_inode(result);
if (de->directory)
hpfs_read_inode(result);
- else if (de->ea_size && hpfs_sb(dir->i_sb)->sb_eas)
+ else if (le32_to_cpu(de->ea_size) && hpfs_sb(dir->i_sb)->sb_eas)
hpfs_read_inode(result);
else {
result->i_mode |= S_IFREG;
@@ -250,8 +250,6 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
hpfs_result = hpfs_i(result);
if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino;
- hpfs_decide_conv(result, name, len);
-
if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) {
hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures");
goto bail1;
@@ -263,19 +261,19 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
*/
if (!result->i_ctime.tv_sec) {
- if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, de->creation_date)))
+ if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date))))
result->i_ctime.tv_sec = 1;
result->i_ctime.tv_nsec = 0;
- result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, de->write_date);
+ result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->write_date));
result->i_mtime.tv_nsec = 0;
- result->i_atime.tv_sec = local_to_gmt(dir->i_sb, de->read_date);
+ result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->read_date));
result->i_atime.tv_nsec = 0;
- hpfs_result->i_ea_size = de->ea_size;
+ hpfs_result->i_ea_size = le32_to_cpu(de->ea_size);
if (!hpfs_result->i_ea_mode && de->read_only)
result->i_mode &= ~0222;
if (!de->directory) {
if (result->i_size == -1) {
- result->i_size = de->file_size;
+ result->i_size = le32_to_cpu(de->file_size);
result->i_data.a_ops = &hpfs_aops;
hpfs_i(result)->mmu_private = result->i_size;
/*
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index 9b2ffadfc8c4..1e0e2ac30fd3 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -14,11 +14,11 @@ static loff_t get_pos(struct dnode *d, struct hpfs_dirent *fde)
struct hpfs_dirent *de_end = dnode_end_de(d);
int i = 1;
for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) {
- if (de == fde) return ((loff_t) d->self << 4) | (loff_t)i;
+ if (de == fde) return ((loff_t) le32_to_cpu(d->self) << 4) | (loff_t)i;
i++;
}
printk("HPFS: get_pos: not_found\n");
- return ((loff_t)d->self << 4) | (loff_t)1;
+ return ((loff_t)le32_to_cpu(d->self) << 4) | (loff_t)1;
}
void hpfs_add_pos(struct inode *inode, loff_t *pos)
@@ -130,29 +130,30 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno
{
struct hpfs_dirent *de;
if (!(de = dnode_last_de(d))) {
- hpfs_error(s, "set_last_pointer: empty dnode %08x", d->self);
+ hpfs_error(s, "set_last_pointer: empty dnode %08x", le32_to_cpu(d->self));
return;
}
if (hpfs_sb(s)->sb_chk) {
if (de->down) {
hpfs_error(s, "set_last_pointer: dnode %08x has already last pointer %08x",
- d->self, de_down_pointer(de));
+ le32_to_cpu(d->self), de_down_pointer(de));
return;
}
- if (de->length != 32) {
- hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", d->self);
+ if (le16_to_cpu(de->length) != 32) {
+ hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", le32_to_cpu(d->self));
return;
}
}
if (ptr) {
- if ((d->first_free += 4) > 2048) {
- hpfs_error(s,"set_last_pointer: too long dnode %08x", d->self);
- d->first_free -= 4;
+ d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + 4);
+ if (le32_to_cpu(d->first_free) > 2048) {
+ hpfs_error(s, "set_last_pointer: too long dnode %08x", le32_to_cpu(d->self));
+ d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - 4);
return;
}
- de->length = 36;
+ de->length = cpu_to_le16(36);
de->down = 1;
- *(dnode_secno *)((char *)de + 32) = ptr;
+ *(dnode_secno *)((char *)de + 32) = cpu_to_le32(ptr);
}
}
@@ -168,7 +169,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) {
int c = hpfs_compare_names(s, name, namelen, de->name, de->namelen, de->last);
if (!c) {
- hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, d->self);
+ hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, le32_to_cpu(d->self));
return NULL;
}
if (c < 0) break;
@@ -176,15 +177,14 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
memmove((char *)de + d_size, de, (char *)de_end - (char *)de);
memset(de, 0, d_size);
if (down_ptr) {
- *(int *)((char *)de + d_size - 4) = down_ptr;
+ *(dnode_secno *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr);
de->down = 1;
}
- de->length = d_size;
- if (down_ptr) de->down = 1;
+ de->length = cpu_to_le16(d_size);
de->not_8x3 = hpfs_is_name_long(name, namelen);
de->namelen = namelen;
memcpy(de->name, name, namelen);
- d->first_free += d_size;
+ d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + d_size);
return de;
}
@@ -194,25 +194,25 @@ static void hpfs_delete_de(struct super_block *s, struct dnode *d,
struct hpfs_dirent *de)
{
if (de->last) {
- hpfs_error(s, "attempt to delete last dirent in dnode %08x", d->self);
+ hpfs_error(s, "attempt to delete last dirent in dnode %08x", le32_to_cpu(d->self));
return;
}
- d->first_free -= de->length;
- memmove(de, de_next_de(de), d->first_free + (char *)d - (char *)de);
+ d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - le16_to_cpu(de->length));
+ memmove(de, de_next_de(de), le32_to_cpu(d->first_free) + (char *)d - (char *)de);
}
static void fix_up_ptrs(struct super_block *s, struct dnode *d)
{
struct hpfs_dirent *de;
struct hpfs_dirent *de_end = dnode_end_de(d);
- dnode_secno dno = d->self;
+ dnode_secno dno = le32_to_cpu(d->self);
for (de = dnode_first_de(d); de < de_end; de = de_next_de(de))
if (de->down) {
struct quad_buffer_head qbh;
struct dnode *dd;
if ((dd = hpfs_map_dnode(s, de_down_pointer(de), &qbh))) {
- if (dd->up != dno || dd->root_dnode) {
- dd->up = dno;
+ if (le32_to_cpu(dd->up) != dno || dd->root_dnode) {
+ dd->up = cpu_to_le32(dno);
dd->root_dnode = 0;
hpfs_mark_4buffers_dirty(&qbh);
}
@@ -262,7 +262,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
kfree(nname);
return 1;
}
- if (d->first_free + de_size(namelen, down_ptr) <= 2048) {
+ if (le32_to_cpu(d->first_free) + de_size(namelen, down_ptr) <= 2048) {
loff_t t;
copy_de(de=hpfs_add_de(i->i_sb, d, name, namelen, down_ptr), new_de);
t = get_pos(d, de);
@@ -286,11 +286,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
kfree(nname);
return 1;
}
- memcpy(nd, d, d->first_free);
+ memcpy(nd, d, le32_to_cpu(d->first_free));
copy_de(de = hpfs_add_de(i->i_sb, nd, name, namelen, down_ptr), new_de);
for_all_poss(i, hpfs_pos_ins, get_pos(nd, de), 1);
h = ((char *)dnode_last_de(nd) - (char *)nd) / 2 + 10;
- if (!(ad = hpfs_alloc_dnode(i->i_sb, d->up, &adno, &qbh1, 0))) {
+ if (!(ad = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &adno, &qbh1))) {
hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted");
hpfs_brelse4(&qbh);
kfree(nd);
@@ -313,20 +313,21 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
down_ptr = adno;
set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0);
de = de_next_de(de);
- memmove((char *)nd + 20, de, nd->first_free + (char *)nd - (char *)de);
- nd->first_free -= (char *)de - (char *)nd - 20;
- memcpy(d, nd, nd->first_free);
+ memmove((char *)nd + 20, de, le32_to_cpu(nd->first_free) + (char *)nd - (char *)de);
+ nd->first_free = cpu_to_le32(le32_to_cpu(nd->first_free) - ((char *)de - (char *)nd - 20));
+ memcpy(d, nd, le32_to_cpu(nd->first_free));
for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos);
fix_up_ptrs(i->i_sb, ad);
if (!d->root_dnode) {
- dno = ad->up = d->up;
+ ad->up = d->up;
+ dno = le32_to_cpu(ad->up);
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
hpfs_mark_4buffers_dirty(&qbh1);
hpfs_brelse4(&qbh1);
goto go_up;
}
- if (!(rd = hpfs_alloc_dnode(i->i_sb, d->up, &rdno, &qbh2, 0))) {
+ if (!(rd = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &rdno, &qbh2))) {
hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted");
hpfs_brelse4(&qbh);
hpfs_brelse4(&qbh1);
@@ -338,7 +339,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
i->i_blocks += 4;
rd->root_dnode = 1;
rd->up = d->up;
- if (!(fnode = hpfs_map_fnode(i->i_sb, d->up, &bh))) {
+ if (!(fnode = hpfs_map_fnode(i->i_sb, le32_to_cpu(d->up), &bh))) {
hpfs_free_dnode(i->i_sb, rdno);
hpfs_brelse4(&qbh);
hpfs_brelse4(&qbh1);
@@ -347,10 +348,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
kfree(nname);
return 1;
}
- fnode->u.external[0].disk_secno = rdno;
+ fnode->u.external[0].disk_secno = cpu_to_le32(rdno);
mark_buffer_dirty(bh);
brelse(bh);
- d->up = ad->up = hpfs_i(i)->i_dno = rdno;
+ hpfs_i(i)->i_dno = rdno;
+ d->up = ad->up = cpu_to_le32(rdno);
d->root_dnode = ad->root_dnode = 0;
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
@@ -373,7 +375,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
int hpfs_add_dirent(struct inode *i,
const unsigned char *name, unsigned namelen,
- struct hpfs_dirent *new_de, int cdepth)
+ struct hpfs_dirent *new_de)
{
struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
struct dnode *d;
@@ -403,7 +405,6 @@ int hpfs_add_dirent(struct inode *i,
}
}
hpfs_brelse4(&qbh);
- if (!cdepth) hpfs_lock_creation(i->i_sb);
if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_ADD)) {
c = 1;
goto ret;
@@ -411,7 +412,6 @@ int hpfs_add_dirent(struct inode *i,
i->i_version++;
c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0);
ret:
- if (!cdepth) hpfs_unlock_creation(i->i_sb);
return c;
}
@@ -437,9 +437,9 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
return 0;
if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return 0;
if (hpfs_sb(i->i_sb)->sb_chk) {
- if (dnode->up != chk_up) {
+ if (le32_to_cpu(dnode->up) != chk_up) {
hpfs_error(i->i_sb, "move_to_top: up pointer from %08x should be %08x, is %08x",
- dno, chk_up, dnode->up);
+ dno, chk_up, le32_to_cpu(dnode->up));
hpfs_brelse4(&qbh);
return 0;
}
@@ -455,7 +455,7 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
hpfs_brelse4(&qbh);
}
while (!(de = dnode_pre_last_de(dnode))) {
- dnode_secno up = dnode->up;
+ dnode_secno up = le32_to_cpu(dnode->up);
hpfs_brelse4(&qbh);
hpfs_free_dnode(i->i_sb, dno);
i->i_size -= 2048;
@@ -474,8 +474,8 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
hpfs_brelse4(&qbh);
return 0;
}
- dnode->first_free -= 4;
- de->length -= 4;
+ dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4);
+ de->length = cpu_to_le16(le16_to_cpu(de->length) - 4);
de->down = 0;
hpfs_mark_4buffers_dirty(&qbh);
dno = up;
@@ -483,12 +483,12 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
t = get_pos(dnode, de);
for_all_poss(i, hpfs_pos_subst, t, 4);
for_all_poss(i, hpfs_pos_subst, t + 1, 5);
- if (!(nde = kmalloc(de->length, GFP_NOFS))) {
+ if (!(nde = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) {
hpfs_error(i->i_sb, "out of memory for dirent - directory will be corrupted");
hpfs_brelse4(&qbh);
return 0;
}
- memcpy(nde, de, de->length);
+ memcpy(nde, de, le16_to_cpu(de->length));
ddno = de->down ? de_down_pointer(de) : 0;
hpfs_delete_de(i->i_sb, dnode, de);
set_last_pointer(i->i_sb, dnode, ddno);
@@ -517,11 +517,11 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
try_it_again:
if (hpfs_stop_cycles(i->i_sb, dno, &c1, &c2, "delete_empty_dnode")) return;
if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return;
- if (dnode->first_free > 56) goto end;
- if (dnode->first_free == 52 || dnode->first_free == 56) {
+ if (le32_to_cpu(dnode->first_free) > 56) goto end;
+ if (le32_to_cpu(dnode->first_free) == 52 || le32_to_cpu(dnode->first_free) == 56) {
struct hpfs_dirent *de_end;
int root = dnode->root_dnode;
- up = dnode->up;
+ up = le32_to_cpu(dnode->up);
de = dnode_first_de(dnode);
down = de->down ? de_down_pointer(de) : 0;
if (hpfs_sb(i->i_sb)->sb_chk) if (root && !down) {
@@ -545,13 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
return;
}
if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
- d1->up = up;
+ d1->up = cpu_to_le32(up);
d1->root_dnode = 1;
hpfs_mark_4buffers_dirty(&qbh1);
hpfs_brelse4(&qbh1);
}
if ((fnode = hpfs_map_fnode(i->i_sb, up, &bh))) {
- fnode->u.external[0].disk_secno = down;
+ fnode->u.external[0].disk_secno = cpu_to_le32(down);
mark_buffer_dirty(bh);
brelse(bh);
}
@@ -570,22 +570,22 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p);
if (!down) {
de->down = 0;
- de->length -= 4;
- dnode->first_free -= 4;
+ de->length = cpu_to_le16(le16_to_cpu(de->length) - 4);
+ dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4);
memmove(de_next_de(de), (char *)de_next_de(de) + 4,
- (char *)dnode + dnode->first_free - (char *)de_next_de(de));
+ (char *)dnode + le32_to_cpu(dnode->first_free) - (char *)de_next_de(de));
} else {
struct dnode *d1;
struct quad_buffer_head qbh1;
- *(dnode_secno *) ((void *) de + de->length - 4) = down;
+ *(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4) = down;
if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
- d1->up = up;
+ d1->up = cpu_to_le32(up);
hpfs_mark_4buffers_dirty(&qbh1);
hpfs_brelse4(&qbh1);
}
}
} else {
- hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, dnode->first_free);
+ hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, le32_to_cpu(dnode->first_free));
goto end;
}
@@ -596,18 +596,18 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
struct quad_buffer_head qbh1;
if (!de_next->down) goto endm;
ndown = de_down_pointer(de_next);
- if (!(de_cp = kmalloc(de->length, GFP_NOFS))) {
+ if (!(de_cp = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) {
printk("HPFS: out of memory for dtree balancing\n");
goto endm;
}
- memcpy(de_cp, de, de->length);
+ memcpy(de_cp, de, le16_to_cpu(de->length));
hpfs_delete_de(i->i_sb, dnode, de);
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, 4);
for_all_poss(i, hpfs_pos_del, ((loff_t)up << 4) | p, 1);
if (de_cp->down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de_cp), &qbh1))) {
- d1->up = ndown;
+ d1->up = cpu_to_le32(ndown);
hpfs_mark_4buffers_dirty(&qbh1);
hpfs_brelse4(&qbh1);
}
@@ -635,7 +635,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
struct hpfs_dirent *del = dnode_last_de(d1);
dlp = del->down ? de_down_pointer(del) : 0;
if (!dlp && down) {
- if (d1->first_free > 2044) {
+ if (le32_to_cpu(d1->first_free) > 2044) {
if (hpfs_sb(i->i_sb)->sb_chk >= 2) {
printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n");
printk("HPFS: warning: terminating balancing operation\n");
@@ -647,38 +647,38 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n");
printk("HPFS: warning: goin'on\n");
}
- del->length += 4;
+ del->length = cpu_to_le16(le16_to_cpu(del->length) + 4);
del->down = 1;
- d1->first_free += 4;
+ d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) + 4);
}
if (dlp && !down) {
- del->length -= 4;
+ del->length = cpu_to_le16(le16_to_cpu(del->length) - 4);
del->down = 0;
- d1->first_free -= 4;
+ d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4);
} else if (down)
- *(dnode_secno *) ((void *) del + del->length - 4) = down;
+ *(dnode_secno *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down);
} else goto endm;
- if (!(de_cp = kmalloc(de_prev->length, GFP_NOFS))) {
+ if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) {
printk("HPFS: out of memory for dtree balancing\n");
hpfs_brelse4(&qbh1);
goto endm;
}
hpfs_mark_4buffers_dirty(&qbh1);
hpfs_brelse4(&qbh1);
- memcpy(de_cp, de_prev, de_prev->length);
+ memcpy(de_cp, de_prev, le16_to_cpu(de_prev->length));
hpfs_delete_de(i->i_sb, dnode, de_prev);
if (!de_prev->down) {
- de_prev->length += 4;
+ de_prev->length = cpu_to_le16(le16_to_cpu(de_prev->length) + 4);
de_prev->down = 1;
- dnode->first_free += 4;
+ dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4);
}
- *(dnode_secno *) ((void *) de_prev + de_prev->length - 4) = ndown;
+ *(dnode_secno *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown);
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4);
for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, ((loff_t)up << 4) | (p - 1));
if (down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de), &qbh1))) {
- d1->up = ndown;
+ d1->up = cpu_to_le32(ndown);
hpfs_mark_4buffers_dirty(&qbh1);
hpfs_brelse4(&qbh1);
}
@@ -701,7 +701,6 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
{
struct dnode *dnode = qbh->data;
dnode_secno down = 0;
- int lock = 0;
loff_t t;
if (de->first || de->last) {
hpfs_error(i->i_sb, "hpfs_remove_dirent: attempt to delete first or last dirent in dnode %08x", dno);
@@ -710,11 +709,8 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
}
if (de->down) down = de_down_pointer(de);
if (depth && (de->down || (de == dnode_first_de(dnode) && de_next_de(de)->last))) {
- lock = 1;
- hpfs_lock_creation(i->i_sb);
if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_DEL)) {
hpfs_brelse4(qbh);
- hpfs_unlock_creation(i->i_sb);
return 2;
}
}
@@ -727,11 +723,9 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
dnode_secno a = move_to_top(i, down, dno);
for_all_poss(i, hpfs_pos_subst, 5, t);
if (a) delete_empty_dnode(i, a);
- if (lock) hpfs_unlock_creation(i->i_sb);
return !a;
}
delete_empty_dnode(i, dno);
- if (lock) hpfs_unlock_creation(i->i_sb);
return 0;
}
@@ -751,8 +745,8 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes,
ptr = 0;
go_up:
if (!(dnode = hpfs_map_dnode(s, dno, &qbh))) return;
- if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && dnode->up != odno)
- hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, dnode->up);
+ if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && le32_to_cpu(dnode->up) != odno)
+ hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, le32_to_cpu(dnode->up));
de = dnode_first_de(dnode);
if (ptr) while(1) {
if (de->down) if (de_down_pointer(de) == ptr) goto process_de;
@@ -776,7 +770,7 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes,
if (!de->first && !de->last && n_items) (*n_items)++;
if ((de = de_next_de(de)) < dnode_end_de(dnode)) goto next_de;
ptr = dno;
- dno = dnode->up;
+ dno = le32_to_cpu(dnode->up);
if (dnode->root_dnode) {
hpfs_brelse4(&qbh);
return;
@@ -824,8 +818,8 @@ dnode_secno hpfs_de_as_down_as_possible(struct super_block *s, dnode_secno dno)
return d;
if (!(de = map_nth_dirent(s, d, 1, &qbh, NULL))) return dno;
if (hpfs_sb(s)->sb_chk)
- if (up && ((struct dnode *)qbh.data)->up != up)
- hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, ((struct dnode *)qbh.data)->up);
+ if (up && le32_to_cpu(((struct dnode *)qbh.data)->up) != up)
+ hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, le32_to_cpu(((struct dnode *)qbh.data)->up));
if (!de->down) {
hpfs_brelse4(&qbh);
return d;
@@ -874,7 +868,7 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
/* Going up */
if (dnode->root_dnode) goto bail;
- if (!(up_dnode = hpfs_map_dnode(inode->i_sb, dnode->up, &qbh0)))
+ if (!(up_dnode = hpfs_map_dnode(inode->i_sb, le32_to_cpu(dnode->up), &qbh0)))
goto bail;
end_up_de = dnode_end_de(up_dnode);
@@ -882,16 +876,16 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
for (up_de = dnode_first_de(up_dnode); up_de < end_up_de;
up_de = de_next_de(up_de)) {
if (!(++c & 077)) hpfs_error(inode->i_sb,
- "map_pos_dirent: pos crossed dnode boundary; dnode = %08x", dnode->up);
+ "map_pos_dirent: pos crossed dnode boundary; dnode = %08x", le32_to_cpu(dnode->up));
if (up_de->down && de_down_pointer(up_de) == dno) {
- *posp = ((loff_t) dnode->up << 4) + c;
+ *posp = ((loff_t) le32_to_cpu(dnode->up) << 4) + c;
hpfs_brelse4(&qbh0);
return de;
}
}
hpfs_error(inode->i_sb, "map_pos_dirent: pointer to dnode %08x not found in parent dnode %08x",
- dno, dnode->up);
+ dno, le32_to_cpu(dnode->up));
hpfs_brelse4(&qbh0);
bail:
@@ -1017,17 +1011,17 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
/*name2[15] = 0xff;*/
name1len = 15; name2len = 256;
}
- if (!(upf = hpfs_map_fnode(s, f->up, &bh))) {
+ if (!(upf = hpfs_map_fnode(s, le32_to_cpu(f->up), &bh))) {
kfree(name2);
return NULL;
}
if (!upf->dirflag) {
brelse(bh);
- hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, f->up);
+ hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, le32_to_cpu(f->up));
kfree(name2);
return NULL;
}
- dno = upf->u.external[0].disk_secno;
+ dno = le32_to_cpu(upf->u.external[0].disk_secno);
brelse(bh);
go_down:
downd = 0;
@@ -1049,7 +1043,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
return NULL;
}
next_de:
- if (de->fnode == fno) {
+ if (le32_to_cpu(de->fnode) == fno) {
kfree(name2);
return de;
}
@@ -1065,7 +1059,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
goto go_down;
}
f:
- if (de->fnode == fno) {
+ if (le32_to_cpu(de->fnode) == fno) {
kfree(name2);
return de;
}
@@ -1074,7 +1068,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
if ((de = de_next_de(de)) < de_end) goto next_de;
if (d->root_dnode) goto not_found;
downd = dno;
- dno = d->up;
+ dno = le32_to_cpu(d->up);
hpfs_brelse4(qbh);
if (hpfs_sb(s)->sb_chk)
if (hpfs_stop_cycles(s, downd, &d1, &d2, "map_fnode_dirent #2")) {
diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c
index 45e53d972b42..d8b84d113c89 100644
--- a/fs/hpfs/ea.c
+++ b/fs/hpfs/ea.c
@@ -24,7 +24,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len)
}
if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return;
if (ea->indirect) {
- if (ea->valuelen != 8) {
+ if (ea_valuelen(ea) != 8) {
hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x",
ano ? "anode" : "sectors", a, pos);
return;
@@ -33,7 +33,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len)
return;
hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea));
}
- pos += ea->namelen + ea->valuelen + 5;
+ pos += ea->namelen + ea_valuelen(ea) + 5;
}
if (!ano) hpfs_free_sectors(s, a, (len+511) >> 9);
else {
@@ -76,24 +76,24 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key,
unsigned pos;
int ano, len;
secno a;
+ char ex[4 + 255 + 1 + 8];
struct extended_attribute *ea;
struct extended_attribute *ea_end = fnode_end_ea(fnode);
for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea))
if (!strcmp(ea->name, key)) {
if (ea->indirect)
goto indirect;
- if (ea->valuelen >= size)
+ if (ea_valuelen(ea) >= size)
return -EINVAL;
- memcpy(buf, ea_data(ea), ea->valuelen);
- buf[ea->valuelen] = 0;
+ memcpy(buf, ea_data(ea), ea_valuelen(ea));
+ buf[ea_valuelen(ea)] = 0;
return 0;
}
- a = fnode->ea_secno;
- len = fnode->ea_size_l;
+ a = le32_to_cpu(fnode->ea_secno);
+ len = le32_to_cpu(fnode->ea_size_l);
ano = fnode->ea_anode;
pos = 0;
while (pos < len) {
- char ex[4 + 255 + 1 + 8];
ea = (struct extended_attribute *)ex;
if (pos + 4 > len) {
hpfs_error(s, "EAs don't end correctly, %s %08x, len %08x",
@@ -106,14 +106,14 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key,
if (!strcmp(ea->name, key)) {
if (ea->indirect)
goto indirect;
- if (ea->valuelen >= size)
+ if (ea_valuelen(ea) >= size)
return -EINVAL;
- if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, buf))
+ if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), buf))
return -EIO;
- buf[ea->valuelen] = 0;
+ buf[ea_valuelen(ea)] = 0;
return 0;
}
- pos += ea->namelen + ea->valuelen + 5;
+ pos += ea->namelen + ea_valuelen(ea) + 5;
}
return -ENOENT;
indirect:
@@ -138,16 +138,16 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si
if (!strcmp(ea->name, key)) {
if (ea->indirect)
return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea));
- if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) {
+ if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) {
printk("HPFS: out of memory for EA\n");
return NULL;
}
- memcpy(ret, ea_data(ea), ea->valuelen);
- ret[ea->valuelen] = 0;
+ memcpy(ret, ea_data(ea), ea_valuelen(ea));
+ ret[ea_valuelen(ea)] = 0;
return ret;
}
- a = fnode->ea_secno;
- len = fnode->ea_size_l;
+ a = le32_to_cpu(fnode->ea_secno);
+ len = le32_to_cpu(fnode->ea_size_l);
ano = fnode->ea_anode;
pos = 0;
while (pos < len) {
@@ -164,18 +164,18 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si
if (!strcmp(ea->name, key)) {
if (ea->indirect)
return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea));
- if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) {
+ if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) {
printk("HPFS: out of memory for EA\n");
return NULL;
}
- if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, ret)) {
+ if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), ret)) {
kfree(ret);
return NULL;
}
- ret[ea->valuelen] = 0;
+ ret[ea_valuelen(ea)] = 0;
return ret;
}
- pos += ea->namelen + ea->valuelen + 5;
+ pos += ea->namelen + ea_valuelen(ea) + 5;
}
return NULL;
}
@@ -202,13 +202,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
if (ea->indirect) {
if (ea_len(ea) == size)
set_indirect_ea(s, ea->anode, ea_sec(ea), data, size);
- } else if (ea->valuelen == size) {
+ } else if (ea_valuelen(ea) == size) {
memcpy(ea_data(ea), data, size);
}
return;
}
- a = fnode->ea_secno;
- len = fnode->ea_size_l;
+ a = le32_to_cpu(fnode->ea_secno);
+ len = le32_to_cpu(fnode->ea_size_l);
ano = fnode->ea_anode;
pos = 0;
while (pos < len) {
@@ -228,68 +228,70 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
set_indirect_ea(s, ea->anode, ea_sec(ea), data, size);
}
else {
- if (ea->valuelen == size)
+ if (ea_valuelen(ea) == size)
hpfs_ea_write(s, a, ano, pos + 4 + ea->namelen + 1, size, data);
}
return;
}
- pos += ea->namelen + ea->valuelen + 5;
+ pos += ea->namelen + ea_valuelen(ea) + 5;
}
- if (!fnode->ea_offs) {
- /*if (fnode->ea_size_s) {
+ if (!le16_to_cpu(fnode->ea_offs)) {
+ /*if (le16_to_cpu(fnode->ea_size_s)) {
hpfs_error(s, "fnode %08x: ea_size_s == %03x, ea_offs == 0",
- inode->i_ino, fnode->ea_size_s);
+ inode->i_ino, le16_to_cpu(fnode->ea_size_s));
return;
}*/
- fnode->ea_offs = 0xc4;
+ fnode->ea_offs = cpu_to_le16(0xc4);
}
- if (fnode->ea_offs < 0xc4 || fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200) {
+ if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) {
hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x",
(unsigned long)inode->i_ino,
- fnode->ea_offs, fnode->ea_size_s);
+ le32_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s));
return;
}
- if ((fnode->ea_size_s || !fnode->ea_size_l) &&
- fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s + strlen(key) + size + 5 <= 0x200) {
+ if ((le16_to_cpu(fnode->ea_size_s) || !le32_to_cpu(fnode->ea_size_l)) &&
+ le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5 <= 0x200) {
ea = fnode_end_ea(fnode);
*(char *)ea = 0;
ea->namelen = strlen(key);
- ea->valuelen = size;
+ ea->valuelen_lo = size;
+ ea->valuelen_hi = size >> 8;
strcpy(ea->name, key);
memcpy(ea_data(ea), data, size);
- fnode->ea_size_s += strlen(key) + size + 5;
+ fnode->ea_size_s = cpu_to_le16(le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5);
goto ret;
}
/* Most the code here is 99.9993422% unused. I hope there are no bugs.
But what .. HPFS.IFS has also bugs in ea management. */
- if (fnode->ea_size_s && !fnode->ea_size_l) {
+ if (le16_to_cpu(fnode->ea_size_s) && !le32_to_cpu(fnode->ea_size_l)) {
secno n;
struct buffer_head *bh;
char *data;
- if (!(n = hpfs_alloc_sector(s, fno, 1, 0, 1))) return;
+ if (!(n = hpfs_alloc_sector(s, fno, 1, 0))) return;
if (!(data = hpfs_get_sector(s, n, &bh))) {
hpfs_free_sectors(s, n, 1);
return;
}
- memcpy(data, fnode_ea(fnode), fnode->ea_size_s);
- fnode->ea_size_l = fnode->ea_size_s;
- fnode->ea_size_s = 0;
- fnode->ea_secno = n;
- fnode->ea_anode = 0;
+ memcpy(data, fnode_ea(fnode), le16_to_cpu(fnode->ea_size_s));
+ fnode->ea_size_l = cpu_to_le32(le16_to_cpu(fnode->ea_size_s));
+ fnode->ea_size_s = cpu_to_le16(0);
+ fnode->ea_secno = cpu_to_le32(n);
+ fnode->ea_anode = cpu_to_le32(0);
mark_buffer_dirty(bh);
brelse(bh);
}
- pos = fnode->ea_size_l + 5 + strlen(key) + size;
- len = (fnode->ea_size_l + 511) >> 9;
+ pos = le32_to_cpu(fnode->ea_size_l) + 5 + strlen(key) + size;
+ len = (le32_to_cpu(fnode->ea_size_l) + 511) >> 9;
if (pos >= 30000) goto bail;
while (((pos + 511) >> 9) > len) {
if (!len) {
- if (!(fnode->ea_secno = hpfs_alloc_sector(s, fno, 1, 0, 1)))
- goto bail;
+ secno q = hpfs_alloc_sector(s, fno, 1, 0);
+ if (!q) goto bail;
+ fnode->ea_secno = cpu_to_le32(q);
fnode->ea_anode = 0;
len++;
} else if (!fnode->ea_anode) {
- if (hpfs_alloc_if_possible(s, fnode->ea_secno + len)) {
+ if (hpfs_alloc_if_possible(s, le32_to_cpu(fnode->ea_secno) + len)) {
len++;
} else {
/* Aargh... don't know how to create ea anodes :-( */
@@ -298,26 +300,26 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
anode_secno a_s;
if (!(anode = hpfs_alloc_anode(s, fno, &a_s, &bh)))
goto bail;
- anode->up = fno;
+ anode->up = cpu_to_le32(fno);
anode->btree.fnode_parent = 1;
anode->btree.n_free_nodes--;
anode->btree.n_used_nodes++;
- anode->btree.first_free += 12;
- anode->u.external[0].disk_secno = fnode->ea_secno;
- anode->u.external[0].file_secno = 0;
- anode->u.external[0].length = len;
+ anode->btree.first_free = cpu_to_le16(le16_to_cpu(anode->btree.first_free) + 12);
+ anode->u.external[0].disk_secno = cpu_to_le32(le32_to_cpu(fnode->ea_secno));
+ anode->u.external[0].file_secno = cpu_to_le32(0);
+ anode->u.external[0].length = cpu_to_le32(len);
mark_buffer_dirty(bh);
brelse(bh);
fnode->ea_anode = 1;
- fnode->ea_secno = a_s;*/
+ fnode->ea_secno = cpu_to_le32(a_s);*/
secno new_sec;
int i;
- if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9), 1)))
+ if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9))))
goto bail;
for (i = 0; i < len; i++) {
struct buffer_head *bh1, *bh2;
void *b1, *b2;
- if (!(b1 = hpfs_map_sector(s, fnode->ea_secno + i, &bh1, len - i - 1))) {
+ if (!(b1 = hpfs_map_sector(s, le32_to_cpu(fnode->ea_secno) + i, &bh1, len - i - 1))) {
hpfs_free_sectors(s, new_sec, (pos + 511) >> 9);
goto bail;
}
@@ -331,13 +333,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
mark_buffer_dirty(bh2);
brelse(bh2);
}
- hpfs_free_sectors(s, fnode->ea_secno, len);
- fnode->ea_secno = new_sec;
+ hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno), len);
+ fnode->ea_secno = cpu_to_le32(new_sec);
len = (pos + 511) >> 9;
}
}
if (fnode->ea_anode) {
- if (hpfs_add_sector_to_btree(s, fnode->ea_secno,
+ if (hpfs_add_sector_to_btree(s, le32_to_cpu(fnode->ea_secno),
0, len) != -1) {
len++;
} else {
@@ -349,17 +351,17 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
h[1] = strlen(key);
h[2] = size & 0xff;
h[3] = size >> 8;
- if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l, 4, h)) goto bail;
- if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 4, h[1] + 1, key)) goto bail;
- if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 5 + h[1], size, data)) goto bail;
- fnode->ea_size_l = pos;
+ if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail;
+ if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail;
+ if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail;
+ fnode->ea_size_l = cpu_to_le32(pos);
ret:
hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size;
return;
bail:
- if (fnode->ea_secno)
- if (fnode->ea_anode) hpfs_truncate_btree(s, fnode->ea_secno, 1, (fnode->ea_size_l + 511) >> 9);
- else hpfs_free_sectors(s, fnode->ea_secno + ((fnode->ea_size_l + 511) >> 9), len - ((fnode->ea_size_l + 511) >> 9));
- else fnode->ea_secno = fnode->ea_size_l = 0;
+ if (le32_to_cpu(fnode->ea_secno))
+ if (fnode->ea_anode) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9);
+ else hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno) + ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9), len - ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9));
+ else fnode->ea_secno = fnode->ea_size_l = cpu_to_le32(0);
}
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 9b9eb6933e43..89c500ee5213 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -20,8 +20,8 @@ static int hpfs_file_release(struct inode *inode, struct file *file)
int hpfs_file_fsync(struct file *file, int datasync)
{
- /*return file_fsync(file, datasync);*/
- return 0; /* Don't fsync :-) */
+ struct inode *inode = file->f_mapping->host;
+ return sync_blockdev(inode->i_sb->s_bdev);
}
/*
@@ -48,38 +48,46 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno)
static void hpfs_truncate(struct inode *i)
{
if (IS_IMMUTABLE(i)) return /*-EPERM*/;
- hpfs_lock(i->i_sb);
+ hpfs_lock_assert(i->i_sb);
+
hpfs_i(i)->i_n_secs = 0;
i->i_blocks = 1 + ((i->i_size + 511) >> 9);
hpfs_i(i)->mmu_private = i->i_size;
hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9));
hpfs_write_inode(i);
hpfs_i(i)->i_n_secs = 0;
- hpfs_unlock(i->i_sb);
}
static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
{
+ int r;
secno s;
+ hpfs_lock(inode->i_sb);
s = hpfs_bmap(inode, iblock);
if (s) {
map_bh(bh_result, inode->i_sb, s);
- return 0;
+ goto ret_0;
}
- if (!create) return 0;
+ if (!create) goto ret_0;
if (iblock<<9 != hpfs_i(inode)->mmu_private) {
BUG();
- return -EIO;
+ r = -EIO;
+ goto ret_r;
}
if ((s = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1)) == -1) {
hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1);
- return -ENOSPC;
+ r = -ENOSPC;
+ goto ret_r;
}
inode->i_blocks++;
hpfs_i(inode)->mmu_private += 512;
set_buffer_new(bh_result);
map_bh(bh_result, inode->i_sb, s);
- return 0;
+ ret_0:
+ r = 0;
+ ret_r:
+ hpfs_unlock(inode->i_sb);
+ return r;
}
static int hpfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -130,8 +138,11 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
ssize_t retval;
retval = do_sync_write(file, buf, count, ppos);
- if (retval > 0)
+ if (retval > 0) {
+ hpfs_lock(file->f_path.dentry->d_sb);
hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1;
+ hpfs_unlock(file->f_path.dentry->d_sb);
+ }
return retval;
}
diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h
index 0e84c73cd9c4..8b0650aae328 100644
--- a/fs/hpfs/hpfs.h
+++ b/fs/hpfs/hpfs.h
@@ -19,9 +19,13 @@
For definitive information on HPFS, ask somebody else -- this is guesswork.
There are certain to be many mistakes. */
+#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
+#error unknown endian
+#endif
+
/* Notation */
-typedef unsigned secno; /* sector number, partition relative */
+typedef u32 secno; /* sector number, partition relative */
typedef secno dnode_secno; /* sector number of a dnode */
typedef secno fnode_secno; /* sector number of an fnode */
@@ -38,28 +42,28 @@ typedef u32 time32_t; /* 32-bit time_t type */
struct hpfs_boot_block
{
- unsigned char jmp[3];
- unsigned char oem_id[8];
- unsigned char bytes_per_sector[2]; /* 512 */
- unsigned char sectors_per_cluster;
- unsigned char n_reserved_sectors[2];
- unsigned char n_fats;
- unsigned char n_rootdir_entries[2];
- unsigned char n_sectors_s[2];
- unsigned char media_byte;
- unsigned short sectors_per_fat;
- unsigned short sectors_per_track;
- unsigned short heads_per_cyl;
- unsigned int n_hidden_sectors;
- unsigned int n_sectors_l; /* size of partition */
- unsigned char drive_number;
- unsigned char mbz;
- unsigned char sig_28h; /* 28h */
- unsigned char vol_serno[4];
- unsigned char vol_label[11];
- unsigned char sig_hpfs[8]; /* "HPFS " */
- unsigned char pad[448];
- unsigned short magic; /* aa55 */
+ u8 jmp[3];
+ u8 oem_id[8];
+ u8 bytes_per_sector[2]; /* 512 */
+ u8 sectors_per_cluster;
+ u8 n_reserved_sectors[2];
+ u8 n_fats;
+ u8 n_rootdir_entries[2];
+ u8 n_sectors_s[2];
+ u8 media_byte;
+ u16 sectors_per_fat;
+ u16 sectors_per_track;
+ u16 heads_per_cyl;
+ u32 n_hidden_sectors;
+ u32 n_sectors_l; /* size of partition */
+ u8 drive_number;
+ u8 mbz;
+ u8 sig_28h; /* 28h */
+ u8 vol_serno[4];
+ u8 vol_label[11];
+ u8 sig_hpfs[8]; /* "HPFS " */
+ u8 pad[448];
+ u16 magic; /* aa55 */
};
@@ -71,31 +75,29 @@ struct hpfs_boot_block
struct hpfs_super_block
{
- unsigned magic; /* f995 e849 */
- unsigned magic1; /* fa53 e9c5, more magic? */
- /*unsigned huh202;*/ /* ?? 202 = N. of B. in 1.00390625 S.*/
- char version; /* version of a filesystem usually 2 */
- char funcversion; /* functional version - oldest version
+ u32 magic; /* f995 e849 */
+ u32 magic1; /* fa53 e9c5, more magic? */
+ u8 version; /* version of a filesystem usually 2 */
+ u8 funcversion; /* functional version - oldest version
of filesystem that can understand
this disk */
- unsigned short int zero; /* 0 */
+ u16 zero; /* 0 */
fnode_secno root; /* fnode of root directory */
secno n_sectors; /* size of filesystem */
- unsigned n_badblocks; /* number of bad blocks */
+ u32 n_badblocks; /* number of bad blocks */
secno bitmaps; /* pointers to free space bit maps */
- unsigned zero1; /* 0 */
+ u32 zero1; /* 0 */
secno badblocks; /* bad block list */
- unsigned zero3; /* 0 */
+ u32 zero3; /* 0 */
time32_t last_chkdsk; /* date last checked, 0 if never */
- /*unsigned zero4;*/ /* 0 */
- time32_t last_optimize; /* date last optimized, 0 if never */
+ time32_t last_optimize; /* date last optimized, 0 if never */
secno n_dir_band; /* number of sectors in dir band */
secno dir_band_start; /* first sector in dir band */
secno dir_band_end; /* last sector in dir band */
secno dir_band_bitmap; /* free space map, 1 dnode per bit */
- char volume_name[32]; /* not used */
+ u8 volume_name[32]; /* not used */
secno user_id_table; /* 8 preallocated sectors - user id */
- unsigned zero6[103]; /* 0 */
+ u32 zero6[103]; /* 0 */
};
@@ -107,44 +109,65 @@ struct hpfs_super_block
struct hpfs_spare_block
{
- unsigned magic; /* f991 1849 */
- unsigned magic1; /* fa52 29c5, more magic? */
-
- unsigned dirty: 1; /* 0 clean, 1 "improperly stopped" */
- /*unsigned flag1234: 4;*/ /* unknown flags */
- unsigned sparedir_used: 1; /* spare dirblks used */
- unsigned hotfixes_used: 1; /* hotfixes used */
- unsigned bad_sector: 1; /* bad sector, corrupted disk (???) */
- unsigned bad_bitmap: 1; /* bad bitmap */
- unsigned fast: 1; /* partition was fast formatted */
- unsigned old_wrote: 1; /* old version wrote to partion */
- unsigned old_wrote_1: 1; /* old version wrote to partion (?) */
- unsigned install_dasd_limits: 1; /* HPFS386 flags */
- unsigned resynch_dasd_limits: 1;
- unsigned dasd_limits_operational: 1;
- unsigned multimedia_active: 1;
- unsigned dce_acls_active: 1;
- unsigned dasd_limits_dirty: 1;
- unsigned flag67: 2;
- unsigned char mm_contlgulty;
- unsigned char unused;
+ u32 magic; /* f991 1849 */
+ u32 magic1; /* fa52 29c5, more magic? */
+
+#ifdef __LITTLE_ENDIAN
+ u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */
+ u8 sparedir_used: 1; /* spare dirblks used */
+ u8 hotfixes_used: 1; /* hotfixes used */
+ u8 bad_sector: 1; /* bad sector, corrupted disk (???) */
+ u8 bad_bitmap: 1; /* bad bitmap */
+ u8 fast: 1; /* partition was fast formatted */
+ u8 old_wrote: 1; /* old version wrote to partion */
+ u8 old_wrote_1: 1; /* old version wrote to partion (?) */
+#else
+ u8 old_wrote_1: 1; /* old version wrote to partion (?) */
+ u8 old_wrote: 1; /* old version wrote to partion */
+ u8 fast: 1; /* partition was fast formatted */
+ u8 bad_bitmap: 1; /* bad bitmap */
+ u8 bad_sector: 1; /* bad sector, corrupted disk (???) */
+ u8 hotfixes_used: 1; /* hotfixes used */
+ u8 sparedir_used: 1; /* spare dirblks used */
+ u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */
+#endif
+
+#ifdef __LITTLE_ENDIAN
+ u8 install_dasd_limits: 1; /* HPFS386 flags */
+ u8 resynch_dasd_limits: 1;
+ u8 dasd_limits_operational: 1;
+ u8 multimedia_active: 1;
+ u8 dce_acls_active: 1;
+ u8 dasd_limits_dirty: 1;
+ u8 flag67: 2;
+#else
+ u8 flag67: 2;
+ u8 dasd_limits_dirty: 1;
+ u8 dce_acls_active: 1;
+ u8 multimedia_active: 1;
+ u8 dasd_limits_operational: 1;
+ u8 resynch_dasd_limits: 1;
+ u8 install_dasd_limits: 1; /* HPFS386 flags */
+#endif
+
+ u8 mm_contlgulty;
+ u8 unused;
secno hotfix_map; /* info about remapped bad sectors */
- unsigned n_spares_used; /* number of hotfixes */
- unsigned n_spares; /* number of spares in hotfix map */
- unsigned n_dnode_spares_free; /* spare dnodes unused */
- unsigned n_dnode_spares; /* length of spare_dnodes[] list,
+ u32 n_spares_used; /* number of hotfixes */
+ u32 n_spares; /* number of spares in hotfix map */
+ u32 n_dnode_spares_free; /* spare dnodes unused */
+ u32 n_dnode_spares; /* length of spare_dnodes[] list,
follows in this block*/
secno code_page_dir; /* code page directory block */
- unsigned n_code_pages; /* number of code pages */
- /*unsigned large_numbers[2];*/ /* ?? */
- unsigned super_crc; /* on HPFS386 and LAN Server this is
+ u32 n_code_pages; /* number of code pages */
+ u32 super_crc; /* on HPFS386 and LAN Server this is
checksum of superblock, on normal
OS/2 unused */
- unsigned spare_crc; /* on HPFS386 checksum of spareblock */
- unsigned zero1[15]; /* unused */
+ u32 spare_crc; /* on HPFS386 checksum of spareblock */
+ u32 zero1[15]; /* unused */
dnode_secno spare_dnodes[100]; /* emergency free dnode list */
- unsigned zero2[1]; /* room for more? */
+ u32 zero2[1]; /* room for more? */
};
/* The bad block list is 4 sectors long. The first word must be zero,
@@ -179,18 +202,18 @@ struct hpfs_spare_block
struct code_page_directory
{
- unsigned magic; /* 4945 21f7 */
- unsigned n_code_pages; /* number of pointers following */
- unsigned zero1[2];
+ u32 magic; /* 4945 21f7 */
+ u32 n_code_pages; /* number of pointers following */
+ u32 zero1[2];
struct {
- unsigned short ix; /* index */
- unsigned short code_page_number; /* code page number */
- unsigned bounds; /* matches corresponding word
+ u16 ix; /* index */
+ u16 code_page_number; /* code page number */
+ u32 bounds; /* matches corresponding word
in data block */
secno code_page_data; /* sector number of a code_page_data
containing c.p. array */
- unsigned short index; /* index in c.p. array in that sector*/
- unsigned short unknown; /* some unknown value; usually 0;
+ u16 index; /* index in c.p. array in that sector*/
+ u16 unknown; /* some unknown value; usually 0;
2 in Japanese version */
} array[31]; /* unknown length */
};
@@ -201,21 +224,21 @@ struct code_page_directory
struct code_page_data
{
- unsigned magic; /* 8945 21f7 */
- unsigned n_used; /* # elements used in c_p_data[] */
- unsigned bounds[3]; /* looks a bit like
+ u32 magic; /* 8945 21f7 */
+ u32 n_used; /* # elements used in c_p_data[] */
+ u32 bounds[3]; /* looks a bit like
(beg1,end1), (beg2,end2)
one byte each */
- unsigned short offs[3]; /* offsets from start of sector
+ u16 offs[3]; /* offsets from start of sector
to start of c_p_data[ix] */
struct {
- unsigned short ix; /* index */
- unsigned short code_page_number; /* code page number */
- unsigned short unknown; /* the same as in cp directory */
- unsigned char map[128]; /* upcase table for chars 80..ff */
- unsigned short zero2;
+ u16 ix; /* index */
+ u16 code_page_number; /* code page number */
+ u16 unknown; /* the same as in cp directory */
+ u8 map[128]; /* upcase table for chars 80..ff */
+ u16 zero2;
} code_page[3];
- unsigned char incognita[78];
+ u8 incognita[78];
};
@@ -255,50 +278,84 @@ struct code_page_data
#define DNODE_MAGIC 0x77e40aae
struct dnode {
- unsigned magic; /* 77e4 0aae */
- unsigned first_free; /* offset from start of dnode to
+ u32 magic; /* 77e4 0aae */
+ u32 first_free; /* offset from start of dnode to
first free dir entry */
- unsigned root_dnode:1; /* Is it root dnode? */
- unsigned increment_me:31; /* some kind of activity counter?
- Neither HPFS.IFS nor CHKDSK cares
+#ifdef __LITTLE_ENDIAN
+ u8 root_dnode: 1; /* Is it root dnode? */
+ u8 increment_me: 7; /* some kind of activity counter? */
+ /* Neither HPFS.IFS nor CHKDSK cares
+ if you change this word */
+#else
+ u8 increment_me: 7; /* some kind of activity counter? */
+ /* Neither HPFS.IFS nor CHKDSK cares
if you change this word */
+ u8 root_dnode: 1; /* Is it root dnode? */
+#endif
+ u8 increment_me2[3];
secno up; /* (root dnode) directory's fnode
(nonroot) parent dnode */
dnode_secno self; /* pointer to this dnode */
- unsigned char dirent[2028]; /* one or more dirents */
+ u8 dirent[2028]; /* one or more dirents */
};
struct hpfs_dirent {
- unsigned short length; /* offset to next dirent */
- unsigned first: 1; /* set on phony ^A^A (".") entry */
- unsigned has_acl: 1;
- unsigned down: 1; /* down pointer present (after name) */
- unsigned last: 1; /* set on phony \377 entry */
- unsigned has_ea: 1; /* entry has EA */
- unsigned has_xtd_perm: 1; /* has extended perm list (???) */
- unsigned has_explicit_acl: 1;
- unsigned has_needea: 1; /* ?? some EA has NEEDEA set
+ u16 length; /* offset to next dirent */
+
+#ifdef __LITTLE_ENDIAN
+ u8 first: 1; /* set on phony ^A^A (".") entry */
+ u8 has_acl: 1;
+ u8 down: 1; /* down pointer present (after name) */
+ u8 last: 1; /* set on phony \377 entry */
+ u8 has_ea: 1; /* entry has EA */
+ u8 has_xtd_perm: 1; /* has extended perm list (???) */
+ u8 has_explicit_acl: 1;
+ u8 has_needea: 1; /* ?? some EA has NEEDEA set
+ I have no idea why this is
+ interesting in a dir entry */
+#else
+ u8 has_needea: 1; /* ?? some EA has NEEDEA set
I have no idea why this is
interesting in a dir entry */
- unsigned read_only: 1; /* dos attrib */
- unsigned hidden: 1; /* dos attrib */
- unsigned system: 1; /* dos attrib */
- unsigned flag11: 1; /* would be volume label dos attrib */
- unsigned directory: 1; /* dos attrib */
- unsigned archive: 1; /* dos attrib */
- unsigned not_8x3: 1; /* name is not 8.3 */
- unsigned flag15: 1;
+ u8 has_explicit_acl: 1;
+ u8 has_xtd_perm: 1; /* has extended perm list (???) */
+ u8 has_ea: 1; /* entry has EA */
+ u8 last: 1; /* set on phony \377 entry */
+ u8 down: 1; /* down pointer present (after name) */
+ u8 has_acl: 1;
+ u8 first: 1; /* set on phony ^A^A (".") entry */
+#endif
+
+#ifdef __LITTLE_ENDIAN
+ u8 read_only: 1; /* dos attrib */
+ u8 hidden: 1; /* dos attrib */
+ u8 system: 1; /* dos attrib */
+ u8 flag11: 1; /* would be volume label dos attrib */
+ u8 directory: 1; /* dos attrib */
+ u8 archive: 1; /* dos attrib */
+ u8 not_8x3: 1; /* name is not 8.3 */
+ u8 flag15: 1;
+#else
+ u8 flag15: 1;
+ u8 not_8x3: 1; /* name is not 8.3 */
+ u8 archive: 1; /* dos attrib */
+ u8 directory: 1; /* dos attrib */
+ u8 flag11: 1; /* would be volume label dos attrib */
+ u8 system: 1; /* dos attrib */
+ u8 hidden: 1; /* dos attrib */
+ u8 read_only: 1; /* dos attrib */
+#endif
+
fnode_secno fnode; /* fnode giving allocation info */
time32_t write_date; /* mtime */
- unsigned file_size; /* file length, bytes */
+ u32 file_size; /* file length, bytes */
time32_t read_date; /* atime */
time32_t creation_date; /* ctime */
- unsigned ea_size; /* total EA length, bytes */
- unsigned char no_of_acls : 3; /* number of ACL's */
- unsigned char reserver : 5;
- unsigned char ix; /* code page index (of filename), see
+ u32 ea_size; /* total EA length, bytes */
+ u8 no_of_acls; /* number of ACL's (low 3 bits) */
+ u8 ix; /* code page index (of filename), see
struct code_page_data */
- unsigned char namelen, name[1]; /* file name */
+ u8 namelen, name[1]; /* file name */
/* dnode_secno down; btree down pointer, if present,
follows name on next word boundary, or maybe it
precedes next dirent, which is on a word boundary. */
@@ -318,38 +375,50 @@ struct hpfs_dirent {
struct bplus_leaf_node
{
- unsigned file_secno; /* first file sector in extent */
- unsigned length; /* length, sectors */
+ u32 file_secno; /* first file sector in extent */
+ u32 length; /* length, sectors */
secno disk_secno; /* first corresponding disk sector */
};
struct bplus_internal_node
{
- unsigned file_secno; /* subtree maps sectors < this */
+ u32 file_secno; /* subtree maps sectors < this */
anode_secno down; /* pointer to subtree */
};
struct bplus_header
{
- unsigned hbff: 1; /* high bit of first free entry offset */
- unsigned flag1: 1;
- unsigned flag2: 1;
- unsigned flag3: 1;
- unsigned flag4: 1;
- unsigned fnode_parent: 1; /* ? we're pointed to by an fnode,
+#ifdef __LITTLE_ENDIAN
+ u8 hbff: 1; /* high bit of first free entry offset */
+ u8 flag1234: 4;
+ u8 fnode_parent: 1; /* ? we're pointed to by an fnode,
the data btree or some ea or the
main ea bootage pointer ea_secno */
/* also can get set in fnodes, which
may be a chkdsk glitch or may mean
this bit is irrelevant in fnodes,
or this interpretation is all wet */
- unsigned binary_search: 1; /* suggest binary search (unused) */
- unsigned internal: 1; /* 1 -> (internal) tree of anodes
+ u8 binary_search: 1; /* suggest binary search (unused) */
+ u8 internal: 1; /* 1 -> (internal) tree of anodes
+ 0 -> (leaf) list of extents */
+#else
+ u8 internal: 1; /* 1 -> (internal) tree of anodes
0 -> (leaf) list of extents */
- unsigned char fill[3];
- unsigned char n_free_nodes; /* free nodes in following array */
- unsigned char n_used_nodes; /* used nodes in following array */
- unsigned short first_free; /* offset from start of header to
+ u8 binary_search: 1; /* suggest binary search (unused) */
+ u8 fnode_parent: 1; /* ? we're pointed to by an fnode,
+ the data btree or some ea or the
+ main ea bootage pointer ea_secno */
+ /* also can get set in fnodes, which
+ may be a chkdsk glitch or may mean
+ this bit is irrelevant in fnodes,
+ or this interpretation is all wet */
+ u8 flag1234: 4;
+ u8 hbff: 1; /* high bit of first free entry offset */
+#endif
+ u8 fill[3];
+ u8 n_free_nodes; /* free nodes in following array */
+ u8 n_used_nodes; /* used nodes in following array */
+ u16 first_free; /* offset from start of header to
first free node in array */
union {
struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
@@ -369,37 +438,38 @@ struct bplus_header
struct fnode
{
- unsigned magic; /* f7e4 0aae */
- unsigned zero1[2]; /* read history */
- unsigned char len, name[15]; /* true length, truncated name */
+ u32 magic; /* f7e4 0aae */
+ u32 zero1[2]; /* read history */
+ u8 len, name[15]; /* true length, truncated name */
fnode_secno up; /* pointer to file's directory fnode */
- /*unsigned zero2[3];*/
secno acl_size_l;
secno acl_secno;
- unsigned short acl_size_s;
- char acl_anode;
- char zero2; /* history bit count */
- unsigned ea_size_l; /* length of disk-resident ea's */
+ u16 acl_size_s;
+ u8 acl_anode;
+ u8 zero2; /* history bit count */
+ u32 ea_size_l; /* length of disk-resident ea's */
secno ea_secno; /* first sector of disk-resident ea's*/
- unsigned short ea_size_s; /* length of fnode-resident ea's */
-
- unsigned flag0: 1;
- unsigned ea_anode: 1; /* 1 -> ea_secno is an anode */
- unsigned flag2: 1;
- unsigned flag3: 1;
- unsigned flag4: 1;
- unsigned flag5: 1;
- unsigned flag6: 1;
- unsigned flag7: 1;
- unsigned dirflag: 1; /* 1 -> directory. first & only extent
+ u16 ea_size_s; /* length of fnode-resident ea's */
+
+#ifdef __LITTLE_ENDIAN
+ u8 flag0: 1;
+ u8 ea_anode: 1; /* 1 -> ea_secno is an anode */
+ u8 flag234567: 6;
+#else
+ u8 flag234567: 6;
+ u8 ea_anode: 1; /* 1 -> ea_secno is an anode */
+ u8 flag0: 1;
+#endif
+
+#ifdef __LITTLE_ENDIAN
+ u8 dirflag: 1; /* 1 -> directory. first & only extent
points to dnode. */
- unsigned flag9: 1;
- unsigned flag10: 1;
- unsigned flag11: 1;
- unsigned flag12: 1;
- unsigned flag13: 1;
- unsigned flag14: 1;
- unsigned flag15: 1;
+ u8 flag9012345: 7;
+#else
+ u8 flag9012345: 7;
+ u8 dirflag: 1; /* 1 -> directory. first & only extent
+ points to dnode. */
+#endif
struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */
union {
@@ -407,17 +477,16 @@ struct fnode
struct bplus_internal_node internal[12];
} u;
- unsigned file_size; /* file length, bytes */
- unsigned n_needea; /* number of EA's with NEEDEA set */
- char user_id[16]; /* unused */
- unsigned short ea_offs; /* offset from start of fnode
+ u32 file_size; /* file length, bytes */
+ u32 n_needea; /* number of EA's with NEEDEA set */
+ u8 user_id[16]; /* unused */
+ u16 ea_offs; /* offset from start of fnode
to first fnode-resident ea */
- char dasd_limit_treshhold;
- char dasd_limit_delta;
- unsigned dasd_limit;
- unsigned dasd_usage;
- /*unsigned zero5[2];*/
- unsigned char ea[316]; /* zero or more EA's, packed together
+ u8 dasd_limit_treshhold;
+ u8 dasd_limit_delta;
+ u32 dasd_limit;
+ u32 dasd_usage;
+ u8 ea[316]; /* zero or more EA's, packed together
with no alignment padding.
(Do not use this name, get here
via fnode + ea_offs. I think.) */
@@ -430,7 +499,7 @@ struct fnode
struct anode
{
- unsigned magic; /* 37e4 0aae */
+ u32 magic; /* 37e4 0aae */
anode_secno self; /* pointer to this anode */
secno up; /* parent anode or fnode */
@@ -440,7 +509,7 @@ struct anode
struct bplus_internal_node internal[60];
} u;
- unsigned fill[3]; /* unused */
+ u32 fill[3]; /* unused */
};
@@ -461,25 +530,31 @@ struct anode
struct extended_attribute
{
- unsigned indirect: 1; /* 1 -> value gives sector number
+#ifdef __LITTLE_ENDIAN
+ u8 indirect: 1; /* 1 -> value gives sector number
where real value starts */
- unsigned anode: 1; /* 1 -> sector is an anode
+ u8 anode: 1; /* 1 -> sector is an anode
+ that points to fragmented value */
+ u8 flag23456: 5;
+ u8 needea: 1; /* required ea */
+#else
+ u8 needea: 1; /* required ea */
+ u8 flag23456: 5;
+ u8 anode: 1; /* 1 -> sector is an anode
that points to fragmented value */
- unsigned flag2: 1;
- unsigned flag3: 1;
- unsigned flag4: 1;
- unsigned flag5: 1;
- unsigned flag6: 1;
- unsigned needea: 1; /* required ea */
- unsigned char namelen; /* length of name, bytes */
- unsigned short valuelen; /* length of value, bytes */
- unsigned char name[0];
+ u8 indirect: 1; /* 1 -> value gives sector number
+ where real value starts */
+#endif
+ u8 namelen; /* length of name, bytes */
+ u8 valuelen_lo; /* length of value, bytes */
+ u8 valuelen_hi; /* length of value, bytes */
+ u8 name[0];
/*
- unsigned char name[namelen]; ascii attrib name
- unsigned char nul; terminating '\0', not counted
- unsigned char value[valuelen]; value, arbitrary
+ u8 name[namelen]; ascii attrib name
+ u8 nul; terminating '\0', not counted
+ u8 value[valuelen]; value, arbitrary
if this.indirect, valuelen is 8 and the value is
- unsigned length; real length of value, bytes
+ u32 length; real length of value, bytes
secno secno; sector address where it starts
if this.anode, the above sector number is the root of an anode tree
which points to the value.
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c15adbca07ff..dd552f862c8f 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -13,6 +13,7 @@
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
+#include <asm/unaligned.h>
#include "hpfs.h"
@@ -51,18 +52,16 @@ struct hpfs_inode_info {
unsigned i_disk_sec; /* (files) minimalist cache of alloc info */
unsigned i_n_secs; /* (files) minimalist cache of alloc info */
unsigned i_ea_size; /* size of extended attributes */
- unsigned i_conv : 2; /* (files) crlf->newline hackery */
unsigned i_ea_mode : 1; /* file's permission is stored in ea */
unsigned i_ea_uid : 1; /* file's uid is stored in ea */
unsigned i_ea_gid : 1; /* file's gid is stored in ea */
unsigned i_dirty : 1;
- struct mutex i_mutex;
- struct mutex i_parent_mutex;
loff_t **i_rddir_off;
struct inode vfs_inode;
};
struct hpfs_sb_info {
+ struct mutex hpfs_mutex; /* global hpfs lock */
ino_t sb_root; /* inode number of root dir */
unsigned sb_fs_size; /* file system size, sectors */
unsigned sb_bitmaps; /* sector number of bitmap list */
@@ -74,7 +73,6 @@ struct hpfs_sb_info {
uid_t sb_uid; /* uid from mount options */
gid_t sb_gid; /* gid from mount options */
umode_t sb_mode; /* mode from mount options */
- unsigned sb_conv : 2; /* crlf->newline hackery */
unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */
unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */
unsigned sb_chk : 2; /* checks: 0-no, 1-normal, 2-strict */
@@ -87,20 +85,9 @@ struct hpfs_sb_info {
unsigned *sb_bmp_dir; /* main bitmap directory */
unsigned sb_c_bitmap; /* current bitmap */
unsigned sb_max_fwd_alloc; /* max forwad allocation */
- struct mutex hpfs_creation_de; /* when creating dirents, nobody else
- can alloc blocks */
- /*unsigned sb_mounting : 1;*/
int sb_timeshift;
};
-/*
- * conv= options
- */
-
-#define CONV_BINARY 0 /* no conversion */
-#define CONV_TEXT 1 /* crlf->newline */
-#define CONV_AUTO 2 /* decide based on file contents */
-
/* Four 512-byte buffers and the 2k block obtained by concatenating them */
struct quad_buffer_head {
@@ -113,7 +100,7 @@ struct quad_buffer_head {
static inline dnode_secno de_down_pointer (struct hpfs_dirent *de)
{
CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n"));
- return *(dnode_secno *) ((void *) de + de->length - 4);
+ return le32_to_cpu(*(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4));
}
/* The first dir entry in a dnode */
@@ -127,41 +114,46 @@ static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode)
static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode)
{
- CHKCOND(dnode->first_free>=0x14 && dnode->first_free<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %d\n",(int)dnode->first_free));
- return (void *) dnode + dnode->first_free;
+ CHKCOND(le32_to_cpu(dnode->first_free)>=0x14 && le32_to_cpu(dnode->first_free)<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %x\n",(unsigned)le32_to_cpu(dnode->first_free)));
+ return (void *) dnode + le32_to_cpu(dnode->first_free);
}
/* The dir entry after dir entry de */
static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de)
{
- CHKCOND(de->length>=0x20 && de->length<0x800,("HPFS: de_next_de: de->length = %d\n",(int)de->length));
- return (void *) de + de->length;
+ CHKCOND(le16_to_cpu(de->length)>=0x20 && le16_to_cpu(de->length)<0x800,("HPFS: de_next_de: de->length = %x\n",(unsigned)le16_to_cpu(de->length)));
+ return (void *) de + le16_to_cpu(de->length);
}
static inline struct extended_attribute *fnode_ea(struct fnode *fnode)
{
- return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s);
+ return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s));
}
static inline struct extended_attribute *fnode_end_ea(struct fnode *fnode)
{
- return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s);
+ return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s));
+}
+
+static unsigned ea_valuelen(struct extended_attribute *ea)
+{
+ return ea->valuelen_lo + 256 * ea->valuelen_hi;
}
static inline struct extended_attribute *next_ea(struct extended_attribute *ea)
{
- return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea->valuelen);
+ return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea_valuelen(ea));
}
static inline secno ea_sec(struct extended_attribute *ea)
{
- return *(secno *)((char *)ea + 9 + ea->namelen);
+ return le32_to_cpu(get_unaligned((secno *)((char *)ea + 9 + ea->namelen)));
}
static inline secno ea_len(struct extended_attribute *ea)
{
- return *(secno *)((char *)ea + 5 + ea->namelen);
+ return le32_to_cpu(get_unaligned((secno *)((char *)ea + 5 + ea->namelen)));
}
static inline char *ea_data(struct extended_attribute *ea)
@@ -186,13 +178,13 @@ static inline void copy_de(struct hpfs_dirent *dst, struct hpfs_dirent *src)
dst->not_8x3 = n;
}
-static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n)
+static inline unsigned tstbits(u32 *bmp, unsigned b, unsigned n)
{
int i;
if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n;
- if (!((bmp[(b & 0x3fff) >> 5] >> (b & 0x1f)) & 1)) return 1;
+ if (!((le32_to_cpu(bmp[(b & 0x3fff) >> 5]) >> (b & 0x1f)) & 1)) return 1;
for (i = 1; i < n; i++)
- if (/*b+i < 0x4000 &&*/ !((bmp[((b+i) & 0x3fff) >> 5] >> ((b+i) & 0x1f)) & 1))
+ if (!((le32_to_cpu(bmp[((b+i) & 0x3fff) >> 5]) >> ((b+i) & 0x1f)) & 1))
return i + 1;
return 0;
}
@@ -200,12 +192,12 @@ static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n)
/* alloc.c */
int hpfs_chk_sectors(struct super_block *, secno, int, char *);
-secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int, int);
+secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int);
int hpfs_alloc_if_possible(struct super_block *, secno);
void hpfs_free_sectors(struct super_block *, secno, unsigned);
int hpfs_check_free_dnodes(struct super_block *, int);
void hpfs_free_dnode(struct super_block *, secno);
-struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *, int);
+struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *);
struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **);
struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **);
@@ -222,8 +214,6 @@ void hpfs_remove_fnode(struct super_block *, fnode_secno fno);
/* buffer.c */
-void hpfs_lock_creation(struct super_block *);
-void hpfs_unlock_creation(struct super_block *);
void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int);
void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **);
void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int);
@@ -247,7 +237,7 @@ void hpfs_del_pos(struct inode *, loff_t *);
struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *,
const unsigned char *, unsigned, secno);
int hpfs_add_dirent(struct inode *, const unsigned char *, unsigned,
- struct hpfs_dirent *, int);
+ struct hpfs_dirent *);
int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int);
void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *);
dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno);
@@ -303,7 +293,6 @@ int hpfs_compare_names(struct super_block *, const unsigned char *, unsigned,
const unsigned char *, unsigned, int);
int hpfs_is_name_long(const unsigned char *, unsigned);
void hpfs_adjust_length(const unsigned char *, unsigned *);
-void hpfs_decide_conv(struct inode *, const unsigned char *, unsigned);
/* namei.c */
@@ -346,21 +335,26 @@ static inline time32_t gmt_to_local(struct super_block *s, time_t t)
/*
* Locking:
*
- * hpfs_lock() is a leftover from the big kernel lock.
- * Right now, these functions are empty and only left
- * for documentation purposes. The file system no longer
- * works on SMP systems, so the lock is not needed
- * any more.
+ * hpfs_lock() locks the whole filesystem. It must be taken
+ * on any method called by the VFS.
*
- * If someone is interested in making it work again, this
- * would be the place to start by adding a per-superblock
- * mutex and fixing all the bugs and performance issues
- * caused by that.
+ * We don't do any per-file locking anymore, it is hard to
+ * review and HPFS is not performance-sensitive anyway.
*/
static inline void hpfs_lock(struct super_block *s)
{
+ struct hpfs_sb_info *sbi = hpfs_sb(s);
+ mutex_lock(&sbi->hpfs_mutex);
}
static inline void hpfs_unlock(struct super_block *s)
{
+ struct hpfs_sb_info *sbi = hpfs_sb(s);
+ mutex_unlock(&sbi->hpfs_mutex);
+}
+
+static inline void hpfs_lock_assert(struct super_block *s)
+{
+ struct hpfs_sb_info *sbi = hpfs_sb(s);
+ WARN_ON(!mutex_is_locked(&sbi->hpfs_mutex));
}
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 87f1f787e767..338cd8368451 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -17,7 +17,6 @@ void hpfs_init_inode(struct inode *i)
i->i_uid = hpfs_sb(sb)->sb_uid;
i->i_gid = hpfs_sb(sb)->sb_gid;
i->i_mode = hpfs_sb(sb)->sb_mode;
- hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv;
i->i_size = -1;
i->i_blocks = -1;
@@ -116,8 +115,8 @@ void hpfs_read_inode(struct inode *i)
i->i_mode |= S_IFDIR;
i->i_op = &hpfs_dir_iops;
i->i_fop = &hpfs_dir_ops;
- hpfs_inode->i_parent_dir = fnode->up;
- hpfs_inode->i_dno = fnode->u.external[0].disk_secno;
+ hpfs_inode->i_parent_dir = le32_to_cpu(fnode->up);
+ hpfs_inode->i_dno = le32_to_cpu(fnode->u.external[0].disk_secno);
if (hpfs_sb(sb)->sb_chk >= 2) {
struct buffer_head *bh0;
if (hpfs_map_fnode(sb, hpfs_inode->i_parent_dir, &bh0)) brelse(bh0);
@@ -133,7 +132,7 @@ void hpfs_read_inode(struct inode *i)
i->i_op = &hpfs_file_iops;
i->i_fop = &hpfs_file_ops;
i->i_nlink = 1;
- i->i_size = fnode->file_size;
+ i->i_size = le32_to_cpu(fnode->file_size);
i->i_blocks = ((i->i_size + 511) >> 9) + 1;
i->i_data.a_ops = &hpfs_aops;
hpfs_i(i)->mmu_private = i->i_size;
@@ -144,7 +143,7 @@ void hpfs_read_inode(struct inode *i)
static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
{
struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
- /*if (fnode->acl_size_l || fnode->acl_size_s) {
+ /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) {
Some unknown structures like ACL may be in fnode,
we'd better not overwrite them
hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino);
@@ -187,9 +186,7 @@ void hpfs_write_inode(struct inode *i)
kfree(hpfs_inode->i_rddir_off);
hpfs_inode->i_rddir_off = NULL;
}
- mutex_lock(&hpfs_inode->i_parent_mutex);
if (!i->i_nlink) {
- mutex_unlock(&hpfs_inode->i_parent_mutex);
return;
}
parent = iget_locked(i->i_sb, hpfs_inode->i_parent_dir);
@@ -200,14 +197,9 @@ void hpfs_write_inode(struct inode *i)
hpfs_read_inode(parent);
unlock_new_inode(parent);
}
- mutex_lock(&hpfs_inode->i_mutex);
hpfs_write_inode_nolock(i);
- mutex_unlock(&hpfs_inode->i_mutex);
iput(parent);
- } else {
- mark_inode_dirty(i);
}
- mutex_unlock(&hpfs_inode->i_parent_mutex);
}
void hpfs_write_inode_nolock(struct inode *i)
@@ -226,30 +218,30 @@ void hpfs_write_inode_nolock(struct inode *i)
}
} else de = NULL;
if (S_ISREG(i->i_mode)) {
- fnode->file_size = i->i_size;
- if (de) de->file_size = i->i_size;
+ fnode->file_size = cpu_to_le32(i->i_size);
+ if (de) de->file_size = cpu_to_le32(i->i_size);
} else if (S_ISDIR(i->i_mode)) {
- fnode->file_size = 0;
- if (de) de->file_size = 0;
+ fnode->file_size = cpu_to_le32(0);
+ if (de) de->file_size = cpu_to_le32(0);
}
hpfs_write_inode_ea(i, fnode);
if (de) {
- de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec);
- de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec);
- de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec);
+ de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
+ de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
+ de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
de->read_only = !(i->i_mode & 0222);
- de->ea_size = hpfs_inode->i_ea_size;
+ de->ea_size = cpu_to_le32(hpfs_inode->i_ea_size);
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
}
if (S_ISDIR(i->i_mode)) {
if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) {
- de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec);
- de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec);
- de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec);
+ de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
+ de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
+ de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
de->read_only = !(i->i_mode & 0222);
- de->ea_size = /*hpfs_inode->i_ea_size*/0;
- de->file_size = 0;
+ de->ea_size = cpu_to_le32(/*hpfs_inode->i_ea_size*/0);
+ de->file_size = cpu_to_le32(0);
hpfs_mark_4buffers_dirty(&qbh);
hpfs_brelse4(&qbh);
} else
@@ -269,6 +261,10 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
hpfs_lock(inode->i_sb);
if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root)
goto out_unlock;
+ if ((attr->ia_valid & ATTR_UID) && attr->ia_uid >= 0x10000)
+ goto out_unlock;
+ if ((attr->ia_valid & ATTR_GID) && attr->ia_gid >= 0x10000)
+ goto out_unlock;
if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
goto out_unlock;
@@ -284,7 +280,6 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
}
setattr_copy(inode, attr);
- mark_inode_dirty(inode);
hpfs_write_inode(inode);
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index 840d033ecee8..a790821366a7 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -21,7 +21,7 @@ unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block,
hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id);
return NULL;
}
- sec = hpfs_sb(s)->sb_bmp_dir[bmp_block];
+ sec = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]);
if (!sec || sec > hpfs_sb(s)->sb_fs_size-4) {
hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id);
return NULL;
@@ -46,18 +46,18 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
struct code_page_data *cpd;
struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0);
if (!cp) return NULL;
- if (cp->magic != CP_DIR_MAGIC) {
- printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", cp->magic);
+ if (le32_to_cpu(cp->magic) != CP_DIR_MAGIC) {
+ printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", le32_to_cpu(cp->magic));
brelse(bh);
return NULL;
}
- if (!cp->n_code_pages) {
+ if (!le32_to_cpu(cp->n_code_pages)) {
printk("HPFS: n_code_pages == 0\n");
brelse(bh);
return NULL;
}
- cpds = cp->array[0].code_page_data;
- cpi = cp->array[0].index;
+ cpds = le32_to_cpu(cp->array[0].code_page_data);
+ cpi = le16_to_cpu(cp->array[0].index);
brelse(bh);
if (cpi >= 3) {
@@ -66,12 +66,12 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
}
if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL;
- if ((unsigned)cpd->offs[cpi] > 0x178) {
+ if (le16_to_cpu(cpd->offs[cpi]) > 0x178) {
printk("HPFS: Code page index out of sector\n");
brelse(bh);
return NULL;
}
- ptr = (unsigned char *)cpd + cpd->offs[cpi] + 6;
+ ptr = (unsigned char *)cpd + le16_to_cpu(cpd->offs[cpi]) + 6;
if (!(cp_table = kmalloc(256, GFP_KERNEL))) {
printk("HPFS: out of memory for code page table\n");
brelse(bh);
@@ -125,7 +125,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
if (hpfs_sb(s)->sb_chk) {
struct extended_attribute *ea;
struct extended_attribute *ea_end;
- if (fnode->magic != FNODE_MAGIC) {
+ if (le32_to_cpu(fnode->magic) != FNODE_MAGIC) {
hpfs_error(s, "bad magic on fnode %08lx",
(unsigned long)ino);
goto bail;
@@ -138,7 +138,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
(unsigned long)ino);
goto bail;
}
- if (fnode->btree.first_free !=
+ if (le16_to_cpu(fnode->btree.first_free) !=
8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) {
hpfs_error(s,
"bad first_free pointer in fnode %08lx",
@@ -146,12 +146,12 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
goto bail;
}
}
- if (fnode->ea_size_s && ((signed int)fnode->ea_offs < 0xc4 ||
- (signed int)fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200)) {
+ if (le16_to_cpu(fnode->ea_size_s) && (le16_to_cpu(fnode->ea_offs) < 0xc4 ||
+ le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200)) {
hpfs_error(s,
"bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x",
(unsigned long)ino,
- fnode->ea_offs, fnode->ea_size_s);
+ le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s));
goto bail;
}
ea = fnode_ea(fnode);
@@ -178,16 +178,20 @@ struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buff
if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ano, 1, "anode")) return NULL;
if ((anode = hpfs_map_sector(s, ano, bhp, ANODE_RD_AHEAD)))
if (hpfs_sb(s)->sb_chk) {
- if (anode->magic != ANODE_MAGIC || anode->self != ano) {
+ if (le32_to_cpu(anode->magic) != ANODE_MAGIC) {
hpfs_error(s, "bad magic on anode %08x", ano);
goto bail;
}
+ if (le32_to_cpu(anode->self) != ano) {
+ hpfs_error(s, "self pointer invalid on anode %08x", ano);
+ goto bail;
+ }
if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes !=
(anode->btree.internal ? 60 : 40)) {
hpfs_error(s, "bad number of nodes in anode %08x", ano);
goto bail;
}
- if (anode->btree.first_free !=
+ if (le16_to_cpu(anode->btree.first_free) !=
8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) {
hpfs_error(s, "bad first_free pointer in anode %08x", ano);
goto bail;
@@ -219,26 +223,26 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
unsigned p, pp = 0;
unsigned char *d = (unsigned char *)dnode;
int b = 0;
- if (dnode->magic != DNODE_MAGIC) {
+ if (le32_to_cpu(dnode->magic) != DNODE_MAGIC) {
hpfs_error(s, "bad magic on dnode %08x", secno);
goto bail;
}
- if (dnode->self != secno)
- hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, dnode->self);
+ if (le32_to_cpu(dnode->self) != secno)
+ hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, le32_to_cpu(dnode->self));
/* Check dirents - bad dirents would cause infinite
loops or shooting to memory */
- if (dnode->first_free > 2048/* || dnode->first_free < 84*/) {
- hpfs_error(s, "dnode %08x has first_free == %08x", secno, dnode->first_free);
+ if (le32_to_cpu(dnode->first_free) > 2048) {
+ hpfs_error(s, "dnode %08x has first_free == %08x", secno, le32_to_cpu(dnode->first_free));
goto bail;
}
- for (p = 20; p < dnode->first_free; p += d[p] + (d[p+1] << 8)) {
+ for (p = 20; p < le32_to_cpu(dnode->first_free); p += d[p] + (d[p+1] << 8)) {
struct hpfs_dirent *de = (struct hpfs_dirent *)((char *)dnode + p);
- if (de->length > 292 || (de->length < 32) || (de->length & 3) || p + de->length > 2048) {
+ if (le16_to_cpu(de->length) > 292 || (le16_to_cpu(de->length) < 32) || (le16_to_cpu(de->length) & 3) || p + le16_to_cpu(de->length) > 2048) {
hpfs_error(s, "bad dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);
goto bail;
}
- if (((31 + de->namelen + de->down*4 + 3) & ~3) != de->length) {
- if (((31 + de->namelen + de->down*4 + 3) & ~3) < de->length && s->s_flags & MS_RDONLY) goto ok;
+ if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) {
+ if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & MS_RDONLY) goto ok;
hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);
goto bail;
}
@@ -251,7 +255,7 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
pp = p;
}
- if (p != dnode->first_free) {
+ if (p != le32_to_cpu(dnode->first_free)) {
hpfs_error(s, "size on last dirent does not match first_free; dnode %08x", secno);
goto bail;
}
@@ -277,7 +281,7 @@ dnode_secno hpfs_fnode_dno(struct super_block *s, ino_t ino)
if (!fnode)
return 0;
- dno = fnode->u.external[0].disk_secno;
+ dno = le32_to_cpu(fnode->u.external[0].disk_secno);
brelse(bh);
return dno;
}
diff --git a/fs/hpfs/name.c b/fs/hpfs/name.c
index f24736d7a439..9acdf338def0 100644
--- a/fs/hpfs/name.c
+++ b/fs/hpfs/name.c
@@ -8,39 +8,6 @@
#include "hpfs_fn.h"
-static const char *text_postfix[]={
-".ASM", ".BAS", ".BAT", ".C", ".CC", ".CFG", ".CMD", ".CON", ".CPP", ".DEF",
-".DOC", ".DPR", ".ERX", ".H", ".HPP", ".HTM", ".HTML", ".JAVA", ".LOG", ".PAS",
-".RC", ".TEX", ".TXT", ".Y", ""};
-
-static const char *text_prefix[]={
-"AUTOEXEC.", "CHANGES", "COPYING", "CONFIG.", "CREDITS", "FAQ", "FILE_ID.DIZ",
-"MAKEFILE", "READ.ME", "README", "TERMCAP", ""};
-
-void hpfs_decide_conv(struct inode *inode, const unsigned char *name, unsigned len)
-{
- struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
- int i;
- if (hpfs_inode->i_conv != CONV_AUTO) return;
- for (i = 0; *text_postfix[i]; i++) {
- int l = strlen(text_postfix[i]);
- if (l <= len)
- if (!hpfs_compare_names(inode->i_sb, text_postfix[i], l, name + len - l, l, 0))
- goto text;
- }
- for (i = 0; *text_prefix[i]; i++) {
- int l = strlen(text_prefix[i]);
- if (l <= len)
- if (!hpfs_compare_names(inode->i_sb, text_prefix[i], l, name, l, 0))
- goto text;
- }
- hpfs_inode->i_conv = CONV_BINARY;
- return;
- text:
- hpfs_inode->i_conv = CONV_TEXT;
- return;
-}
-
static inline int not_allowed_char(unsigned char c)
{
return c<' ' || c=='"' || c=='*' || c=='/' || c==':' || c=='<' ||
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d3db95f51a4e..ff0ce21c0867 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -29,7 +29,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
if (!fnode)
goto bail;
- dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0, 1);
+ dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0);
if (!dnode)
goto bail1;
memset(&dee, 0, sizeof dee);
@@ -37,8 +37,8 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (!(mode & 0222)) dee.read_only = 1;
/*dee.archive = 0;*/
dee.hidden = name[0] == '.';
- dee.fnode = fno;
- dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+ dee.fnode = cpu_to_le32(fno);
+ dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
result = new_inode(dir->i_sb);
if (!result)
goto bail2;
@@ -46,7 +46,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
result->i_ino = fno;
hpfs_i(result)->i_parent_dir = dir->i_ino;
hpfs_i(result)->i_dno = dno;
- result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+ result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
result->i_ctime.tv_nsec = 0;
result->i_mtime.tv_nsec = 0;
result->i_atime.tv_nsec = 0;
@@ -60,8 +60,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (dee.read_only)
result->i_mode &= ~0222;
- mutex_lock(&hpfs_i(dir)->i_mutex);
- r = hpfs_add_dirent(dir, name, len, &dee, 0);
+ r = hpfs_add_dirent(dir, name, len, &dee);
if (r == 1)
goto bail3;
if (r == -1) {
@@ -70,21 +69,21 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
}
fnode->len = len;
memcpy(fnode->name, name, len > 15 ? 15 : len);
- fnode->up = dir->i_ino;
+ fnode->up = cpu_to_le32(dir->i_ino);
fnode->dirflag = 1;
fnode->btree.n_free_nodes = 7;
fnode->btree.n_used_nodes = 1;
- fnode->btree.first_free = 0x14;
- fnode->u.external[0].disk_secno = dno;
- fnode->u.external[0].file_secno = -1;
+ fnode->btree.first_free = cpu_to_le16(0x14);
+ fnode->u.external[0].disk_secno = cpu_to_le32(dno);
+ fnode->u.external[0].file_secno = cpu_to_le32(-1);
dnode->root_dnode = 1;
- dnode->up = fno;
+ dnode->up = cpu_to_le32(fno);
de = hpfs_add_de(dir->i_sb, dnode, "\001\001", 2, 0);
- de->creation_date = de->write_date = de->read_date = gmt_to_local(dir->i_sb, get_seconds());
+ de->creation_date = de->write_date = de->read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
if (!(mode & 0222)) de->read_only = 1;
de->first = de->directory = 1;
/*de->hidden = de->system = 0;*/
- de->fnode = fno;
+ de->fnode = cpu_to_le32(fno);
mark_buffer_dirty(bh);
brelse(bh);
hpfs_mark_4buffers_dirty(&qbh0);
@@ -101,11 +100,9 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
hpfs_write_inode_nolock(result);
}
d_instantiate(dentry, result);
- mutex_unlock(&hpfs_i(dir)->i_mutex);
hpfs_unlock(dir->i_sb);
return 0;
bail3:
- mutex_unlock(&hpfs_i(dir)->i_mutex);
iput(result);
bail2:
hpfs_brelse4(&qbh0);
@@ -140,8 +137,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
if (!(mode & 0222)) dee.read_only = 1;
dee.archive = 1;
dee.hidden = name[0] == '.';
- dee.fnode = fno;
- dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+ dee.fnode = cpu_to_le32(fno);
+ dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
result = new_inode(dir->i_sb);
if (!result)
@@ -154,9 +151,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
result->i_op = &hpfs_file_iops;
result->i_fop = &hpfs_file_ops;
result->i_nlink = 1;
- hpfs_decide_conv(result, name, len);
hpfs_i(result)->i_parent_dir = dir->i_ino;
- result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+ result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
result->i_ctime.tv_nsec = 0;
result->i_mtime.tv_nsec = 0;
result->i_atime.tv_nsec = 0;
@@ -168,8 +164,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
result->i_data.a_ops = &hpfs_aops;
hpfs_i(result)->mmu_private = 0;
- mutex_lock(&hpfs_i(dir)->i_mutex);
- r = hpfs_add_dirent(dir, name, len, &dee, 0);
+ r = hpfs_add_dirent(dir, name, len, &dee);
if (r == 1)
goto bail2;
if (r == -1) {
@@ -178,7 +173,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
}
fnode->len = len;
memcpy(fnode->name, name, len > 15 ? 15 : len);
- fnode->up = dir->i_ino;
+ fnode->up = cpu_to_le32(dir->i_ino);
mark_buffer_dirty(bh);
brelse(bh);
@@ -193,12 +188,10 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
hpfs_write_inode_nolock(result);
}
d_instantiate(dentry, result);
- mutex_unlock(&hpfs_i(dir)->i_mutex);
hpfs_unlock(dir->i_sb);
return 0;
bail2:
- mutex_unlock(&hpfs_i(dir)->i_mutex);
iput(result);
bail1:
brelse(bh);
@@ -232,8 +225,8 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
if (!(mode & 0222)) dee.read_only = 1;
dee.archive = 1;
dee.hidden = name[0] == '.';
- dee.fnode = fno;
- dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+ dee.fnode = cpu_to_le32(fno);
+ dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
result = new_inode(dir->i_sb);
if (!result)
@@ -242,7 +235,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
hpfs_init_inode(result);
result->i_ino = fno;
hpfs_i(result)->i_parent_dir = dir->i_ino;
- result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+ result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
result->i_ctime.tv_nsec = 0;
result->i_mtime.tv_nsec = 0;
result->i_atime.tv_nsec = 0;
@@ -254,8 +247,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
result->i_blocks = 1;
init_special_inode(result, mode, rdev);
- mutex_lock(&hpfs_i(dir)->i_mutex);
- r = hpfs_add_dirent(dir, name, len, &dee, 0);
+ r = hpfs_add_dirent(dir, name, len, &dee);
if (r == 1)
goto bail2;
if (r == -1) {
@@ -264,19 +256,17 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
}
fnode->len = len;
memcpy(fnode->name, name, len > 15 ? 15 : len);
- fnode->up = dir->i_ino;
+ fnode->up = cpu_to_le32(dir->i_ino);
mark_buffer_dirty(bh);
insert_inode_hash(result);
hpfs_write_inode_nolock(result);
d_instantiate(dentry, result);
- mutex_unlock(&hpfs_i(dir)->i_mutex);
brelse(bh);
hpfs_unlock(dir->i_sb);
return 0;
bail2:
- mutex_unlock(&hpfs_i(dir)->i_mutex);
iput(result);
bail1:
brelse(bh);
@@ -310,8 +300,8 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
memset(&dee, 0, sizeof dee);
dee.archive = 1;
dee.hidden = name[0] == '.';
- dee.fnode = fno;
- dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+ dee.fnode = cpu_to_le32(fno);
+ dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
result = new_inode(dir->i_sb);
if (!result)
@@ -319,7 +309,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
result->i_ino = fno;
hpfs_init_inode(result);
hpfs_i(result)->i_parent_dir = dir->i_ino;
- result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+ result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
result->i_ctime.tv_nsec = 0;
result->i_mtime.tv_nsec = 0;
result->i_atime.tv_nsec = 0;
@@ -333,8 +323,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
result->i_op = &page_symlink_inode_operations;
result->i_data.a_ops = &hpfs_symlink_aops;
- mutex_lock(&hpfs_i(dir)->i_mutex);
- r = hpfs_add_dirent(dir, name, len, &dee, 0);
+ r = hpfs_add_dirent(dir, name, len, &dee);
if (r == 1)
goto bail2;
if (r == -1) {
@@ -343,7 +332,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
}
fnode->len = len;
memcpy(fnode->name, name, len > 15 ? 15 : len);
- fnode->up = dir->i_ino;
+ fnode->up = cpu_to_le32(dir->i_ino);
hpfs_set_ea(result, fnode, "SYMLINK", symlink, strlen(symlink));
mark_buffer_dirty(bh);
brelse(bh);
@@ -352,11 +341,9 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
hpfs_write_inode_nolock(result);
d_instantiate(dentry, result);
- mutex_unlock(&hpfs_i(dir)->i_mutex);
hpfs_unlock(dir->i_sb);
return 0;
bail2:
- mutex_unlock(&hpfs_i(dir)->i_mutex);
iput(result);
bail1:
brelse(bh);
@@ -374,7 +361,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
struct hpfs_dirent *de;
struct inode *inode = dentry->d_inode;
dnode_secno dno;
- fnode_secno fno;
int r;
int rep = 0;
int err;
@@ -382,8 +368,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
hpfs_lock(dir->i_sb);
hpfs_adjust_length(name, &len);
again:
- mutex_lock(&hpfs_i(inode)->i_parent_mutex);
- mutex_lock(&hpfs_i(dir)->i_mutex);
err = -ENOENT;
de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
if (!de)
@@ -397,7 +381,6 @@ again:
if (de->directory)
goto out1;
- fno = de->fnode;
r = hpfs_remove_dirent(dir, dno, de, &qbh, 1);
switch (r) {
case 1:
@@ -410,8 +393,6 @@ again:
if (rep++)
break;
- mutex_unlock(&hpfs_i(dir)->i_mutex);
- mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
dentry_unhash(dentry);
if (!d_unhashed(dentry)) {
hpfs_unlock(dir->i_sb);
@@ -442,8 +423,6 @@ again:
out1:
hpfs_brelse4(&qbh);
out:
- mutex_unlock(&hpfs_i(dir)->i_mutex);
- mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
hpfs_unlock(dir->i_sb);
return err;
}
@@ -456,7 +435,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
struct hpfs_dirent *de;
struct inode *inode = dentry->d_inode;
dnode_secno dno;
- fnode_secno fno;
int n_items = 0;
int err;
int r;
@@ -465,8 +443,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
hpfs_adjust_length(name, &len);
hpfs_lock(dir->i_sb);
- mutex_lock(&hpfs_i(inode)->i_parent_mutex);
- mutex_lock(&hpfs_i(dir)->i_mutex);
err = -ENOENT;
de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
if (!de)
@@ -485,7 +461,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
if (n_items)
goto out1;
- fno = de->fnode;
r = hpfs_remove_dirent(dir, dno, de, &qbh, 1);
switch (r) {
case 1:
@@ -504,8 +479,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
out1:
hpfs_brelse4(&qbh);
out:
- mutex_unlock(&hpfs_i(dir)->i_mutex);
- mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
hpfs_unlock(dir->i_sb);
return err;
}
@@ -571,12 +544,6 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
hpfs_lock(i->i_sb);
/* order doesn't matter, due to VFS exclusion */
- mutex_lock(&hpfs_i(i)->i_parent_mutex);
- if (new_inode)
- mutex_lock(&hpfs_i(new_inode)->i_parent_mutex);
- mutex_lock(&hpfs_i(old_dir)->i_mutex);
- if (new_dir != old_dir)
- mutex_lock(&hpfs_i(new_dir)->i_mutex);
/* Erm? Moving over the empty non-busy directory is perfectly legal */
if (new_inode && S_ISDIR(new_inode->i_mode)) {
@@ -613,9 +580,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir == old_dir) hpfs_brelse4(&qbh);
- hpfs_lock_creation(i->i_sb);
- if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de, 1))) {
- hpfs_unlock_creation(i->i_sb);
+ if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de))) {
if (r == -1) hpfs_error(new_dir->i_sb, "hpfs_rename: dirent already exists!");
err = r == 1 ? -ENOSPC : -EFSERROR;
if (new_dir != old_dir) hpfs_brelse4(&qbh);
@@ -624,20 +589,17 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir == old_dir)
if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) {
- hpfs_unlock_creation(i->i_sb);
hpfs_error(i->i_sb, "lookup succeeded but map dirent failed at #2");
err = -ENOENT;
goto end1;
}
if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 0))) {
- hpfs_unlock_creation(i->i_sb);
hpfs_error(i->i_sb, "hpfs_rename: could not remove dirent");
err = r == 2 ? -ENOSPC : -EFSERROR;
goto end1;
}
- hpfs_unlock_creation(i->i_sb);
-
+
end:
hpfs_i(i)->i_parent_dir = new_dir->i_ino;
if (S_ISDIR(i->i_mode)) {
@@ -645,22 +607,14 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
drop_nlink(old_dir);
}
if ((fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) {
- fnode->up = new_dir->i_ino;
+ fnode->up = cpu_to_le32(new_dir->i_ino);
fnode->len = new_len;
memcpy(fnode->name, new_name, new_len>15?15:new_len);
if (new_len < 15) memset(&fnode->name[new_len], 0, 15 - new_len);
mark_buffer_dirty(bh);
brelse(bh);
}
- hpfs_i(i)->i_conv = hpfs_sb(i->i_sb)->sb_conv;
- hpfs_decide_conv(i, new_name, new_len);
end1:
- if (old_dir != new_dir)
- mutex_unlock(&hpfs_i(new_dir)->i_mutex);
- mutex_unlock(&hpfs_i(old_dir)->i_mutex);
- mutex_unlock(&hpfs_i(i)->i_parent_mutex);
- if (new_inode)
- mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex);
hpfs_unlock(i->i_sb);
return err;
}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c89b40808587..98580a3b5005 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -18,15 +18,16 @@
/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
-static void mark_dirty(struct super_block *s)
+static void mark_dirty(struct super_block *s, int remount)
{
- if (hpfs_sb(s)->sb_chkdsk && !(s->s_flags & MS_RDONLY)) {
+ if (hpfs_sb(s)->sb_chkdsk && (remount || !(s->s_flags & MS_RDONLY))) {
struct buffer_head *bh;
struct hpfs_spare_block *sb;
if ((sb = hpfs_map_sector(s, 17, &bh, 0))) {
sb->dirty = 1;
sb->old_wrote = 0;
mark_buffer_dirty(bh);
+ sync_dirty_buffer(bh);
brelse(bh);
}
}
@@ -40,10 +41,12 @@ static void unmark_dirty(struct super_block *s)
struct buffer_head *bh;
struct hpfs_spare_block *sb;
if (s->s_flags & MS_RDONLY) return;
+ sync_blockdev(s->s_bdev);
if ((sb = hpfs_map_sector(s, 17, &bh, 0))) {
sb->dirty = hpfs_sb(s)->sb_chkdsk > 1 - hpfs_sb(s)->sb_was_error;
sb->old_wrote = hpfs_sb(s)->sb_chkdsk >= 2 && !hpfs_sb(s)->sb_was_error;
mark_buffer_dirty(bh);
+ sync_dirty_buffer(bh);
brelse(bh);
}
}
@@ -63,13 +66,13 @@ void hpfs_error(struct super_block *s, const char *fmt, ...)
if (!hpfs_sb(s)->sb_was_error) {
if (hpfs_sb(s)->sb_err == 2) {
printk("; crashing the system because you wanted it\n");
- mark_dirty(s);
+ mark_dirty(s, 0);
panic("HPFS panic");
} else if (hpfs_sb(s)->sb_err == 1) {
if (s->s_flags & MS_RDONLY) printk("; already mounted read-only\n");
else {
printk("; remounting read-only\n");
- mark_dirty(s);
+ mark_dirty(s, 0);
s->s_flags |= MS_RDONLY;
}
} else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n");
@@ -102,9 +105,12 @@ static void hpfs_put_super(struct super_block *s)
{
struct hpfs_sb_info *sbi = hpfs_sb(s);
+ hpfs_lock(s);
+ unmark_dirty(s);
+ hpfs_unlock(s);
+
kfree(sbi->sb_cp_table);
kfree(sbi->sb_bmp_dir);
- unmark_dirty(s);
s->s_fs_info = NULL;
kfree(sbi);
}
@@ -129,7 +135,7 @@ static unsigned count_bitmaps(struct super_block *s)
n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14;
count = 0;
for (n = 0; n < n_bands; n++)
- count += hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_bmp_dir[n]);
+ count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n]));
return count;
}
@@ -188,8 +194,6 @@ static void init_once(void *foo)
{
struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
- mutex_init(&ei->i_mutex);
- mutex_init(&ei->i_parent_mutex);
inode_init_once(&ei->vfs_inode);
}
@@ -218,7 +222,6 @@ static void destroy_inodecache(void)
enum {
Opt_help, Opt_uid, Opt_gid, Opt_umask, Opt_case_lower, Opt_case_asis,
- Opt_conv_binary, Opt_conv_text, Opt_conv_auto,
Opt_check_none, Opt_check_normal, Opt_check_strict,
Opt_err_cont, Opt_err_ro, Opt_err_panic,
Opt_eas_no, Opt_eas_ro, Opt_eas_rw,
@@ -233,9 +236,6 @@ static const match_table_t tokens = {
{Opt_umask, "umask=%o"},
{Opt_case_lower, "case=lower"},
{Opt_case_asis, "case=asis"},
- {Opt_conv_binary, "conv=binary"},
- {Opt_conv_text, "conv=text"},
- {Opt_conv_auto, "conv=auto"},
{Opt_check_none, "check=none"},
{Opt_check_normal, "check=normal"},
{Opt_check_strict, "check=strict"},
@@ -253,7 +253,7 @@ static const match_table_t tokens = {
};
static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
- int *lowercase, int *conv, int *eas, int *chk, int *errs,
+ int *lowercase, int *eas, int *chk, int *errs,
int *chkdsk, int *timeshift)
{
char *p;
@@ -295,15 +295,6 @@ static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
case Opt_case_asis:
*lowercase = 0;
break;
- case Opt_conv_binary:
- *conv = CONV_BINARY;
- break;
- case Opt_conv_text:
- *conv = CONV_TEXT;
- break;
- case Opt_conv_auto:
- *conv = CONV_AUTO;
- break;
case Opt_check_none:
*chk = 0;
break;
@@ -370,9 +361,6 @@ HPFS filesystem options:\n\
umask=xxx set mode of files that don't have mode specified in eas\n\
case=lower lowercase all files\n\
case=asis do not lowercase files (default)\n\
- conv=binary do not convert CR/LF -> LF (default)\n\
- conv=auto convert only files with known text extensions\n\
- conv=text convert all files\n\
check=none no fs checks - kernel may crash on corrupted filesystem\n\
check=normal do some checks - it should not crash (default)\n\
check=strict do extra time-consuming checks, used for debugging\n\
@@ -394,7 +382,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
uid_t uid;
gid_t gid;
umode_t umask;
- int lowercase, conv, eas, chk, errs, chkdsk, timeshift;
+ int lowercase, eas, chk, errs, chkdsk, timeshift;
int o;
struct hpfs_sb_info *sbi = hpfs_sb(s);
char *new_opts = kstrdup(data, GFP_KERNEL);
@@ -405,11 +393,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
lock_super(s);
uid = sbi->sb_uid; gid = sbi->sb_gid;
umask = 0777 & ~sbi->sb_mode;
- lowercase = sbi->sb_lowercase; conv = sbi->sb_conv;
+ lowercase = sbi->sb_lowercase;
eas = sbi->sb_eas; chk = sbi->sb_chk; chkdsk = sbi->sb_chkdsk;
errs = sbi->sb_err; timeshift = sbi->sb_timeshift;
- if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase, &conv,
+ if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase,
&eas, &chk, &errs, &chkdsk, &timeshift))) {
printk("HPFS: bad mount options.\n");
goto out_err;
@@ -427,11 +415,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
sbi->sb_uid = uid; sbi->sb_gid = gid;
sbi->sb_mode = 0777 & ~umask;
- sbi->sb_lowercase = lowercase; sbi->sb_conv = conv;
+ sbi->sb_lowercase = lowercase;
sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk;
sbi->sb_err = errs; sbi->sb_timeshift = timeshift;
- if (!(*flags & MS_RDONLY)) mark_dirty(s);
+ if (!(*flags & MS_RDONLY)) mark_dirty(s, 1);
replace_mount_options(s, new_opts);
@@ -471,7 +459,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
uid_t uid;
gid_t gid;
umode_t umask;
- int lowercase, conv, eas, chk, errs, chkdsk, timeshift;
+ int lowercase, eas, chk, errs, chkdsk, timeshift;
dnode_secno root_dno;
struct hpfs_dirent *de = NULL;
@@ -479,11 +467,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
int o;
- if (num_possible_cpus() > 1) {
- printk(KERN_ERR "HPFS is not SMP safe\n");
- return -EINVAL;
- }
-
save_mount_options(s, options);
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
@@ -495,20 +478,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
sbi->sb_bmp_dir = NULL;
sbi->sb_cp_table = NULL;
- mutex_init(&sbi->hpfs_creation_de);
+ mutex_init(&sbi->hpfs_mutex);
+ hpfs_lock(s);
uid = current_uid();
gid = current_gid();
umask = current_umask();
lowercase = 0;
- conv = CONV_BINARY;
eas = 2;
chk = 1;
errs = 1;
chkdsk = 1;
timeshift = 0;
- if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase, &conv,
+ if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase,
&eas, &chk, &errs, &chkdsk, &timeshift))) {
printk("HPFS: bad mount options.\n");
goto bail0;
@@ -526,9 +509,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
if (!(spareblock = hpfs_map_sector(s, 17, &bh2, 0))) goto bail3;
/* Check magics */
- if (/*bootblock->magic != BB_MAGIC
- ||*/ superblock->magic != SB_MAGIC
- || spareblock->magic != SP_MAGIC) {
+ if (/*le16_to_cpu(bootblock->magic) != BB_MAGIC
+ ||*/ le32_to_cpu(superblock->magic) != SB_MAGIC
+ || le32_to_cpu(spareblock->magic) != SP_MAGIC) {
if (!silent) printk("HPFS: Bad magic ... probably not HPFS\n");
goto bail4;
}
@@ -549,19 +532,18 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
s->s_op = &hpfs_sops;
s->s_d_op = &hpfs_dentry_operations;
- sbi->sb_root = superblock->root;
- sbi->sb_fs_size = superblock->n_sectors;
- sbi->sb_bitmaps = superblock->bitmaps;
- sbi->sb_dirband_start = superblock->dir_band_start;
- sbi->sb_dirband_size = superblock->n_dir_band;
- sbi->sb_dmap = superblock->dir_band_bitmap;
+ sbi->sb_root = le32_to_cpu(superblock->root);
+ sbi->sb_fs_size = le32_to_cpu(superblock->n_sectors);
+ sbi->sb_bitmaps = le32_to_cpu(superblock->bitmaps);
+ sbi->sb_dirband_start = le32_to_cpu(superblock->dir_band_start);
+ sbi->sb_dirband_size = le32_to_cpu(superblock->n_dir_band);
+ sbi->sb_dmap = le32_to_cpu(superblock->dir_band_bitmap);
sbi->sb_uid = uid;
sbi->sb_gid = gid;
sbi->sb_mode = 0777 & ~umask;
sbi->sb_n_free = -1;
sbi->sb_n_free_dnodes = -1;
sbi->sb_lowercase = lowercase;
- sbi->sb_conv = conv;
sbi->sb_eas = eas;
sbi->sb_chk = chk;
sbi->sb_chkdsk = chkdsk;
@@ -573,7 +555,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
sbi->sb_max_fwd_alloc = 0xffffff;
/* Load bitmap directory */
- if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, superblock->bitmaps)))
+ if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps))))
goto bail4;
/* Check for general fs errors*/
@@ -591,20 +573,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
mark_buffer_dirty(bh2);
}
- if (spareblock->hotfixes_used || spareblock->n_spares_used) {
+ if (le32_to_cpu(spareblock->hotfixes_used) || le32_to_cpu(spareblock->n_spares_used)) {
if (errs >= 2) {
printk("HPFS: Hotfixes not supported here, try chkdsk\n");
- mark_dirty(s);
+ mark_dirty(s, 0);
goto bail4;
}
hpfs_error(s, "hotfixes not supported here, try chkdsk");
if (errs == 0) printk("HPFS: Proceeding, but your filesystem will be probably corrupted by this driver...\n");
else printk("HPFS: This driver may read bad files or crash when operating on disk with hotfixes.\n");
}
- if (spareblock->n_dnode_spares != spareblock->n_dnode_spares_free) {
+ if (le32_to_cpu(spareblock->n_dnode_spares) != le32_to_cpu(spareblock->n_dnode_spares_free)) {
if (errs >= 2) {
printk("HPFS: Spare dnodes used, try chkdsk\n");
- mark_dirty(s);
+ mark_dirty(s, 0);
goto bail4;
}
hpfs_error(s, "warning: spare dnodes used, try chkdsk");
@@ -612,26 +594,26 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
}
if (chk) {
unsigned a;
- if (superblock->dir_band_end - superblock->dir_band_start + 1 != superblock->n_dir_band ||
- superblock->dir_band_end < superblock->dir_band_start || superblock->n_dir_band > 0x4000) {
+ if (le32_to_cpu(superblock->dir_band_end) - le32_to_cpu(superblock->dir_band_start) + 1 != le32_to_cpu(superblock->n_dir_band) ||
+ le32_to_cpu(superblock->dir_band_end) < le32_to_cpu(superblock->dir_band_start) || le32_to_cpu(superblock->n_dir_band) > 0x4000) {
hpfs_error(s, "dir band size mismatch: dir_band_start==%08x, dir_band_end==%08x, n_dir_band==%08x",
- superblock->dir_band_start, superblock->dir_band_end, superblock->n_dir_band);
+ le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->dir_band_end), le32_to_cpu(superblock->n_dir_band));
goto bail4;
}
a = sbi->sb_dirband_size;
sbi->sb_dirband_size = 0;
- if (hpfs_chk_sectors(s, superblock->dir_band_start, superblock->n_dir_band, "dir_band") ||
- hpfs_chk_sectors(s, superblock->dir_band_bitmap, 4, "dir_band_bitmap") ||
- hpfs_chk_sectors(s, superblock->bitmaps, 4, "bitmaps")) {
- mark_dirty(s);
+ if (hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->n_dir_band), "dir_band") ||
+ hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_bitmap), 4, "dir_band_bitmap") ||
+ hpfs_chk_sectors(s, le32_to_cpu(superblock->bitmaps), 4, "bitmaps")) {
+ mark_dirty(s, 0);
goto bail4;
}
sbi->sb_dirband_size = a;
} else printk("HPFS: You really don't want any checks? You are crazy...\n");
/* Load code page table */
- if (spareblock->n_code_pages)
- if (!(sbi->sb_cp_table = hpfs_load_code_page(s, spareblock->code_page_dir)))
+ if (le32_to_cpu(spareblock->n_code_pages))
+ if (!(sbi->sb_cp_table = hpfs_load_code_page(s, le32_to_cpu(spareblock->code_page_dir))))
printk("HPFS: Warning: code page support is disabled\n");
brelse(bh2);
@@ -660,13 +642,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
if (!de)
hpfs_error(s, "unable to find root dir");
else {
- root->i_atime.tv_sec = local_to_gmt(s, de->read_date);
+ root->i_atime.tv_sec = local_to_gmt(s, le32_to_cpu(de->read_date));
root->i_atime.tv_nsec = 0;
- root->i_mtime.tv_sec = local_to_gmt(s, de->write_date);
+ root->i_mtime.tv_sec = local_to_gmt(s, le32_to_cpu(de->write_date));
root->i_mtime.tv_nsec = 0;
- root->i_ctime.tv_sec = local_to_gmt(s, de->creation_date);
+ root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date));
root->i_ctime.tv_nsec = 0;
- hpfs_i(root)->i_ea_size = de->ea_size;
+ hpfs_i(root)->i_ea_size = le16_to_cpu(de->ea_size);
hpfs_i(root)->i_parent_dir = root->i_ino;
if (root->i_size == -1)
root->i_size = 2048;
@@ -674,6 +656,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
root->i_blocks = 5;
hpfs_brelse4(&qbh);
}
+ hpfs_unlock(s);
return 0;
bail4: brelse(bh2);
@@ -681,6 +664,7 @@ bail3: brelse(bh1);
bail2: brelse(bh0);
bail1:
bail0:
+ hpfs_unlock(s);
kfree(sbi->sb_bmp_dir);
kfree(sbi->sb_cp_table);
s->s_fs_info = NULL;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b9eeb1cd03ff..7aafeb8fa300 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
pgoff = offset >> PAGE_SHIFT;
i_size_write(inode, offset);
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_mutex);
if (!prio_tree_empty(&mapping->i_mmap))
hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_mutex);
truncate_hugepages(inode, offset);
return 0;
}
@@ -921,7 +921,8 @@ static int can_do_hugetlb_shm(void)
return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
}
-struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
+struct file *hugetlb_file_setup(const char *name, size_t size,
+ vm_flags_t acctflag,
struct user_struct **user, int creat_flags)
{
int error = -ENOMEM;
diff --git a/fs/inode.c b/fs/inode.c
index 33c963d08ab4..990d284877a1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,6 +24,7 @@
#include <linux/mount.h>
#include <linux/async.h>
#include <linux/posix_acl.h>
+#include <linux/prefetch.h>
#include <linux/ima.h>
#include <linux/cred.h>
#include "internal.h"
@@ -325,12 +326,11 @@ void address_space_init_once(struct address_space *mapping)
memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
spin_lock_init(&mapping->tree_lock);
- spin_lock_init(&mapping->i_mmap_lock);
+ mutex_init(&mapping->i_mmap_mutex);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
- mutex_init(&mapping->unmap_mutex);
}
EXPORT_SYMBOL(address_space_init_once);
@@ -751,8 +751,12 @@ static void prune_icache(int nr_to_scan)
* This function is passed the number of inodes to scan, and it returns the
* total number of remaining possibly-reclaimable inodes.
*/
-static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink,
+ struct shrink_control *sc)
{
+ int nr = sc->nr_to_scan;
+ gfp_t gfp_mask = sc->gfp_mask;
+
if (nr) {
/*
* Nasty deadlock avoidance. We may hold various FS locks,
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 69b180459463..72ffa974b0b8 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -302,12 +302,6 @@ void journal_commit_transaction(journal_t *journal)
* all outstanding updates to complete.
*/
-#ifdef COMMIT_STATS
- spin_lock(&journal->j_list_lock);
- summarise_journal_usage(journal);
- spin_unlock(&journal->j_list_lock);
-#endif
-
/* Do we need to erase the effects of a prior journal_flush? */
if (journal->j_flags & JFS_FLUSHED) {
jbd_debug(3, "super block updated\n");
@@ -722,8 +716,13 @@ wait_for_iobuf:
required. */
JBUFFER_TRACE(jh, "file as BJ_Forget");
journal_file_buffer(jh, commit_transaction, BJ_Forget);
- /* Wake up any transactions which were waiting for this
- IO to complete */
+ /*
+ * Wake up any transactions which were waiting for this
+ * IO to complete. The barrier must be here so that changes
+ * by journal_file_buffer() take effect before wake_up_bit()
+ * does the waitqueue check.
+ */
+ smp_mb();
wake_up_bit(&bh->b_state, BH_Unshadow);
JBUFFER_TRACE(jh, "brelse shadowed buffer");
__brelse(bh);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b3713afaaa9e..e2d4285fbe90 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -437,9 +437,12 @@ int __log_space_left(journal_t *journal)
int __log_start_commit(journal_t *journal, tid_t target)
{
/*
- * Are we already doing a recent enough commit?
+ * The only transaction we can possibly wait upon is the
+ * currently running transaction (if it exists). Otherwise,
+ * the target tid must be an old one.
*/
- if (!tid_geq(journal->j_commit_request, target)) {
+ if (journal->j_running_transaction &&
+ journal->j_running_transaction->t_tid == target) {
/*
* We want a new commit: OK, mark the request and wakeup the
* commit thread. We do _not_ do the commit ourselves.
@@ -451,7 +454,14 @@ int __log_start_commit(journal_t *journal, tid_t target)
journal->j_commit_sequence);
wake_up(&journal->j_wait_commit);
return 1;
- }
+ } else if (!tid_geq(journal->j_commit_request, target))
+ /* This should never happen, but if it does, preserve
+ the evidence before kjournald goes into a loop and
+ increments j_commit_sequence beyond all recognition. */
+ WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
+ journal->j_commit_request, journal->j_commit_sequence,
+ target, journal->j_running_transaction ?
+ journal->j_running_transaction->t_tid : 0);
return 0;
}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 60d2319651b2..f7ee81a065da 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -266,7 +266,8 @@ static handle_t *new_handle(int nblocks)
* This function is visible to journal users (like ext3fs), so is not
* called with the journal already locked.
*
- * Return a pointer to a newly allocated handle, or NULL on failure
+ * Return a pointer to a newly allocated handle, or an ERR_PTR() value
+ * on failure.
*/
handle_t *journal_start(journal_t *journal, int nblocks)
{
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6e28000a4b21..29148a81c783 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -338,12 +338,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* all outstanding updates to complete.
*/
-#ifdef COMMIT_STATS
- spin_lock(&journal->j_list_lock);
- summarise_journal_usage(journal);
- spin_unlock(&journal->j_list_lock);
-#endif
-
/* Do we need to erase the effects of a prior jbd2_journal_flush? */
if (journal->j_flags & JBD2_FLUSHED) {
jbd_debug(3, "super block updated\n");
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 1adc8d455f0e..df0de27c2733 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -10,6 +10,7 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/gfp.h>
+#include <linux/prefetch.h>
#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 9e22085231b3..d8d09380c7de 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -481,7 +481,7 @@ static int inode_write_alias(struct super_block *sb,
val = inode_val0(inode);
break;
case INODE_USED_OFS:
- val = cpu_to_be64(li->li_used_bytes);;
+ val = cpu_to_be64(li->li_used_bytes);
break;
case INODE_SIZE_OFS:
val = cpu_to_be64(i_size_read(inode));
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 33435e4b14d2..ce03a182c771 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -480,10 +480,6 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
!read_only)
return -EIO;
- mutex_init(&super->s_dirop_mutex);
- mutex_init(&super->s_object_alias_mutex);
- INIT_LIST_HEAD(&super->s_freeing_list);
-
ret = logfs_init_rw(sb);
if (ret)
return ret;
@@ -601,6 +597,10 @@ static struct dentry *logfs_mount(struct file_system_type *type, int flags,
if (!super)
return ERR_PTR(-ENOMEM);
+ mutex_init(&super->s_dirop_mutex);
+ mutex_init(&super->s_object_alias_mutex);
+ INIT_LIST_HEAD(&super->s_freeing_list);
+
if (!devname)
err = logfs_get_sb_bdev(super, type, devname);
else if (strncmp(devname, "mtd", 3))
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 2f174be06555..8c32ef3ba88e 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -90,7 +90,8 @@ static DEFINE_SPINLOCK(mb_cache_spinlock);
* What the mbcache registers as to get shrunk dynamically.
*/
-static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink,
+ struct shrink_control *sc);
static struct shrinker mb_cache_shrinker = {
.shrink = mb_cache_shrink_fn,
@@ -156,18 +157,19 @@ forget:
* gets low.
*
* @shrink: (ignored)
- * @nr_to_scan: Number of objects to scan
- * @gfp_mask: (ignored)
+ * @sc: shrink_control passed from reclaim
*
* Returns the number of objects which are present in the cache.
*/
static int
-mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
{
LIST_HEAD(free_list);
struct mb_cache *cache;
struct mb_cache_entry *entry, *tmp;
int count = 0;
+ int nr_to_scan = sc->nr_to_scan;
+ gfp_t gfp_mask = sc->gfp_mask;
mb_debug("trying to free %d entries", nr_to_scan);
spin_lock(&mb_cache_spinlock);
diff --git a/fs/namei.c b/fs/namei.c
index f90f0593092a..2358b326b221 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -179,7 +179,7 @@ EXPORT_SYMBOL(putname);
static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
{
- umode_t mode = inode->i_mode;
+ unsigned int mode = inode->i_mode;
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
@@ -1296,12 +1296,12 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
{
int res;
- BUG_ON(nd->depth >= MAX_NESTED_LINKS);
if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
path_put_conditional(path, nd);
path_put(&nd->path);
return -ELOOP;
}
+ BUG_ON(nd->depth >= MAX_NESTED_LINKS);
nd->depth++;
current->link_count++;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 0250e4ce4893..202f370526a7 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -461,7 +461,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
#endif
struct ncp_entry_info finfo;
- data.wdog_pid = NULL;
+ memset(&data, 0, sizeof(data));
server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL);
if (!server)
return -ENOMEM;
@@ -496,7 +496,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data;
data.flags = md->flags;
- data.int_flags = 0;
data.mounted_uid = md->mounted_uid;
data.wdog_pid = find_get_pid(md->wdog_pid);
data.ncp_fd = md->ncp_fd;
@@ -507,7 +506,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
data.file_mode = md->file_mode;
data.dir_mode = md->dir_mode;
data.info_fd = -1;
- data.mounted_vol[0] = 0;
}
break;
default:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7237672216c8..424e47773a84 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2042,11 +2042,14 @@ static void nfs_access_free_list(struct list_head *head)
}
}
-int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink,
+ struct shrink_control *sc)
{
LIST_HEAD(head);
struct nfs_inode *nfsi, *next;
struct nfs_access_entry *cache;
+ int nr_to_scan = sc->nr_to_scan;
+ gfp_t gfp_mask = sc->gfp_mask;
if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
return (nr_to_scan == 0) ? 0 : -1;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ce118ce885dd..2df6ca7b5898 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -234,7 +234,7 @@ extern int nfs_init_client(struct nfs_client *clp,
/* dir.c */
extern int nfs_access_cache_shrinker(struct shrinker *shrink,
- int nr_to_scan, gfp_t gfp_mask);
+ struct shrink_control *sc);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 6f8192f4cfc7..be79dc9f386d 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -117,6 +117,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
case -EKEYEXPIRED:
rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
break;
+ case -NFS4ERR_RETRY_UNCACHED_REP:
+ break;
default:
dprintk("%s DS error. Retry through MDS %d\n", __func__,
task->tk_status);
@@ -416,7 +418,8 @@ static int
filelayout_check_layout(struct pnfs_layout_hdr *lo,
struct nfs4_filelayout_segment *fl,
struct nfs4_layoutget_res *lgr,
- struct nfs4_deviceid *id)
+ struct nfs4_deviceid *id,
+ gfp_t gfp_flags)
{
struct nfs4_file_layout_dsaddr *dsaddr;
int status = -EINVAL;
@@ -439,7 +442,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
/* find and reference the deviceid */
dsaddr = nfs4_fl_find_get_deviceid(id);
if (dsaddr == NULL) {
- dsaddr = get_device_info(lo->plh_inode, id);
+ dsaddr = get_device_info(lo->plh_inode, id, gfp_flags);
if (dsaddr == NULL)
goto out;
}
@@ -500,7 +503,8 @@ static int
filelayout_decode_layout(struct pnfs_layout_hdr *flo,
struct nfs4_filelayout_segment *fl,
struct nfs4_layoutget_res *lgr,
- struct nfs4_deviceid *id)
+ struct nfs4_deviceid *id,
+ gfp_t gfp_flags)
{
struct xdr_stream stream;
struct xdr_buf buf = {
@@ -516,7 +520,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
dprintk("%s: set_layout_map Begin\n", __func__);
- scratch = alloc_page(GFP_KERNEL);
+ scratch = alloc_page(gfp_flags);
if (!scratch)
return -ENOMEM;
@@ -554,13 +558,13 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
goto out_err;
fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
- GFP_KERNEL);
+ gfp_flags);
if (!fl->fh_array)
goto out_err;
for (i = 0; i < fl->num_fh; i++) {
/* Do we want to use a mempool here? */
- fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
+ fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
if (!fl->fh_array[i])
goto out_err_free;
@@ -605,19 +609,20 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
static struct pnfs_layout_segment *
filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
- struct nfs4_layoutget_res *lgr)
+ struct nfs4_layoutget_res *lgr,
+ gfp_t gfp_flags)
{
struct nfs4_filelayout_segment *fl;
int rc;
struct nfs4_deviceid id;
dprintk("--> %s\n", __func__);
- fl = kzalloc(sizeof(*fl), GFP_KERNEL);
+ fl = kzalloc(sizeof(*fl), gfp_flags);
if (!fl)
return NULL;
- rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
- if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
+ rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags);
+ if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) {
_filelayout_free_lseg(fl);
return NULL;
}
@@ -633,7 +638,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
int size = (fl->stripe_type == STRIPE_SPARSE) ?
fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
- fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL);
+ fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags);
if (!fl->commit_buckets) {
filelayout_free_lseg(&fl->generic_hdr);
return NULL;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 7c44579f5832..2b461d77b43a 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -104,6 +104,6 @@ extern struct nfs4_file_layout_dsaddr *
nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index de5350f2b249..db07c7af1395 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -225,11 +225,11 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
}
static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
+nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds *tmp_ds, *ds;
- ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
+ ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
if (!ds)
goto out;
@@ -261,7 +261,7 @@ out:
* Currently only support ipv4, and one multi-path address.
*/
static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
+decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds *ds = NULL;
char *buf;
@@ -303,7 +303,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
rlen);
goto out_err;
}
- buf = kmalloc(rlen + 1, GFP_KERNEL);
+ buf = kmalloc(rlen + 1, gfp_flags);
if (!buf) {
dprintk("%s: Not enough memory\n", __func__);
goto out_err;
@@ -333,7 +333,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
port = htons((tmp[0] << 8) | (tmp[1]));
- ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
+ ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
dprintk("%s: Decoded address and port %s\n", __func__, buf);
out_free:
kfree(buf);
@@ -343,7 +343,7 @@ out_err:
/* Decode opaque device data and return the result */
static struct nfs4_file_layout_dsaddr*
-decode_device(struct inode *ino, struct pnfs_device *pdev)
+decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
{
int i;
u32 cnt, num;
@@ -362,7 +362,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
struct page *scratch;
/* set up xdr stream */
- scratch = alloc_page(GFP_KERNEL);
+ scratch = alloc_page(gfp_flags);
if (!scratch)
goto out_err;
@@ -384,7 +384,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
}
/* read stripe indices */
- stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL);
+ stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
if (!stripe_indices)
goto out_err_free_scratch;
@@ -423,7 +423,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
dsaddr = kzalloc(sizeof(*dsaddr) +
(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
- GFP_KERNEL);
+ gfp_flags);
if (!dsaddr)
goto out_err_free_stripe_indices;
@@ -452,7 +452,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
for (j = 0; j < mp_count; j++) {
if (j == 0) {
dsaddr->ds_list[i] = decode_and_add_ds(&stream,
- ino);
+ ino, gfp_flags);
if (dsaddr->ds_list[i] == NULL)
goto out_err_free_deviceid;
} else {
@@ -503,12 +503,12 @@ out_err:
* available devices.
*/
static struct nfs4_file_layout_dsaddr *
-decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
+decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
{
struct nfs4_file_layout_dsaddr *d, *new;
long hash;
- new = decode_device(inode, dev);
+ new = decode_device(inode, dev, gfp_flags);
if (!new) {
printk(KERN_WARNING "%s: Could not decode or add device\n",
__func__);
@@ -537,7 +537,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
* of available devices, and return it.
*/
struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
{
struct pnfs_device *pdev = NULL;
u32 max_resp_sz;
@@ -556,17 +556,17 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
__func__, inode, max_resp_sz, max_pages);
- pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
+ pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags);
if (pdev == NULL)
return NULL;
- pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+ pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
if (pages == NULL) {
kfree(pdev);
return NULL;
}
for (i = 0; i < max_pages; i++) {
- pages[i] = alloc_page(GFP_KERNEL);
+ pages[i] = alloc_page(gfp_flags);
if (!pages[i])
goto out_free;
}
@@ -587,7 +587,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
* Found new device, need to decode it and then add it to the
* list of known devices for this mountpoint.
*/
- dsaddr = decode_and_add_device(inode, pdev);
+ dsaddr = decode_and_add_device(inode, pdev, gfp_flags);
out_free:
for (i = 0; i < max_pages; i++)
__free_page(pages[i]);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 69c0f3c5ee7a..cf1b339c3937 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -300,6 +300,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
ret = nfs4_delay(server->client, &exception->timeout);
if (ret != 0)
break;
+ case -NFS4ERR_RETRY_UNCACHED_REP:
case -NFS4ERR_OLD_STATEID:
exception->retry = 1;
break;
@@ -3695,6 +3696,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
rpc_delay(task, NFS4_POLL_RETRY_MAX);
task->tk_status = 0;
return -EAGAIN;
+ case -NFS4ERR_RETRY_UNCACHED_REP:
case -NFS4ERR_OLD_STATEID:
task->tk_status = 0;
return -EAGAIN;
@@ -4844,6 +4846,8 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
rpc_delay(task, NFS4_POLL_RETRY_MIN);
task->tk_status = 0;
+ /* fall through */
+ case -NFS4ERR_RETRY_UNCACHED_REP:
nfs_restart_rpc(task, data->clp);
return;
}
@@ -5479,6 +5483,8 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
break;
case -NFS4ERR_DELAY:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
+ /* fall through */
+ case -NFS4ERR_RETRY_UNCACHED_REP:
return -EAGAIN;
default:
nfs4_schedule_lease_recovery(clp);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ff681ab65d31..f57f5281a520 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -383,6 +383,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
plh_layouts);
dprintk("%s freeing layout for inode %lu\n", __func__,
lo->plh_inode->i_ino);
+ list_del_init(&lo->plh_layouts);
pnfs_destroy_layout(NFS_I(lo->plh_inode));
}
}
@@ -466,7 +467,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
static struct pnfs_layout_segment *
send_layoutget(struct pnfs_layout_hdr *lo,
struct nfs_open_context *ctx,
- u32 iomode)
+ u32 iomode,
+ gfp_t gfp_flags)
{
struct inode *ino = lo->plh_inode;
struct nfs_server *server = NFS_SERVER(ino);
@@ -479,7 +481,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
dprintk("--> %s\n", __func__);
BUG_ON(ctx == NULL);
- lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
+ lgp = kzalloc(sizeof(*lgp), gfp_flags);
if (lgp == NULL)
return NULL;
@@ -487,12 +489,12 @@ send_layoutget(struct pnfs_layout_hdr *lo,
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
max_pages = max_resp_sz >> PAGE_SHIFT;
- pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+ pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
if (!pages)
goto out_err_free;
for (i = 0; i < max_pages; i++) {
- pages[i] = alloc_page(GFP_KERNEL);
+ pages[i] = alloc_page(gfp_flags);
if (!pages[i])
goto out_err_free;
}
@@ -508,6 +510,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
lgp->args.layout.pages = pages;
lgp->args.layout.pglen = max_pages * PAGE_SIZE;
lgp->lsegpp = &lseg;
+ lgp->gfp_flags = gfp_flags;
/* Synchronously retrieve layout information from server and
* store in lseg.
@@ -665,11 +668,11 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
}
static struct pnfs_layout_hdr *
-alloc_init_layout_hdr(struct inode *ino)
+alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
{
struct pnfs_layout_hdr *lo;
- lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
+ lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
if (!lo)
return NULL;
atomic_set(&lo->plh_refcount, 1);
@@ -681,7 +684,7 @@ alloc_init_layout_hdr(struct inode *ino)
}
static struct pnfs_layout_hdr *
-pnfs_find_alloc_layout(struct inode *ino)
+pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
{
struct nfs_inode *nfsi = NFS_I(ino);
struct pnfs_layout_hdr *new = NULL;
@@ -696,7 +699,7 @@ pnfs_find_alloc_layout(struct inode *ino)
return nfsi->layout;
}
spin_unlock(&ino->i_lock);
- new = alloc_init_layout_hdr(ino);
+ new = alloc_init_layout_hdr(ino, gfp_flags);
spin_lock(&ino->i_lock);
if (likely(nfsi->layout == NULL)) /* Won the race? */
@@ -756,7 +759,8 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
- enum pnfs_iomode iomode)
+ enum pnfs_iomode iomode,
+ gfp_t gfp_flags)
{
struct nfs_inode *nfsi = NFS_I(ino);
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
@@ -767,7 +771,7 @@ pnfs_update_layout(struct inode *ino,
if (!pnfs_enabled_sb(NFS_SERVER(ino)))
return NULL;
spin_lock(&ino->i_lock);
- lo = pnfs_find_alloc_layout(ino);
+ lo = pnfs_find_alloc_layout(ino, gfp_flags);
if (lo == NULL) {
dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
goto out_unlock;
@@ -807,7 +811,7 @@ pnfs_update_layout(struct inode *ino,
spin_unlock(&clp->cl_lock);
}
- lseg = send_layoutget(lo, ctx, iomode);
+ lseg = send_layoutget(lo, ctx, iomode, gfp_flags);
if (!lseg && first) {
spin_lock(&clp->cl_lock);
list_del_init(&lo->plh_layouts);
@@ -846,7 +850,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out;
}
/* Inject layout blob into I/O device driver */
- lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
+ lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
if (!lseg || IS_ERR(lseg)) {
if (!lseg)
status = -ENOMEM;
@@ -899,7 +903,8 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
/* This is first coelesce call for a series of nfs_pages */
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
prev->wb_context,
- IOMODE_READ);
+ IOMODE_READ,
+ GFP_KERNEL);
}
return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
}
@@ -921,7 +926,8 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
/* This is first coelesce call for a series of nfs_pages */
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
prev->wb_context,
- IOMODE_RW);
+ IOMODE_RW,
+ GFP_NOFS);
}
return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index bc4827202e7a..0c015bad9e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -70,7 +70,7 @@ struct pnfs_layoutdriver_type {
const u32 id;
const char *name;
struct module *owner;
- struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
+ struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags);
void (*free_lseg) (struct pnfs_layout_segment *lseg);
/* test for nfs page cache coalescing */
@@ -126,7 +126,7 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo);
void put_lseg(struct pnfs_layout_segment *lseg);
struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
- enum pnfs_iomode access_type);
+ enum pnfs_iomode access_type, gfp_t gfp_flags);
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
@@ -245,7 +245,7 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
static inline struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
- enum pnfs_iomode access_type)
+ enum pnfs_iomode access_type, gfp_t gfp_flags)
{
return NULL;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7cded2b12a05..2bcf0dc306a1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -288,7 +288,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
atomic_set(&req->wb_complete, requests);
BUG_ON(desc->pg_lseg != NULL);
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
ClearPageError(page);
offset = 0;
nbytes = desc->pg_count;
@@ -351,7 +351,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
}
req = nfs_list_entry(data->pages.next);
if ((!lseg) && list_is_singular(&data->pages))
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
0, lseg);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3bd5d7e80f6c..49c715b4ac92 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -939,7 +939,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
atomic_set(&req->wb_complete, requests);
BUG_ON(desc->pg_lseg);
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
ClearPageError(page);
offset = 0;
nbytes = desc->pg_count;
@@ -1013,7 +1013,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
}
req = nfs_list_entry(data->pages.next);
if ((!lseg) && list_is_singular(&data->pages))
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 5232d3e8fb2f..a2e2402b2afb 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -8,7 +8,7 @@
* Statistsics for the reply cache
* fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
* statistics for filehandle lookup
- * io <bytes-read> <bytes-writtten>
+ * io <bytes-read> <bytes-written>
* statistics for IO throughput
* th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%>
* time (seconds) when nfsd thread usage above thresholds
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 0a0a66d98cce..eed4d7b26249 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -489,8 +489,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
void nilfs_palloc_commit_alloc_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
- nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh);
- nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh);
+ mark_buffer_dirty(req->pr_bitmap_bh);
+ mark_buffer_dirty(req->pr_desc_bh);
nilfs_mdt_mark_dirty(inode);
brelse(req->pr_bitmap_bh);
@@ -527,8 +527,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
kunmap(req->pr_bitmap_bh->b_page);
kunmap(req->pr_desc_bh->b_page);
- nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh);
- nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh);
+ mark_buffer_dirty(req->pr_desc_bh);
+ mark_buffer_dirty(req->pr_bitmap_bh);
nilfs_mdt_mark_dirty(inode);
brelse(req->pr_bitmap_bh);
@@ -646,7 +646,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
unsigned long group, group_offset;
int i, j, n, ret;
- for (i = 0; i < nitems; i += n) {
+ for (i = 0; i < nitems; i = j) {
group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
if (ret < 0)
@@ -683,8 +683,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
kunmap(bitmap_bh->b_page);
kunmap(desc_bh->b_page);
- nilfs_mdt_mark_buffer_dirty(desc_bh);
- nilfs_mdt_mark_buffer_dirty(bitmap_bh);
+ mark_buffer_dirty(desc_bh);
+ mark_buffer_dirty(bitmap_bh);
nilfs_mdt_mark_dirty(inode);
brelse(bitmap_bh);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 4723f04e9b12..aadbd0b5e3e8 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -34,7 +34,9 @@
struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
{
- return NILFS_I_NILFS(bmap->b_inode)->ns_dat;
+ struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info;
+
+ return nilfs->ns_dat;
}
static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 609cd223eea8..a35ae35e6932 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,12 +34,6 @@
#include "page.h"
#include "btnode.h"
-void nilfs_btnode_cache_init(struct address_space *btnc,
- struct backing_dev_info *bdi)
-{
- nilfs_mapping_init(btnc, bdi);
-}
-
void nilfs_btnode_cache_clear(struct address_space *btnc)
{
invalidate_mapping_pages(btnc, 0, -1);
@@ -62,7 +56,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
BUG();
}
memset(bh->b_data, 0, 1 << inode->i_blkbits);
- bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
+ bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = blocknr;
set_buffer_mapped(bh);
set_buffer_uptodate(bh);
@@ -94,10 +88,11 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
if (pblocknr == 0) {
pblocknr = blocknr;
if (inode->i_ino != NILFS_DAT_INO) {
- struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
/* blocknr is a virtual block number */
- err = nilfs_dat_translate(dat, blocknr, &pblocknr);
+ err = nilfs_dat_translate(nilfs->ns_dat, blocknr,
+ &pblocknr);
if (unlikely(err)) {
brelse(bh);
goto out_locked;
@@ -120,7 +115,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
goto found;
}
set_buffer_mapped(bh);
- bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
+ bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = pblocknr; /* set block address for read */
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
@@ -259,7 +254,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
"invalid oldkey %lld (newkey=%lld)",
(unsigned long long)oldkey,
(unsigned long long)newkey);
- nilfs_btnode_mark_dirty(obh);
+ mark_buffer_dirty(obh);
spin_lock_irq(&btnc->tree_lock);
radix_tree_delete(&btnc->page_tree, oldkey);
@@ -271,7 +266,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
unlock_page(opage);
} else {
nilfs_copy_buffer(nbh, obh);
- nilfs_btnode_mark_dirty(nbh);
+ mark_buffer_dirty(nbh);
nbh->b_blocknr = newkey;
ctxt->bh = nbh;
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 1b8ebd888c28..3a4dd2d8d3fc 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
struct buffer_head *newbh;
};
-void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
__u64 blocknr);
@@ -51,7 +50,4 @@ void nilfs_btnode_commit_change_key(struct address_space *,
void nilfs_btnode_abort_change_key(struct address_space *,
struct nilfs_btnode_chkey_ctxt *);
-#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh)
-
-
#endif /* _NILFS_BTNODE_H */
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index d451ae0e0bf3..7eafe468a29c 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -714,7 +714,7 @@ static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
nilfs_btree_get_nonroot_node(path, level),
path[level].bp_index, key);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
} while ((path[level].bp_index == 0) &&
(++level < nilfs_btree_height(btree) - 1));
}
@@ -739,7 +739,7 @@ static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
nilfs_btree_node_insert(node, path[level].bp_index,
*keyp, *ptrp, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (path[level].bp_index == 0)
nilfs_btree_promote_key(btree, path, level + 1,
@@ -777,9 +777,9 @@ static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(node, 0));
@@ -823,9 +823,9 @@ static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
path[level + 1].bp_index++;
nilfs_btree_promote_key(btree, path, level + 1,
@@ -870,9 +870,9 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
newkey = nilfs_btree_node_get_key(right, 0);
newptr = path[level].bp_newreq.bpr_ptr;
@@ -919,7 +919,7 @@ static void nilfs_btree_grow(struct nilfs_bmap *btree,
nilfs_btree_node_set_level(root, level + 1);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
path[level].bp_bh = path[level].bp_sib_bh;
path[level].bp_sib_bh = NULL;
@@ -1194,7 +1194,7 @@ static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
nilfs_btree_node_delete(node, path[level].bp_index,
keyp, ptrp, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (path[level].bp_index == 0)
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(node, 0));
@@ -1226,9 +1226,9 @@ static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(node, 0));
@@ -1258,9 +1258,9 @@ static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
path[level + 1].bp_index++;
nilfs_btree_promote_key(btree, path, level + 1,
@@ -1289,7 +1289,7 @@ static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
nilfs_btnode_delete(path[level].bp_bh);
path[level].bp_bh = path[level].bp_sib_bh;
@@ -1315,7 +1315,7 @@ static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
nilfs_btnode_delete(path[level].bp_sib_bh);
path[level].bp_sib_bh = NULL;
@@ -1709,7 +1709,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
if (!buffer_dirty(bh))
- nilfs_btnode_mark_dirty(bh);
+ mark_buffer_dirty(bh);
if (!nilfs_bmap_dirty(btree))
nilfs_bmap_set_dirty(btree);
@@ -1787,7 +1787,7 @@ static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
{
while ((++level < nilfs_btree_height(btree) - 1) &&
!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
return 0;
}
@@ -2229,7 +2229,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
}
if (!buffer_dirty(bh))
- nilfs_btnode_mark_dirty(bh);
+ mark_buffer_dirty(bh);
brelse(bh);
if (!nilfs_bmap_dirty(btree))
nilfs_bmap_set_dirty(btree);
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 5ff15a8a1024..c9b342c8b503 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -216,14 +216,14 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
if (!nilfs_cpfile_is_in_first(cpfile, cno))
nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
kaddr, 1);
- nilfs_mdt_mark_buffer_dirty(cp_bh);
+ mark_buffer_dirty(cp_bh);
kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
header = nilfs_cpfile_block_get_header(cpfile, header_bh,
kaddr);
le64_add_cpu(&header->ch_ncheckpoints, 1);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
}
@@ -326,7 +326,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
}
if (nicps > 0) {
tnicps += nicps;
- nilfs_mdt_mark_buffer_dirty(cp_bh);
+ mark_buffer_dirty(cp_bh);
nilfs_mdt_mark_dirty(cpfile);
if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
count =
@@ -358,7 +358,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
header = nilfs_cpfile_block_get_header(cpfile, header_bh,
kaddr);
le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
kunmap_atomic(kaddr, KM_USER0);
}
@@ -671,10 +671,10 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
le64_add_cpu(&header->ch_nsnapshots, 1);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(prev_bh);
- nilfs_mdt_mark_buffer_dirty(curr_bh);
- nilfs_mdt_mark_buffer_dirty(cp_bh);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(prev_bh);
+ mark_buffer_dirty(curr_bh);
+ mark_buffer_dirty(cp_bh);
+ mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
brelse(prev_bh);
@@ -774,10 +774,10 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
le64_add_cpu(&header->ch_nsnapshots, -1);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(next_bh);
- nilfs_mdt_mark_buffer_dirty(prev_bh);
- nilfs_mdt_mark_buffer_dirty(cp_bh);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(next_bh);
+ mark_buffer_dirty(prev_bh);
+ mark_buffer_dirty(cp_bh);
+ mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
brelse(prev_bh);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 59e5fe742f7b..fcc2f869af16 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -54,7 +54,7 @@ static int nilfs_dat_prepare_entry(struct inode *dat,
static void nilfs_dat_commit_entry(struct inode *dat,
struct nilfs_palloc_req *req)
{
- nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh);
+ mark_buffer_dirty(req->pr_entry_bh);
nilfs_mdt_mark_dirty(dat);
brelse(req->pr_entry_bh);
}
@@ -361,7 +361,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
entry->de_blocknr = cpu_to_le64(blocknr);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(entry_bh);
+ mark_buffer_dirty(entry_bh);
nilfs_mdt_mark_dirty(dat);
brelse(entry_bh);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 397e73258631..d7eeca62febd 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -111,7 +111,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
nilfs_transaction_commit(inode->i_sb);
mapped:
- SetPageChecked(page);
wait_on_page_writeback(page);
return VM_FAULT_LOCKED;
}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1c2a3e23f8b2..08a07a218d26 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -48,9 +48,6 @@
#include "dat.h"
#include "ifile.h"
-static const struct address_space_operations def_gcinode_aops = {
-};
-
/*
* nilfs_gccache_submit_read_data() - add data buffer and submit read request
* @inode - gc inode
@@ -87,9 +84,9 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
goto out;
if (pbn == 0) {
- struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat;
- /* use original dat, not gc dat. */
- err = nilfs_dat_translate(dat_inode, vbn, &pbn);
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+
+ err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
brelse(bh);
goto failed;
@@ -103,7 +100,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
}
if (!buffer_mapped(bh)) {
- bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
+ bh->b_bdev = inode->i_sb->s_bdev;
set_buffer_mapped(bh);
}
bh->b_blocknr = pbn;
@@ -160,15 +157,11 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
if (buffer_dirty(bh))
return -EEXIST;
- if (buffer_nilfs_node(bh)) {
- if (nilfs_btree_broken_node_block(bh)) {
- clear_buffer_uptodate(bh);
- return -EIO;
- }
- nilfs_btnode_mark_dirty(bh);
- } else {
- nilfs_mark_buffer_dirty(bh);
+ if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) {
+ clear_buffer_uptodate(bh);
+ return -EIO;
}
+ mark_buffer_dirty(bh);
return 0;
}
@@ -178,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode)
inode->i_mode = S_IFREG;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
- inode->i_mapping->a_ops = &def_gcinode_aops;
+ inode->i_mapping->a_ops = &empty_aops;
inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
ii->i_flags = 0;
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index bfc73d3a30ed..684d76300a80 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -80,7 +80,7 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
return ret;
}
nilfs_palloc_commit_alloc_entry(ifile, &req);
- nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh);
+ mark_buffer_dirty(req.pr_entry_bh);
nilfs_mdt_mark_dirty(ifile);
*out_ino = (ino_t)req.pr_entry_nr;
*out_bh = req.pr_entry_bh;
@@ -128,7 +128,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
raw_inode->i_flags = 0;
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh);
+ mark_buffer_dirty(req.pr_entry_bh);
brelse(req.pr_entry_bh);
nilfs_palloc_commit_free_entry(ifile, &req);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index c0aa27490c02..587f18432832 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -74,14 +74,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
struct buffer_head *bh_result, int create)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
__u64 blknum = 0;
int err = 0, ret;
- struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
- down_read(&NILFS_MDT(dat)->mi_sem);
+ down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
- up_read(&NILFS_MDT(dat)->mi_sem);
+ up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
if (ret >= 0) { /* found */
map_bh(bh_result, inode->i_sb, blknum);
if (ret > 0)
@@ -596,6 +596,16 @@ void nilfs_write_inode_common(struct inode *inode,
raw_inode->i_flags = cpu_to_le32(ii->i_flags);
raw_inode->i_generation = cpu_to_le32(inode->i_generation);
+ if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+
+ /* zero-fill unused portion in the case of super root block */
+ raw_inode->i_xattr = 0;
+ raw_inode->i_pad = 0;
+ memset((void *)raw_inode + sizeof(*raw_inode), 0,
+ nilfs->ns_inode_size - sizeof(*raw_inode));
+ }
+
if (has_bmap)
nilfs_bmap_write(ii->i_bmap, raw_inode);
else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -872,8 +882,7 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
return -EINVAL; /* NILFS_I_DIRTY may remain for
freeing inode */
}
- list_del(&ii->i_dirty);
- list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
+ list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
set_bit(NILFS_I_QUEUED, &ii->i_state);
}
spin_unlock(&nilfs->ns_inode_lock);
@@ -892,7 +901,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
return err;
}
nilfs_update_inode(inode, ibh);
- nilfs_mdt_mark_buffer_dirty(ibh);
+ mark_buffer_dirty(ibh);
nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
brelse(ibh);
return 0;
@@ -931,7 +940,7 @@ void nilfs_dirty_inode(struct inode *inode)
int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
- struct the_nilfs *nilfs = NILFS_I_NILFS(inode);
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
__u64 logical = 0, phys = 0, size = 0;
__u32 flags = 0;
loff_t isize;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f2469ba6246b..41d6743d303c 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -698,6 +698,63 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
return 0;
}
+static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
+ void __user *argp)
+{
+ __u64 newsize;
+ int ret = -EPERM;
+
+ if (!capable(CAP_SYS_ADMIN))
+ goto out;
+
+ ret = mnt_want_write(filp->f_path.mnt);
+ if (ret)
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_from_user(&newsize, argp, sizeof(newsize)))
+ goto out_drop_write;
+
+ ret = nilfs_resize_fs(inode->i_sb, newsize);
+
+out_drop_write:
+ mnt_drop_write(filp->f_path.mnt);
+out:
+ return ret;
+}
+
+static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
+{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+ __u64 range[2];
+ __u64 minseg, maxseg;
+ unsigned long segbytes;
+ int ret = -EPERM;
+
+ if (!capable(CAP_SYS_ADMIN))
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_from_user(range, argp, sizeof(__u64[2])))
+ goto out;
+
+ ret = -ERANGE;
+ if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode))
+ goto out;
+
+ segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
+
+ minseg = range[0] + segbytes - 1;
+ do_div(minseg, segbytes);
+ maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
+ do_div(maxseg, segbytes);
+ maxseg--;
+
+ ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
+out:
+ return ret;
+}
+
static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp,
size_t membsz,
@@ -763,6 +820,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return nilfs_ioctl_clean_segments(inode, filp, cmd, argp);
case NILFS_IOCTL_SYNC:
return nilfs_ioctl_sync(inode, filp, cmd, argp);
+ case NILFS_IOCTL_RESIZE:
+ return nilfs_ioctl_resize(inode, filp, argp);
+ case NILFS_IOCTL_SET_ALLOC_RANGE:
+ return nilfs_ioctl_set_alloc_range(inode, argp);
default:
return -ENOTTY;
}
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a649b05f7069..800e8d78a83b 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -66,7 +66,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
kunmap_atomic(kaddr, KM_USER0);
set_buffer_uptodate(bh);
- nilfs_mark_buffer_dirty(bh);
+ mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(inode);
return 0;
}
@@ -355,7 +355,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
err = nilfs_mdt_read_block(inode, block, 0, &bh);
if (unlikely(err))
return err;
- nilfs_mark_buffer_dirty(bh);
+ mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(inode);
brelse(bh);
return 0;
@@ -450,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
INIT_LIST_HEAD(&shadow->frozen_buffers);
address_space_init_once(&shadow->frozen_data);
- nilfs_mapping_init(&shadow->frozen_data, bdi);
+ nilfs_mapping_init(&shadow->frozen_data, inode, bdi);
address_space_init_once(&shadow->frozen_btnodes);
- nilfs_mapping_init(&shadow->frozen_btnodes, bdi);
+ nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi);
mi->mi_shadow = shadow;
return 0;
}
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ed68563ec708..ab20a4baa50f 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -64,11 +64,6 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
return inode->i_private;
}
-static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
-{
- return inode->i_sb->s_fs_info;
-}
-
/* Default GFP flags using highmem */
#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
@@ -93,8 +88,6 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh);
struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode,
struct buffer_head *bh);
-#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh)
-
static inline void nilfs_mdt_mark_dirty(struct inode *inode)
{
if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state))
@@ -108,7 +101,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode)
static inline __u64 nilfs_mdt_cno(struct inode *inode)
{
- return NILFS_I_NILFS(inode)->ns_cno;
+ return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
}
#define nilfs_mdt_bgl_lock(inode, bg) \
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index a8dd344303cb..a9c6a531f80c 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -80,12 +80,6 @@ static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
return &ii->vfs_inode;
}
-static inline struct inode *NILFS_AS_I(struct address_space *mapping)
-{
- return (mapping->host) ? :
- container_of(mapping, struct inode, i_data);
-}
-
/*
* Dynamic state flags of NILFS on-memory inode (i_state)
*/
@@ -298,6 +292,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
int flip);
int nilfs_commit_super(struct super_block *sb, int flag);
int nilfs_cleanup_super(struct super_block *sb);
+int nilfs_resize_fs(struct super_block *sb, __u64 newsize);
int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
struct nilfs_root **root);
int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 1168059c7efd..65221a04c6f0 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -37,8 +37,7 @@
#define NILFS_BUFFER_INHERENT_BITS \
((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
- (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \
- (1UL << BH_NILFS_Checked))
+ (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
static struct buffer_head *
__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
@@ -59,19 +58,6 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
return bh;
}
-/*
- * Since the page cache of B-tree node pages or data page cache of pseudo
- * inodes does not have a valid mapping->host pointer, calling
- * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
- * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
- * To avoid this problem, the old style mark_buffer_dirty() is used instead.
- */
-void nilfs_mark_buffer_dirty(struct buffer_head *bh)
-{
- if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
- __set_page_dirty_nobuffers(bh->b_page);
-}
-
struct buffer_head *nilfs_grab_buffer(struct inode *inode,
struct address_space *mapping,
unsigned long blkoff,
@@ -183,7 +169,7 @@ int nilfs_page_buffers_clean(struct page *page)
void nilfs_page_bug(struct page *page)
{
struct address_space *m;
- unsigned long ino = 0;
+ unsigned long ino;
if (unlikely(!page)) {
printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
@@ -191,11 +177,8 @@ void nilfs_page_bug(struct page *page)
}
m = page->mapping;
- if (m) {
- struct inode *inode = NILFS_AS_I(m);
- if (inode != NULL)
- ino = inode->i_ino;
- }
+ ino = m ? m->host->i_ino : 0;
+
printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
"mapping=%p ino=%lu\n",
page, atomic_read(&page->_count),
@@ -217,56 +200,6 @@ void nilfs_page_bug(struct page *page)
}
/**
- * nilfs_alloc_private_page - allocate a private page with buffer heads
- *
- * Return Value: On success, a pointer to the allocated page is returned.
- * On error, NULL is returned.
- */
-struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
- unsigned long state)
-{
- struct buffer_head *bh, *head, *tail;
- struct page *page;
-
- page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
- if (unlikely(!page))
- return NULL;
-
- lock_page(page);
- head = alloc_page_buffers(page, size, 0);
- if (unlikely(!head)) {
- unlock_page(page);
- __free_page(page);
- return NULL;
- }
-
- bh = head;
- do {
- bh->b_state = (1UL << BH_NILFS_Allocated) | state;
- tail = bh;
- bh->b_bdev = bdev;
- bh = bh->b_this_page;
- } while (bh);
-
- tail->b_this_page = head;
- attach_page_buffers(page, head);
-
- return page;
-}
-
-void nilfs_free_private_page(struct page *page)
-{
- BUG_ON(!PageLocked(page));
- BUG_ON(page->mapping);
-
- if (page_has_buffers(page) && !try_to_free_buffers(page))
- NILFS_PAGE_BUG(page, "failed to free page");
-
- unlock_page(page);
- __free_page(page);
-}
-
-/**
* nilfs_copy_page -- copy the page with buffers
* @dst: destination page
* @src: source page
@@ -492,10 +425,10 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
return nc;
}
-void nilfs_mapping_init(struct address_space *mapping,
+void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
struct backing_dev_info *bdi)
{
- mapping->host = NULL;
+ mapping->host = inode;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->assoc_mapping = NULL;
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index f06b79ad7493..fb7de71605a0 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -38,14 +38,12 @@ enum {
BH_NILFS_Redirected,
};
-BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */
BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */
BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */
BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */
-void nilfs_mark_buffer_dirty(struct buffer_head *bh);
int __nilfs_clear_page_dirty(struct page *);
struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
@@ -54,14 +52,11 @@ void nilfs_forget_buffer(struct buffer_head *);
void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
int nilfs_page_buffers_clean(struct page *);
void nilfs_page_bug(struct page *);
-struct page *nilfs_alloc_private_page(struct block_device *, int,
- unsigned long);
-void nilfs_free_private_page(struct page *);
int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
void nilfs_copy_back_pages(struct address_space *, struct address_space *);
void nilfs_clear_dirty_pages(struct address_space *);
-void nilfs_mapping_init(struct address_space *mapping,
+void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
struct backing_dev_info *bdi);
unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ba4a64518f38..a604ac0331b2 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -387,9 +387,9 @@ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
static void dispose_recovery_list(struct list_head *head)
{
while (!list_empty(head)) {
- struct nilfs_recovery_block *rb
- = list_entry(head->next,
- struct nilfs_recovery_block, list);
+ struct nilfs_recovery_block *rb;
+
+ rb = list_first_entry(head, struct nilfs_recovery_block, list);
list_del(&rb->list);
kfree(rb);
}
@@ -416,9 +416,9 @@ static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
void nilfs_dispose_segment_list(struct list_head *head)
{
while (!list_empty(head)) {
- struct nilfs_segment_entry *ent
- = list_entry(head->next,
- struct nilfs_segment_entry, list);
+ struct nilfs_segment_entry *ent;
+
+ ent = list_first_entry(head, struct nilfs_segment_entry, list);
list_del(&ent->list);
kfree(ent);
}
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2853ff20f85a..850a7c0228fb 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
u32 seed)
{
struct nilfs_super_root *raw_sr;
+ struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
+ unsigned srsize;
u32 crc;
raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
+ srsize = NILFS_SR_BYTES(nilfs->ns_inode_size);
crc = crc32_le(seed,
(unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
- NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
+ srsize - sizeof(raw_sr->sr_sum));
raw_sr->sr_sum = cpu_to_le32(crc);
}
@@ -254,18 +257,6 @@ static void nilfs_release_buffers(struct list_head *list)
list_for_each_entry_safe(bh, n, list, b_assoc_buffers) {
list_del_init(&bh->b_assoc_buffers);
- if (buffer_nilfs_allocated(bh)) {
- struct page *clone_page = bh->b_page;
-
- /* remove clone page */
- brelse(bh);
- page_cache_release(clone_page); /* for each bh */
- if (page_count(clone_page) <= 2) {
- lock_page(clone_page);
- nilfs_free_private_page(clone_page);
- }
- continue;
- }
brelse(bh);
}
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index afe4f2183454..141646e88fb5 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -655,13 +655,10 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
if (unlikely(page->index > last))
break;
- if (mapping->host) {
- lock_page(page);
- if (!page_has_buffers(page))
- create_empty_buffers(page,
- 1 << inode->i_blkbits, 0);
- unlock_page(page);
- }
+ lock_page(page);
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+ unlock_page(page);
bh = head = page_buffers(page);
do {
@@ -809,7 +806,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
/* The following code is duplicated with cpfile. But, it is
needed to collect the checkpoint even if it was not newly
created */
- nilfs_mdt_mark_buffer_dirty(bh_cp);
+ mark_buffer_dirty(bh_cp);
nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
nilfs_cpfile_put_checkpoint(
nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
@@ -889,12 +886,14 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
{
struct buffer_head *bh_sr;
struct nilfs_super_root *raw_sr;
- unsigned isz = nilfs->ns_inode_size;
+ unsigned isz, srsz;
bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
+ isz = nilfs->ns_inode_size;
+ srsz = NILFS_SR_BYTES(isz);
- raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES);
+ raw_sr->sr_bytes = cpu_to_le16(srsz);
raw_sr->sr_nongc_ctime
= cpu_to_le64(nilfs_doing_gc() ?
nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
@@ -906,6 +905,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
NILFS_SR_CPFILE_OFFSET(isz), 1);
nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
NILFS_SR_SUFILE_OFFSET(isz), 1);
+ memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
}
static void nilfs_redirty_inodes(struct list_head *head)
@@ -954,8 +954,8 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
dispose_buffers:
while (!list_empty(listp)) {
- bh = list_entry(listp->next, struct buffer_head,
- b_assoc_buffers);
+ bh = list_first_entry(listp, struct buffer_head,
+ b_assoc_buffers);
list_del_init(&bh->b_assoc_buffers);
brelse(bh);
}
@@ -1500,10 +1500,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
nblocks = le32_to_cpu(finfo->fi_nblocks);
ndatablk = le32_to_cpu(finfo->fi_ndatablk);
- if (buffer_nilfs_node(bh))
- inode = NILFS_BTNC_I(bh->b_page->mapping);
- else
- inode = NILFS_AS_I(bh->b_page->mapping);
+ inode = bh->b_page->mapping->host;
if (mode == SC_LSEG_DSYNC)
sc_op = &nilfs_sc_dsync_ops;
@@ -1556,83 +1553,24 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
return 0;
}
-static int
-nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
-{
- struct page *clone_page;
- struct buffer_head *bh, *head, *bh2;
- void *kaddr;
-
- bh = head = page_buffers(page);
-
- clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0);
- if (unlikely(!clone_page))
- return -ENOMEM;
-
- bh2 = page_buffers(clone_page);
- kaddr = kmap_atomic(page, KM_USER0);
- do {
- if (list_empty(&bh->b_assoc_buffers))
- continue;
- get_bh(bh2);
- page_cache_get(clone_page); /* for each bh */
- memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size);
- bh2->b_blocknr = bh->b_blocknr;
- list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers);
- list_add_tail(&bh->b_assoc_buffers, out);
- } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head);
- kunmap_atomic(kaddr, KM_USER0);
-
- if (!TestSetPageWriteback(clone_page))
- account_page_writeback(clone_page);
- unlock_page(clone_page);
-
- return 0;
-}
-
-static int nilfs_test_page_to_be_frozen(struct page *page)
-{
- struct address_space *mapping = page->mapping;
-
- if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode))
- return 0;
-
- if (page_mapped(page)) {
- ClearPageChecked(page);
- return 1;
- }
- return PageChecked(page);
-}
-
-static int nilfs_begin_page_io(struct page *page, struct list_head *out)
+static void nilfs_begin_page_io(struct page *page)
{
if (!page || PageWriteback(page))
/* For split b-tree node pages, this function may be called
twice. We ignore the 2nd or later calls by this check. */
- return 0;
+ return;
lock_page(page);
clear_page_dirty_for_io(page);
set_page_writeback(page);
unlock_page(page);
-
- if (nilfs_test_page_to_be_frozen(page)) {
- int err = nilfs_copy_replace_page_buffers(page, out);
- if (unlikely(err))
- return err;
- }
- return 0;
}
-static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
- struct page **failed_page)
+static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
{
struct nilfs_segment_buffer *segbuf;
struct page *bd_page = NULL, *fs_page = NULL;
- struct list_head *list = &sci->sc_copied_buffers;
- int err;
- *failed_page = NULL;
list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
struct buffer_head *bh;
@@ -1662,11 +1600,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
break;
}
if (bh->b_page != fs_page) {
- err = nilfs_begin_page_io(fs_page, list);
- if (unlikely(err)) {
- *failed_page = fs_page;
- goto out;
- }
+ nilfs_begin_page_io(fs_page);
fs_page = bh->b_page;
}
}
@@ -1677,11 +1611,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
set_page_writeback(bd_page);
unlock_page(bd_page);
}
- err = nilfs_begin_page_io(fs_page, list);
- if (unlikely(err))
- *failed_page = fs_page;
- out:
- return err;
+ nilfs_begin_page_io(fs_page);
}
static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -1694,24 +1624,6 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
return ret;
}
-static void __nilfs_end_page_io(struct page *page, int err)
-{
- if (!err) {
- if (!nilfs_page_buffers_clean(page))
- __set_page_dirty_nobuffers(page);
- ClearPageError(page);
- } else {
- __set_page_dirty_nobuffers(page);
- SetPageError(page);
- }
-
- if (buffer_nilfs_allocated(page_buffers(page))) {
- if (TestClearPageWriteback(page))
- dec_zone_page_state(page, NR_WRITEBACK);
- } else
- end_page_writeback(page);
-}
-
static void nilfs_end_page_io(struct page *page, int err)
{
if (!page)
@@ -1738,40 +1650,19 @@ static void nilfs_end_page_io(struct page *page, int err)
return;
}
- __nilfs_end_page_io(page, err);
-}
-
-static void nilfs_clear_copied_buffers(struct list_head *list, int err)
-{
- struct buffer_head *bh, *head;
- struct page *page;
-
- while (!list_empty(list)) {
- bh = list_entry(list->next, struct buffer_head,
- b_assoc_buffers);
- page = bh->b_page;
- page_cache_get(page);
- head = bh = page_buffers(page);
- do {
- if (!list_empty(&bh->b_assoc_buffers)) {
- list_del_init(&bh->b_assoc_buffers);
- if (!err) {
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
- clear_buffer_delay(bh);
- clear_buffer_nilfs_volatile(bh);
- }
- brelse(bh); /* for b_assoc_buffers */
- }
- } while ((bh = bh->b_this_page) != head);
-
- __nilfs_end_page_io(page, err);
- page_cache_release(page);
+ if (!err) {
+ if (!nilfs_page_buffers_clean(page))
+ __set_page_dirty_nobuffers(page);
+ ClearPageError(page);
+ } else {
+ __set_page_dirty_nobuffers(page);
+ SetPageError(page);
}
+
+ end_page_writeback(page);
}
-static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
- int err)
+static void nilfs_abort_logs(struct list_head *logs, int err)
{
struct nilfs_segment_buffer *segbuf;
struct page *bd_page = NULL, *fs_page = NULL;
@@ -1801,8 +1692,6 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
}
if (bh->b_page != fs_page) {
nilfs_end_page_io(fs_page, err);
- if (fs_page && fs_page == failed_page)
- return;
fs_page = bh->b_page;
}
}
@@ -1821,12 +1710,11 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
list_splice_tail_init(&sci->sc_write_logs, &logs);
ret = nilfs_wait_on_logs(&logs);
- nilfs_abort_logs(&logs, NULL, ret ? : err);
+ nilfs_abort_logs(&logs, ret ? : err);
list_splice_tail_init(&sci->sc_segbufs, &logs);
nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
nilfs_free_incomplete_logs(&logs, nilfs);
- nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
@@ -1920,8 +1808,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
nilfs_end_page_io(fs_page, 0);
- nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0);
-
nilfs_drop_collected_inodes(&sci->sc_dirty_files);
if (nilfs_doing_gc())
@@ -1979,7 +1865,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
"failed to get inode block.\n");
return err;
}
- nilfs_mdt_mark_buffer_dirty(ibh);
+ mark_buffer_dirty(ibh);
nilfs_mdt_mark_dirty(ifile);
spin_lock(&nilfs->ns_inode_lock);
if (likely(!ii->i_bh))
@@ -1991,8 +1877,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
clear_bit(NILFS_I_QUEUED, &ii->i_state);
set_bit(NILFS_I_BUSY, &ii->i_state);
- list_del(&ii->i_dirty);
- list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
+ list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
}
spin_unlock(&nilfs->ns_inode_lock);
@@ -2014,8 +1899,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
clear_bit(NILFS_I_BUSY, &ii->i_state);
brelse(ii->i_bh);
ii->i_bh = NULL;
- list_del(&ii->i_dirty);
- list_add_tail(&ii->i_dirty, &ti->ti_garbage);
+ list_move_tail(&ii->i_dirty, &ti->ti_garbage);
}
spin_unlock(&nilfs->ns_inode_lock);
}
@@ -2026,7 +1910,6 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
{
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
- struct page *failed_page;
int err;
sci->sc_stage.scnt = NILFS_ST_INIT;
@@ -2081,11 +1964,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
/* Write partial segments */
- err = nilfs_segctor_prepare_write(sci, &failed_page);
- if (err) {
- nilfs_abort_logs(&sci->sc_segbufs, failed_page, err);
- goto failed_to_write;
- }
+ nilfs_segctor_prepare_write(sci);
nilfs_add_checksums_on_logs(&sci->sc_segbufs,
nilfs->ns_crc_seed);
@@ -2687,7 +2566,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
INIT_LIST_HEAD(&sci->sc_segbufs);
INIT_LIST_HEAD(&sci->sc_write_logs);
INIT_LIST_HEAD(&sci->sc_gc_inodes);
- INIT_LIST_HEAD(&sci->sc_copied_buffers);
init_timer(&sci->sc_timer);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2741,8 +2619,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
if (flag || !nilfs_segctor_confirm(sci))
nilfs_segctor_write_out(sci);
- WARN_ON(!list_empty(&sci->sc_copied_buffers));
-
if (!list_empty(&sci->sc_dirty_files)) {
nilfs_warning(sci->sc_super, __func__,
"dirty file(s) after the final construction\n");
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 6c02a86745fb..38a1d0013314 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -92,7 +92,6 @@ struct nilfs_segsum_pointer {
* @sc_nblk_inc: Block count of current generation
* @sc_dirty_files: List of files to be written
* @sc_gc_inodes: List of GC inodes having blocks to be written
- * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
* @sc_freesegs: array of segment numbers to be freed
* @sc_nfreesegs: number of segments on @sc_freesegs
* @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -136,7 +135,6 @@ struct nilfs_sc_info {
struct list_head sc_dirty_files;
struct list_head sc_gc_inodes;
- struct list_head sc_copied_buffers;
__u64 *sc_freesegs;
size_t sc_nfreesegs;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1d6f488ccae8..0a0aba617d8a 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -33,7 +33,9 @@
struct nilfs_sufile_info {
struct nilfs_mdt_info mi;
- unsigned long ncleansegs;
+ unsigned long ncleansegs;/* number of clean segments */
+ __u64 allocmin; /* lower limit of allocatable segment range */
+ __u64 allocmax; /* upper limit of allocatable segment range */
};
static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
@@ -96,6 +98,13 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
create, NULL, bhp);
}
+static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile,
+ __u64 segnum)
+{
+ return nilfs_mdt_delete_block(sufile,
+ nilfs_sufile_get_blkoff(sufile, segnum));
+}
+
static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
u64 ncleanadd, u64 ndirtyadd)
{
@@ -108,7 +117,7 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(header_bh);
}
/**
@@ -248,6 +257,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
}
/**
+ * nilfs_sufile_set_alloc_range - limit range of segment to be allocated
+ * @sufile: inode of segment usage file
+ * @start: minimum segment number of allocatable region (inclusive)
+ * @end: maximum segment number of allocatable region (inclusive)
+ *
+ * Return Value: On success, 0 is returned. On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-ERANGE - invalid segment region
+ */
+int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
+{
+ struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
+ __u64 nsegs;
+ int ret = -ERANGE;
+
+ down_write(&NILFS_MDT(sufile)->mi_sem);
+ nsegs = nilfs_sufile_get_nsegments(sufile);
+
+ if (start <= end && end < nsegs) {
+ sui->allocmin = start;
+ sui->allocmax = end;
+ ret = 0;
+ }
+ up_write(&NILFS_MDT(sufile)->mi_sem);
+ return ret;
+}
+
+/**
* nilfs_sufile_alloc - allocate a segment
* @sufile: inode of segment usage file
* @segnump: pointer to segment number
@@ -269,11 +307,12 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
struct buffer_head *header_bh, *su_bh;
struct nilfs_sufile_header *header;
struct nilfs_segment_usage *su;
+ struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
size_t susz = NILFS_MDT(sufile)->mi_entry_size;
__u64 segnum, maxsegnum, last_alloc;
void *kaddr;
- unsigned long nsegments, ncleansegs, nsus;
- int ret, i, j;
+ unsigned long nsegments, ncleansegs, nsus, cnt;
+ int ret, j;
down_write(&NILFS_MDT(sufile)->mi_sem);
@@ -287,13 +326,31 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
kunmap_atomic(kaddr, KM_USER0);
nsegments = nilfs_sufile_get_nsegments(sufile);
+ maxsegnum = sui->allocmax;
segnum = last_alloc + 1;
- maxsegnum = nsegments - 1;
- for (i = 0; i < nsegments; i += nsus) {
- if (segnum >= nsegments) {
- /* wrap around */
- segnum = 0;
- maxsegnum = last_alloc;
+ if (segnum < sui->allocmin || segnum > sui->allocmax)
+ segnum = sui->allocmin;
+
+ for (cnt = 0; cnt < nsegments; cnt += nsus) {
+ if (segnum > maxsegnum) {
+ if (cnt < sui->allocmax - sui->allocmin + 1) {
+ /*
+ * wrap around in the limited region.
+ * if allocation started from
+ * sui->allocmin, this never happens.
+ */
+ segnum = sui->allocmin;
+ maxsegnum = last_alloc;
+ } else if (segnum > sui->allocmin &&
+ sui->allocmax + 1 < nsegments) {
+ segnum = sui->allocmax + 1;
+ maxsegnum = nsegments - 1;
+ } else if (sui->allocmin > 0) {
+ segnum = 0;
+ maxsegnum = sui->allocmin - 1;
+ } else {
+ break; /* never happens */
+ }
}
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
&su_bh);
@@ -319,9 +376,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
header->sh_last_alloc = cpu_to_le64(segnum);
kunmap_atomic(kaddr, KM_USER0);
- NILFS_SUI(sufile)->ncleansegs--;
- nilfs_mdt_mark_buffer_dirty(header_bh);
- nilfs_mdt_mark_buffer_dirty(su_bh);
+ sui->ncleansegs--;
+ mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
brelse(su_bh);
*segnump = segnum;
@@ -364,7 +421,7 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
nilfs_sufile_mod_counter(header_bh, -1, 1);
NILFS_SUI(sufile)->ncleansegs--;
- nilfs_mdt_mark_buffer_dirty(su_bh);
+ mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
}
@@ -395,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
NILFS_SUI(sufile)->ncleansegs -= clean;
- nilfs_mdt_mark_buffer_dirty(su_bh);
+ mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
}
@@ -421,7 +478,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
sudirty = nilfs_segment_usage_dirty(su);
nilfs_segment_usage_set_clean(su);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(su_bh);
+ mark_buffer_dirty(su_bh);
nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
NILFS_SUI(sufile)->ncleansegs++;
@@ -441,7 +498,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
if (!ret) {
- nilfs_mdt_mark_buffer_dirty(bh);
+ mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
brelse(bh);
}
@@ -476,7 +533,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
su->su_nblocks = cpu_to_le32(nblocks);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(bh);
+ mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
brelse(bh);
@@ -505,7 +562,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
{
struct buffer_head *header_bh;
struct nilfs_sufile_header *header;
- struct the_nilfs *nilfs = NILFS_I_NILFS(sufile);
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
void *kaddr;
int ret;
@@ -555,11 +612,183 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
nilfs_sufile_mod_counter(header_bh, -1, 0);
NILFS_SUI(sufile)->ncleansegs--;
}
- nilfs_mdt_mark_buffer_dirty(su_bh);
+ mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
}
/**
+ * nilfs_sufile_truncate_range - truncate range of segment array
+ * @sufile: inode of segment usage file
+ * @start: start segment number (inclusive)
+ * @end: end segment number (inclusive)
+ *
+ * Return Value: On success, 0 is returned. On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EINVAL - Invalid number of segments specified
+ *
+ * %-EBUSY - Dirty or active segments are present in the range
+ */
+static int nilfs_sufile_truncate_range(struct inode *sufile,
+ __u64 start, __u64 end)
+{
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+ struct buffer_head *header_bh;
+ struct buffer_head *su_bh;
+ struct nilfs_segment_usage *su, *su2;
+ size_t susz = NILFS_MDT(sufile)->mi_entry_size;
+ unsigned long segusages_per_block;
+ unsigned long nsegs, ncleaned;
+ __u64 segnum;
+ void *kaddr;
+ ssize_t n, nc;
+ int ret;
+ int j;
+
+ nsegs = nilfs_sufile_get_nsegments(sufile);
+
+ ret = -EINVAL;
+ if (start > end || start >= nsegs)
+ goto out;
+
+ ret = nilfs_sufile_get_header_block(sufile, &header_bh);
+ if (ret < 0)
+ goto out;
+
+ segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile);
+ ncleaned = 0;
+
+ for (segnum = start; segnum <= end; segnum += n) {
+ n = min_t(unsigned long,
+ segusages_per_block -
+ nilfs_sufile_get_offset(sufile, segnum),
+ end - segnum + 1);
+ ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0,
+ &su_bh);
+ if (ret < 0) {
+ if (ret != -ENOENT)
+ goto out_header;
+ /* hole */
+ continue;
+ }
+ kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+ su = nilfs_sufile_block_get_segment_usage(
+ sufile, segnum, su_bh, kaddr);
+ su2 = su;
+ for (j = 0; j < n; j++, su = (void *)su + susz) {
+ if ((le32_to_cpu(su->su_flags) &
+ ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
+ nilfs_segment_is_active(nilfs, segnum + j)) {
+ ret = -EBUSY;
+ kunmap_atomic(kaddr, KM_USER0);
+ brelse(su_bh);
+ goto out_header;
+ }
+ }
+ nc = 0;
+ for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) {
+ if (nilfs_segment_usage_error(su)) {
+ nilfs_segment_usage_set_clean(su);
+ nc++;
+ }
+ }
+ kunmap_atomic(kaddr, KM_USER0);
+ if (nc > 0) {
+ mark_buffer_dirty(su_bh);
+ ncleaned += nc;
+ }
+ brelse(su_bh);
+
+ if (n == segusages_per_block) {
+ /* make hole */
+ nilfs_sufile_delete_segment_usage_block(sufile, segnum);
+ }
+ }
+ ret = 0;
+
+out_header:
+ if (ncleaned > 0) {
+ NILFS_SUI(sufile)->ncleansegs += ncleaned;
+ nilfs_sufile_mod_counter(header_bh, ncleaned, 0);
+ nilfs_mdt_mark_dirty(sufile);
+ }
+ brelse(header_bh);
+out:
+ return ret;
+}
+
+/**
+ * nilfs_sufile_resize - resize segment array
+ * @sufile: inode of segment usage file
+ * @newnsegs: new number of segments
+ *
+ * Return Value: On success, 0 is returned. On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-ENOSPC - Enough free space is not left for shrinking
+ *
+ * %-EBUSY - Dirty or active segments exist in the region to be truncated
+ */
+int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
+{
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+ struct buffer_head *header_bh;
+ struct nilfs_sufile_header *header;
+ struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
+ void *kaddr;
+ unsigned long nsegs, nrsvsegs;
+ int ret = 0;
+
+ down_write(&NILFS_MDT(sufile)->mi_sem);
+
+ nsegs = nilfs_sufile_get_nsegments(sufile);
+ if (nsegs == newnsegs)
+ goto out;
+
+ ret = -ENOSPC;
+ nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs);
+ if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs)
+ goto out;
+
+ ret = nilfs_sufile_get_header_block(sufile, &header_bh);
+ if (ret < 0)
+ goto out;
+
+ if (newnsegs > nsegs) {
+ sui->ncleansegs += newnsegs - nsegs;
+ } else /* newnsegs < nsegs */ {
+ ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1);
+ if (ret < 0)
+ goto out_header;
+
+ sui->ncleansegs -= nsegs - newnsegs;
+ }
+
+ kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+ header = kaddr + bh_offset(header_bh);
+ header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
+ kunmap_atomic(kaddr, KM_USER0);
+
+ mark_buffer_dirty(header_bh);
+ nilfs_mdt_mark_dirty(sufile);
+ nilfs_set_nsegments(nilfs, newnsegs);
+
+out_header:
+ brelse(header_bh);
+out:
+ up_write(&NILFS_MDT(sufile)->mi_sem);
+ return ret;
+}
+
+/**
* nilfs_sufile_get_suinfo -
* @sufile: inode of segment usage file
* @segnum: segment number to start looking
@@ -583,7 +812,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
struct nilfs_segment_usage *su;
struct nilfs_suinfo *si = buf;
size_t susz = NILFS_MDT(sufile)->mi_entry_size;
- struct the_nilfs *nilfs = NILFS_I_NILFS(sufile);
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
void *kaddr;
unsigned long nsegs, segusages_per_block;
ssize_t n;
@@ -679,6 +908,9 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
kunmap_atomic(kaddr, KM_USER0);
brelse(header_bh);
+ sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
+ sui->allocmin = 0;
+
unlock_new_inode(sufile);
out:
*inodep = sufile;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a943fbacb45b..e84bc5b51fc1 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -31,11 +31,12 @@
static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
{
- return NILFS_I_NILFS(sufile)->ns_nsegments;
+ return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments;
}
unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
+int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
int nilfs_sufile_alloc(struct inode *, __u64 *);
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
@@ -61,6 +62,7 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
struct buffer_head *);
+int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs);
int nilfs_sufile_read(struct super_block *sb, size_t susize,
struct nilfs_inode *raw_inode, struct inode **inodep);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 062cca065195..8351c44a7320 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -56,6 +56,7 @@
#include "btnode.h"
#include "page.h"
#include "cpfile.h"
+#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */
#include "ifile.h"
#include "dat.h"
#include "segment.h"
@@ -165,7 +166,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
ii->i_state = 0;
ii->i_cno = 0;
ii->vfs_inode.i_version = 1;
- nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi);
+ nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi);
return &ii->vfs_inode;
}
@@ -347,6 +348,134 @@ int nilfs_cleanup_super(struct super_block *sb)
return ret;
}
+/**
+ * nilfs_move_2nd_super - relocate secondary super block
+ * @sb: super block instance
+ * @sb2off: new offset of the secondary super block (in bytes)
+ */
+static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
+{
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ struct buffer_head *nsbh;
+ struct nilfs_super_block *nsbp;
+ sector_t blocknr, newblocknr;
+ unsigned long offset;
+ int sb2i = -1; /* array index of the secondary superblock */
+ int ret = 0;
+
+ /* nilfs->ns_sem must be locked by the caller. */
+ if (nilfs->ns_sbh[1] &&
+ nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) {
+ sb2i = 1;
+ blocknr = nilfs->ns_sbh[1]->b_blocknr;
+ } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
+ sb2i = 0;
+ blocknr = nilfs->ns_sbh[0]->b_blocknr;
+ }
+ if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
+ goto out; /* super block location is unchanged */
+
+ /* Get new super block buffer */
+ newblocknr = sb2off >> nilfs->ns_blocksize_bits;
+ offset = sb2off & (nilfs->ns_blocksize - 1);
+ nsbh = sb_getblk(sb, newblocknr);
+ if (!nsbh) {
+ printk(KERN_WARNING
+ "NILFS warning: unable to move secondary superblock "
+ "to block %llu\n", (unsigned long long)newblocknr);
+ ret = -EIO;
+ goto out;
+ }
+ nsbp = (void *)nsbh->b_data + offset;
+ memset(nsbp, 0, nilfs->ns_blocksize);
+
+ if (sb2i >= 0) {
+ memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
+ brelse(nilfs->ns_sbh[sb2i]);
+ nilfs->ns_sbh[sb2i] = nsbh;
+ nilfs->ns_sbp[sb2i] = nsbp;
+ } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) {
+ /* secondary super block will be restored to index 1 */
+ nilfs->ns_sbh[1] = nsbh;
+ nilfs->ns_sbp[1] = nsbp;
+ } else {
+ brelse(nsbh);
+ }
+out:
+ return ret;
+}
+
+/**
+ * nilfs_resize_fs - resize the filesystem
+ * @sb: super block instance
+ * @newsize: new size of the filesystem (in bytes)
+ */
+int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
+{
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ struct nilfs_super_block **sbp;
+ __u64 devsize, newnsegs;
+ loff_t sb2off;
+ int ret;
+
+ ret = -ERANGE;
+ devsize = i_size_read(sb->s_bdev->bd_inode);
+ if (newsize > devsize)
+ goto out;
+
+ /*
+ * Write lock is required to protect some functions depending
+ * on the number of segments, the number of reserved segments,
+ * and so forth.
+ */
+ down_write(&nilfs->ns_segctor_sem);
+
+ sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
+ newnsegs = sb2off >> nilfs->ns_blocksize_bits;
+ do_div(newnsegs, nilfs->ns_blocks_per_segment);
+
+ ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
+ up_write(&nilfs->ns_segctor_sem);
+ if (ret < 0)
+ goto out;
+
+ ret = nilfs_construct_segment(sb);
+ if (ret < 0)
+ goto out;
+
+ down_write(&nilfs->ns_sem);
+ nilfs_move_2nd_super(sb, sb2off);
+ ret = -EIO;
+ sbp = nilfs_prepare_super(sb, 0);
+ if (likely(sbp)) {
+ nilfs_set_log_cursor(sbp[0], nilfs);
+ /*
+ * Drop NILFS_RESIZE_FS flag for compatibility with
+ * mount-time resize which may be implemented in a
+ * future release.
+ */
+ sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) &
+ ~NILFS_RESIZE_FS);
+ sbp[0]->s_dev_size = cpu_to_le64(newsize);
+ sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments);
+ if (sbp[1])
+ memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
+ ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
+ }
+ up_write(&nilfs->ns_sem);
+
+ /*
+ * Reset the range of allocatable segments last. This order
+ * is important in the case of expansion because the secondary
+ * superblock must be protected from log write until migration
+ * completes.
+ */
+ if (!ret)
+ nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1);
+out:
+ return ret;
+}
+
static void nilfs_put_super(struct super_block *sb)
{
struct the_nilfs *nilfs = sb->s_fs_info;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d2acd1a651f3..d32714094375 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -363,6 +363,24 @@ static unsigned long long nilfs_max_size(unsigned int blkbits)
return res;
}
+/**
+ * nilfs_nrsvsegs - calculate the number of reserved segments
+ * @nilfs: nilfs object
+ * @nsegs: total number of segments
+ */
+unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
+{
+ return max_t(unsigned long, NILFS_MIN_NRSVSEGS,
+ DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage,
+ 100));
+}
+
+void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
+{
+ nilfs->ns_nsegments = nsegs;
+ nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs);
+}
+
static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
struct nilfs_super_block *sbp)
{
@@ -389,13 +407,9 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
}
nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
- nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments);
nilfs->ns_r_segments_percentage =
le32_to_cpu(sbp->s_r_segments_percentage);
- nilfs->ns_nrsvsegs =
- max_t(unsigned long, NILFS_MIN_NRSVSEGS,
- DIV_ROUND_UP(nilfs->ns_nsegments *
- nilfs->ns_r_segments_percentage, 100));
+ nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
return 0;
}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index f4968145c2a3..9992b11312ff 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -268,6 +268,8 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev);
void destroy_nilfs(struct the_nilfs *nilfs);
int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
+unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
+void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 643720209a98..9a3e6bbff27b 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
/* We want to make sure that nobody is heartbeating on top of us --
* this will help detect an invalid configuration. */
-static int o2hb_check_last_timestamp(struct o2hb_region *reg)
+static void o2hb_check_last_timestamp(struct o2hb_region *reg)
{
- int node_num, ret;
struct o2hb_disk_slot *slot;
struct o2hb_disk_heartbeat_block *hb_block;
+ char *errstr;
- node_num = o2nm_this_node();
-
- ret = 1;
- slot = &reg->hr_slots[node_num];
+ slot = &reg->hr_slots[o2nm_this_node()];
/* Don't check on our 1st timestamp */
- if (slot->ds_last_time) {
- hb_block = slot->ds_raw_block;
+ if (!slot->ds_last_time)
+ return;
- if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time)
- ret = 0;
- }
+ hb_block = slot->ds_raw_block;
+ if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
+ le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
+ hb_block->hb_node == slot->ds_node_num)
+ return;
- return ret;
+#define ERRSTR1 "Another node is heartbeating on device"
+#define ERRSTR2 "Heartbeat generation mismatch on device"
+#define ERRSTR3 "Heartbeat sequence mismatch on device"
+
+ if (hb_block->hb_node != slot->ds_node_num)
+ errstr = ERRSTR1;
+ else if (le64_to_cpu(hb_block->hb_generation) !=
+ slot->ds_last_generation)
+ errstr = ERRSTR2;
+ else
+ errstr = ERRSTR3;
+
+ mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), "
+ "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name,
+ slot->ds_node_num, (unsigned long long)slot->ds_last_generation,
+ (unsigned long long)slot->ds_last_time, hb_block->hb_node,
+ (unsigned long long)le64_to_cpu(hb_block->hb_generation),
+ (unsigned long long)le64_to_cpu(hb_block->hb_seq));
}
static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
/* With an up to date view of the slots, we can check that no
* other node has been improperly configured to heartbeat in
* our slot. */
- if (!o2hb_check_last_timestamp(reg))
- mlog(ML_ERROR, "Device \"%s\": another node is heartbeating "
- "in our slot!\n", reg->hr_dev_name);
+ o2hb_check_last_timestamp(reg);
/* fill in the proper info for our next heartbeat */
o2hb_prepare_block(reg, reg->hr_generation);
@@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
}
i = -1;
- while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
-
+ while((i = find_next_bit(configured_nodes,
+ O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
}
@@ -1690,6 +1704,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
struct file *filp = NULL;
struct inode *inode = NULL;
ssize_t ret = -EINVAL;
+ int live_threshold;
if (reg->hr_bdev)
goto out;
@@ -1766,8 +1781,18 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
* A node is considered live after it has beat LIVE_THRESHOLD
* times. We're not steady until we've given them a chance
* _after_ our first read.
+ * The default threshold is bare minimum so as to limit the delay
+ * during mounts. For global heartbeat, the threshold doubled for the
+ * first region.
*/
- atomic_set(&reg->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1);
+ live_threshold = O2HB_LIVE_THRESHOLD;
+ if (o2hb_global_heartbeat_active()) {
+ spin_lock(&o2hb_live_lock);
+ if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
+ live_threshold <<= 1;
+ spin_unlock(&o2hb_live_lock);
+ }
+ atomic_set(&reg->hr_steady_iterations, live_threshold + 1);
hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
reg->hr_item.ci_name);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 9fe5b8fd658f..8582e3f4f120 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2868,7 +2868,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
bytes = blocks_wanted << sb->s_blocksize_bits;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
struct ocfs2_inode_info *oi = OCFS2_I(dir);
- struct ocfs2_alloc_context *data_ac;
+ struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *meta_ac = NULL;
struct buffer_head *dirdata_bh = NULL;
struct buffer_head *dx_root_bh = NULL;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7540a492eaba..3b179d6cbde0 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1614,7 +1614,8 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
spin_unlock(&dlm->spinlock);
/* Support for global heartbeat and node info was added in 1.1 */
- if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) {
+ if (dlm->dlm_locking_proto.pv_major > 1 ||
+ dlm->dlm_locking_proto.pv_minor > 0) {
status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map);
if (status) {
mlog_errno(status);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index fede57ed005f..84d166328cf7 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2574,6 +2574,9 @@ fail:
res->state &= ~DLM_LOCK_RES_MIGRATING;
wake = 1;
spin_unlock(&res->spinlock);
+ if (dlm_is_host_down(ret))
+ dlm_wait_for_node_death(dlm, target,
+ DLM_NODE_DEATH_WAIT_MAX);
goto leave;
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 41565ae52856..89659d6dc206 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1607,6 +1607,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
+ /*
+ * remove an entire extent record.
+ */
*trunc_cpos = le32_to_cpu(rec->e_cpos);
/*
* Skip holes if any.
@@ -1617,7 +1620,16 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
*blkno = le64_to_cpu(rec->e_blkno);
*trunc_end = le32_to_cpu(rec->e_cpos);
} else if (range > trunc_start) {
+ /*
+ * remove a partial extent record, which means we're
+ * removing the last extent record.
+ */
*trunc_cpos = trunc_start;
+ /*
+ * skip hole if any.
+ */
+ if (range < *trunc_end)
+ *trunc_end = range;
*trunc_len = *trunc_end - trunc_start;
coff = trunc_start - le32_to_cpu(rec->e_cpos);
*blkno = le64_to_cpu(rec->e_blkno) +
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index b141a44605ca..295d56454e8b 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1260,6 +1260,9 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
{
struct ocfs2_journal *journal = osb->journal;
+ if (ocfs2_is_hard_readonly(osb))
+ return;
+
/* No need to queue up our truncate_log as regular cleanup will catch
* that */
ocfs2_queue_recovery_completion(journal, osb->slot_num,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5d32749c896d..3c7606cff1ab 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3706,7 +3706,7 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
context->cow_start = cow_start;
context->cow_len = cow_len;
context->ref_tree = ref_tree;
- context->ref_root_bh = ref_root_bh;;
+ context->ref_root_bh = ref_root_bh;
context->cow_object = xv;
context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..8ed4d3433199 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,7 +255,11 @@ ssize_t part_discard_alignment_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
- return sprintf(buf, "%u\n", p->discard_alignment);
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return sprintf(buf, "%u\n",
+ queue_limit_discard_alignment(&disk->queue->limits,
+ p->start_sect));
}
ssize_t part_stat_show(struct device *dev,
@@ -449,8 +453,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
p->start_sect = start;
p->alignment_offset =
queue_limit_alignment_offset(&disk->queue->limits, start);
- p->discard_alignment =
- queue_limit_discard_alignment(&disk->queue->limits, start);
p->nr_sects = len;
p->partno = partno;
p->policy = get_disk_ro(disk);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index ac0ccb5026a2..19d6750d1d6c 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -348,6 +348,12 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
goto fail;
}
+ /* Check that sizeof_partition_entry has the correct value */
+ if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) {
+ pr_debug("GUID Partitition Entry Size check failed.\n");
+ goto fail;
+ }
+
if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
goto fail;
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index ce4f62440425..af9fdf046769 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -565,7 +565,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
data = read_part_sector(state, 0, &sect);
if (!data) {
- ldm_crit ("Disk read failed.");
+ ldm_info ("Disk read failed.");
return false;
}
@@ -1335,6 +1335,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
list_add_tail (&f->list, frags);
found:
+ if (rec >= f->num) {
+ ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
+ return false;
+ }
+
if (f->map & (1 << rec)) {
ldm_error ("Duplicate VBLK, part %d.", rec);
f->map &= 0x7F; /* Mark the group as broken */
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index df434c5f28fb..c1c729335924 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -20,6 +20,7 @@ proc-y += stat.o
proc-y += uptime.o
proc-y += version.o
proc-y += softirqs.o
+proc-y += namespaces.o
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dfa532730e55..dc8bca72b002 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode)
return allowed;
}
-static int proc_setattr(struct dentry *dentry, struct iattr *attr)
+int proc_setattr(struct dentry *dentry, struct iattr *attr)
{
int error;
struct inode *inode = dentry->d_inode;
@@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task)
return 0;
}
-
-static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
{
struct inode * inode;
struct proc_inode *ei;
@@ -1779,7 +1778,7 @@ out_unlock:
return NULL;
}
-static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
struct task_struct *task;
@@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
* made this apply to all per process world readable and executable
* directories.
*/
-static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode;
struct task_struct *task;
@@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry)
return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
}
-static const struct dentry_operations pid_dentry_operations =
+const struct dentry_operations pid_dentry_operations =
{
.d_revalidate = pid_revalidate,
.d_delete = pid_delete_dentry,
@@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations =
/* Lookups */
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
- struct task_struct *, const void *);
-
/*
* Fill a directory entry.
*
@@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
* reported by readdir in sync with the inode numbers reported
* by stat.
*/
-static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
- char *name, int len,
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ const char *name, int len,
instantiate_t instantiate, struct task_struct *task, const void *ptr)
{
struct dentry *child, *dir = filp->f_path.dentry;
@@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+ DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
#ifdef CONFIG_NET
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
@@ -3168,6 +3165,7 @@ out_no_task:
static const struct pid_entry tid_base_stuff[] = {
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+ DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f1281339b6fa..f1637f17c37c 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -674,6 +674,7 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
}
return ent;
}
+EXPORT_SYMBOL(proc_mkdir_mode);
struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
struct proc_dir_entry *parent)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d15aa1b1cc8f..74b48cfa1bb2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
struct ctl_table_header *head;
+ const struct proc_ns_operations *ns_ops;
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
@@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
sysctl_head_put(head);
}
+ /* Release any associated namespace */
+ ns_ops = PROC_I(inode)->ns_ops;
+ if (ns_ops && ns_ops->put)
+ ns_ops->put(PROC_I(inode)->ns);
}
static struct kmem_cache * proc_inode_cachep;
@@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
ei->pde = NULL;
ei->sysctl = NULL;
ei->sysctl_entry = NULL;
+ ei->ns = NULL;
+ ei->ns_ops = NULL;
inode = &ei->vfs_inode;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
return inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3a3a5b..7838e5cfec14 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations;
extern const struct file_operations proc_net_operations;
extern const struct inode_operations proc_net_inode_operations;
+struct proc_maps_private {
+ struct pid *pid;
+ struct task_struct *task;
+#ifdef CONFIG_MMU
+ struct vm_area_struct *tail_vma;
+#endif
+};
+
void proc_init_inodecache(void);
static inline struct pid *proc_pid(struct inode *inode)
@@ -119,3 +127,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
*/
int proc_readdir(struct file *, void *, filldir_t);
struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
+
+
+
+/* Lookups */
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+ struct task_struct *, const void *);
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ const char *name, int len,
+ instantiate_t instantiate, struct task_struct *task, const void *ptr);
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
+extern const struct dentry_operations pid_dentry_operations;
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
+int proc_setattr(struct dentry *dentry, struct iattr *attr);
+
+extern const struct inode_operations proc_ns_dir_inode_operations;
+extern const struct file_operations proc_ns_dir_operations;
+
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
new file mode 100644
index 000000000000..781dec5bd682
--- /dev/null
+++ b/fs/proc/namespaces.c
@@ -0,0 +1,198 @@
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/fs_struct.h>
+#include <linux/mount.h>
+#include <linux/path.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <net/net_namespace.h>
+#include <linux/mnt_namespace.h>
+#include <linux/ipc_namespace.h>
+#include <linux/pid_namespace.h>
+#include "internal.h"
+
+
+static const struct proc_ns_operations *ns_entries[] = {
+#ifdef CONFIG_NET_NS
+ &netns_operations,
+#endif
+#ifdef CONFIG_UTS_NS
+ &utsns_operations,
+#endif
+#ifdef CONFIG_IPC_NS
+ &ipcns_operations,
+#endif
+};
+
+static const struct file_operations ns_file_operations = {
+ .llseek = no_llseek,
+};
+
+static struct dentry *proc_ns_instantiate(struct inode *dir,
+ struct dentry *dentry, struct task_struct *task, const void *ptr)
+{
+ const struct proc_ns_operations *ns_ops = ptr;
+ struct inode *inode;
+ struct proc_inode *ei;
+ struct dentry *error = ERR_PTR(-ENOENT);
+
+ inode = proc_pid_make_inode(dir->i_sb, task);
+ if (!inode)
+ goto out;
+
+ ei = PROC_I(inode);
+ inode->i_mode = S_IFREG|S_IRUSR;
+ inode->i_fop = &ns_file_operations;
+ ei->ns_ops = ns_ops;
+ ei->ns = ns_ops->get(task);
+ if (!ei->ns)
+ goto out_iput;
+
+ dentry->d_op = &pid_dentry_operations;
+ d_add(dentry, inode);
+ /* Close the race of the process dying before we return the dentry */
+ if (pid_revalidate(dentry, NULL))
+ error = NULL;
+out:
+ return error;
+out_iput:
+ iput(inode);
+ goto out;
+}
+
+static int proc_ns_fill_cache(struct file *filp, void *dirent,
+ filldir_t filldir, struct task_struct *task,
+ const struct proc_ns_operations *ops)
+{
+ return proc_fill_cache(filp, dirent, filldir,
+ ops->name, strlen(ops->name),
+ proc_ns_instantiate, task, ops);
+}
+
+static int proc_ns_dir_readdir(struct file *filp, void *dirent,
+ filldir_t filldir)
+{
+ int i;
+ struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
+ struct task_struct *task = get_proc_task(inode);
+ const struct proc_ns_operations **entry, **last;
+ ino_t ino;
+ int ret;
+
+ ret = -ENOENT;
+ if (!task)
+ goto out_no_task;
+
+ ret = -EPERM;
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out;
+
+ ret = 0;
+ i = filp->f_pos;
+ switch (i) {
+ case 0:
+ ino = inode->i_ino;
+ if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
+ goto out;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ case 1:
+ ino = parent_ino(dentry);
+ if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
+ goto out;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ default:
+ i -= 2;
+ if (i >= ARRAY_SIZE(ns_entries)) {
+ ret = 1;
+ goto out;
+ }
+ entry = ns_entries + i;
+ last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+ while (entry <= last) {
+ if (proc_ns_fill_cache(filp, dirent, filldir,
+ task, *entry) < 0)
+ goto out;
+ filp->f_pos++;
+ entry++;
+ }
+ }
+
+ ret = 1;
+out:
+ put_task_struct(task);
+out_no_task:
+ return ret;
+}
+
+const struct file_operations proc_ns_dir_operations = {
+ .read = generic_read_dir,
+ .readdir = proc_ns_dir_readdir,
+};
+
+static struct dentry *proc_ns_dir_lookup(struct inode *dir,
+ struct dentry *dentry, struct nameidata *nd)
+{
+ struct dentry *error;
+ struct task_struct *task = get_proc_task(dir);
+ const struct proc_ns_operations **entry, **last;
+ unsigned int len = dentry->d_name.len;
+
+ error = ERR_PTR(-ENOENT);
+
+ if (!task)
+ goto out_no_task;
+
+ error = ERR_PTR(-EPERM);
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out;
+
+ last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+ for (entry = ns_entries; entry <= last; entry++) {
+ if (strlen((*entry)->name) != len)
+ continue;
+ if (!memcmp(dentry->d_name.name, (*entry)->name, len))
+ break;
+ }
+ error = ERR_PTR(-ENOENT);
+ if (entry > last)
+ goto out;
+
+ error = proc_ns_instantiate(dir, dentry, task, *entry);
+out:
+ put_task_struct(task);
+out_no_task:
+ return error;
+}
+
+const struct inode_operations proc_ns_dir_inode_operations = {
+ .lookup = proc_ns_dir_lookup,
+ .getattr = pid_getattr,
+ .setattr = proc_setattr,
+};
+
+struct file *proc_ns_fget(int fd)
+{
+ struct file *file;
+
+ file = fget(fd);
+ if (!file)
+ return ERR_PTR(-EBADF);
+
+ if (file->f_op != &ns_file_operations)
+ goto out_invalid;
+
+ return file;
+
+out_invalid:
+ fput(file);
+ return ERR_PTR(-EINVAL);
+}
+
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2e7addfd9803..db15935fa757 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -211,10 +211,10 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
- int flags = vma->vm_flags;
+ vm_flags_t flags = vma->vm_flags;
unsigned long ino = 0;
unsigned long long pgoff = 0;
- unsigned long start;
+ unsigned long start, end;
dev_t dev = 0;
int len;
@@ -227,13 +227,15 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
/* We don't show the stack guard page in /proc/maps */
start = vma->vm_start;
- if (vma->vm_flags & VM_GROWSDOWN)
- if (!vma_stack_continue(vma->vm_prev, vma->vm_start))
- start += PAGE_SIZE;
+ if (stack_guard_page_start(vma, start))
+ start += PAGE_SIZE;
+ end = vma->vm_end;
+ if (stack_guard_page_end(vma, end))
+ end -= PAGE_SIZE;
seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
start,
- vma->vm_end,
+ end,
flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-',
@@ -856,7 +858,192 @@ const struct file_operations proc_pagemap_operations = {
#endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA
-extern int show_numa_map(struct seq_file *m, void *v);
+
+struct numa_maps {
+ struct vm_area_struct *vma;
+ unsigned long pages;
+ unsigned long anon;
+ unsigned long active;
+ unsigned long writeback;
+ unsigned long mapcount_max;
+ unsigned long dirty;
+ unsigned long swapcache;
+ unsigned long node[MAX_NUMNODES];
+};
+
+struct numa_maps_private {
+ struct proc_maps_private proc_maps;
+ struct numa_maps md;
+};
+
+static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty)
+{
+ int count = page_mapcount(page);
+
+ md->pages++;
+ if (pte_dirty || PageDirty(page))
+ md->dirty++;
+
+ if (PageSwapCache(page))
+ md->swapcache++;
+
+ if (PageActive(page) || PageUnevictable(page))
+ md->active++;
+
+ if (PageWriteback(page))
+ md->writeback++;
+
+ if (PageAnon(page))
+ md->anon++;
+
+ if (count > md->mapcount_max)
+ md->mapcount_max = count;
+
+ md->node[page_to_nid(page)]++;
+}
+
+static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct numa_maps *md;
+ spinlock_t *ptl;
+ pte_t *orig_pte;
+ pte_t *pte;
+
+ md = walk->private;
+ orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ do {
+ struct page *page;
+ int nid;
+
+ if (!pte_present(*pte))
+ continue;
+
+ page = vm_normal_page(md->vma, addr, *pte);
+ if (!page)
+ continue;
+
+ if (PageReserved(page))
+ continue;
+
+ nid = page_to_nid(page);
+ if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
+ continue;
+
+ gather_stats(page, md, pte_dirty(*pte));
+
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap_unlock(orig_pte, ptl);
+ return 0;
+}
+#ifdef CONFIG_HUGETLB_PAGE
+static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+ unsigned long addr, unsigned long end, struct mm_walk *walk)
+{
+ struct numa_maps *md;
+ struct page *page;
+
+ if (pte_none(*pte))
+ return 0;
+
+ page = pte_page(*pte);
+ if (!page)
+ return 0;
+
+ md = walk->private;
+ gather_stats(page, md, pte_dirty(*pte));
+ return 0;
+}
+
+#else
+static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+ unsigned long addr, unsigned long end, struct mm_walk *walk)
+{
+ return 0;
+}
+#endif
+
+/*
+ * Display pages allocated per node and memory policy via /proc.
+ */
+static int show_numa_map(struct seq_file *m, void *v)
+{
+ struct numa_maps_private *numa_priv = m->private;
+ struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
+ struct vm_area_struct *vma = v;
+ struct numa_maps *md = &numa_priv->md;
+ struct file *file = vma->vm_file;
+ struct mm_struct *mm = vma->vm_mm;
+ struct mm_walk walk = {};
+ struct mempolicy *pol;
+ int n;
+ char buffer[50];
+
+ if (!mm)
+ return 0;
+
+ /* Ensure we start with an empty set of numa_maps statistics. */
+ memset(md, 0, sizeof(*md));
+
+ md->vma = vma;
+
+ walk.hugetlb_entry = gather_hugetbl_stats;
+ walk.pmd_entry = gather_pte_stats;
+ walk.private = md;
+ walk.mm = mm;
+
+ pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
+ mpol_to_str(buffer, sizeof(buffer), pol, 0);
+ mpol_cond_put(pol);
+
+ seq_printf(m, "%08lx %s", vma->vm_start, buffer);
+
+ if (file) {
+ seq_printf(m, " file=");
+ seq_path(m, &file->f_path, "\n\t= ");
+ } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
+ seq_printf(m, " heap");
+ } else if (vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack) {
+ seq_printf(m, " stack");
+ }
+
+ walk_page_range(vma->vm_start, vma->vm_end, &walk);
+
+ if (!md->pages)
+ goto out;
+
+ if (md->anon)
+ seq_printf(m, " anon=%lu", md->anon);
+
+ if (md->dirty)
+ seq_printf(m, " dirty=%lu", md->dirty);
+
+ if (md->pages != md->anon && md->pages != md->dirty)
+ seq_printf(m, " mapped=%lu", md->pages);
+
+ if (md->mapcount_max > 1)
+ seq_printf(m, " mapmax=%lu", md->mapcount_max);
+
+ if (md->swapcache)
+ seq_printf(m, " swapcache=%lu", md->swapcache);
+
+ if (md->active < md->pages && !is_vm_hugetlb_page(vma))
+ seq_printf(m, " active=%lu", md->active);
+
+ if (md->writeback)
+ seq_printf(m, " writeback=%lu", md->writeback);
+
+ for_each_node_state(n, N_HIGH_MEMORY)
+ if (md->node[n])
+ seq_printf(m, " N%d=%lu", n, md->node[n]);
+out:
+ seq_putc(m, '\n');
+
+ if (m->count < m->size)
+ m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
+ return 0;
+}
static const struct seq_operations proc_pid_numa_maps_op = {
.start = m_start,
@@ -867,7 +1054,20 @@ static const struct seq_operations proc_pid_numa_maps_op = {
static int numa_maps_open(struct inode *inode, struct file *file)
{
- return do_maps_open(inode, file, &proc_pid_numa_maps_op);
+ struct numa_maps_private *priv;
+ int ret = -ENOMEM;
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (priv) {
+ priv->proc_maps.pid = proc_pid(inode);
+ ret = seq_open(file, &proc_pid_numa_maps_op);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = priv;
+ } else {
+ kfree(priv);
+ }
+ }
+ return ret;
}
const struct file_operations proc_numa_maps_operations = {
@@ -876,4 +1076,4 @@ const struct file_operations proc_numa_maps_operations = {
.llseek = seq_lseek,
.release = seq_release_private,
};
-#endif
+#endif /* CONFIG_NUMA */
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f835a25625ff..f2c3ff20ea68 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -152,21 +152,27 @@ EXPORT_SYMBOL_GPL(pstore_register);
void pstore_get_records(void)
{
struct pstore_info *psi = psinfo;
- size_t size;
+ ssize_t size;
u64 id;
enum pstore_type_id type;
struct timespec time;
- int failed = 0;
+ int failed = 0, rc;
if (!psi)
return;
mutex_lock(&psinfo->buf_mutex);
+ rc = psi->open(psi);
+ if (rc)
+ goto out;
+
while ((size = psi->read(&id, &type, &time)) > 0) {
- if (pstore_mkfile(type, psi->name, id, psi->buf, size,
+ if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
time, psi->erase))
failed++;
}
+ psi->close(psi);
+out:
mutex_unlock(&psinfo->buf_mutex);
if (failed)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d3c032f5fa0a..5b572c89e6c4 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -691,8 +691,11 @@ static void prune_dqcache(int count)
* This is called from kswapd when we think we need some
* more memory
*/
-static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink,
+ struct shrink_control *sc)
{
+ int nr = sc->nr_to_scan;
+
if (nr) {
spin_lock(&dq_list_lock);
prune_dqcache(nr);
diff --git a/fs/splice.c b/fs/splice.c
index 50a5d978da16..aa866d309695 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -162,6 +162,14 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
+static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
+{
+ smp_mb();
+ if (waitqueue_active(&pipe->wait))
+ wake_up_interruptible(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+}
+
/**
* splice_to_pipe - fill passed data into a pipe
* @pipe: pipe to fill
@@ -247,12 +255,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
pipe_unlock(pipe);
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- }
+ if (do_wakeup)
+ wakeup_pipe_readers(pipe);
while (page_nr < spd_pages)
spd->spd_release(spd, page_nr++);
@@ -1892,12 +1896,9 @@ retry:
/*
* If we put data in the output pipe, wakeup any potential readers.
*/
- if (ret > 0) {
- smp_mb();
- if (waitqueue_active(&opipe->wait))
- wake_up_interruptible(&opipe->wait);
- kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
- }
+ if (ret > 0)
+ wakeup_pipe_readers(opipe);
+
if (input_wakeup)
wakeup_pipe_writers(ipipe);
@@ -1976,12 +1977,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
/*
* If we put data in the output pipe, wakeup any potential readers.
*/
- if (ret > 0) {
- smp_mb();
- if (waitqueue_active(&opipe->wait))
- wake_up_interruptible(&opipe->wait);
- kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
- }
+ if (ret > 0)
+ wakeup_pipe_readers(opipe);
return ret;
}
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index efc309fa3035..7797218d0b30 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -42,7 +42,7 @@ config SQUASHFS_LZO
select LZO_DECOMPRESS
help
Saying Y here includes support for reading Squashfs file systems
- compressed with LZO compresssion. LZO compression is mainly
+ compressed with LZO compression. LZO compression is mainly
aimed at embedded systems with slower CPUs where the overheads
of zlib are too high.
@@ -57,7 +57,7 @@ config SQUASHFS_XZ
select XZ_DEC
help
Saying Y here includes support for reading Squashfs file systems
- compressed with XZ compresssion. XZ gives better compression than
+ compressed with XZ compression. XZ gives better compression than
the default zlib compression, at the expense of greater CPU and
memory overhead.
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index c37b520132ff..4b5a3fbb1f1f 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -29,7 +29,7 @@
* plus functions layered ontop of the generic cache implementation to
* access the metadata and fragment caches.
*
- * To avoid out of memory and fragmentation isssues with vmalloc the cache
+ * To avoid out of memory and fragmentation issues with vmalloc the cache
* uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
*
* It should be noted that the cache is not used for file datablocks, these
diff --git a/fs/super.c b/fs/super.c
index 8a06881b1920..c04f7e0b7ed2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -948,8 +948,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
* but s_maxbytes was an unsigned long long for many releases. Throw
* this warning for a little while to try and catch filesystems that
- * violate this rule. This warning should be either removed or
- * converted to a BUG() in 2.6.34.
+ * violate this rule.
*/
WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
"negative value (%lld)\n", type->name, sb->s_maxbytes);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da3fefe91a8f..1ad8c93c1b85 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -24,13 +24,6 @@
#include "sysfs.h"
-/* used in crash dumps to help with debugging */
-static char last_sysfs_file[PATH_MAX];
-void sysfs_printk_last_file(void)
-{
- printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
-}
-
/*
* There's one sysfs_buffer for each open file and one
* sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -337,11 +330,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
struct sysfs_buffer *buffer;
const struct sysfs_ops *ops;
int error = -EACCES;
- char *p;
-
- p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
- if (!IS_ERR(p))
- memmove(last_sysfs_file, p, strlen(p) + 1);
/* need attr_sd for attr and ops, its parent for kobj */
if (!sysfs_get_active(attr_sd))
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index c8769dc222d8..194414f8298c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -101,9 +101,9 @@ int sysfs_create_group(struct kobject *kobj,
}
/**
- * sysfs_update_group - given a directory kobject, create an attribute group
- * @kobj: The kobject to create the group on
- * @grp: The attribute group to create
+ * sysfs_update_group - given a directory kobject, update an attribute group
+ * @kobj: The kobject to update the group on
+ * @grp: The attribute group to update
*
* This function updates an attribute group. Unlike
* sysfs_create_group(), it will explicitly not warn or error if any
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 8c4fc1425b3e..f67acbdda5e8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,16 +22,24 @@
#include <linux/anon_inodes.h>
#include <linux/timerfd.h>
#include <linux/syscalls.h>
+#include <linux/rcupdate.h>
struct timerfd_ctx {
struct hrtimer tmr;
ktime_t tintv;
+ ktime_t moffs;
wait_queue_head_t wqh;
u64 ticks;
int expired;
int clockid;
+ struct rcu_head rcu;
+ struct list_head clist;
+ bool might_cancel;
};
+static LIST_HEAD(cancel_list);
+static DEFINE_SPINLOCK(cancel_lock);
+
/*
* This gets called when the timer event triggers. We set the "expired"
* flag, but we do not re-arm the timer (in case it's necessary,
@@ -51,6 +59,63 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
return HRTIMER_NORESTART;
}
+/*
+ * Called when the clock was set to cancel the timers in the cancel
+ * list.
+ */
+void timerfd_clock_was_set(void)
+{
+ ktime_t moffs = ktime_get_monotonic_offset();
+ struct timerfd_ctx *ctx;
+ unsigned long flags;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &cancel_list, clist) {
+ if (!ctx->might_cancel)
+ continue;
+ spin_lock_irqsave(&ctx->wqh.lock, flags);
+ if (ctx->moffs.tv64 != moffs.tv64) {
+ ctx->moffs.tv64 = KTIME_MAX;
+ wake_up_locked(&ctx->wqh);
+ }
+ spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+ }
+ rcu_read_unlock();
+}
+
+static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
+{
+ if (ctx->might_cancel) {
+ ctx->might_cancel = false;
+ spin_lock(&cancel_lock);
+ list_del_rcu(&ctx->clist);
+ spin_unlock(&cancel_lock);
+ }
+}
+
+static bool timerfd_canceled(struct timerfd_ctx *ctx)
+{
+ if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
+ return false;
+ ctx->moffs = ktime_get_monotonic_offset();
+ return true;
+}
+
+static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
+{
+ if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
+ (flags & TFD_TIMER_CANCEL_ON_SET)) {
+ if (!ctx->might_cancel) {
+ ctx->might_cancel = true;
+ spin_lock(&cancel_lock);
+ list_add_rcu(&ctx->clist, &cancel_list);
+ spin_unlock(&cancel_lock);
+ }
+ } else if (ctx->might_cancel) {
+ timerfd_remove_cancel(ctx);
+ }
+}
+
static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
{
ktime_t remaining;
@@ -59,11 +124,12 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
}
-static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
- const struct itimerspec *ktmr)
+static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
+ const struct itimerspec *ktmr)
{
enum hrtimer_mode htmode;
ktime_t texp;
+ int clockid = ctx->clockid;
htmode = (flags & TFD_TIMER_ABSTIME) ?
HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
@@ -72,19 +138,24 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
ctx->expired = 0;
ctx->ticks = 0;
ctx->tintv = timespec_to_ktime(ktmr->it_interval);
- hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
+ hrtimer_init(&ctx->tmr, clockid, htmode);
hrtimer_set_expires(&ctx->tmr, texp);
ctx->tmr.function = timerfd_tmrproc;
- if (texp.tv64 != 0)
+ if (texp.tv64 != 0) {
hrtimer_start(&ctx->tmr, texp, htmode);
+ if (timerfd_canceled(ctx))
+ return -ECANCELED;
+ }
+ return 0;
}
static int timerfd_release(struct inode *inode, struct file *file)
{
struct timerfd_ctx *ctx = file->private_data;
+ timerfd_remove_cancel(ctx);
hrtimer_cancel(&ctx->tmr);
- kfree(ctx);
+ kfree_rcu(ctx, rcu);
return 0;
}
@@ -118,8 +189,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
res = -EAGAIN;
else
res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
+
+ /*
+ * If clock has changed, we do not care about the
+ * ticks and we do not rearm the timer. Userspace must
+ * reevaluate anyway.
+ */
+ if (timerfd_canceled(ctx)) {
+ ctx->ticks = 0;
+ ctx->expired = 0;
+ res = -ECANCELED;
+ }
+
if (ctx->ticks) {
ticks = ctx->ticks;
+
if (ctx->expired && ctx->tintv.tv64) {
/*
* If tintv.tv64 != 0, this is a periodic timer that
@@ -183,6 +267,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
init_waitqueue_head(&ctx->wqh);
ctx->clockid = clockid;
hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
+ ctx->moffs = ktime_get_monotonic_offset();
ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
@@ -199,6 +284,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
struct file *file;
struct timerfd_ctx *ctx;
struct itimerspec ktmr, kotmr;
+ int ret;
if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
return -EFAULT;
@@ -213,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
return PTR_ERR(file);
ctx = file->private_data;
+ timerfd_setup_cancel(ctx, flags);
+
/*
* We need to stop the existing timer before reprogramming
* it to the new values.
@@ -240,14 +328,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
/*
* Re-program the timer to the new value ...
*/
- timerfd_setup(ctx, flags, &ktmr);
+ ret = timerfd_setup(ctx, flags, &ktmr);
spin_unlock_irq(&ctx->wqh.lock);
fput(file);
if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
return -EFAULT;
- return 0;
+ return ret;
}
SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 8b3a7da531eb..315de66e52b2 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c)
long long liab;
spin_lock(&c->space_lock);
- liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth;
+ liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
spin_unlock(&c->space_lock);
return liab;
}
@@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
int idx_lebs;
long long idx_size;
- idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
+ idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
/* And make sure we have thrice the index size of space reserved */
idx_size += idx_size << 1;
/*
@@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c)
* budgeted index space to the size of the current index, multiplies this by 3,
* and makes sure this does not exceed the amount of free LEBs.
*
- * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
+ * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
* o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
* be large, because UBIFS does not do any index consolidation as long as
* there is free space. IOW, the index may take a lot of LEBs, but the LEBs
* will contain a lot of dirt.
- * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW,
- * the index may be consolidated to take up to @c->min_idx_lebs LEBs.
+ * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
+ * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
*
* This function returns zero in case of success, and %-ENOSPC in case of
* failure.
@@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c)
c->lst.taken_empty_lebs;
if (unlikely(rsvd_idx_lebs > lebs)) {
dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
- "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs,
+ "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs,
rsvd_idx_lebs);
return -ENOSPC;
}
available = ubifs_calc_available(c, min_idx_lebs);
- outstanding = c->budg_data_growth + c->budg_dd_growth;
+ outstanding = c->bi.data_growth + c->bi.dd_growth;
if (unlikely(available < outstanding)) {
dbg_budg("out of data space: available %lld, outstanding %lld",
@@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c)
if (available - outstanding <= c->rp_size && !can_use_rp(c))
return -ENOSPC;
- c->min_idx_lebs = min_idx_lebs;
+ c->bi.min_idx_lebs = min_idx_lebs;
return 0;
}
@@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c,
{
int data_growth;
- data_growth = req->new_ino ? c->inode_budget : 0;
+ data_growth = req->new_ino ? c->bi.inode_budget : 0;
if (req->new_page)
- data_growth += c->page_budget;
+ data_growth += c->bi.page_budget;
if (req->new_dent)
- data_growth += c->dent_budget;
+ data_growth += c->bi.dent_budget;
data_growth += req->new_ino_d;
return data_growth;
}
@@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c,
{
int dd_growth;
- dd_growth = req->dirtied_page ? c->page_budget : 0;
+ dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
if (req->dirtied_ino)
- dd_growth += c->inode_budget << (req->dirtied_ino - 1);
+ dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
if (req->mod_dent)
- dd_growth += c->dent_budget;
+ dd_growth += c->bi.dent_budget;
dd_growth += req->dirtied_ino_d;
return dd_growth;
}
@@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
again:
spin_lock(&c->space_lock);
- ubifs_assert(c->budg_idx_growth >= 0);
- ubifs_assert(c->budg_data_growth >= 0);
- ubifs_assert(c->budg_dd_growth >= 0);
+ ubifs_assert(c->bi.idx_growth >= 0);
+ ubifs_assert(c->bi.data_growth >= 0);
+ ubifs_assert(c->bi.dd_growth >= 0);
- if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) {
+ if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
dbg_budg("no space");
spin_unlock(&c->space_lock);
return -ENOSPC;
}
- c->budg_idx_growth += idx_growth;
- c->budg_data_growth += data_growth;
- c->budg_dd_growth += dd_growth;
+ c->bi.idx_growth += idx_growth;
+ c->bi.data_growth += data_growth;
+ c->bi.dd_growth += dd_growth;
err = do_budget_space(c);
if (likely(!err)) {
@@ -484,9 +484,9 @@ again:
}
/* Restore the old values */
- c->budg_idx_growth -= idx_growth;
- c->budg_data_growth -= data_growth;
- c->budg_dd_growth -= dd_growth;
+ c->bi.idx_growth -= idx_growth;
+ c->bi.data_growth -= data_growth;
+ c->bi.dd_growth -= dd_growth;
spin_unlock(&c->space_lock);
if (req->fast) {
@@ -506,9 +506,9 @@ again:
goto again;
}
dbg_budg("FS is full, -ENOSPC");
- c->nospace = 1;
+ c->bi.nospace = 1;
if (can_use_rp(c) || c->rp_size == 0)
- c->nospace_rp = 1;
+ c->bi.nospace_rp = 1;
smp_wmb();
} else
ubifs_err("cannot budget space, error %d", err);
@@ -523,8 +523,8 @@ again:
* This function releases the space budgeted by 'ubifs_budget_space()'. Note,
* since the index changes (which were budgeted for in @req->idx_growth) will
* only be written to the media on commit, this function moves the index budget
- * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be
- * zeroed by the commit operation.
+ * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
+ * by the commit operation.
*/
void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
{
@@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
if (!req->data_growth && !req->dd_growth)
return;
- c->nospace = c->nospace_rp = 0;
+ c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
spin_lock(&c->space_lock);
- c->budg_idx_growth -= req->idx_growth;
- c->budg_uncommitted_idx += req->idx_growth;
- c->budg_data_growth -= req->data_growth;
- c->budg_dd_growth -= req->dd_growth;
- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
-
- ubifs_assert(c->budg_idx_growth >= 0);
- ubifs_assert(c->budg_data_growth >= 0);
- ubifs_assert(c->budg_dd_growth >= 0);
- ubifs_assert(c->min_idx_lebs < c->main_lebs);
- ubifs_assert(!(c->budg_idx_growth & 7));
- ubifs_assert(!(c->budg_data_growth & 7));
- ubifs_assert(!(c->budg_dd_growth & 7));
+ c->bi.idx_growth -= req->idx_growth;
+ c->bi.uncommitted_idx += req->idx_growth;
+ c->bi.data_growth -= req->data_growth;
+ c->bi.dd_growth -= req->dd_growth;
+ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+
+ ubifs_assert(c->bi.idx_growth >= 0);
+ ubifs_assert(c->bi.data_growth >= 0);
+ ubifs_assert(c->bi.dd_growth >= 0);
+ ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
+ ubifs_assert(!(c->bi.idx_growth & 7));
+ ubifs_assert(!(c->bi.data_growth & 7));
+ ubifs_assert(!(c->bi.dd_growth & 7));
spin_unlock(&c->space_lock);
}
@@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
{
spin_lock(&c->space_lock);
/* Release the index growth reservation */
- c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+ c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
/* Release the data growth reservation */
- c->budg_data_growth -= c->page_budget;
+ c->bi.data_growth -= c->bi.page_budget;
/* Increase the dirty data growth reservation instead */
- c->budg_dd_growth += c->page_budget;
+ c->bi.dd_growth += c->bi.page_budget;
/* And re-calculate the indexing space reservation */
- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
spin_unlock(&c->space_lock);
}
@@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
memset(&req, 0, sizeof(struct ubifs_budget_req));
/* The "no space" flags will be cleared because dd_growth is > 0 */
- req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8);
+ req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
ubifs_release_budget(c, &req);
}
@@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
int rsvd_idx_lebs, lebs;
long long available, outstanding, free;
- ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
- outstanding = c->budg_data_growth + c->budg_dd_growth;
- available = ubifs_calc_available(c, c->min_idx_lebs);
+ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
+ outstanding = c->bi.data_growth + c->bi.dd_growth;
+ available = ubifs_calc_available(c, c->bi.min_idx_lebs);
/*
* When reporting free space to user-space, UBIFS guarantees that it is
@@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
* Note, the calculations below are similar to what we have in
* 'do_budget_space()', so refer there for comments.
*/
- if (c->min_idx_lebs > c->lst.idx_lebs)
- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
+ if (c->bi.min_idx_lebs > c->lst.idx_lebs)
+ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 1bd01ded7123..87cd0ead8633 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -182,7 +182,7 @@ static int do_commit(struct ubifs_info *c)
c->mst_node->root_len = cpu_to_le32(zroot.len);
c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
- c->mst_node->index_size = cpu_to_le64(c->old_idx_sz);
+ c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz);
c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 004d3745dc45..0bb2bcef0de9 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -34,7 +34,6 @@
#include <linux/moduleparam.h>
#include <linux/debugfs.h>
#include <linux/math64.h>
-#include <linux/slab.h>
#ifdef CONFIG_UBIFS_FS_DEBUG
@@ -43,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock);
static char dbg_key_buf0[128];
static char dbg_key_buf1[128];
-unsigned int ubifs_msg_flags;
unsigned int ubifs_chk_flags;
unsigned int ubifs_tst_flags;
-module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
MODULE_PARM_DESC(debug_chks, "Debug check flags");
MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
@@ -317,6 +313,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
printk(KERN_DEBUG "\tflags %#x\n", sup_flags);
printk(KERN_DEBUG "\t big_lpt %u\n",
!!(sup_flags & UBIFS_FLG_BIGLPT));
+ printk(KERN_DEBUG "\t space_fixup %u\n",
+ !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
printk(KERN_DEBUG "\tmin_io_size %u\n",
le32_to_cpu(sup->min_io_size));
printk(KERN_DEBUG "\tleb_size %u\n",
@@ -602,7 +600,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
spin_unlock(&dbg_lock);
}
-void dbg_dump_budg(struct ubifs_info *c)
+void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
{
int i;
struct rb_node *rb;
@@ -610,26 +608,42 @@ void dbg_dump_budg(struct ubifs_info *c)
struct ubifs_gced_idx_leb *idx_gc;
long long available, outstanding, free;
- ubifs_assert(spin_is_locked(&c->space_lock));
+ spin_lock(&c->space_lock);
spin_lock(&dbg_lock);
- printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
- "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
- c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
- printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
- "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
- c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth,
- c->freeable_cnt);
- printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, "
- "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs,
- c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt);
+ printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, "
+ "total budget sum %lld\n", current->pid,
+ bi->data_growth + bi->dd_growth,
+ bi->data_growth + bi->dd_growth + bi->idx_growth);
+ printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, "
+ "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
+ bi->idx_growth);
+ printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, "
+ "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
+ bi->uncommitted_idx);
+ printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
+ bi->page_budget, bi->inode_budget, bi->dent_budget);
+ printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n",
+ bi->nospace, bi->nospace_rp);
+ printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
+ c->dark_wm, c->dead_wm, c->max_idx_node_sz);
+
+ if (bi != &c->bi)
+ /*
+ * If we are dumping saved budgeting data, do not print
+ * additional information which is about the current state, not
+ * the old one which corresponded to the saved budgeting data.
+ */
+ goto out_unlock;
+
+ printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
+ c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
"clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
atomic_long_read(&c->dirty_zn_cnt),
atomic_long_read(&c->clean_zn_cnt));
- printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
- c->dark_wm, c->dead_wm, c->max_idx_node_sz);
printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
c->gc_lnum, c->ihead_lnum);
+
/* If we are in R/O mode, journal heads do not exist */
if (c->jheads)
for (i = 0; i < c->jhead_cnt; i++)
@@ -648,13 +662,15 @@ void dbg_dump_budg(struct ubifs_info *c)
printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
/* Print budgeting predictions */
- available = ubifs_calc_available(c, c->min_idx_lebs);
- outstanding = c->budg_data_growth + c->budg_dd_growth;
+ available = ubifs_calc_available(c, c->bi.min_idx_lebs);
+ outstanding = c->bi.data_growth + c->bi.dd_growth;
free = ubifs_get_free_space_nolock(c);
printk(KERN_DEBUG "Budgeting predictions:\n");
printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
available, outstanding, free);
+out_unlock:
spin_unlock(&dbg_lock);
+ spin_unlock(&c->space_lock);
}
void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
@@ -729,7 +745,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
if (bud->lnum == lp->lnum) {
int head = 0;
for (i = 0; i < c->jhead_cnt; i++) {
- if (lp->lnum == c->jheads[i].wbuf.lnum) {
+ /*
+ * Note, if we are in R/O mode or in the middle
+ * of mounting/re-mounting, the write-buffers do
+ * not exist.
+ */
+ if (c->jheads &&
+ lp->lnum == c->jheads[i].wbuf.lnum) {
printk(KERN_CONT ", jhead %s",
dbg_jhead(i));
head = 1;
@@ -976,6 +998,8 @@ void dbg_save_space_info(struct ubifs_info *c)
spin_lock(&c->space_lock);
memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
+ memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info));
+ d->saved_idx_gc_cnt = c->idx_gc_cnt;
/*
* We use a dirty hack here and zero out @c->freeable_cnt, because it
@@ -1042,14 +1066,14 @@ int dbg_check_space_info(struct ubifs_info *c)
out:
ubifs_msg("saved lprops statistics dump");
dbg_dump_lstats(&d->saved_lst);
- ubifs_get_lp_stats(c, &lst);
-
+ ubifs_msg("saved budgeting info dump");
+ dbg_dump_budg(c, &d->saved_bi);
+ ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
ubifs_msg("current lprops statistics dump");
+ ubifs_get_lp_stats(c, &lst);
dbg_dump_lstats(&lst);
-
- spin_lock(&c->space_lock);
- dbg_dump_budg(c);
- spin_unlock(&c->space_lock);
+ ubifs_msg("current budgeting info dump");
+ dbg_dump_budg(c, &c->bi);
dump_stack();
return -EINVAL;
}
@@ -1793,6 +1817,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
struct rb_node **p, *parent = NULL;
struct fsck_inode *fscki;
ino_t inum = key_inum_flash(c, &ino->key);
+ struct inode *inode;
+ struct ubifs_inode *ui;
p = &fsckd->inodes.rb_node;
while (*p) {
@@ -1816,19 +1842,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
if (!fscki)
return ERR_PTR(-ENOMEM);
+ inode = ilookup(c->vfs_sb, inum);
+
fscki->inum = inum;
- fscki->nlink = le32_to_cpu(ino->nlink);
- fscki->size = le64_to_cpu(ino->size);
- fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
- fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
- fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
- fscki->mode = le32_to_cpu(ino->mode);
+ /*
+ * If the inode is present in the VFS inode cache, use it instead of
+ * the on-flash inode which might be out-of-date. E.g., the size might
+ * be out-of-date. If we do not do this, the following may happen, for
+ * example:
+ * 1. A power cut happens
+ * 2. We mount the file-system R/O, the replay process fixes up the
+ * inode size in the VFS cache, but on on-flash.
+ * 3. 'check_leaf()' fails because it hits a data node beyond inode
+ * size.
+ */
+ if (!inode) {
+ fscki->nlink = le32_to_cpu(ino->nlink);
+ fscki->size = le64_to_cpu(ino->size);
+ fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
+ fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
+ fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
+ fscki->mode = le32_to_cpu(ino->mode);
+ } else {
+ ui = ubifs_inode(inode);
+ fscki->nlink = inode->i_nlink;
+ fscki->size = inode->i_size;
+ fscki->xattr_cnt = ui->xattr_cnt;
+ fscki->xattr_sz = ui->xattr_size;
+ fscki->xattr_nms = ui->xattr_names;
+ fscki->mode = inode->i_mode;
+ iput(inode);
+ }
+
if (S_ISDIR(fscki->mode)) {
fscki->calc_sz = UBIFS_INO_NODE_SZ;
fscki->calc_cnt = 2;
}
+
rb_link_node(&fscki->rb, parent, p);
rb_insert_color(&fscki->rb, &fsckd->inodes);
+
return fscki;
}
@@ -2421,7 +2474,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
hashb = key_block(c, &sb->key);
if (hasha > hashb) {
- ubifs_err("larger hash %u goes before %u", hasha, hashb);
+ ubifs_err("larger hash %u goes before %u",
+ hasha, hashb);
goto error_dump;
}
}
@@ -2437,14 +2491,12 @@ error_dump:
return 0;
}
-static int invocation_cnt;
-
int dbg_force_in_the_gaps(void)
{
- if (!dbg_force_in_the_gaps_enabled)
+ if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
return 0;
- /* Force in-the-gaps every 8th commit */
- return !((invocation_cnt++) & 0x7);
+
+ return !(random32() & 7);
}
/* Failure mode for recovery testing */
@@ -2632,7 +2684,7 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
int len, int check)
{
if (in_failure_mode(desc))
- return -EIO;
+ return -EROFS;
return ubi_leb_read(desc, lnum, buf, offset, len, check);
}
@@ -2642,7 +2694,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
int err, failing;
if (in_failure_mode(desc))
- return -EIO;
+ return -EROFS;
failing = do_fail(desc, lnum, 1);
if (failing)
cut_data(buf, len);
@@ -2650,7 +2702,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
if (err)
return err;
if (failing)
- return -EIO;
+ return -EROFS;
return 0;
}
@@ -2660,12 +2712,12 @@ int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
int err;
if (do_fail(desc, lnum, 1))
- return -EIO;
+ return -EROFS;
err = ubi_leb_change(desc, lnum, buf, len, dtype);
if (err)
return err;
if (do_fail(desc, lnum, 1))
- return -EIO;
+ return -EROFS;
return 0;
}
@@ -2674,12 +2726,12 @@ int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
int err;
if (do_fail(desc, lnum, 0))
- return -EIO;
+ return -EROFS;
err = ubi_leb_erase(desc, lnum);
if (err)
return err;
if (do_fail(desc, lnum, 0))
- return -EIO;
+ return -EROFS;
return 0;
}
@@ -2688,19 +2740,19 @@ int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
int err;
if (do_fail(desc, lnum, 0))
- return -EIO;
+ return -EROFS;
err = ubi_leb_unmap(desc, lnum);
if (err)
return err;
if (do_fail(desc, lnum, 0))
- return -EIO;
+ return -EROFS;
return 0;
}
int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
{
if (in_failure_mode(desc))
- return -EIO;
+ return -EROFS;
return ubi_is_mapped(desc, lnum);
}
@@ -2709,12 +2761,12 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
int err;
if (do_fail(desc, lnum, 0))
- return -EIO;
+ return -EROFS;
err = ubi_leb_map(desc, lnum, dtype);
if (err)
return err;
if (do_fail(desc, lnum, 0))
- return -EIO;
+ return -EROFS;
return 0;
}
@@ -2784,7 +2836,7 @@ void dbg_debugfs_exit(void)
static int open_debugfs_file(struct inode *inode, struct file *file)
{
file->private_data = inode->i_private;
- return 0;
+ return nonseekable_open(inode, file);
}
static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
@@ -2795,18 +2847,15 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
if (file->f_path.dentry == d->dfs_dump_lprops)
dbg_dump_lprops(c);
- else if (file->f_path.dentry == d->dfs_dump_budg) {
- spin_lock(&c->space_lock);
- dbg_dump_budg(c);
- spin_unlock(&c->space_lock);
- } else if (file->f_path.dentry == d->dfs_dump_tnc) {
+ else if (file->f_path.dentry == d->dfs_dump_budg)
+ dbg_dump_budg(c, &c->bi);
+ else if (file->f_path.dentry == d->dfs_dump_tnc) {
mutex_lock(&c->tnc_mutex);
dbg_dump_tnc(c);
mutex_unlock(&c->tnc_mutex);
} else
return -EINVAL;
- *ppos += count;
return count;
}
@@ -2814,7 +2863,7 @@ static const struct file_operations dfs_fops = {
.open = open_debugfs_file,
.write = write_debugfs_file,
.owner = THIS_MODULE,
- .llseek = default_llseek,
+ .llseek = no_llseek,
};
/**
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index e6493cac193d..a811ac4a26bb 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -31,6 +31,8 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
#ifdef CONFIG_UBIFS_FS_DEBUG
+#include <linux/random.h>
+
/**
* ubifs_debug_info - per-FS debugging information.
* @old_zroot: old index root - used by 'dbg_check_old_index()'
@@ -50,13 +52,15 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
* @new_ihead_offs: used by debugging to check @c->ihead_offs
*
* @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
- * @saved_free: saved free space (used by 'dbg_save_space_info()')
+ * @saved_bi: saved budgeting information
+ * @saved_free: saved amount of free space
+ * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
*
- * dfs_dir_name: name of debugfs directory containing this file-system's files
- * dfs_dir: direntry object of the file-system debugfs directory
- * dfs_dump_lprops: "dump lprops" debugfs knob
- * dfs_dump_budg: "dump budgeting information" debugfs knob
- * dfs_dump_tnc: "dump TNC" debugfs knob
+ * @dfs_dir_name: name of debugfs directory containing this file-system's files
+ * @dfs_dir: direntry object of the file-system debugfs directory
+ * @dfs_dump_lprops: "dump lprops" debugfs knob
+ * @dfs_dump_budg: "dump budgeting information" debugfs knob
+ * @dfs_dump_tnc: "dump TNC" debugfs knob
*/
struct ubifs_debug_info {
struct ubifs_zbranch old_zroot;
@@ -76,7 +80,9 @@ struct ubifs_debug_info {
int new_ihead_offs;
struct ubifs_lp_stats saved_lst;
+ struct ubifs_budg_info saved_bi;
long long saved_free;
+ int saved_idx_gc_cnt;
char dfs_dir_name[100];
struct dentry *dfs_dir;
@@ -101,23 +107,7 @@ struct ubifs_debug_info {
} \
} while (0)
-#define dbg_dump_stack() do { \
- if (!dbg_failure_mode) \
- dump_stack(); \
-} while (0)
-
-/* Generic debugging messages */
-#define dbg_msg(fmt, ...) do { \
- spin_lock(&dbg_lock); \
- printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
- __func__, ##__VA_ARGS__); \
- spin_unlock(&dbg_lock); \
-} while (0)
-
-#define dbg_do_msg(typ, fmt, ...) do { \
- if (ubifs_msg_flags & typ) \
- dbg_msg(fmt, ##__VA_ARGS__); \
-} while (0)
+#define dbg_dump_stack() dump_stack()
#define dbg_err(fmt, ...) do { \
spin_lock(&dbg_lock); \
@@ -137,77 +127,40 @@ const char *dbg_key_str1(const struct ubifs_info *c,
#define DBGKEY(key) dbg_key_str0(c, (key))
#define DBGKEY1(key) dbg_key_str1(c, (key))
-/* General messages */
-#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
+#define ubifs_dbg_msg(type, fmt, ...) do { \
+ spin_lock(&dbg_lock); \
+ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
+ spin_unlock(&dbg_lock); \
+} while (0)
+/* Just a debugging messages not related to any specific UBIFS subsystem */
+#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__)
+/* General messages */
+#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
/* Additional journal messages */
-#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
-
+#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
/* Additional TNC messages */
-#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
-
+#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
/* Additional lprops messages */
-#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
-
+#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
/* Additional LEB find messages */
-#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
-
+#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
/* Additional mount messages */
-#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
-
+#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
/* Additional I/O messages */
-#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
-
+#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
/* Additional commit messages */
-#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
-
+#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__)
/* Additional budgeting messages */
-#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
-
+#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__)
/* Additional log messages */
-#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
-
+#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__)
/* Additional gc messages */
-#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
-
+#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__)
/* Additional scan messages */
-#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
-
+#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__)
/* Additional recovery messages */
-#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
-
-/*
- * Debugging message type flags.
- *
- * UBIFS_MSG_GEN: general messages
- * UBIFS_MSG_JNL: journal messages
- * UBIFS_MSG_MNT: mount messages
- * UBIFS_MSG_CMT: commit messages
- * UBIFS_MSG_FIND: LEB find messages
- * UBIFS_MSG_BUDG: budgeting messages
- * UBIFS_MSG_GC: garbage collection messages
- * UBIFS_MSG_TNC: TNC messages
- * UBIFS_MSG_LP: lprops messages
- * UBIFS_MSG_IO: I/O messages
- * UBIFS_MSG_LOG: log messages
- * UBIFS_MSG_SCAN: scan messages
- * UBIFS_MSG_RCVRY: recovery messages
- */
-enum {
- UBIFS_MSG_GEN = 0x1,
- UBIFS_MSG_JNL = 0x2,
- UBIFS_MSG_MNT = 0x4,
- UBIFS_MSG_CMT = 0x8,
- UBIFS_MSG_FIND = 0x10,
- UBIFS_MSG_BUDG = 0x20,
- UBIFS_MSG_GC = 0x40,
- UBIFS_MSG_TNC = 0x80,
- UBIFS_MSG_LP = 0x100,
- UBIFS_MSG_IO = 0x200,
- UBIFS_MSG_LOG = 0x400,
- UBIFS_MSG_SCAN = 0x800,
- UBIFS_MSG_RCVRY = 0x1000,
-};
+#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
/*
* Debugging check flags.
@@ -233,11 +186,9 @@ enum {
/*
* Special testing flags.
*
- * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
* UBIFS_TST_RCVRY: failure mode for recovery testing
*/
enum {
- UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
UBIFS_TST_RCVRY = 0x4,
};
@@ -262,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
int offs);
void dbg_dump_budget_req(const struct ubifs_budget_req *req);
void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
-void dbg_dump_budg(struct ubifs_info *c);
+void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
void dbg_dump_lprops(struct ubifs_info *c);
void dbg_dump_lpt_info(struct ubifs_info *c);
@@ -304,18 +255,16 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
/* Force the use of in-the-gaps method for testing */
-
-#define dbg_force_in_the_gaps_enabled \
- (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS)
-
+static inline int dbg_force_in_the_gaps_enabled(void)
+{
+ return ubifs_chk_flags & UBIFS_CHK_GEN;
+}
int dbg_force_in_the_gaps(void);
/* Failure mode for recovery testing */
-
#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
#ifndef UBIFS_DBG_PRESERVE_UBI
-
#define ubi_leb_read dbg_leb_read
#define ubi_leb_write dbg_leb_write
#define ubi_leb_change dbg_leb_change
@@ -323,7 +272,6 @@ int dbg_force_in_the_gaps(void);
#define ubi_leb_unmap dbg_leb_unmap
#define ubi_is_mapped dbg_is_mapped
#define ubi_leb_map dbg_leb_map
-
#endif
int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
@@ -370,33 +318,33 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
__func__, __LINE__, current->pid); \
} while (0)
-#define dbg_err(fmt, ...) do { \
- if (0) \
- ubifs_err(fmt, ##__VA_ARGS__); \
+#define dbg_err(fmt, ...) do { \
+ if (0) \
+ ubifs_err(fmt, ##__VA_ARGS__); \
} while (0)
-#define dbg_msg(fmt, ...) do { \
- if (0) \
- printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \
- current->pid, __func__, ##__VA_ARGS__); \
+#define ubifs_dbg_msg(fmt, ...) do { \
+ if (0) \
+ pr_debug(fmt "\n", ##__VA_ARGS__); \
} while (0)
#define dbg_dump_stack()
#define ubifs_assert_cmt_locked(c)
-#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define DBGKEY(key) ((char *)(key))
#define DBGKEY1(key) ((char *)(key))
@@ -420,7 +368,9 @@ static inline void
dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
static inline void
dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
-static inline void dbg_dump_budg(struct ubifs_info *c) { return; }
+static inline void
+dbg_dump_budg(struct ubifs_info *c,
+ const struct ubifs_budg_info *bi) { return; }
static inline void dbg_dump_lprop(const struct ubifs_info *c,
const struct ubifs_lprops *lp) { return; }
static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
@@ -482,8 +432,8 @@ dbg_check_nondata_nodes_order(struct ubifs_info *c,
struct list_head *head) { return 0; }
static inline int dbg_force_in_the_gaps(void) { return 0; }
-#define dbg_force_in_the_gaps_enabled 0
-#define dbg_failure_mode 0
+#define dbg_force_in_the_gaps_enabled() 0
+#define dbg_failure_mode 0
static inline int dbg_debugfs_init(void) { return 0; }
static inline void dbg_debugfs_exit(void) { return; }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index d80810bb4c37..c2b80943560d 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -603,7 +603,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
ubifs_release_budget(c, &req);
else {
/* We've deleted something - clean the "no space" flags */
- c->nospace = c->nospace_rp = 0;
+ c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
}
return 0;
@@ -695,7 +695,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
ubifs_release_budget(c, &req);
else {
/* We've deleted something - clean the "no space" flags */
- c->nospace = c->nospace_rp = 0;
+ c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
}
return 0;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index b286db79c686..5e7fccfc4b29 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c)
*/
static void release_existing_page_budget(struct ubifs_info *c)
{
- struct ubifs_budget_req req = { .dd_growth = c->page_budget};
+ struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
ubifs_release_budget(c, &req);
}
@@ -971,11 +971,11 @@ static int do_writepage(struct page *page, int len)
* the page locked, and it locks @ui_mutex. However, write-back does take inode
* @i_mutex, which means other VFS operations may be run on this inode at the
* same time. And the problematic one is truncation to smaller size, from where
- * we have to call 'truncate_setsize()', which first changes @inode->i_size, then
- * drops the truncated pages. And while dropping the pages, it takes the page
- * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with
- * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
- * means that @inode->i_size is changed while @ui_mutex is unlocked.
+ * we have to call 'truncate_setsize()', which first changes @inode->i_size,
+ * then drops the truncated pages. And while dropping the pages, it takes the
+ * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()'
+ * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'.
+ * This means that @inode->i_size is changed while @ui_mutex is unlocked.
*
* XXX(truncate): with the new truncate sequence this is not true anymore,
* and the calls to truncate_setsize can be move around freely. They should
@@ -1189,7 +1189,7 @@ out_budg:
if (budgeted)
ubifs_release_budget(c, &req);
else {
- c->nospace = c->nospace_rp = 0;
+ c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
}
return err;
@@ -1312,7 +1312,11 @@ int ubifs_fsync(struct file *file, int datasync)
dbg_gen("syncing inode %lu", inode->i_ino);
- if (inode->i_sb->s_flags & MS_RDONLY)
+ if (c->ro_mount)
+ /*
+ * For some really strange reasons VFS does not filter out
+ * 'fsync()' for R/O mounted file-systems as per 2.6.39.
+ */
return 0;
/*
@@ -1432,10 +1436,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
}
/*
- * mmap()d file has taken write protection fault and is being made
- * writable. UBIFS must ensure page is budgeted for.
+ * mmap()d file has taken write protection fault and is being made writable.
+ * UBIFS must ensure page is budgeted for.
*/
-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -1536,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
int err;
- /* 'generic_file_mmap()' takes care of NOMMU case */
err = generic_file_mmap(file, vma);
if (err)
return err;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 1d54383d1269..2559d174e004 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
* But if the index takes fewer LEBs than it is reserved for it,
* this function must avoid picking those reserved LEBs.
*/
- if (c->min_idx_lebs >= c->lst.idx_lebs) {
- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
+ if (c->bi.min_idx_lebs >= c->lst.idx_lebs) {
+ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
exclude_index = 1;
}
spin_unlock(&c->space_lock);
@@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
pick_free = 0;
} else {
spin_lock(&c->space_lock);
- exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs);
+ exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs);
spin_unlock(&c->space_lock);
}
@@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
/* Check if there are enough empty LEBs for commit */
spin_lock(&c->space_lock);
- if (c->min_idx_lebs > c->lst.idx_lebs)
- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
+ if (c->bi.min_idx_lebs > c->lst.idx_lebs)
+ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 151f10882820..ded29f6224c2 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c)
if (err)
return err;
+ err = ubifs_wbuf_sync_nolock(wbuf);
+ if (err)
+ return err;
+
err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
if (err)
return err;
@@ -118,7 +122,7 @@ static int switch_gc_head(struct ubifs_info *c)
* This function compares data nodes @a and @b. Returns %1 if @a has greater
* inode or block number, and %-1 otherwise.
*/
-int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
{
ino_t inuma, inumb;
struct ubifs_info *c = priv;
@@ -161,7 +165,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
* first and sorted by length in descending order. Directory entry nodes go
* after inode nodes and are sorted in ascending hash valuer order.
*/
-int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int nondata_nodes_cmp(void *priv, struct list_head *a,
+ struct list_head *b)
{
ino_t inuma, inumb;
struct ubifs_info *c = priv;
@@ -473,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
ubifs_assert(c->gc_lnum != lnum);
ubifs_assert(wbuf->lnum != lnum);
+ if (lp->free + lp->dirty == c->leb_size) {
+ /* Special case - a free LEB */
+ dbg_gc("LEB %d is free, return it", lp->lnum);
+ ubifs_assert(!(lp->flags & LPROPS_INDEX));
+
+ if (lp->free != c->leb_size) {
+ /*
+ * Write buffers must be sync'd before unmapping
+ * freeable LEBs, because one of them may contain data
+ * which obsoletes something in 'lp->pnum'.
+ */
+ err = gc_sync_wbufs(c);
+ if (err)
+ return err;
+ err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
+ 0, 0, 0, 0);
+ if (err)
+ return err;
+ }
+ err = ubifs_leb_unmap(c, lp->lnum);
+ if (err)
+ return err;
+
+ if (c->gc_lnum == -1) {
+ c->gc_lnum = lnum;
+ return LEB_RETAINED;
+ }
+
+ return LEB_FREED;
+ }
+
/*
* We scan the entire LEB even though we only really need to scan up to
* (c->leb_size - lp->free).
@@ -682,37 +718,6 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
"(min. space %d)", lp.lnum, lp.free, lp.dirty,
lp.free + lp.dirty, min_space);
- if (lp.free + lp.dirty == c->leb_size) {
- /* An empty LEB was returned */
- dbg_gc("LEB %d is free, return it", lp.lnum);
- /*
- * ubifs_find_dirty_leb() doesn't return freeable index
- * LEBs.
- */
- ubifs_assert(!(lp.flags & LPROPS_INDEX));
- if (lp.free != c->leb_size) {
- /*
- * Write buffers must be sync'd before
- * unmapping freeable LEBs, because one of them
- * may contain data which obsoletes something
- * in 'lp.pnum'.
- */
- ret = gc_sync_wbufs(c);
- if (ret)
- goto out;
- ret = ubifs_change_one_lp(c, lp.lnum,
- c->leb_size, 0, 0, 0,
- 0);
- if (ret)
- goto out;
- }
- ret = ubifs_leb_unmap(c, lp.lnum);
- if (ret)
- goto out;
- ret = lp.lnum;
- break;
- }
-
space_before = c->leb_size - wbuf->offs - wbuf->used;
if (wbuf->lnum == -1)
space_before = 0;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index dfd168b7807e..166951e0dcd3 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -393,7 +393,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
ubifs_assert(wbuf->size % c->min_io_size == 0);
ubifs_assert(!c->ro_media && !c->ro_mount);
if (c->leb_size - wbuf->offs >= c->max_write_size)
- ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
+ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
if (c->ro_error)
return -EROFS;
@@ -452,8 +452,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
* @dtype: data type
*
* This function targets the write-buffer to logical eraseblock @lnum:@offs.
- * The write-buffer is synchronized if it is not empty. Returns zero in case of
- * success and a negative error code in case of failure.
+ * The write-buffer has to be empty. Returns zero in case of success and a
+ * negative error code in case of failure.
*/
int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
int dtype)
@@ -465,13 +465,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
ubifs_assert(offs >= 0 && offs <= c->leb_size);
ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
ubifs_assert(lnum != wbuf->lnum);
-
- if (wbuf->used > 0) {
- int err = ubifs_wbuf_sync_nolock(wbuf);
-
- if (err)
- return err;
- }
+ ubifs_assert(wbuf->used == 0);
spin_lock(&wbuf->lock);
wbuf->lnum = lnum;
@@ -573,7 +567,7 @@ out_timers:
int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
{
struct ubifs_info *c = wbuf->c;
- int err, written, n, aligned_len = ALIGN(len, 8), offs;
+ int err, written, n, aligned_len = ALIGN(len, 8);
dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
dbg_ntype(((struct ubifs_ch *)buf)->node_type),
@@ -588,7 +582,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
ubifs_assert(!c->ro_media && !c->ro_mount);
if (c->leb_size - wbuf->offs >= c->max_write_size)
- ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
+ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
err = -ENOSPC;
@@ -636,7 +630,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
goto exit;
}
- offs = wbuf->offs;
written = 0;
if (wbuf->used) {
@@ -653,7 +646,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
if (err)
goto out;
- offs += wbuf->size;
+ wbuf->offs += wbuf->size;
len -= wbuf->avail;
aligned_len -= wbuf->avail;
written += wbuf->avail;
@@ -672,7 +665,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
if (err)
goto out;
- offs += wbuf->size;
+ wbuf->offs += wbuf->size;
len -= wbuf->size;
aligned_len -= wbuf->size;
written += wbuf->size;
@@ -687,12 +680,13 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
n = aligned_len >> c->max_write_shift;
if (n) {
n <<= c->max_write_shift;
- dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
- err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
- wbuf->dtype);
+ dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
+ wbuf->offs);
+ err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
+ wbuf->offs, n, wbuf->dtype);
if (err)
goto out;
- offs += n;
+ wbuf->offs += n;
aligned_len -= n;
len -= n;
written += n;
@@ -707,7 +701,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
*/
memcpy(wbuf->buf, buf + written, len);
- wbuf->offs = offs;
if (c->leb_size - wbuf->offs >= c->max_write_size)
wbuf->size = c->max_write_size;
else
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index aed25e864227..34b1679e6e3a 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -141,14 +141,8 @@ again:
* LEB with some empty space.
*/
lnum = ubifs_find_free_space(c, len, &offs, squeeze);
- if (lnum >= 0) {
- /* Found an LEB, add it to the journal head */
- err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
- if (err)
- goto out_return;
- /* A new bud was successfully allocated and added to the log */
+ if (lnum >= 0)
goto out;
- }
err = lnum;
if (err != -ENOSPC)
@@ -203,12 +197,23 @@ again:
return 0;
}
- err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
- if (err)
- goto out_return;
offs = 0;
out:
+ /*
+ * Make sure we synchronize the write-buffer before we add the new bud
+ * to the log. Otherwise we may have a power cut after the log
+ * reference node for the last bud (@lnum) is written but before the
+ * write-buffer data are written to the next-to-last bud
+ * (@wbuf->lnum). And the effect would be that the recovery would see
+ * that there is corruption in the next-to-last bud.
+ */
+ err = ubifs_wbuf_sync_nolock(wbuf);
+ if (err)
+ goto out_return;
+ err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
+ if (err)
+ goto out_return;
err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype);
if (err)
goto out_unlock;
@@ -380,10 +385,8 @@ out:
if (err == -ENOSPC) {
/* This are some budgeting problems, print useful information */
down_write(&c->commit_sem);
- spin_lock(&c->space_lock);
dbg_dump_stack();
- dbg_dump_budg(c);
- spin_unlock(&c->space_lock);
+ dbg_dump_budg(c, &c->bi);
dbg_dump_lprops(c);
cmt_retries = dbg_check_lprops(c);
up_write(&c->commit_sem);
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 4d0cb1241460..affea9494ae2 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
}
/**
- * next_log_lnum - switch to the next log LEB.
- * @c: UBIFS file-system description object
- * @lnum: current log LEB
- */
-static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
-{
- lnum += 1;
- if (lnum > c->log_last)
- lnum = UBIFS_LOG_LNUM;
-
- return lnum;
-}
-
-/**
* empty_log_bytes - calculate amount of empty space in the log.
* @c: UBIFS file-system description object
*/
@@ -175,26 +161,6 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
}
/**
- * ubifs_create_buds_lists - create journal head buds lists for remount rw.
- * @c: UBIFS file-system description object
- */
-void ubifs_create_buds_lists(struct ubifs_info *c)
-{
- struct rb_node *p;
-
- spin_lock(&c->buds_lock);
- p = rb_first(&c->buds);
- while (p) {
- struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
- struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
-
- list_add_tail(&bud->list, &jhead->buds_list);
- p = rb_next(p);
- }
- spin_unlock(&c->buds_lock);
-}
-
-/**
* ubifs_add_bud_to_log - add a new bud to the log.
* @c: UBIFS file-system description object
* @jhead: journal head the bud belongs to
@@ -277,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
ref->jhead = cpu_to_le32(jhead);
if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
c->lhead_offs = 0;
}
@@ -445,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
/* Switch to the next log LEB */
if (c->lhead_offs) {
- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
c->lhead_offs = 0;
}
@@ -466,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
c->lhead_offs += len;
if (c->lhead_offs == c->leb_size) {
- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
c->lhead_offs = 0;
}
@@ -553,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
}
mutex_lock(&c->log_mutex);
for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
- lnum = next_log_lnum(c, lnum)) {
+ lnum = ubifs_next_log_lnum(c, lnum)) {
dbg_log("unmap log LEB %d", lnum);
err = ubifs_leb_unmap(c, lnum);
if (err)
@@ -662,7 +628,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
if (err)
return err;
- *lnum = next_log_lnum(c, *lnum);
+ *lnum = ubifs_next_log_lnum(c, *lnum);
*offs = 0;
}
memcpy(buf + *offs, node, len);
@@ -732,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
ubifs_scan_destroy(sleb);
if (lnum == c->lhead_lnum)
break;
- lnum = next_log_lnum(c, lnum);
+ lnum = ubifs_next_log_lnum(c, lnum);
}
if (offs) {
int sz = ALIGN(offs, c->min_io_size);
@@ -752,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
/* Unmap remaining LEBs */
lnum = write_lnum;
do {
- lnum = next_log_lnum(c, lnum);
+ lnum = ubifs_next_log_lnum(c, lnum);
err = ubifs_leb_unmap(c, lnum);
if (err)
return err;
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 0ee0847f2421..667884f4a615 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1007,21 +1007,11 @@ out:
}
/**
- * struct scan_check_data - data provided to scan callback function.
- * @lst: LEB properties statistics
- * @err: error code
- */
-struct scan_check_data {
- struct ubifs_lp_stats lst;
- int err;
-};
-
-/**
* scan_check_cb - scan callback.
* @c: the UBIFS file-system description object
* @lp: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
- * @data: information passed to and from the caller of the scan
+ * @lst: lprops statistics to update
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -1030,11 +1020,10 @@ struct scan_check_data {
*/
static int scan_check_cb(struct ubifs_info *c,
const struct ubifs_lprops *lp, int in_tree,
- struct scan_check_data *data)
+ struct ubifs_lp_stats *lst)
{
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
- struct ubifs_lp_stats *lst = &data->lst;
int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
void *buf = NULL;
@@ -1044,7 +1033,7 @@ static int scan_check_cb(struct ubifs_info *c,
if (cat != (lp->flags & LPROPS_CAT_MASK)) {
ubifs_err("bad LEB category %d expected %d",
(lp->flags & LPROPS_CAT_MASK), cat);
- goto out;
+ return -EINVAL;
}
}
@@ -1078,7 +1067,7 @@ static int scan_check_cb(struct ubifs_info *c,
}
if (!found) {
ubifs_err("bad LPT list (category %d)", cat);
- goto out;
+ return -EINVAL;
}
}
}
@@ -1090,45 +1079,40 @@ static int scan_check_cb(struct ubifs_info *c,
if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
lp != heap->arr[lp->hpos]) {
ubifs_err("bad LPT heap (category %d)", cat);
- goto out;
+ return -EINVAL;
}
}
buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
- if (!buf) {
- ubifs_err("cannot allocate memory to scan LEB %d", lnum);
- goto out;
+ if (!buf)
+ return -ENOMEM;
+
+ /*
+ * After an unclean unmount, empty and freeable LEBs
+ * may contain garbage - do not scan them.
+ */
+ if (lp->free == c->leb_size) {
+ lst->empty_lebs += 1;
+ lst->total_free += c->leb_size;
+ lst->total_dark += ubifs_calc_dark(c, c->leb_size);
+ return LPT_SCAN_CONTINUE;
+ }
+ if (lp->free + lp->dirty == c->leb_size &&
+ !(lp->flags & LPROPS_INDEX)) {
+ lst->total_free += lp->free;
+ lst->total_dirty += lp->dirty;
+ lst->total_dark += ubifs_calc_dark(c, c->leb_size);
+ return LPT_SCAN_CONTINUE;
}
sleb = ubifs_scan(c, lnum, 0, buf, 0);
if (IS_ERR(sleb)) {
- /*
- * After an unclean unmount, empty and freeable LEBs
- * may contain garbage.
- */
- if (lp->free == c->leb_size) {
- ubifs_err("scan errors were in empty LEB "
- "- continuing checking");
- lst->empty_lebs += 1;
- lst->total_free += c->leb_size;
- lst->total_dark += ubifs_calc_dark(c, c->leb_size);
- ret = LPT_SCAN_CONTINUE;
- goto exit;
- }
-
- if (lp->free + lp->dirty == c->leb_size &&
- !(lp->flags & LPROPS_INDEX)) {
- ubifs_err("scan errors were in freeable LEB "
- "- continuing checking");
- lst->total_free += lp->free;
- lst->total_dirty += lp->dirty;
- lst->total_dark += ubifs_calc_dark(c, c->leb_size);
- ret = LPT_SCAN_CONTINUE;
- goto exit;
+ ret = PTR_ERR(sleb);
+ if (ret == -EUCLEAN) {
+ dbg_dump_lprops(c);
+ dbg_dump_budg(c, &c->bi);
}
- data->err = PTR_ERR(sleb);
- ret = LPT_SCAN_STOP;
- goto exit;
+ goto out;
}
is_idx = -1;
@@ -1246,10 +1230,8 @@ static int scan_check_cb(struct ubifs_info *c,
}
ubifs_scan_destroy(sleb);
- ret = LPT_SCAN_CONTINUE;
-exit:
vfree(buf);
- return ret;
+ return LPT_SCAN_CONTINUE;
out_print:
ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
@@ -1258,10 +1240,10 @@ out_print:
dbg_dump_leb(c, lnum);
out_destroy:
ubifs_scan_destroy(sleb);
+ ret = -EINVAL;
out:
vfree(buf);
- data->err = -EINVAL;
- return LPT_SCAN_STOP;
+ return ret;
}
/**
@@ -1278,8 +1260,7 @@ out:
int dbg_check_lprops(struct ubifs_info *c)
{
int i, err;
- struct scan_check_data data;
- struct ubifs_lp_stats *lst = &data.lst;
+ struct ubifs_lp_stats lst;
if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
return 0;
@@ -1294,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *c)
return err;
}
- memset(lst, 0, sizeof(struct ubifs_lp_stats));
-
- data.err = 0;
+ memset(&lst, 0, sizeof(struct ubifs_lp_stats));
err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
(ubifs_lpt_scan_callback)scan_check_cb,
- &data);
+ &lst);
if (err && err != -ENOSPC)
goto out;
- if (data.err) {
- err = data.err;
- goto out;
- }
- if (lst->empty_lebs != c->lst.empty_lebs ||
- lst->idx_lebs != c->lst.idx_lebs ||
- lst->total_free != c->lst.total_free ||
- lst->total_dirty != c->lst.total_dirty ||
- lst->total_used != c->lst.total_used) {
+ if (lst.empty_lebs != c->lst.empty_lebs ||
+ lst.idx_lebs != c->lst.idx_lebs ||
+ lst.total_free != c->lst.total_free ||
+ lst.total_dirty != c->lst.total_dirty ||
+ lst.total_used != c->lst.total_used) {
ubifs_err("bad overall accounting");
ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
"total_free %lld, total_dirty %lld, total_used %lld",
- lst->empty_lebs, lst->idx_lebs, lst->total_free,
- lst->total_dirty, lst->total_used);
+ lst.empty_lebs, lst.idx_lebs, lst.total_free,
+ lst.total_dirty, lst.total_used);
ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
"total_free %lld, total_dirty %lld, total_used %lld",
c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
@@ -1325,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *c)
goto out;
}
- if (lst->total_dead != c->lst.total_dead ||
- lst->total_dark != c->lst.total_dark) {
+ if (lst.total_dead != c->lst.total_dead ||
+ lst.total_dark != c->lst.total_dark) {
ubifs_err("bad dead/dark space accounting");
ubifs_err("calculated: total_dead %lld, total_dark %lld",
- lst->total_dead, lst->total_dark);
+ lst.total_dead, lst.total_dark);
ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
c->lst.total_dead, c->lst.total_dark);
err = -EINVAL;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 0c9c69bd983a..dfcb5748a7dc 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -29,6 +29,12 @@
#include <linux/slab.h>
#include "ubifs.h"
+#ifdef CONFIG_UBIFS_FS_DEBUG
+static int dbg_populate_lsave(struct ubifs_info *c);
+#else
+#define dbg_populate_lsave(c) 0
+#endif
+
/**
* first_dirty_cnode - find first dirty cnode.
* @c: UBIFS file-system description object
@@ -586,7 +592,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
if (nnode->nbranch[iip].lnum)
break;
}
- } while (iip >= UBIFS_LPT_FANOUT);
+ } while (iip >= UBIFS_LPT_FANOUT);
/* Go right */
nnode = ubifs_get_nnode(c, nnode, iip);
@@ -815,6 +821,10 @@ static void populate_lsave(struct ubifs_info *c)
c->lpt_drty_flgs |= LSAVE_DIRTY;
ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
}
+
+ if (dbg_populate_lsave(c))
+ return;
+
list_for_each_entry(lprops, &c->empty_list, list) {
c->lsave[cnt++] = lprops->lnum;
if (cnt >= c->lsave_cnt)
@@ -1994,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c)
current->pid);
}
+/**
+ * dbg_populate_lsave - debugging version of 'populate_lsave()'
+ * @c: UBIFS file-system description object
+ *
+ * This is a debugging version for 'populate_lsave()' which populates lsave
+ * with random LEBs instead of useful LEBs, which is good for test coverage.
+ * Returns zero if lsave has not been populated (this debugging feature is
+ * disabled) an non-zero if lsave has been populated.
+ */
+static int dbg_populate_lsave(struct ubifs_info *c)
+{
+ struct ubifs_lprops *lprops;
+ struct ubifs_lpt_heap *heap;
+ int i;
+
+ if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ return 0;
+ if (random32() & 3)
+ return 0;
+
+ for (i = 0; i < c->lsave_cnt; i++)
+ c->lsave[i] = c->main_first;
+
+ list_for_each_entry(lprops, &c->empty_list, list)
+ c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
+ list_for_each_entry(lprops, &c->freeable_list, list)
+ c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
+ list_for_each_entry(lprops, &c->frdi_idx_list, list)
+ c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
+
+ heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
+ for (i = 0; i < heap->cnt; i++)
+ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
+ heap = &c->lpt_heap[LPROPS_DIRTY - 1];
+ for (i = 0; i < heap->cnt; i++)
+ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
+ heap = &c->lpt_heap[LPROPS_FREE - 1];
+ for (i = 0; i < heap->cnt; i++)
+ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
+
+ return 1;
+}
+
#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 21f47afdacff..278c2382e8c2 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c)
}
main_sz = (long long)c->main_lebs * c->leb_size;
- if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) {
+ if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) {
err = 9;
goto out;
}
@@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c)
}
if (c->lst.total_dead + c->lst.total_dark +
- c->lst.total_used + c->old_idx_sz > main_sz) {
+ c->lst.total_used + c->bi.old_idx_sz > main_sz) {
err = 21;
goto out;
}
@@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c)
c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
- c->old_idx_sz = le64_to_cpu(c->mst_node->index_size);
+ c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size);
c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
@@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c)
c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
- c->calc_idx_sz = c->old_idx_sz;
+ c->calc_idx_sz = c->bi.old_idx_sz;
if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
c->no_orphs = 1;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index c3de04dc952a..0b5296a9a4c5 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -340,4 +340,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c)
mutex_unlock(&c->lp_mutex);
}
+/**
+ * ubifs_next_log_lnum - switch to the next log LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: current log LEB
+ *
+ * This helper function returns the log LEB number which goes next after LEB
+ * 'lnum'.
+ */
+static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
+{
+ lnum += 1;
+ if (lnum > c->log_last)
+ lnum = UBIFS_LOG_LNUM;
+
+ return lnum;
+}
+
#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 09df318e368f..bd644bf587a8 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c)
sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
if (IS_ERR(sleb)) {
if (PTR_ERR(sleb) == -EUCLEAN)
- sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
+ sleb = ubifs_recover_leb(c, lnum, 0,
+ c->sbuf, 0);
if (IS_ERR(sleb)) {
err = PTR_ERR(sleb);
break;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 3dbad6fbd1eb..731d9e2e7b50 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -564,13 +564,16 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
}
/**
- * drop_incomplete_group - drop nodes from an incomplete group.
+ * drop_last_node - drop the last node or group of nodes.
* @sleb: scanned LEB information
* @offs: offset of dropped nodes is returned here
+ * @grouped: non-zero if whole group of nodes have to be dropped
*
- * This function returns %1 if nodes are dropped and %0 otherwise.
+ * This is a helper function for 'ubifs_recover_leb()' which drops the last
+ * node of the scanned LEB or the last group of nodes if @grouped is not zero.
+ * This function returns %1 if a node was dropped and %0 otherwise.
*/
-static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
+static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
{
int dropped = 0;
@@ -589,6 +592,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
kfree(snod);
sleb->nodes_cnt -= 1;
dropped = 1;
+ if (!grouped)
+ break;
}
return dropped;
}
@@ -609,8 +614,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
int offs, void *sbuf, int grouped)
{
- int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
- int empty_chkd = 0, start = offs;
+ int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
struct ubifs_scan_leb *sleb;
void *buf = sbuf + offs;
@@ -620,12 +624,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
if (IS_ERR(sleb))
return sleb;
- if (sleb->ecc)
- need_clean = 1;
-
+ ubifs_assert(len >= 8);
while (len >= 8) {
- int ret;
-
dbg_scan("look at LEB %d:%d (%d bytes left)",
lnum, offs, len);
@@ -635,8 +635,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
* Scan quietly until there is an error from which we cannot
* recover
*/
- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
-
+ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
if (ret == SCANNED_A_NODE) {
/* A valid node, and not a padding node */
struct ubifs_ch *ch = buf;
@@ -649,70 +648,32 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
offs += node_len;
buf += node_len;
len -= node_len;
- continue;
- }
-
- if (ret > 0) {
+ } else if (ret > 0) {
/* Padding bytes or a valid padding node */
offs += ret;
buf += ret;
len -= ret;
- continue;
- }
-
- if (ret == SCANNED_EMPTY_SPACE) {
- if (!is_empty(buf, len)) {
- if (!is_last_write(c, buf, offs))
- break;
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
- }
- empty_chkd = 1;
+ } else if (ret == SCANNED_EMPTY_SPACE ||
+ ret == SCANNED_GARBAGE ||
+ ret == SCANNED_A_BAD_PAD_NODE ||
+ ret == SCANNED_A_CORRUPT_NODE) {
+ dbg_rcvry("found corruption - %d", ret);
break;
- }
-
- if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
- if (is_last_write(c, buf, offs)) {
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
- empty_chkd = 1;
- break;
- }
-
- if (ret == SCANNED_A_CORRUPT_NODE)
- if (no_more_nodes(c, buf, len, lnum, offs)) {
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
- empty_chkd = 1;
- break;
- }
-
- if (quiet) {
- /* Redo the last scan but noisily */
- quiet = 0;
- continue;
- }
-
- switch (ret) {
- case SCANNED_GARBAGE:
- dbg_err("garbage");
- goto corrupted;
- case SCANNED_A_CORRUPT_NODE:
- case SCANNED_A_BAD_PAD_NODE:
- dbg_err("bad node");
- goto corrupted;
- default:
- dbg_err("unknown");
+ } else {
+ dbg_err("unexpected return value %d", ret);
err = -EINVAL;
goto error;
}
}
- if (!empty_chkd && !is_empty(buf, len)) {
- if (is_last_write(c, buf, offs)) {
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
- } else {
+ if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
+ if (!is_last_write(c, buf, offs))
+ goto corrupted_rescan;
+ } else if (ret == SCANNED_A_CORRUPT_NODE) {
+ if (!no_more_nodes(c, buf, len, lnum, offs))
+ goto corrupted_rescan;
+ } else if (!is_empty(buf, len)) {
+ if (!is_last_write(c, buf, offs)) {
int corruption = first_non_ff(buf, len);
/*
@@ -728,29 +689,82 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
}
}
- /* Drop nodes from incomplete group */
- if (grouped && drop_incomplete_group(sleb, &offs)) {
- buf = sbuf + offs;
- len = c->leb_size - offs;
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
- }
+ min_io_unit = round_down(offs, c->min_io_size);
+ if (grouped)
+ /*
+ * If nodes are grouped, always drop the incomplete group at
+ * the end.
+ */
+ drop_last_node(sleb, &offs, 1);
- if (offs % c->min_io_size) {
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
- }
+ /*
+ * While we are in the middle of the same min. I/O unit keep dropping
+ * nodes. So basically, what we want is to make sure that the last min.
+ * I/O unit where we saw the corruption is dropped completely with all
+ * the uncorrupted node which may possibly sit there.
+ *
+ * In other words, let's name the min. I/O unit where the corruption
+ * starts B, and the previous min. I/O unit A. The below code tries to
+ * deal with a situation when half of B contains valid nodes or the end
+ * of a valid node, and the second half of B contains corrupted data or
+ * garbage. This means that UBIFS had been writing to B just before the
+ * power cut happened. I do not know how realistic is this scenario
+ * that half of the min. I/O unit had been written successfully and the
+ * other half not, but this is possible in our 'failure mode emulation'
+ * infrastructure at least.
+ *
+ * So what is the problem, why we need to drop those nodes? Whey can't
+ * we just clean-up the second half of B by putting a padding node
+ * there? We can, and this works fine with one exception which was
+ * reproduced with power cut emulation testing and happens extremely
+ * rarely. The description follows, but it is worth noting that that is
+ * only about the GC head, so we could do this trick only if the bud
+ * belongs to the GC head, but it does not seem to be worth an
+ * additional "if" statement.
+ *
+ * So, imagine the file-system is full, we run GC which is moving valid
+ * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head
+ * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X
+ * and will try to continue. Imagine that LEB X is currently the
+ * dirtiest LEB, and the amount of used space in LEB Y is exactly the
+ * same as amount of free space in LEB X.
+ *
+ * And a power cut happens when nodes are moved from LEB X to LEB Y. We
+ * are here trying to recover LEB Y which is the GC head LEB. We find
+ * the min. I/O unit B as described above. Then we clean-up LEB Y by
+ * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function
+ * fails, because it cannot find a dirty LEB which could be GC'd into
+ * LEB Y! Even LEB X does not match because the amount of valid nodes
+ * there does not fit the free space in LEB Y any more! And this is
+ * because of the padding node which we added to LEB Y. The
+ * user-visible effect of this which I once observed and analysed is
+ * that we cannot mount the file-system with -ENOSPC error.
+ *
+ * So obviously, to make sure that situation does not happen we should
+ * free min. I/O unit B in LEB Y completely and the last used min. I/O
+ * unit in LEB Y should be A. This is basically what the below code
+ * tries to do.
+ */
+ while (min_io_unit == round_down(offs, c->min_io_size) &&
+ min_io_unit != offs &&
+ drop_last_node(sleb, &offs, grouped));
+
+ buf = sbuf + offs;
+ len = c->leb_size - offs;
+ clean_buf(c, &buf, lnum, &offs, &len);
ubifs_end_scan(c, sleb, lnum, offs);
- if (need_clean) {
- err = fix_unclean_leb(c, sleb, start);
- if (err)
- goto error;
- }
+ err = fix_unclean_leb(c, sleb, start);
+ if (err)
+ goto error;
return sleb;
+corrupted_rescan:
+ /* Re-scan the corrupted data with verbose messages */
+ dbg_err("corruptio %d", ret);
+ ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
corrupted:
ubifs_scanned_corruption(c, lnum, offs, buf);
err = -EUCLEAN;
@@ -1070,6 +1084,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
}
/**
+ * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
+ * @c: UBIFS file-system description object
+ *
+ * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
+ * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
+ * zero in case of success and a negative error code in case of failure.
+ */
+static int grab_empty_leb(struct ubifs_info *c)
+{
+ int lnum, err;
+
+ /*
+ * Note, it is very important to first search for an empty LEB and then
+ * run the commit, not vice-versa. The reason is that there might be
+ * only one empty LEB at the moment, the one which has been the
+ * @c->gc_lnum just before the power cut happened. During the regular
+ * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
+ * one but GC can grab it. But at this moment this single empty LEB is
+ * not marked as taken, so if we run commit - what happens? Right, the
+ * commit will grab it and write the index there. Remember that the
+ * index always expands as long as there is free space, and it only
+ * starts consolidating when we run out of space.
+ *
+ * IOW, if we run commit now, we might not be able to find a free LEB
+ * after this.
+ */
+ lnum = ubifs_find_free_leb_for_idx(c);
+ if (lnum < 0) {
+ dbg_err("could not find an empty LEB");
+ dbg_dump_lprops(c);
+ dbg_dump_budg(c, &c->bi);
+ return lnum;
+ }
+
+ /* Reset the index flag */
+ err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
+ LPROPS_INDEX, 0);
+ if (err)
+ return err;
+
+ c->gc_lnum = lnum;
+ dbg_rcvry("found empty LEB %d, run commit", lnum);
+
+ return ubifs_run_commit(c);
+}
+
+/**
* ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
* @c: UBIFS file-system description object
*
@@ -1091,71 +1152,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
{
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
struct ubifs_lprops lp;
- int lnum, err;
+ int err;
+
+ dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
c->gc_lnum = -1;
- if (wbuf->lnum == -1) {
- dbg_rcvry("no GC head LEB");
- goto find_free;
- }
- /*
- * See whether the used space in the dirtiest LEB fits in the GC head
- * LEB.
- */
- if (wbuf->offs == c->leb_size) {
- dbg_rcvry("no room in GC head LEB");
- goto find_free;
- }
+ if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
+ return grab_empty_leb(c);
+
err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
if (err) {
- /*
- * There are no dirty or empty LEBs subject to here being
- * enough for the index. Try to use
- * 'ubifs_find_free_leb_for_idx()', which will return any empty
- * LEBs (ignoring index requirements). If the index then
- * doesn't have enough LEBs the recovery commit will fail -
- * which is the same result anyway i.e. recovery fails. So
- * there is no problem ignoring index requirements and just
- * grabbing a free LEB since we have already established there
- * is not a dirty LEB we could have used instead.
- */
- if (err == -ENOSPC) {
- dbg_rcvry("could not find a dirty LEB");
- goto find_free;
- }
- return err;
- }
- ubifs_assert(!(lp.flags & LPROPS_INDEX));
- lnum = lp.lnum;
- if (lp.free + lp.dirty == c->leb_size) {
- /* An empty LEB was returned */
- if (lp.free != c->leb_size) {
- err = ubifs_change_one_lp(c, lnum, c->leb_size,
- 0, 0, 0, 0);
- if (err)
- return err;
- }
- err = ubifs_leb_unmap(c, lnum);
- if (err)
+ if (err != -ENOSPC)
return err;
- c->gc_lnum = lnum;
- dbg_rcvry("allocated LEB %d for GC", lnum);
- /* Run the commit */
- dbg_rcvry("committing");
- return ubifs_run_commit(c);
- }
- /*
- * There was no empty LEB so the used space in the dirtiest LEB must fit
- * in the GC head LEB.
- */
- if (lp.free + lp.dirty < wbuf->offs) {
- dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
- lnum, wbuf->lnum, wbuf->offs);
- err = ubifs_return_leb(c, lnum);
- if (err)
- return err;
- goto find_free;
+
+ dbg_rcvry("could not find a dirty LEB");
+ return grab_empty_leb(c);
}
+
+ ubifs_assert(!(lp.flags & LPROPS_INDEX));
+ ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
+
/*
* We run the commit before garbage collection otherwise subsequent
* mounts will see the GC and orphan deletion in a different order.
@@ -1164,11 +1180,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
err = ubifs_run_commit(c);
if (err)
return err;
- /*
- * The data in the dirtiest LEB fits in the GC head LEB, so do the GC
- * - use locking to keep 'ubifs_assert()' happy.
- */
- dbg_rcvry("GC'ing LEB %d", lnum);
+
+ dbg_rcvry("GC'ing LEB %d", lp.lnum);
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
err = ubifs_garbage_collect_leb(c, &lp);
if (err >= 0) {
@@ -1184,37 +1197,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
err = -EINVAL;
return err;
}
- if (err != LEB_RETAINED) {
- dbg_err("GC returned %d", err);
+
+ ubifs_assert(err == LEB_RETAINED);
+ if (err != LEB_RETAINED)
return -EINVAL;
- }
+
err = ubifs_leb_unmap(c, c->gc_lnum);
if (err)
return err;
- dbg_rcvry("allocated LEB %d for GC", lnum);
- return 0;
-find_free:
- /*
- * There is no GC head LEB or the free space in the GC head LEB is too
- * small, or there are not dirty LEBs. Allocate gc_lnum by calling
- * 'ubifs_find_free_leb_for_idx()' so GC is not run.
- */
- lnum = ubifs_find_free_leb_for_idx(c);
- if (lnum < 0) {
- dbg_err("could not find an empty LEB");
- return lnum;
- }
- /* And reset the index flag */
- err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
- LPROPS_INDEX, 0);
- if (err)
- return err;
- c->gc_lnum = lnum;
- dbg_rcvry("allocated LEB %d for GC", lnum);
- /* Run the commit */
- dbg_rcvry("committing");
- return ubifs_run_commit(c);
+ dbg_rcvry("allocated LEB %d for GC", lp.lnum);
+ return 0;
}
/**
@@ -1456,7 +1449,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
if (err)
goto out;
- dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ",
+ dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
(unsigned long)e->inum, lnum, offs, i_size, e->d_size);
return 0;
@@ -1505,20 +1498,27 @@ int ubifs_recover_size(struct ubifs_info *c)
e->i_size = le64_to_cpu(ino->size);
}
}
+
if (e->exists && e->i_size < e->d_size) {
- if (!e->inode && c->ro_mount) {
+ if (c->ro_mount) {
/* Fix the inode size and pin it in memory */
struct inode *inode;
+ struct ubifs_inode *ui;
+
+ ubifs_assert(!e->inode);
inode = ubifs_iget(c->vfs_sb, e->inum);
if (IS_ERR(inode))
return PTR_ERR(inode);
+
+ ui = ubifs_inode(inode);
if (inode->i_size < e->d_size) {
dbg_rcvry("ino %lu size %lld -> %lld",
(unsigned long)e->inum,
- e->d_size, inode->i_size);
+ inode->i_size, e->d_size);
inode->i_size = e->d_size;
- ubifs_inode(inode)->ui_size = e->d_size;
+ ui->ui_size = e->d_size;
+ ui->synced_i_size = e->d_size;
e->inode = inode;
this = rb_next(this);
continue;
@@ -1533,9 +1533,11 @@ int ubifs_recover_size(struct ubifs_info *c)
iput(e->inode);
}
}
+
this = rb_next(this);
rb_erase(&e->rb, &c->size_tree);
kfree(e);
}
+
return 0;
}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index eed0fcff8d73..6617280d1679 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -33,43 +33,32 @@
*/
#include "ubifs.h"
-
-/*
- * Replay flags.
- *
- * REPLAY_DELETION: node was deleted
- * REPLAY_REF: node is a reference node
- */
-enum {
- REPLAY_DELETION = 1,
- REPLAY_REF = 2,
-};
+#include <linux/list_sort.h>
/**
- * struct replay_entry - replay tree entry.
+ * struct replay_entry - replay list entry.
* @lnum: logical eraseblock number of the node
* @offs: node offset
* @len: node length
+ * @deletion: non-zero if this entry corresponds to a node deletion
* @sqnum: node sequence number
- * @flags: replay flags
- * @rb: links the replay tree
+ * @list: links the replay list
* @key: node key
* @nm: directory entry name
* @old_size: truncation old size
* @new_size: truncation new size
- * @free: amount of free space in a bud
- * @dirty: amount of dirty space in a bud from padding and deletion nodes
*
- * UBIFS journal replay must compare node sequence numbers, which means it must
- * build a tree of node information to insert into the TNC.
+ * The replay process first scans all buds and builds the replay list, then
+ * sorts the replay list in nodes sequence number order, and then inserts all
+ * the replay entries to the TNC.
*/
struct replay_entry {
int lnum;
int offs;
int len;
+ unsigned int deletion:1;
unsigned long long sqnum;
- int flags;
- struct rb_node rb;
+ struct list_head list;
union ubifs_key key;
union {
struct qstr nm;
@@ -77,10 +66,6 @@ struct replay_entry {
loff_t old_size;
loff_t new_size;
};
- struct {
- int free;
- int dirty;
- };
};
};
@@ -88,57 +73,64 @@ struct replay_entry {
* struct bud_entry - entry in the list of buds to replay.
* @list: next bud in the list
* @bud: bud description object
- * @free: free bytes in the bud
* @sqnum: reference node sequence number
+ * @free: free bytes in the bud
+ * @dirty: dirty bytes in the bud
*/
struct bud_entry {
struct list_head list;
struct ubifs_bud *bud;
- int free;
unsigned long long sqnum;
+ int free;
+ int dirty;
};
/**
* set_bud_lprops - set free and dirty space used by a bud.
* @c: UBIFS file-system description object
- * @r: replay entry of bud
+ * @b: bud entry which describes the bud
+ *
+ * This function makes sure the LEB properties of bud @b are set correctly
+ * after the replay. Returns zero in case of success and a negative error code
+ * in case of failure.
*/
-static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
+static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
{
const struct ubifs_lprops *lp;
int err = 0, dirty;
ubifs_get_lprops(c);
- lp = ubifs_lpt_lookup_dirty(c, r->lnum);
+ lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
dirty = lp->dirty;
- if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
+ if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
/*
* The LEB was added to the journal with a starting offset of
* zero which means the LEB must have been empty. The LEB
- * property values should be lp->free == c->leb_size and
- * lp->dirty == 0, but that is not the case. The reason is that
- * the LEB was garbage collected. The garbage collector resets
- * the free and dirty space without recording it anywhere except
- * lprops, so if there is not a commit then lprops does not have
- * that information next time the file system is mounted.
+ * property values should be @lp->free == @c->leb_size and
+ * @lp->dirty == 0, but that is not the case. The reason is that
+ * the LEB had been garbage collected before it became the bud,
+ * and there was not commit inbetween. The garbage collector
+ * resets the free and dirty space without recording it
+ * anywhere except lprops, so if there was no commit then
+ * lprops does not have that information.
*
* We do not need to adjust free space because the scan has told
* us the exact value which is recorded in the replay entry as
- * r->free.
+ * @b->free.
*
* However we do need to subtract from the dirty space the
* amount of space that the garbage collector reclaimed, which
* is the whole LEB minus the amount of space that was free.
*/
- dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
+ dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
lp->free, lp->dirty);
- dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
+ dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
lp->free, lp->dirty);
dirty -= c->leb_size - lp->free;
/*
@@ -150,21 +142,48 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
*/
if (dirty != 0)
dbg_msg("LEB %d lp: %d free %d dirty "
- "replay: %d free %d dirty", r->lnum, lp->free,
- lp->dirty, r->free, r->dirty);
+ "replay: %d free %d dirty", b->bud->lnum,
+ lp->free, lp->dirty, b->free, b->dirty);
}
- lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
+ lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
lp->flags | LPROPS_TAKEN, 0);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
+
+ /* Make sure the journal head points to the latest bud */
+ err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
+ b->bud->lnum, c->leb_size - b->free,
+ UBI_SHORTTERM);
+
out:
ubifs_release_lprops(c);
return err;
}
/**
+ * set_buds_lprops - set free and dirty space for all replayed buds.
+ * @c: UBIFS file-system description object
+ *
+ * This function sets LEB properties for all replayed buds. Returns zero in
+ * case of success and a negative error code in case of failure.
+ */
+static int set_buds_lprops(struct ubifs_info *c)
+{
+ struct bud_entry *b;
+ int err;
+
+ list_for_each_entry(b, &c->replay_buds, list) {
+ err = set_bud_lprops(c, b);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/**
* trun_remove_range - apply a replay entry for a truncation to the TNC.
* @c: UBIFS file-system description object
* @r: replay entry of truncation
@@ -200,24 +219,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
*/
static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
{
- int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
+ int err;
- dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
- r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
+ dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum,
+ r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key));
/* Set c->replay_sqnum to help deal with dangling branches. */
c->replay_sqnum = r->sqnum;
- if (r->flags & REPLAY_REF)
- err = set_bud_lprops(c, r);
- else if (is_hash_key(c, &r->key)) {
- if (deletion)
+ if (is_hash_key(c, &r->key)) {
+ if (r->deletion)
err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
else
err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
r->len, &r->nm);
} else {
- if (deletion)
+ if (r->deletion)
switch (key_type(c, &r->key)) {
case UBIFS_INO_KEY:
{
@@ -240,7 +257,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
return err;
if (c->need_recovery)
- err = ubifs_recover_size_accum(c, &r->key, deletion,
+ err = ubifs_recover_size_accum(c, &r->key, r->deletion,
r->new_size);
}
@@ -248,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
}
/**
- * destroy_replay_tree - destroy the replay.
- * @c: UBIFS file-system description object
+ * replay_entries_cmp - compare 2 replay entries.
+ * @priv: UBIFS file-system description object
+ * @a: first replay entry
+ * @a: second replay entry
*
- * Destroy the replay tree.
+ * This is a comparios function for 'list_sort()' which compares 2 replay
+ * entries @a and @b by comparing their sequence numer. Returns %1 if @a has
+ * greater sequence number and %-1 otherwise.
*/
-static void destroy_replay_tree(struct ubifs_info *c)
+static int replay_entries_cmp(void *priv, struct list_head *a,
+ struct list_head *b)
{
- struct rb_node *this = c->replay_tree.rb_node;
- struct replay_entry *r;
-
- while (this) {
- if (this->rb_left) {
- this = this->rb_left;
- continue;
- } else if (this->rb_right) {
- this = this->rb_right;
- continue;
- }
- r = rb_entry(this, struct replay_entry, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &r->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
- if (is_hash_key(c, &r->key))
- kfree(r->nm.name);
- kfree(r);
- }
- c->replay_tree = RB_ROOT;
+ struct replay_entry *ra, *rb;
+
+ cond_resched();
+ if (a == b)
+ return 0;
+
+ ra = list_entry(a, struct replay_entry, list);
+ rb = list_entry(b, struct replay_entry, list);
+ ubifs_assert(ra->sqnum != rb->sqnum);
+ if (ra->sqnum > rb->sqnum)
+ return 1;
+ return -1;
}
/**
- * apply_replay_tree - apply the replay tree to the TNC.
+ * apply_replay_list - apply the replay list to the TNC.
* @c: UBIFS file-system description object
*
- * Apply the replay tree.
- * Returns zero in case of success and a negative error code in case of
- * failure.
+ * Apply all entries in the replay list to the TNC. Returns zero in case of
+ * success and a negative error code in case of failure.
*/
-static int apply_replay_tree(struct ubifs_info *c)
+static int apply_replay_list(struct ubifs_info *c)
{
- struct rb_node *this = rb_first(&c->replay_tree);
+ struct replay_entry *r;
+ int err;
- while (this) {
- struct replay_entry *r;
- int err;
+ list_sort(c, &c->replay_list, &replay_entries_cmp);
+ list_for_each_entry(r, &c->replay_list, list) {
cond_resched();
- r = rb_entry(this, struct replay_entry, rb);
err = apply_replay_entry(c, r);
if (err)
return err;
- this = rb_next(this);
}
+
return 0;
}
/**
- * insert_node - insert a node to the replay tree.
+ * destroy_replay_list - destroy the replay.
+ * @c: UBIFS file-system description object
+ *
+ * Destroy the replay list.
+ */
+static void destroy_replay_list(struct ubifs_info *c)
+{
+ struct replay_entry *r, *tmp;
+
+ list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
+ if (is_hash_key(c, &r->key))
+ kfree(r->nm.name);
+ list_del(&r->list);
+ kfree(r);
+ }
+}
+
+/**
+ * insert_node - insert a node to the replay list
* @c: UBIFS file-system description object
* @lnum: node logical eraseblock number
* @offs: node offset
@@ -321,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c)
* @old_size: truncation old size
* @new_size: truncation new size
*
- * This function inserts a scanned non-direntry node to the replay tree. The
- * replay tree is an RB-tree containing @struct replay_entry elements which are
- * indexed by the sequence number. The replay tree is applied at the very end
- * of the replay process. Since the tree is sorted in sequence number order,
- * the older modifications are applied first. This function returns zero in
- * case of success and a negative error code in case of failure.
+ * This function inserts a scanned non-direntry node to the replay list. The
+ * replay list contains @struct replay_entry elements, and we sort this list in
+ * sequence number order before applying it. The replay list is applied at the
+ * very end of the replay process. Since the list is sorted in sequence number
+ * order, the older modifications are applied first. This function returns zero
+ * in case of success and a negative error code in case of failure.
*/
static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
union ubifs_key *key, unsigned long long sqnum,
int deletion, int *used, loff_t old_size,
loff_t new_size)
{
- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
struct replay_entry *r;
+ dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
+
if (key_inum(c, key) >= c->highest_inum)
c->highest_inum = key_inum(c, key);
- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
- while (*p) {
- parent = *p;
- r = rb_entry(parent, struct replay_entry, rb);
- if (sqnum < r->sqnum) {
- p = &(*p)->rb_left;
- continue;
- } else if (sqnum > r->sqnum) {
- p = &(*p)->rb_right;
- continue;
- }
- ubifs_err("duplicate sqnum in replay");
- return -EINVAL;
- }
-
r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
if (!r)
return -ENOMEM;
@@ -363,19 +375,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
r->lnum = lnum;
r->offs = offs;
r->len = len;
+ r->deletion = !!deletion;
r->sqnum = sqnum;
- r->flags = (deletion ? REPLAY_DELETION : 0);
+ key_copy(c, key, &r->key);
r->old_size = old_size;
r->new_size = new_size;
- key_copy(c, key, &r->key);
- rb_link_node(&r->rb, parent, p);
- rb_insert_color(&r->rb, &c->replay_tree);
+ list_add_tail(&r->list, &c->replay_list);
return 0;
}
/**
- * insert_dent - insert a directory entry node into the replay tree.
+ * insert_dent - insert a directory entry node into the replay list.
* @c: UBIFS file-system description object
* @lnum: node logical eraseblock number
* @offs: node offset
@@ -387,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
* @deletion: non-zero if this is a deletion
* @used: number of bytes in use in a LEB
*
- * This function inserts a scanned directory entry node to the replay tree.
- * Returns zero in case of success and a negative error code in case of
- * failure.
- *
- * This function is also used for extended attribute entries because they are
- * implemented as directory entry nodes.
+ * This function inserts a scanned directory entry node or an extended
+ * attribute entry to the replay list. Returns zero in case of success and a
+ * negative error code in case of failure.
*/
static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
union ubifs_key *key, const char *name, int nlen,
unsigned long long sqnum, int deletion, int *used)
{
- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
struct replay_entry *r;
char *nbuf;
+ dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
if (key_inum(c, key) >= c->highest_inum)
c->highest_inum = key_inum(c, key);
- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
- while (*p) {
- parent = *p;
- r = rb_entry(parent, struct replay_entry, rb);
- if (sqnum < r->sqnum) {
- p = &(*p)->rb_left;
- continue;
- }
- if (sqnum > r->sqnum) {
- p = &(*p)->rb_right;
- continue;
- }
- ubifs_err("duplicate sqnum in replay");
- return -EINVAL;
- }
-
r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
if (!r)
return -ENOMEM;
+
nbuf = kmalloc(nlen + 1, GFP_KERNEL);
if (!nbuf) {
kfree(r);
@@ -435,17 +428,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
r->lnum = lnum;
r->offs = offs;
r->len = len;
+ r->deletion = !!deletion;
r->sqnum = sqnum;
+ key_copy(c, key, &r->key);
r->nm.len = nlen;
memcpy(nbuf, name, nlen);
nbuf[nlen] = '\0';
r->nm.name = nbuf;
- r->flags = (deletion ? REPLAY_DELETION : 0);
- key_copy(c, key, &r->key);
- ubifs_assert(!*p);
- rb_link_node(&r->rb, parent, p);
- rb_insert_color(&r->rb, &c->replay_tree);
+ list_add_tail(&r->list, &c->replay_list);
return 0;
}
@@ -482,29 +473,92 @@ int ubifs_validate_entry(struct ubifs_info *c,
}
/**
+ * is_last_bud - check if the bud is the last in the journal head.
+ * @c: UBIFS file-system description object
+ * @bud: bud description object
+ *
+ * This function checks if bud @bud is the last bud in its journal head. This
+ * information is then used by 'replay_bud()' to decide whether the bud can
+ * have corruptions or not. Indeed, only last buds can be corrupted by power
+ * cuts. Returns %1 if this is the last bud, and %0 if not.
+ */
+static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
+{
+ struct ubifs_jhead *jh = &c->jheads[bud->jhead];
+ struct ubifs_bud *next;
+ uint32_t data;
+ int err;
+
+ if (list_is_last(&bud->list, &jh->buds_list))
+ return 1;
+
+ /*
+ * The following is a quirk to make sure we work correctly with UBIFS
+ * images used with older UBIFS.
+ *
+ * Normally, the last bud will be the last in the journal head's list
+ * of bud. However, there is one exception if the UBIFS image belongs
+ * to older UBIFS. This is fairly unlikely: one would need to use old
+ * UBIFS, then have a power cut exactly at the right point, and then
+ * try to mount this image with new UBIFS.
+ *
+ * The exception is: it is possible to have 2 buds A and B, A goes
+ * before B, and B is the last, bud B is contains no data, and bud A is
+ * corrupted at the end. The reason is that in older versions when the
+ * journal code switched the next bud (from A to B), it first added a
+ * log reference node for the new bud (B), and only after this it
+ * synchronized the write-buffer of current bud (A). But later this was
+ * changed and UBIFS started to always synchronize the write-buffer of
+ * the bud (A) before writing the log reference for the new bud (B).
+ *
+ * But because older UBIFS always synchronized A's write-buffer before
+ * writing to B, we can recognize this exceptional situation but
+ * checking the contents of bud B - if it is empty, then A can be
+ * treated as the last and we can recover it.
+ *
+ * TODO: remove this piece of code in a couple of years (today it is
+ * 16.05.2011).
+ */
+ next = list_entry(bud->list.next, struct ubifs_bud, list);
+ if (!list_is_last(&next->list, &jh->buds_list))
+ return 0;
+
+ err = ubi_read(c->ubi, next->lnum, (char *)&data,
+ next->start, 4);
+ if (err)
+ return 0;
+
+ return data == 0xFFFFFFFF;
+}
+
+/**
* replay_bud - replay a bud logical eraseblock.
* @c: UBIFS file-system description object
- * @lnum: bud logical eraseblock number to replay
- * @offs: bud start offset
- * @jhead: journal head to which this bud belongs
- * @free: amount of free space in the bud is returned here
- * @dirty: amount of dirty space from padding and deletion nodes is returned
- * here
+ * @b: bud entry which describes the bud
*
- * This function returns zero in case of success and a negative error code in
- * case of failure.
+ * This function replays bud @bud, recovers it if needed, and adds all nodes
+ * from this bud to the replay list. Returns zero in case of success and a
+ * negative error code in case of failure.
*/
-static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
- int *free, int *dirty)
+static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
{
- int err = 0, used = 0;
+ int is_last = is_last_bud(c, b->bud);
+ int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
- struct ubifs_bud *bud;
- dbg_mnt("replay bud LEB %d, head %d", lnum, jhead);
- if (c->need_recovery)
- sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
+ dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
+ lnum, b->bud->jhead, offs, is_last);
+
+ if (c->need_recovery && is_last)
+ /*
+ * Recover only last LEBs in the journal heads, because power
+ * cuts may cause corruptions only in these LEBs, because only
+ * these LEBs could possibly be written to at the power cut
+ * time.
+ */
+ sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf,
+ b->bud->jhead != GCHD);
else
sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
if (IS_ERR(sleb))
@@ -620,19 +674,13 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
goto out;
}
- bud = ubifs_search_bud(c, lnum);
- if (!bud)
- BUG();
-
+ ubifs_assert(ubifs_search_bud(c, lnum));
ubifs_assert(sleb->endpt - offs >= used);
ubifs_assert(sleb->endpt % c->min_io_size == 0);
- if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount)
- err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
- sleb->endpt, UBI_SHORTTERM);
-
- *dirty = sleb->endpt - offs - used;
- *free = c->leb_size - sleb->endpt;
+ b->dirty = sleb->endpt - offs - used;
+ b->free = c->leb_size - sleb->endpt;
+ dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free);
out:
ubifs_scan_destroy(sleb);
@@ -646,55 +694,6 @@ out_dump:
}
/**
- * insert_ref_node - insert a reference node to the replay tree.
- * @c: UBIFS file-system description object
- * @lnum: node logical eraseblock number
- * @offs: node offset
- * @sqnum: sequence number
- * @free: amount of free space in bud
- * @dirty: amount of dirty space from padding and deletion nodes
- *
- * This function inserts a reference node to the replay tree and returns zero
- * in case of success or a negative error code in case of failure.
- */
-static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
- unsigned long long sqnum, int free, int dirty)
-{
- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
- struct replay_entry *r;
-
- dbg_mnt("add ref LEB %d:%d", lnum, offs);
- while (*p) {
- parent = *p;
- r = rb_entry(parent, struct replay_entry, rb);
- if (sqnum < r->sqnum) {
- p = &(*p)->rb_left;
- continue;
- } else if (sqnum > r->sqnum) {
- p = &(*p)->rb_right;
- continue;
- }
- ubifs_err("duplicate sqnum in replay tree");
- return -EINVAL;
- }
-
- r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
- if (!r)
- return -ENOMEM;
-
- r->lnum = lnum;
- r->offs = offs;
- r->sqnum = sqnum;
- r->flags = REPLAY_REF;
- r->free = free;
- r->dirty = dirty;
-
- rb_link_node(&r->rb, parent, p);
- rb_insert_color(&r->rb, &c->replay_tree);
- return 0;
-}
-
-/**
* replay_buds - replay all buds.
* @c: UBIFS file-system description object
*
@@ -704,17 +703,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
static int replay_buds(struct ubifs_info *c)
{
struct bud_entry *b;
- int err, uninitialized_var(free), uninitialized_var(dirty);
+ int err;
+ unsigned long long prev_sqnum = 0;
list_for_each_entry(b, &c->replay_buds, list) {
- err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
- &free, &dirty);
- if (err)
- return err;
- err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
- free, dirty);
+ err = replay_bud(c, b);
if (err)
return err;
+
+ ubifs_assert(b->sqnum > prev_sqnum);
+ prev_sqnum = b->sqnum;
}
return 0;
@@ -1054,25 +1052,29 @@ int ubifs_replay_journal(struct ubifs_info *c)
if (err)
goto out;
- err = apply_replay_tree(c);
+ err = apply_replay_list(c);
+ if (err)
+ goto out;
+
+ err = set_buds_lprops(c);
if (err)
goto out;
/*
- * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable
- * to roughly estimate index growth. Things like @c->min_idx_lebs
+ * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
+ * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
* depend on it. This means we have to initialize it to make sure
* budgeting works properly.
*/
- c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
- c->budg_uncommitted_idx *= c->max_idx_node_sz;
+ c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
+ c->bi.uncommitted_idx *= c->max_idx_node_sz;
ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
"highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
(unsigned long)c->highest_inum);
out:
- destroy_replay_tree(c);
+ destroy_replay_list(c);
destroy_bud_list(c);
c->replaying = 0;
return err;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index bf31b4729e51..c606f010e8df 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -475,7 +475,8 @@ failed:
* @c: UBIFS file-system description object
*
* This function returns a pointer to the superblock node or a negative error
- * code.
+ * code. Note, the user of this function is responsible of kfree()'ing the
+ * returned superblock buffer.
*/
struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
{
@@ -616,6 +617,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
memcpy(&c->uuid, &sup->uuid, 16);
c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
+ c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP);
/* Automatically increase file system size to the maximum size */
c->old_leb_cnt = c->leb_cnt;
@@ -650,3 +652,152 @@ out:
kfree(sup);
return err;
}
+
+/**
+ * fixup_leb - fixup/unmap an LEB containing free space.
+ * @c: UBIFS file-system description object
+ * @lnum: the LEB number to fix up
+ * @len: number of used bytes in LEB (starting at offset 0)
+ *
+ * This function reads the contents of the given LEB number @lnum, then fixes
+ * it up, so that empty min. I/O units in the end of LEB are actually erased on
+ * flash (rather than being just all-0xff real data). If the LEB is completely
+ * empty, it is simply unmapped.
+ */
+static int fixup_leb(struct ubifs_info *c, int lnum, int len)
+{
+ int err;
+
+ ubifs_assert(len >= 0);
+ ubifs_assert(len % c->min_io_size == 0);
+ ubifs_assert(len < c->leb_size);
+
+ if (len == 0) {
+ dbg_mnt("unmap empty LEB %d", lnum);
+ return ubi_leb_unmap(c->ubi, lnum);
+ }
+
+ dbg_mnt("fixup LEB %d, data len %d", lnum, len);
+ err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
+ if (err)
+ return err;
+
+ return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
+}
+
+/**
+ * fixup_free_space - find & remap all LEBs containing free space.
+ * @c: UBIFS file-system description object
+ *
+ * This function walks through all LEBs in the filesystem and fiexes up those
+ * containing free/empty space.
+ */
+static int fixup_free_space(struct ubifs_info *c)
+{
+ int lnum, err = 0;
+ struct ubifs_lprops *lprops;
+
+ ubifs_get_lprops(c);
+
+ /* Fixup LEBs in the master area */
+ for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) {
+ err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz);
+ if (err)
+ goto out;
+ }
+
+ /* Unmap unused log LEBs */
+ lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
+ while (lnum != c->ltail_lnum) {
+ err = fixup_leb(c, lnum, 0);
+ if (err)
+ goto out;
+ lnum = ubifs_next_log_lnum(c, lnum);
+ }
+
+ /* Fixup the current log head */
+ err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
+ if (err)
+ goto out;
+
+ /* Fixup LEBs in the LPT area */
+ for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
+ int free = c->ltab[lnum - c->lpt_first].free;
+
+ if (free > 0) {
+ err = fixup_leb(c, lnum, c->leb_size - free);
+ if (err)
+ goto out;
+ }
+ }
+
+ /* Unmap LEBs in the orphans area */
+ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
+ err = fixup_leb(c, lnum, 0);
+ if (err)
+ goto out;
+ }
+
+ /* Fixup LEBs in the main area */
+ for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
+ lprops = ubifs_lpt_lookup(c, lnum);
+ if (IS_ERR(lprops)) {
+ err = PTR_ERR(lprops);
+ goto out;
+ }
+
+ if (lprops->free > 0) {
+ err = fixup_leb(c, lnum, c->leb_size - lprops->free);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ ubifs_release_lprops(c);
+ return err;
+}
+
+/**
+ * ubifs_fixup_free_space - find & fix all LEBs with free space.
+ * @c: UBIFS file-system description object
+ *
+ * This function fixes up LEBs containing free space on first mount, if the
+ * appropriate flag was set when the FS was created. Each LEB with one or more
+ * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure
+ * the free space is actually erased. E.g., this is necessary for some NAND
+ * chips, since the free space may have been programmed like real "0xff" data
+ * (generating a non-0xff ECC), causing future writes to the not-really-erased
+ * NAND pages to behave badly. After the space is fixed up, the superblock flag
+ * is cleared, so that this is skipped for all future mounts.
+ */
+int ubifs_fixup_free_space(struct ubifs_info *c)
+{
+ int err;
+ struct ubifs_sb_node *sup;
+
+ ubifs_assert(c->space_fixup);
+ ubifs_assert(!c->ro_mount);
+
+ ubifs_msg("start fixing up free space");
+
+ err = fixup_free_space(c);
+ if (err)
+ return err;
+
+ sup = ubifs_read_sb_node(c);
+ if (IS_ERR(sup))
+ return PTR_ERR(sup);
+
+ /* Free-space fixup is no longer required */
+ c->space_fixup = 0;
+ sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
+
+ err = ubifs_write_sb_node(c, sup);
+ kfree(sup);
+ if (err)
+ return err;
+
+ ubifs_msg("free space fixup complete");
+ return err;
+}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index be6c7b008f38..6db0bdaa9f74 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -375,7 +375,7 @@ out:
ubifs_release_dirty_inode_budget(c, ui);
else {
/* We've deleted something - clean the "no space" flags */
- c->nospace = c->nospace_rp = 0;
+ c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
}
done:
@@ -694,11 +694,11 @@ static int init_constants_sb(struct ubifs_info *c)
* be compressed and direntries are of the maximum size.
*
* Note, data, which may be stored in inodes is budgeted separately, so
- * it is not included into 'c->inode_budget'.
+ * it is not included into 'c->bi.inode_budget'.
*/
- c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
- c->inode_budget = UBIFS_INO_NODE_SZ;
- c->dent_budget = UBIFS_MAX_DENT_NODE_SZ;
+ c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
+ c->bi.inode_budget = UBIFS_INO_NODE_SZ;
+ c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ;
/*
* When the amount of flash space used by buds becomes
@@ -742,7 +742,7 @@ static void init_constants_master(struct ubifs_info *c)
{
long long tmp64;
- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
c->report_rp_size = ubifs_reported_space(c, c->rp_size);
/*
@@ -1144,8 +1144,8 @@ static int check_free_space(struct ubifs_info *c)
{
ubifs_assert(c->dark_wm > 0);
if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
- ubifs_err("insufficient free space to mount in read/write mode");
- dbg_dump_budg(c);
+ ubifs_err("insufficient free space to mount in R/W mode");
+ dbg_dump_budg(c, &c->bi);
dbg_dump_lprops(c);
return -ENOSPC;
}
@@ -1257,12 +1257,12 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_free;
}
+ err = alloc_wbufs(c);
+ if (err)
+ goto out_cbuf;
+
sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
if (!c->ro_mount) {
- err = alloc_wbufs(c);
- if (err)
- goto out_cbuf;
-
/* Create background thread */
c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
if (IS_ERR(c->bgt)) {
@@ -1304,7 +1304,7 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_lpt;
- err = dbg_check_idx_size(c, c->old_idx_sz);
+ err = dbg_check_idx_size(c, c->bi.old_idx_sz);
if (err)
goto out_lpt;
@@ -1313,7 +1313,7 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_journal;
/* Calculate 'min_idx_lebs' after journal replay */
- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount);
if (err)
@@ -1396,6 +1396,12 @@ static int mount_ubifs(struct ubifs_info *c)
} else
ubifs_assert(c->lst.taken_empty_lebs > 0);
+ if (!c->ro_mount && c->space_fixup) {
+ err = ubifs_fixup_free_space(c);
+ if (err)
+ goto out_infos;
+ }
+
err = dbg_check_filesystem(c);
if (err)
goto out_infos;
@@ -1442,7 +1448,8 @@ static int mount_ubifs(struct ubifs_info *c)
c->main_lebs, c->main_first, c->leb_cnt - 1);
dbg_msg("index LEBs: %d", c->lst.idx_lebs);
dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)",
- c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20);
+ c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
+ c->bi.old_idx_sz >> 20);
dbg_msg("key hash type: %d", c->key_hash_type);
dbg_msg("tree fanout: %d", c->fanout);
dbg_msg("reserved GC LEB: %d", c->gc_lnum);
@@ -1456,7 +1463,7 @@ static int mount_ubifs(struct ubifs_info *c)
dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
- UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
+ UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
dbg_msg("dead watermark: %d", c->dead_wm);
dbg_msg("dark watermark: %d", c->dark_wm);
@@ -1584,6 +1591,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
}
sup->leb_cnt = cpu_to_le32(c->leb_cnt);
err = ubifs_write_sb_node(c, sup);
+ kfree(sup);
if (err)
goto out;
}
@@ -1631,12 +1639,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
if (err)
goto out;
- err = alloc_wbufs(c);
- if (err)
- goto out;
-
- ubifs_create_buds_lists(c);
-
/* Create background thread */
c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
if (IS_ERR(c->bgt)) {
@@ -1690,6 +1692,13 @@ static int ubifs_remount_rw(struct ubifs_info *c)
*/
err = dbg_check_space_info(c);
}
+
+ if (c->space_fixup) {
+ err = ubifs_fixup_free_space(c);
+ if (err)
+ goto out;
+ }
+
mutex_unlock(&c->umount_mutex);
return err;
@@ -1744,7 +1753,6 @@ static void ubifs_remount_ro(struct ubifs_info *c)
if (err)
ubifs_ro_mode(c, err);
- free_wbufs(c);
vfree(c->orph_buf);
c->orph_buf = NULL;
kfree(c->write_reserve_buf);
@@ -1773,10 +1781,9 @@ static void ubifs_put_super(struct super_block *sb)
* to write them back because of I/O errors.
*/
if (!c->ro_error) {
- ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
- ubifs_assert(c->budg_idx_growth == 0);
- ubifs_assert(c->budg_dd_growth == 0);
- ubifs_assert(c->budg_data_growth == 0);
+ ubifs_assert(c->bi.idx_growth == 0);
+ ubifs_assert(c->bi.dd_growth == 0);
+ ubifs_assert(c->bi.data_growth == 0);
}
/*
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index de485979ca39..8119b1fd8d94 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2557,11 +2557,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
if (err) {
/* Ensure the znode is dirtied */
if (znode->cnext || !ubifs_zn_dirty(znode)) {
- znode = dirty_cow_bottom_up(c, znode);
- if (IS_ERR(znode)) {
- err = PTR_ERR(znode);
- goto out_unlock;
- }
+ znode = dirty_cow_bottom_up(c, znode);
+ if (IS_ERR(znode)) {
+ err = PTR_ERR(znode);
+ goto out_unlock;
+ }
}
err = tnc_delete(c, znode, n);
}
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 53288e5d604e..41920f357bbf 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
c->gap_lebs = NULL;
return err;
}
- if (!dbg_force_in_the_gaps_enabled) {
+ if (dbg_force_in_the_gaps_enabled()) {
/*
* Do not print scary warnings if the debugging
* option which forces in-the-gaps is enabled.
*/
- ubifs_err("out of space");
- spin_lock(&c->space_lock);
- dbg_dump_budg(c);
- spin_unlock(&c->space_lock);
+ ubifs_warn("out of space");
+ dbg_dump_budg(c, &c->bi);
dbg_dump_lprops(c);
}
/* Try to commit anyway */
@@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
spin_lock(&c->space_lock);
/*
* Although we have not finished committing yet, update size of the
- * committed index ('c->old_idx_sz') and zero out the index growth
+ * committed index ('c->bi.old_idx_sz') and zero out the index growth
* budget. It is OK to do this now, because we've reserved all the
* space which is needed to commit the index, and it is save for the
* budgeting subsystem to assume the index is already committed,
* even though it is not.
*/
- ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
- c->old_idx_sz = c->calc_idx_sz;
- c->budg_uncommitted_idx = 0;
- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
+ c->bi.old_idx_sz = c->calc_idx_sz;
+ c->bi.uncommitted_idx = 0;
+ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
spin_unlock(&c->space_lock);
mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 191ca7863fe7..e24380cf46ed 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -408,9 +408,11 @@ enum {
* Superblock flags.
*
* UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
+ * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed
*/
enum {
UBIFS_FLG_BIGLPT = 0x02,
+ UBIFS_FLG_SPACE_FIXUP = 0x04,
};
/**
@@ -434,7 +436,7 @@ struct ubifs_ch {
__u8 node_type;
__u8 group_type;
__u8 padding[2];
-} __attribute__ ((packed));
+} __packed;
/**
* union ubifs_dev_desc - device node descriptor.
@@ -448,7 +450,7 @@ struct ubifs_ch {
union ubifs_dev_desc {
__le32 new;
__le64 huge;
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_ino_node - inode node.
@@ -509,7 +511,7 @@ struct ubifs_ino_node {
__le16 compr_type;
__u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
__u8 data[];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_dent_node - directory entry node.
@@ -534,7 +536,7 @@ struct ubifs_dent_node {
__le16 nlen;
__u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
__u8 name[];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_data_node - data node.
@@ -555,7 +557,7 @@ struct ubifs_data_node {
__le16 compr_type;
__u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
__u8 data[];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_trun_node - truncation node.
@@ -575,7 +577,7 @@ struct ubifs_trun_node {
__u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
__le64 old_size;
__le64 new_size;
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_pad_node - padding node.
@@ -586,7 +588,7 @@ struct ubifs_trun_node {
struct ubifs_pad_node {
struct ubifs_ch ch;
__le32 pad_len;
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_sb_node - superblock node.
@@ -644,7 +646,7 @@ struct ubifs_sb_node {
__u8 uuid[16];
__le32 ro_compat_version;
__u8 padding2[3968];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_mst_node - master node.
@@ -711,7 +713,7 @@ struct ubifs_mst_node {
__le32 idx_lebs;
__le32 leb_cnt;
__u8 padding[344];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_ref_node - logical eraseblock reference node.
@@ -727,7 +729,7 @@ struct ubifs_ref_node {
__le32 offs;
__le32 jhead;
__u8 padding[28];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_branch - key/reference/length branch
@@ -741,7 +743,7 @@ struct ubifs_branch {
__le32 offs;
__le32 len;
__u8 key[];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_idx_node - indexing node.
@@ -755,7 +757,7 @@ struct ubifs_idx_node {
__le16 child_cnt;
__le16 level;
__u8 branches[];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_cs_node - commit start node.
@@ -765,7 +767,7 @@ struct ubifs_idx_node {
struct ubifs_cs_node {
struct ubifs_ch ch;
__le64 cmt_no;
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_orph_node - orphan node.
@@ -777,6 +779,6 @@ struct ubifs_orph_node {
struct ubifs_ch ch;
__le64 cmt_no;
__le64 inos[];
-} __attribute__ ((packed));
+} __packed;
#endif /* __UBIFS_MEDIA_H__ */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 8c40ad3c6721..93d1412a06f0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -389,9 +389,9 @@ struct ubifs_gced_idx_leb {
* The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
* @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
* make sure @inode->i_size is always changed under @ui_mutex, because it
- * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock
- * with 'ubifs_writepage()' (see file.c). All the other inode fields are
- * changed under @ui_mutex, so they do not need "shadow" fields. Note, one
+ * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would
+ * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields
+ * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one
* could consider to rework locking and base it on "shadow" fields.
*/
struct ubifs_inode {
@@ -937,6 +937,40 @@ struct ubifs_mount_opts {
unsigned int compr_type:2;
};
+/**
+ * struct ubifs_budg_info - UBIFS budgeting information.
+ * @idx_growth: amount of bytes budgeted for index growth
+ * @data_growth: amount of bytes budgeted for cached data
+ * @dd_growth: amount of bytes budgeted for cached data that will make
+ * other data dirty
+ * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but
+ * which still have to be taken into account because the index
+ * has not been committed so far
+ * @old_idx_sz: size of index on flash
+ * @min_idx_lebs: minimum number of LEBs required for the index
+ * @nospace: non-zero if the file-system does not have flash space (used as
+ * optimization)
+ * @nospace_rp: the same as @nospace, but additionally means that even reserved
+ * pool is full
+ * @page_budget: budget for a page (constant, nenver changed after mount)
+ * @inode_budget: budget for an inode (constant, nenver changed after mount)
+ * @dent_budget: budget for a directory entry (constant, nenver changed after
+ * mount)
+ */
+struct ubifs_budg_info {
+ long long idx_growth;
+ long long data_growth;
+ long long dd_growth;
+ long long uncommitted_idx;
+ unsigned long long old_idx_sz;
+ int min_idx_lebs;
+ unsigned int nospace:1;
+ unsigned int nospace_rp:1;
+ int page_budget;
+ int inode_budget;
+ int dent_budget;
+};
+
struct ubifs_debug_info;
/**
@@ -980,6 +1014,7 @@ struct ubifs_debug_info;
* @cmt_wq: wait queue to sleep on if the log is full and a commit is running
*
* @big_lpt: flag that LPT is too big to write whole during commit
+ * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up
* @no_chk_data_crc: do not check CRCs when reading data nodes (except during
* recovery)
* @bulk_read: enable bulk-reads
@@ -1057,32 +1092,14 @@ struct ubifs_debug_info;
* @dirty_zn_cnt: number of dirty znodes
* @clean_zn_cnt: number of clean znodes
*
- * @budg_idx_growth: amount of bytes budgeted for index growth
- * @budg_data_growth: amount of bytes budgeted for cached data
- * @budg_dd_growth: amount of bytes budgeted for cached data that will make
- * other data dirty
- * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
- * but which still have to be taken into account because
- * the index has not been committed so far
- * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
- * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
- * @nospace, and @nospace_rp;
- * @min_idx_lebs: minimum number of LEBs required for the index
- * @old_idx_sz: size of index on flash
+ * @space_lock: protects @bi and @lst
+ * @lst: lprops statistics
+ * @bi: budgeting information
* @calc_idx_sz: temporary variable which is used to calculate new index size
* (contains accurate new index size at end of TNC commit start)
- * @lst: lprops statistics
- * @nospace: non-zero if the file-system does not have flash space (used as
- * optimization)
- * @nospace_rp: the same as @nospace, but additionally means that even reserved
- * pool is full
- *
- * @page_budget: budget for a page
- * @inode_budget: budget for an inode
- * @dent_budget: budget for a directory entry
*
* @ref_node_alsz: size of the LEB reference node aligned to the min. flash
- * I/O unit
+ * I/O unit
* @mst_node_alsz: master node aligned size
* @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
* @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
@@ -1189,7 +1206,6 @@ struct ubifs_debug_info;
* @replaying: %1 during journal replay
* @mounting: %1 while mounting
* @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
- * @replay_tree: temporary tree used during journal replay
* @replay_list: temporary list used during journal replay
* @replay_buds: list of buds to replay
* @cs_sqnum: sequence number of first node in the log (commit start node)
@@ -1238,6 +1254,7 @@ struct ubifs_info {
wait_queue_head_t cmt_wq;
unsigned int big_lpt:1;
+ unsigned int space_fixup:1;
unsigned int no_chk_data_crc:1;
unsigned int bulk_read:1;
unsigned int default_compr:2;
@@ -1308,21 +1325,10 @@ struct ubifs_info {
atomic_long_t dirty_zn_cnt;
atomic_long_t clean_zn_cnt;
- long long budg_idx_growth;
- long long budg_data_growth;
- long long budg_dd_growth;
- long long budg_uncommitted_idx;
spinlock_t space_lock;
- int min_idx_lebs;
- unsigned long long old_idx_sz;
- unsigned long long calc_idx_sz;
struct ubifs_lp_stats lst;
- unsigned int nospace:1;
- unsigned int nospace_rp:1;
-
- int page_budget;
- int inode_budget;
- int dent_budget;
+ struct ubifs_budg_info bi;
+ unsigned long long calc_idx_sz;
int ref_node_alsz;
int mst_node_alsz;
@@ -1430,7 +1436,6 @@ struct ubifs_info {
unsigned int replaying:1;
unsigned int mounting:1;
unsigned int remounting_rw:1;
- struct rb_root replay_tree;
struct list_head replay_list;
struct list_head replay_buds;
unsigned long long cs_sqnum;
@@ -1628,6 +1633,7 @@ int ubifs_write_master(struct ubifs_info *c);
int ubifs_read_superblock(struct ubifs_info *c);
struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
+int ubifs_fixup_free_space(struct ubifs_info *c);
/* replay.c */
int ubifs_validate_entry(struct ubifs_info *c,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 3299f469e712..16f19f55e63f 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -80,8 +80,8 @@ enum {
SECURITY_XATTR,
};
-static const struct inode_operations none_inode_operations;
-static const struct file_operations none_file_operations;
+static const struct inode_operations empty_iops;
+static const struct file_operations empty_fops;
/**
* create_xattr - create an extended attribute.
@@ -131,8 +131,8 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
/* Re-define all operations to be "nothing" */
inode->i_mapping->a_ops = &empty_aops;
- inode->i_op = &none_inode_operations;
- inode->i_fop = &none_file_operations;
+ inode->i_op = &empty_iops;
+ inode->i_fop = &empty_fops;
inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
ui = ubifs_inode(inode);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e765743cf9f3..b4d791a83207 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -409,7 +409,7 @@ out:
}
/**
- * ufs_getfrag_bloc() - `get_block_t' function, interface between UFS and
+ * ufs_getfrag_block() - `get_block_t' function, interface between UFS and
* readpage, writepage and so on
*/
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9ef9ed2cfe2e..5e68099db2a5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,7 +33,6 @@
#include <linux/migrate.h>
#include <linux/backing-dev.h>
#include <linux/freezer.h>
-#include <linux/list_sort.h>
#include "xfs_sb.h"
#include "xfs_inum.h"
@@ -709,6 +708,27 @@ xfs_buf_get_empty(
return bp;
}
+/*
+ * Return a buffer allocated as an empty buffer and associated to external
+ * memory via xfs_buf_associate_memory() back to it's empty state.
+ */
+void
+xfs_buf_set_empty(
+ struct xfs_buf *bp,
+ size_t len)
+{
+ if (bp->b_pages)
+ _xfs_buf_free_pages(bp);
+
+ bp->b_pages = NULL;
+ bp->b_page_count = 0;
+ bp->b_addr = NULL;
+ bp->b_file_offset = 0;
+ bp->b_buffer_length = bp->b_count_desired = len;
+ bp->b_bn = XFS_BUF_DADDR_NULL;
+ bp->b_flags &= ~XBF_MAPPED;
+}
+
static inline struct page *
mem_to_page(
void *addr)
@@ -1402,12 +1422,12 @@ restart:
int
xfs_buftarg_shrink(
struct shrinker *shrink,
- int nr_to_scan,
- gfp_t mask)
+ struct shrink_control *sc)
{
struct xfs_buftarg *btp = container_of(shrink,
struct xfs_buftarg, bt_shrinker);
struct xfs_buf *bp;
+ int nr_to_scan = sc->nr_to_scan;
LIST_HEAD(dispose);
if (!nr_to_scan)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a9a1c4512645..50a7d5fb3b73 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -178,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
xfs_buf_flags_t);
extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
+extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
extern void xfs_buf_hold(xfs_buf_t *);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b3486dfa5520..54e623bfbb85 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -586,7 +586,8 @@ xfs_file_compat_ioctl(
case XFS_IOC_RESVSP_32:
case XFS_IOC_UNRESVSP_32:
case XFS_IOC_RESVSP64_32:
- case XFS_IOC_UNRESVSP64_32: {
+ case XFS_IOC_UNRESVSP64_32:
+ case XFS_IOC_ZERO_RANGE_32: {
struct xfs_flock64 bf;
if (xfs_compat_flock64_copyin(&bf, arg))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 08b605792a99..80f4060e8970 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 {
#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
+#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64)
typedef struct compat_xfs_fsop_geom_v1 {
__u32 blocksize; /* filesystem (data) block size */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 244be9cbfe78..8633521b3b2e 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -70,6 +70,7 @@
#include <linux/ctype.h>
#include <linux/writeback.h>
#include <linux/capability.h>
+#include <linux/list_sort.h>
#include <asm/page.h>
#include <asm/div64.h>
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 9f76cceb678d..bd672def95ac 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -41,23 +41,6 @@ __xfs_printk(
printk("%sXFS: %pV\n", level, vaf);
}
-void xfs_printk(
- const char *level,
- const struct xfs_mount *mp,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- __xfs_printk(level, mp, &vaf);
- va_end(args);
-}
-
#define define_xfs_printk_level(func, kern_level) \
void func(const struct xfs_mount *mp, const char *fmt, ...) \
{ \
@@ -95,8 +78,7 @@ xfs_alert_tag(
int do_panic = 0;
if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
- xfs_printk(KERN_ALERT, mp,
- "XFS: Transforming an alert into a BUG.");
+ xfs_alert(mp, "Transforming an alert into a BUG.");
do_panic = 1;
}
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
index f1b3fc1b6c4e..7fb7ea007672 100644
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,9 +3,6 @@
struct xfs_mount;
-extern void xfs_printk(const char *level, const struct xfs_mount *mp,
- const char *fmt, ...)
- __attribute__ ((format (printf, 3, 4)));
extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
__attribute__ ((format (printf, 2, 3)));
extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
@@ -28,7 +25,9 @@ extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
__attribute__ ((format (printf, 2, 3)));
#else
-static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+static inline void
+__attribute__ ((format (printf, 2, 3)))
+xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
{
}
#endif
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index b38e58d02299..b0aa59e51fd0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1787,10 +1787,6 @@ init_xfs_fs(void)
if (error)
goto out_cleanup_procfs;
- error = xfs_init_workqueues();
- if (error)
- goto out_sysctl_unregister;
-
vfs_initquota();
error = register_filesystem(&xfs_fs_type);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index e4f9c1b0836c..8ecad5ff9f9b 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -267,6 +267,16 @@ xfs_sync_inode_attr(
error = xfs_iflush(ip, flags);
+ /*
+ * We don't want to try again on non-blocking flushes that can't run
+ * again immediately. If an inode really must be written, then that's
+ * what the SYNC_WAIT flag is for.
+ */
+ if (error == EAGAIN) {
+ ASSERT(!(flags & SYNC_WAIT));
+ error = 0;
+ }
+
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return error;
@@ -926,6 +936,7 @@ restart:
XFS_LOOKUP_BATCH,
XFS_ICI_RECLAIM_TAG);
if (!nr_found) {
+ done = 1;
rcu_read_unlock();
break;
}
@@ -1021,13 +1032,14 @@ xfs_reclaim_inodes(
static int
xfs_reclaim_inode_shrink(
struct shrinker *shrink,
- int nr_to_scan,
- gfp_t gfp_mask)
+ struct shrink_control *sc)
{
struct xfs_mount *mp;
struct xfs_perag *pag;
xfs_agnumber_t ag;
int reclaimable;
+ int nr_to_scan = sc->nr_to_scan;
+ gfp_t gfp_mask = sc->gfp_mask;
mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
if (nr_to_scan) {
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 2d0bcb479075..d48b7a579ae1 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap,
);
-#define XFS_BUSY_SYNC \
- { 0, "async" }, \
- { 1, "sync" }
-
-TRACE_EVENT(xfs_alloc_busy,
- TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t len, int sync),
- TP_ARGS(trans, agno, agbno, len, sync),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(struct xfs_trans *, tp)
- __field(int, tid)
- __field(xfs_agnumber_t, agno)
- __field(xfs_agblock_t, agbno)
- __field(xfs_extlen_t, len)
- __field(int, sync)
- ),
- TP_fast_assign(
- __entry->dev = trans->t_mountp->m_super->s_dev;
- __entry->tp = trans;
- __entry->tid = trans->t_ticket->t_tid;
- __entry->agno = agno;
- __entry->agbno = agbno;
- __entry->len = len;
- __entry->sync = sync;
- ),
- TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->tp,
- __entry->tid,
- __entry->agno,
- __entry->agbno,
- __entry->len,
- __print_symbolic(__entry->sync, XFS_BUSY_SYNC))
-
-);
-
-TRACE_EVENT(xfs_alloc_unbusy,
+DECLARE_EVENT_CLASS(xfs_busy_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len),
TP_ARGS(mp, agno, agbno, len),
@@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy,
__entry->agbno,
__entry->len)
);
+#define DEFINE_BUSY_EVENT(name) \
+DEFINE_EVENT(xfs_busy_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+ xfs_agblock_t agbno, xfs_extlen_t len), \
+ TP_ARGS(mp, agno, agbno, len))
+DEFINE_BUSY_EVENT(xfs_alloc_busy);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
-#define XFS_BUSY_STATES \
- { 0, "missing" }, \
- { 1, "found" }
-
-TRACE_EVENT(xfs_alloc_busysearch,
+TRACE_EVENT(xfs_alloc_busy_trim,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t len, int found),
- TP_ARGS(mp, agno, agbno, len, found),
+ xfs_agblock_t agbno, xfs_extlen_t len,
+ xfs_agblock_t tbno, xfs_extlen_t tlen),
+ TP_ARGS(mp, agno, agbno, len, tbno, tlen),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
- __field(int, found)
+ __field(xfs_agblock_t, tbno)
+ __field(xfs_extlen_t, tlen)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
__entry->agbno = agbno;
__entry->len = len;
- __entry->found = found;
+ __entry->tbno = tbno;
+ __entry->tlen = tlen;
),
- TP_printk("dev %d:%d agno %u agbno %u len %u %s",
+ TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
__entry->len,
- __print_symbolic(__entry->found, XFS_BUSY_STATES))
+ __entry->tbno,
+ __entry->tlen)
);
TRACE_EVENT(xfs_trans_commit_lsn,
@@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
__entry->wasfromfl,
__entry->isfl,
__entry->userdata,
- __entry->firstblock)
+ (unsigned long long)__entry->firstblock)
)
#define DEFINE_ALLOC_EVENT(name) \
@@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 69228aa8605a..b94dace4e785 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -60,7 +60,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t);
+STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
static struct shrinker xfs_qm_shaker = {
.shrink = xfs_qm_shake,
@@ -2009,10 +2009,10 @@ xfs_qm_shake_freelist(
STATIC int
xfs_qm_shake(
struct shrinker *shrink,
- int nr_to_scan,
- gfp_t gfp_mask)
+ struct shrink_control *sc)
{
int ndqused, nfree, n;
+ gfp_t gfp_mask = sc->gfp_mask;
if (!kmem_shake_allow(gfp_mask))
return 0;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 58632cc17f2d..da0a561ffba2 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -187,7 +187,6 @@ struct xfs_busy_extent {
xfs_agnumber_t agno;
xfs_agblock_t bno;
xfs_extlen_t length;
- xlog_tid_t tid; /* transaction that created this */
};
/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 27d64d752eab..acdced86413c 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -41,19 +41,13 @@
#define XFSA_FIXUP_BNO_OK 1
#define XFSA_FIXUP_CNT_OK 2
-/*
- * Prototypes for per-ag allocation routines
- */
-
STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
- xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
-
-/*
- * Internal functions.
- */
+ xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
+STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
+ xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
/*
* Lookup the record equal to [bno, len] in the btree given by cur.
@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned(
xfs_extlen_t *reslen) /* result length */
{
xfs_agblock_t bno;
- xfs_extlen_t diff;
xfs_extlen_t len;
- if (args->alignment > 1 && foundlen >= args->minlen) {
- bno = roundup(foundbno, args->alignment);
- diff = bno - foundbno;
- len = diff >= foundlen ? 0 : foundlen - diff;
+ /* Trim busy sections out of found extent */
+ xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
+
+ if (args->alignment > 1 && len >= args->minlen) {
+ xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
+ xfs_extlen_t diff = aligned_bno - bno;
+
+ *resbno = aligned_bno;
+ *reslen = diff >= len ? 0 : len - diff;
} else {
- bno = foundbno;
- len = foundlen;
+ *resbno = bno;
+ *reslen = len;
}
- *resbno = bno;
- *reslen = len;
}
/*
@@ -280,7 +276,6 @@ xfs_alloc_fix_minleft(
return 1;
agf = XFS_BUF_TO_AGF(args->agbp);
diff = be32_to_cpu(agf->agf_freeblks)
- + be32_to_cpu(agf->agf_flcount)
- args->len - args->minleft;
if (diff >= 0)
return 1;
@@ -541,16 +536,8 @@ xfs_alloc_ag_vextent(
if (error)
return error;
- /*
- * Search the busylist for these blocks and mark the
- * transaction as synchronous if blocks are found. This
- * avoids the need to block due to a synchronous log
- * force to ensure correct ordering as the synchronous
- * transaction will guarantee that for us.
- */
- if (xfs_alloc_busy_search(args->mp, args->agno,
- args->agbno, args->len))
- xfs_trans_set_sync(args->tp);
+ ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
+ args->agbno, args->len));
}
if (!args->isfl) {
@@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact(
{
xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
- xfs_agblock_t end; /* end of allocated extent */
int error;
xfs_agblock_t fbno; /* start block of found extent */
- xfs_agblock_t fend; /* end block of found extent */
xfs_extlen_t flen; /* length of found extent */
+ xfs_agblock_t tbno; /* start block of trimmed extent */
+ xfs_extlen_t tlen; /* length of trimmed extent */
+ xfs_agblock_t tend; /* end block of trimmed extent */
+ xfs_agblock_t end; /* end of allocated extent */
int i; /* success/failure of operation */
- xfs_agblock_t maxend; /* end of maximal extent */
- xfs_agblock_t minend; /* end of minimal extent */
xfs_extlen_t rlen; /* length of returned extent */
ASSERT(args->alignment == 1);
@@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact(
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
ASSERT(fbno <= args->agbno);
- minend = args->agbno + args->minlen;
- maxend = args->agbno + args->maxlen;
- fend = fbno + flen;
/*
- * Give up if the freespace isn't long enough for the minimum request.
+ * Check for overlapping busy extents.
+ */
+ xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
+
+ /*
+ * Give up if the start of the extent is busy, or the freespace isn't
+ * long enough for the minimum request.
*/
- if (fend < minend)
+ if (tbno > args->agbno)
+ goto not_found;
+ if (tlen < args->minlen)
+ goto not_found;
+ tend = tbno + tlen;
+ if (tend < args->agbno + args->minlen)
goto not_found;
/*
@@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact(
*
* Fix the length according to mod and prod if given.
*/
- end = XFS_AGBLOCK_MIN(fend, maxend);
+ end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
args->len = end - args->agbno;
xfs_alloc_fix_len(args);
if (!xfs_alloc_fix_minleft(args))
goto not_found;
rlen = args->len;
- ASSERT(args->agbno + rlen <= fend);
+ ASSERT(args->agbno + rlen <= tend);
end = args->agbno + rlen;
/*
@@ -686,11 +681,11 @@ xfs_alloc_find_best_extent(
struct xfs_btree_cur **scur, /* searching cursor */
xfs_agblock_t gdiff, /* difference for search comparison */
xfs_agblock_t *sbno, /* extent found by search */
- xfs_extlen_t *slen,
- xfs_extlen_t *slena, /* aligned length */
+ xfs_extlen_t *slen, /* extent length */
+ xfs_agblock_t *sbnoa, /* aligned extent found by search */
+ xfs_extlen_t *slena, /* aligned extent length */
int dir) /* 0 = search right, 1 = search left */
{
- xfs_agblock_t bno;
xfs_agblock_t new;
xfs_agblock_t sdiff;
int error;
@@ -708,16 +703,16 @@ xfs_alloc_find_best_extent(
if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena);
+ xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
/*
* The good extent is closer than this one.
*/
if (!dir) {
- if (bno >= args->agbno + gdiff)
+ if (*sbnoa >= args->agbno + gdiff)
goto out_use_good;
} else {
- if (bno <= args->agbno - gdiff)
+ if (*sbnoa <= args->agbno - gdiff)
goto out_use_good;
}
@@ -729,8 +724,8 @@ xfs_alloc_find_best_extent(
xfs_alloc_fix_len(args);
sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
- args->alignment, *sbno,
- *slen, &new);
+ args->alignment, *sbnoa,
+ *slena, &new);
/*
* Choose closer size and invalidate other cursor.
@@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near(
xfs_agblock_t gtbnoa; /* aligned ... */
xfs_extlen_t gtdiff; /* difference to right side entry */
xfs_extlen_t gtlen; /* length of right side entry */
- xfs_extlen_t gtlena = 0; /* aligned ... */
+ xfs_extlen_t gtlena; /* aligned ... */
xfs_agblock_t gtnew; /* useful start bno of right side */
int error; /* error code */
int i; /* result code, temporary */
@@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near(
xfs_agblock_t ltbnoa; /* aligned ... */
xfs_extlen_t ltdiff; /* difference to left side entry */
xfs_extlen_t ltlen; /* length of left side entry */
- xfs_extlen_t ltlena = 0; /* aligned ... */
+ xfs_extlen_t ltlena; /* aligned ... */
xfs_agblock_t ltnew; /* useful start bno of left side */
xfs_extlen_t rlen; /* length of returned extent */
+ int forced = 0;
#if defined(DEBUG) && defined(__KERNEL__)
/*
* Randomly don't execute the first algorithm.
@@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near(
dofirst = random32() & 1;
#endif
+
+restart:
+ bno_cur_lt = NULL;
+ bno_cur_gt = NULL;
+ ltlen = 0;
+ gtlena = 0;
+ ltlena = 0;
+
/*
* Get a cursor for the by-size btree.
*/
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
args->agno, XFS_BTNUM_CNT);
- ltlen = 0;
- bno_cur_lt = bno_cur_gt = NULL;
+
/*
* See if there are any free extents as big as maxlen.
*/
@@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near(
goto error0;
if (i == 0 || ltlen == 0) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+ trace_xfs_alloc_near_noentry(args);
return 0;
}
ASSERT(i == 1);
}
args->wasfromfl = 0;
+
/*
* First algorithm.
* If the requested extent is large wrt the freespaces available
@@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near(
if (args->len < blen)
continue;
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
- args->alignment, ltbno, ltlen, &ltnew);
+ args->alignment, ltbnoa, ltlena, &ltnew);
if (ltnew != NULLAGBLOCK &&
(args->len > blen || ltdiff < bdiff)) {
bdiff = ltdiff;
@@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near(
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
- args->alignment, ltbno, ltlen, &ltnew);
+ args->alignment, ltbnoa, ltlena, &ltnew);
error = xfs_alloc_find_best_extent(args,
&bno_cur_lt, &bno_cur_gt,
- ltdiff, &gtbno, &gtlen, &gtlena,
+ ltdiff, &gtbno, &gtlen,
+ &gtbnoa, &gtlena,
0 /* search right */);
} else {
ASSERT(gtlena >= args->minlen);
@@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near(
args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
xfs_alloc_fix_len(args);
gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
- args->alignment, gtbno, gtlen, &gtnew);
+ args->alignment, gtbnoa, gtlena, &gtnew);
error = xfs_alloc_find_best_extent(args,
&bno_cur_gt, &bno_cur_lt,
- gtdiff, &ltbno, &ltlen, &ltlena,
+ gtdiff, &ltbno, &ltlen,
+ &ltbnoa, &ltlena,
1 /* search left */);
}
@@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near(
* If we couldn't get anything, give up.
*/
if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+ if (!forced++) {
+ trace_xfs_alloc_near_busy(args);
+ xfs_log_force(args->mp, XFS_LOG_SYNC);
+ goto restart;
+ }
+
trace_xfs_alloc_size_neither(args);
args->agbno = NULLAGBLOCK;
return 0;
@@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near(
return 0;
}
rlen = args->len;
- (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
- ltlen, &ltnew);
+ (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
+ ltbnoa, ltlena, &ltnew);
ASSERT(ltnew >= ltbno);
- ASSERT(ltnew + rlen <= ltbno + ltlen);
+ ASSERT(ltnew + rlen <= ltbnoa + ltlena);
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
args->agbno = ltnew;
+
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
ltnew, rlen, XFSA_FIXUP_BNO_OK)))
goto error0;
@@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size(
int i; /* temp status variable */
xfs_agblock_t rbno; /* returned block number */
xfs_extlen_t rlen; /* length of returned extent */
+ int forced = 0;
+restart:
/*
* Allocate and initialize a cursor for the by-size btree.
*/
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
args->agno, XFS_BTNUM_CNT);
bno_cur = NULL;
+
/*
* Look for an entry >= maxlen+alignment-1 blocks.
*/
if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
args->maxlen + args->alignment - 1, &i)))
goto error0;
+
/*
- * If none, then pick up the last entry in the tree unless the
- * tree is empty.
+ * If none or we have busy extents that we cannot allocate from, then
+ * we have to settle for a smaller extent. In the case that there are
+ * no large extents, this will return the last entry in the tree unless
+ * the tree is empty. In the case that there are only busy large
+ * extents, this will return the largest small extent unless there
+ * are no smaller extents available.
*/
- if (!i) {
- if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
- &flen, &i)))
+ if (!i || forced > 1) {
+ error = xfs_alloc_ag_vextent_small(args, cnt_cur,
+ &fbno, &flen, &i);
+ if (error)
goto error0;
if (i == 0 || flen == 0) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size(
return 0;
}
ASSERT(i == 1);
+ xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
+ } else {
+ /*
+ * Search for a non-busy extent that is large enough.
+ * If we are at low space, don't check, or if we fall of
+ * the end of the btree, turn off the busy check and
+ * restart.
+ */
+ for (;;) {
+ error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
+ if (error)
+ goto error0;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ xfs_alloc_compute_aligned(args, fbno, flen,
+ &rbno, &rlen);
+
+ if (rlen >= args->maxlen)
+ break;
+
+ error = xfs_btree_increment(cnt_cur, 0, &i);
+ if (error)
+ goto error0;
+ if (i == 0) {
+ /*
+ * Our only valid extents must have been busy.
+ * Make it unbusy by forcing the log out and
+ * retrying. If we've been here before, forcing
+ * the log isn't making the extents available,
+ * which means they have probably been freed in
+ * this transaction. In that case, we have to
+ * give up on them and we'll attempt a minlen
+ * allocation the next time around.
+ */
+ xfs_btree_del_cursor(cnt_cur,
+ XFS_BTREE_NOERROR);
+ trace_xfs_alloc_size_busy(args);
+ if (!forced++)
+ xfs_log_force(args->mp, XFS_LOG_SYNC);
+ goto restart;
+ }
+ }
}
- /*
- * There's a freespace as big as maxlen+alignment-1, get it.
- */
- else {
- if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- }
+
/*
* In the first case above, we got the last entry in the
* by-size btree. Now we check to see if the space hits maxlen
* once aligned; if not, we search left for something better.
* This can't happen in the second case above.
*/
- xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
(rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size(
* Fix up the length.
*/
args->len = rlen;
- xfs_alloc_fix_len(args);
- if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- trace_xfs_alloc_size_nominleft(args);
- args->agbno = NULLAGBLOCK;
- return 0;
+ if (rlen < args->minlen) {
+ if (!forced++) {
+ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+ trace_xfs_alloc_size_busy(args);
+ xfs_log_force(args->mp, XFS_LOG_SYNC);
+ goto restart;
+ }
+ goto out_nominleft;
}
+ xfs_alloc_fix_len(args);
+
+ if (!xfs_alloc_fix_minleft(args))
+ goto out_nominleft;
rlen = args->len;
XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
/*
@@ -1287,6 +1350,12 @@ error0:
if (bno_cur)
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
return error;
+
+out_nominleft:
+ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+ trace_xfs_alloc_size_nominleft(args);
+ args->agbno = NULLAGBLOCK;
+ return 0;
}
/*
@@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small(
if (error)
goto error0;
if (fbno != NULLAGBLOCK) {
+ xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
+ args->userdata);
+
if (args->userdata) {
xfs_buf_t *bp;
@@ -1617,18 +1689,6 @@ xfs_free_ag_extent(
trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
- /*
- * Since blocks move to the free list without the coordination
- * used in xfs_bmap_finish, we can't allow block to be available
- * for reallocation and non-transaction writing (user data)
- * until we know that the transaction that moved it to the free
- * list is permanently on disk. We track the blocks by declaring
- * these blocks as "busy"; the busy list is maintained on a per-ag
- * basis and each transaction records which entries should be removed
- * when the iclog commits to disk. If a busy block is allocated,
- * the iclog is pushed up to the LSN that freed the block.
- */
- xfs_alloc_busy_insert(tp, agno, bno, len);
return 0;
error0:
@@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist(
xfs_alloc_log_agf(tp, agbp, logflags);
*bnop = bno;
- /*
- * As blocks are freed, they are added to the per-ag busy list and
- * remain there until the freeing transaction is committed to disk.
- * Now that we have allocated blocks, this list must be searched to see
- * if a block is being reused. If one is, then the freeing transaction
- * must be pushed to disk before this transaction.
- *
- * We do this by setting the current transaction to a sync transaction
- * which guarantees that the freeing transaction is on disk before this
- * transaction. This is done instead of a synchronous log force here so
- * that we don't sit and wait with the AGF locked in the transaction
- * during the log force.
- */
- if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
- xfs_trans_set_sync(tp);
return 0;
}
@@ -2423,105 +2468,13 @@ xfs_free_extent(
}
error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
+ if (!error)
+ xfs_alloc_busy_insert(tp, args.agno, args.agbno, len);
error0:
xfs_perag_put(args.pag);
return error;
}
-
-/*
- * AG Busy list management
- * The busy list contains block ranges that have been freed but whose
- * transactions have not yet hit disk. If any block listed in a busy
- * list is reused, the transaction that freed it must be forced to disk
- * before continuing to use the block.
- *
- * xfs_alloc_busy_insert - add to the per-ag busy list
- * xfs_alloc_busy_clear - remove an item from the per-ag busy list
- * xfs_alloc_busy_search - search for a busy extent
- */
-
-/*
- * Insert a new extent into the busy tree.
- *
- * The busy extent tree is indexed by the start block of the busy extent.
- * there can be multiple overlapping ranges in the busy extent tree but only
- * ever one entry at a given start block. The reason for this is that
- * multi-block extents can be freed, then smaller chunks of that extent
- * allocated and freed again before the first transaction commit is on disk.
- * If the exact same start block is freed a second time, we have to wait for
- * that busy extent to pass out of the tree before the new extent is inserted.
- * There are two main cases we have to handle here.
- *
- * The first case is a transaction that triggers a "free - allocate - free"
- * cycle. This can occur during btree manipulations as a btree block is freed
- * to the freelist, then allocated from the free list, then freed again. In
- * this case, the second extxpnet free is what triggers the duplicate and as
- * such the transaction IDs should match. Because the extent was allocated in
- * this transaction, the transaction must be marked as synchronous. This is
- * true for all cases where the free/alloc/free occurs in the one transaction,
- * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
- * This serves to catch violations of the second case quite effectively.
- *
- * The second case is where the free/alloc/free occur in different
- * transactions. In this case, the thread freeing the extent the second time
- * can't mark the extent busy immediately because it is already tracked in a
- * transaction that may be committing. When the log commit for the existing
- * busy extent completes, the busy extent will be removed from the tree. If we
- * allow the second busy insert to continue using that busy extent structure,
- * it can be freed before this transaction is safely in the log. Hence our
- * only option in this case is to force the log to remove the existing busy
- * extent from the list before we insert the new one with the current
- * transaction ID.
- *
- * The problem we are trying to avoid in the free-alloc-free in separate
- * transactions is most easily described with a timeline:
- *
- * Thread 1 Thread 2 Thread 3 xfslogd
- * xact alloc
- * free X
- * mark busy
- * commit xact
- * free xact
- * xact alloc
- * alloc X
- * busy search
- * mark xact sync
- * commit xact
- * free xact
- * force log
- * checkpoint starts
- * ....
- * xact alloc
- * free X
- * mark busy
- * finds match
- * *** KABOOM! ***
- * ....
- * log IO completes
- * unbusy X
- * checkpoint completes
- *
- * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
- * the checkpoint completes, and the busy extent it matched will have been
- * removed from the tree when it is woken. Hence it can then continue safely.
- *
- * However, to ensure this matching process is robust, we need to use the
- * transaction ID for identifying transaction, as delayed logging results in
- * the busy extent and transaction lifecycles being different. i.e. the busy
- * extent is active for a lot longer than the transaction. Hence the
- * transaction structure can be freed and reallocated, then mark the same
- * extent busy again in the new transaction. In this case the new transaction
- * will have a different tid but can have the same address, and hence we need
- * to check against the tid.
- *
- * Future: for delayed logging, we could avoid the log force if the extent was
- * first freed in the current checkpoint sequence. This, however, requires the
- * ability to pin the current checkpoint in memory until this transaction
- * commits to ensure that both the original free and the current one combine
- * logically into the one checkpoint. If the checkpoint sequences are
- * different, however, we still need to wait on a log force.
- */
void
xfs_alloc_busy_insert(
struct xfs_trans *tp,
@@ -2533,9 +2486,7 @@ xfs_alloc_busy_insert(
struct xfs_busy_extent *busyp;
struct xfs_perag *pag;
struct rb_node **rbp;
- struct rb_node *parent;
- int match;
-
+ struct rb_node *parent = NULL;
new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
if (!new) {
@@ -2544,7 +2495,7 @@ xfs_alloc_busy_insert(
* block, make this a synchronous transaction to insure that
* the block is not reused before this transaction commits.
*/
- trace_xfs_alloc_busy(tp, agno, bno, len, 1);
+ trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
xfs_trans_set_sync(tp);
return;
}
@@ -2552,66 +2503,28 @@ xfs_alloc_busy_insert(
new->agno = agno;
new->bno = bno;
new->length = len;
- new->tid = xfs_log_get_trans_ident(tp);
-
INIT_LIST_HEAD(&new->list);
/* trace before insert to be able to see failed inserts */
- trace_xfs_alloc_busy(tp, agno, bno, len, 0);
+ trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
pag = xfs_perag_get(tp->t_mountp, new->agno);
-restart:
spin_lock(&pag->pagb_lock);
rbp = &pag->pagb_tree.rb_node;
- parent = NULL;
- busyp = NULL;
- match = 0;
- while (*rbp && match >= 0) {
+ while (*rbp) {
parent = *rbp;
busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
if (new->bno < busyp->bno) {
- /* may overlap, but exact start block is lower */
rbp = &(*rbp)->rb_left;
- if (new->bno + new->length > busyp->bno)
- match = busyp->tid == new->tid ? 1 : -1;
+ ASSERT(new->bno + new->length <= busyp->bno);
} else if (new->bno > busyp->bno) {
- /* may overlap, but exact start block is higher */
rbp = &(*rbp)->rb_right;
- if (bno < busyp->bno + busyp->length)
- match = busyp->tid == new->tid ? 1 : -1;
+ ASSERT(bno >= busyp->bno + busyp->length);
} else {
- match = busyp->tid == new->tid ? 1 : -1;
- break;
+ ASSERT(0);
}
}
- if (match < 0) {
- /* overlap marked busy in different transaction */
- spin_unlock(&pag->pagb_lock);
- xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
- goto restart;
- }
- if (match > 0) {
- /*
- * overlap marked busy in same transaction. Update if exact
- * start block match, otherwise combine the busy extents into
- * a single range.
- */
- if (busyp->bno == new->bno) {
- busyp->length = max(busyp->length, new->length);
- spin_unlock(&pag->pagb_lock);
- ASSERT(tp->t_flags & XFS_TRANS_SYNC);
- xfs_perag_put(pag);
- kmem_free(new);
- return;
- }
- rb_erase(&busyp->rb_node, &pag->pagb_tree);
- new->length = max(busyp->bno + busyp->length,
- new->bno + new->length) -
- min(busyp->bno, new->bno);
- new->bno = min(busyp->bno, new->bno);
- } else
- busyp = NULL;
rb_link_node(&new->rb_node, parent, rbp);
rb_insert_color(&new->rb_node, &pag->pagb_tree);
@@ -2619,7 +2532,6 @@ restart:
list_add(&new->list, &tp->t_busy);
spin_unlock(&pag->pagb_lock);
xfs_perag_put(pag);
- kmem_free(busyp);
}
/*
@@ -2668,31 +2580,443 @@ xfs_alloc_busy_search(
}
}
spin_unlock(&pag->pagb_lock);
- trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
xfs_perag_put(pag);
return match;
}
+/*
+ * The found free extent [fbno, fend] overlaps part or all of the given busy
+ * extent. If the overlap covers the beginning, the end, or all of the busy
+ * extent, the overlapping portion can be made unbusy and used for the
+ * allocation. We can't split a busy extent because we can't modify a
+ * transaction/CIL context busy list, but we can update an entries block
+ * number or length.
+ *
+ * Returns true if the extent can safely be reused, or false if the search
+ * needs to be restarted.
+ */
+STATIC bool
+xfs_alloc_busy_update_extent(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ struct xfs_busy_extent *busyp,
+ xfs_agblock_t fbno,
+ xfs_extlen_t flen,
+ bool userdata)
+{
+ xfs_agblock_t fend = fbno + flen;
+ xfs_agblock_t bbno = busyp->bno;
+ xfs_agblock_t bend = bbno + busyp->length;
+
+ /*
+ * If there is a busy extent overlapping a user allocation, we have
+ * no choice but to force the log and retry the search.
+ *
+ * Fortunately this does not happen during normal operation, but
+ * only if the filesystem is very low on space and has to dip into
+ * the AGFL for normal allocations.
+ */
+ if (userdata)
+ goto out_force_log;
+
+ if (bbno < fbno && bend > fend) {
+ /*
+ * Case 1:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +---------+
+ * fbno fend
+ */
+
+ /*
+ * We would have to split the busy extent to be able to track
+ * it correct, which we cannot do because we would have to
+ * modify the list of busy extents attached to the transaction
+ * or CIL context, which is immutable.
+ *
+ * Force out the log to clear the busy extent and retry the
+ * search.
+ */
+ goto out_force_log;
+ } else if (bbno >= fbno && bend <= fend) {
+ /*
+ * Case 2:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +-----------------+
+ * fbno fend
+ *
+ * Case 3:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +--------------------------+
+ * fbno fend
+ *
+ * Case 4:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +--------------------------+
+ * fbno fend
+ *
+ * Case 5:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +-----------------------------------+
+ * fbno fend
+ *
+ */
+
+ /*
+ * The busy extent is fully covered by the extent we are
+ * allocating, and can simply be removed from the rbtree.
+ * However we cannot remove it from the immutable list
+ * tracking busy extents in the transaction or CIL context,
+ * so set the length to zero to mark it invalid.
+ *
+ * We also need to restart the busy extent search from the
+ * tree root, because erasing the node can rearrange the
+ * tree topology.
+ */
+ rb_erase(&busyp->rb_node, &pag->pagb_tree);
+ busyp->length = 0;
+ return false;
+ } else if (fend < bend) {
+ /*
+ * Case 6:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +---------+
+ * fbno fend
+ *
+ * Case 7:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +------------------+
+ * fbno fend
+ *
+ */
+ busyp->bno = fend;
+ } else if (bbno < fbno) {
+ /*
+ * Case 8:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +-------------+
+ * fbno fend
+ *
+ * Case 9:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +----------------------+
+ * fbno fend
+ */
+ busyp->length = fbno - busyp->bno;
+ } else {
+ ASSERT(0);
+ }
+
+ trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
+ return true;
+
+out_force_log:
+ spin_unlock(&pag->pagb_lock);
+ xfs_log_force(mp, XFS_LOG_SYNC);
+ trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
+ spin_lock(&pag->pagb_lock);
+ return false;
+}
+
+
+/*
+ * For a given extent [fbno, flen], make sure we can reuse it safely.
+ */
void
-xfs_alloc_busy_clear(
+xfs_alloc_busy_reuse(
struct xfs_mount *mp,
- struct xfs_busy_extent *busyp)
+ xfs_agnumber_t agno,
+ xfs_agblock_t fbno,
+ xfs_extlen_t flen,
+ bool userdata)
{
struct xfs_perag *pag;
+ struct rb_node *rbp;
- trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno,
- busyp->length);
+ ASSERT(flen > 0);
- ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno,
- busyp->length) == 1);
+ pag = xfs_perag_get(mp, agno);
+ spin_lock(&pag->pagb_lock);
+restart:
+ rbp = pag->pagb_tree.rb_node;
+ while (rbp) {
+ struct xfs_busy_extent *busyp =
+ rb_entry(rbp, struct xfs_busy_extent, rb_node);
+ xfs_agblock_t bbno = busyp->bno;
+ xfs_agblock_t bend = bbno + busyp->length;
- list_del_init(&busyp->list);
+ if (fbno + flen <= bbno) {
+ rbp = rbp->rb_left;
+ continue;
+ } else if (fbno >= bend) {
+ rbp = rbp->rb_right;
+ continue;
+ }
- pag = xfs_perag_get(mp, busyp->agno);
- spin_lock(&pag->pagb_lock);
- rb_erase(&busyp->rb_node, &pag->pagb_tree);
+ if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
+ userdata))
+ goto restart;
+ }
spin_unlock(&pag->pagb_lock);
xfs_perag_put(pag);
+}
+
+/*
+ * For a given extent [fbno, flen], search the busy extent list to find a
+ * subset of the extent that is not busy. If *rlen is smaller than
+ * args->minlen no suitable extent could be found, and the higher level
+ * code needs to force out the log and retry the allocation.
+ */
+STATIC void
+xfs_alloc_busy_trim(
+ struct xfs_alloc_arg *args,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ xfs_agblock_t *rbno,
+ xfs_extlen_t *rlen)
+{
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
+ struct rb_node *rbp;
+
+ ASSERT(len > 0);
+ spin_lock(&args->pag->pagb_lock);
+restart:
+ fbno = bno;
+ flen = len;
+ rbp = args->pag->pagb_tree.rb_node;
+ while (rbp && flen >= args->minlen) {
+ struct xfs_busy_extent *busyp =
+ rb_entry(rbp, struct xfs_busy_extent, rb_node);
+ xfs_agblock_t fend = fbno + flen;
+ xfs_agblock_t bbno = busyp->bno;
+ xfs_agblock_t bend = bbno + busyp->length;
+
+ if (fend <= bbno) {
+ rbp = rbp->rb_left;
+ continue;
+ } else if (fbno >= bend) {
+ rbp = rbp->rb_right;
+ continue;
+ }
+
+ /*
+ * If this is a metadata allocation, try to reuse the busy
+ * extent instead of trimming the allocation.
+ */
+ if (!args->userdata) {
+ if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
+ busyp, fbno, flen,
+ false))
+ goto restart;
+ continue;
+ }
+
+ if (bbno <= fbno) {
+ /* start overlap */
+
+ /*
+ * Case 1:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +---------+
+ * fbno fend
+ *
+ * Case 2:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +-------------+
+ * fbno fend
+ *
+ * Case 3:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +-------------+
+ * fbno fend
+ *
+ * Case 4:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +-----------------+
+ * fbno fend
+ *
+ * No unbusy region in extent, return failure.
+ */
+ if (fend <= bend)
+ goto fail;
+
+ /*
+ * Case 5:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +----------------------+
+ * fbno fend
+ *
+ * Case 6:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +--------------------------+
+ * fbno fend
+ *
+ * Needs to be trimmed to:
+ * +-------+
+ * fbno fend
+ */
+ fbno = bend;
+ } else if (bend >= fend) {
+ /* end overlap */
+
+ /*
+ * Case 7:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +------------------+
+ * fbno fend
+ *
+ * Case 8:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +--------------------------+
+ * fbno fend
+ *
+ * Needs to be trimmed to:
+ * +-------+
+ * fbno fend
+ */
+ fend = bbno;
+ } else {
+ /* middle overlap */
+
+ /*
+ * Case 9:
+ * bbno bend
+ * +BBBBBBBBBBBBBBBBB+
+ * +-----------------------------------+
+ * fbno fend
+ *
+ * Can be trimmed to:
+ * +-------+ OR +-------+
+ * fbno fend fbno fend
+ *
+ * Backward allocation leads to significant
+ * fragmentation of directories, which degrades
+ * directory performance, therefore we always want to
+ * choose the option that produces forward allocation
+ * patterns.
+ * Preferring the lower bno extent will make the next
+ * request use "fend" as the start of the next
+ * allocation; if the segment is no longer busy at
+ * that point, we'll get a contiguous allocation, but
+ * even if it is still busy, we will get a forward
+ * allocation.
+ * We try to avoid choosing the segment at "bend",
+ * because that can lead to the next allocation
+ * taking the segment at "fbno", which would be a
+ * backward allocation. We only use the segment at
+ * "fbno" if it is much larger than the current
+ * requested size, because in that case there's a
+ * good chance subsequent allocations will be
+ * contiguous.
+ */
+ if (bbno - fbno >= args->maxlen) {
+ /* left candidate fits perfect */
+ fend = bbno;
+ } else if (fend - bend >= args->maxlen * 4) {
+ /* right candidate has enough free space */
+ fbno = bend;
+ } else if (bbno - fbno >= args->minlen) {
+ /* left candidate fits minimum requirement */
+ fend = bbno;
+ } else {
+ goto fail;
+ }
+ }
+
+ flen = fend - fbno;
+ }
+ spin_unlock(&args->pag->pagb_lock);
+
+ if (fbno != bno || flen != len) {
+ trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
+ fbno, flen);
+ }
+ *rbno = fbno;
+ *rlen = flen;
+ return;
+fail:
+ /*
+ * Return a zero extent length as failure indications. All callers
+ * re-check if the trimmed extent satisfies the minlen requirement.
+ */
+ spin_unlock(&args->pag->pagb_lock);
+ trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
+ *rbno = fbno;
+ *rlen = 0;
+}
+
+static void
+xfs_alloc_busy_clear_one(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ struct xfs_busy_extent *busyp)
+{
+ if (busyp->length) {
+ trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
+ busyp->length);
+ rb_erase(&busyp->rb_node, &pag->pagb_tree);
+ }
+
+ list_del_init(&busyp->list);
kmem_free(busyp);
}
+
+void
+xfs_alloc_busy_clear(
+ struct xfs_mount *mp,
+ struct list_head *list)
+{
+ struct xfs_busy_extent *busyp, *n;
+ struct xfs_perag *pag = NULL;
+ xfs_agnumber_t agno = NULLAGNUMBER;
+
+ list_for_each_entry_safe(busyp, n, list, list) {
+ if (busyp->agno != agno) {
+ if (pag) {
+ spin_unlock(&pag->pagb_lock);
+ xfs_perag_put(pag);
+ }
+ pag = xfs_perag_get(mp, busyp->agno);
+ spin_lock(&pag->pagb_lock);
+ agno = busyp->agno;
+ }
+
+ xfs_alloc_busy_clear_one(mp, pag, busyp);
+ }
+
+ if (pag) {
+ spin_unlock(&pag->pagb_lock);
+ xfs_perag_put(pag);
+ }
+}
+
+/*
+ * Callback for list_sort to sort busy extents by the AG they reside in.
+ */
+int
+xfs_busy_extent_ag_cmp(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
+{
+ return container_of(a, struct xfs_busy_extent, list)->agno -
+ container_of(b, struct xfs_busy_extent, list)->agno;
+}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index d0b3bc72005b..240ad288f2f9 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -140,11 +140,24 @@ xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
xfs_agblock_t bno, xfs_extlen_t len);
void
-xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp);
+xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list);
int
xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t bno, xfs_extlen_t len);
+
+void
+xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
+
+int
+xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
+
+static inline void xfs_alloc_busy_sort(struct list_head *list)
+{
+ list_sort(NULL, list, xfs_busy_extent_ag_cmp);
+}
+
#endif /* __KERNEL__ */
/*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 3916925e2584..8b469d53599f 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -95,6 +95,8 @@ xfs_allocbt_alloc_block(
return 0;
}
+ xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
+
xfs_trans_agbtree_delta(cur->bc_tp, 1);
new->s = cpu_to_be32(bno);
@@ -118,17 +120,6 @@ xfs_allocbt_free_block(
if (error)
return error;
- /*
- * Since blocks move to the free list without the coordination used in
- * xfs_bmap_finish, we can't allow block to be available for
- * reallocation and non-transaction writing (user data) until we know
- * that the transaction that moved it to the free list is permanently
- * on disk. We track the blocks by declaring these blocks as "busy";
- * the busy list is maintained on a per-ag basis and each transaction
- * records which entries should be removed when the iclog commits to
- * disk. If a busy block is allocated, the iclog is pushed up to the
- * LSN that freed the block.
- */
xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
xfs_trans_agbtree_delta(cur->bc_tp, -1);
return 0;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index be628677c288..9a84a85c03b1 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -202,7 +202,7 @@ xfs_swap_extents(
xfs_inode_t *tip, /* tmp inode */
xfs_swapext_t *sxp)
{
- xfs_mount_t *mp;
+ xfs_mount_t *mp = ip->i_mount;
xfs_trans_t *tp;
xfs_bstat_t *sbp = &sxp->sx_stat;
xfs_ifork_t *tempifp, *ifp, *tifp;
@@ -212,16 +212,12 @@ xfs_swap_extents(
int taforkblks = 0;
__uint64_t tmp;
- mp = ip->i_mount;
-
tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
if (!tempifp) {
error = XFS_ERROR(ENOMEM);
goto out;
}
- sbp = &sxp->sx_stat;
-
/*
* we have to do two separate lock calls here to keep lockdep
* happy. If we try to get all the locks in one call, lock will
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a37480a6e023..c8e3349c287c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1354,7 +1354,7 @@ xfs_itruncate_start(
return 0;
}
last_byte = xfs_file_last_byte(ip);
- trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte);
+ trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
if (last_byte > toss_start) {
if (flags & XFS_ITRUNC_DEFINITE) {
xfs_tosspages(ip, toss_start,
@@ -1470,7 +1470,7 @@ xfs_itruncate_finish(
* file but the log buffers containing the free and reallocation
* don't, then we'd end up with garbage in the blocks being freed.
* As long as we make the new_size permanent before actually
- * freeing any blocks it doesn't matter if they get writtten to.
+ * freeing any blocks it doesn't matter if they get written to.
*
* The callers must signal into us whether or not the size
* setting here must be synchronous. There are a few cases
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 576fdfe81d60..09983a3344a5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -970,7 +970,6 @@ xfs_iflush_abort(
{
xfs_inode_log_item_t *iip = ip->i_itemp;
- iip = ip->i_itemp;
if (iip) {
struct xfs_ail *ailp = iip->ili_item.li_ailp;
if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b612ce4520ae..211930246f20 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1449,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log)
xlog_cil_destroy(log);
+ /*
+ * always need to ensure that the extra buffer does not point to memory
+ * owned by another log buffer before we free it.
+ */
+ xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size);
+ xfs_buf_free(log->l_xbuf);
+
iclog = log->l_iclog;
for (i=0; i<log->l_iclog_bufs; i++) {
xfs_buf_free(iclog->ic_bp);
@@ -1458,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log)
}
spinlock_destroy(&log->l_icloglock);
- xfs_buf_free(log->l_xbuf);
log->l_mp->m_log = NULL;
kmem_free(log);
} /* xlog_dealloc_log */
@@ -3248,13 +3254,6 @@ xfs_log_ticket_get(
return ticket;
}
-xlog_tid_t
-xfs_log_get_trans_ident(
- struct xfs_trans *tp)
-{
- return tp->t_ticket->t_tid;
-}
-
/*
* Allocate and initialise a new log ticket.
*/
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3bd3291ef8d2..78c9039994af 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *);
struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
void xfs_log_ticket_put(struct xlog_ticket *ticket);
-xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
-
void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
struct xfs_log_vec *log_vector,
xfs_lsn_t *commit_lsn, int flags);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 9ca59be08977..7d56e88a3f0e 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -361,13 +361,12 @@ xlog_cil_committed(
int abort)
{
struct xfs_cil_ctx *ctx = args;
- struct xfs_busy_extent *busyp, *n;
xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
ctx->start_lsn, abort);
- list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
- xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
+ xfs_alloc_busy_sort(&ctx->busy_extents);
+ xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents);
spin_lock(&ctx->cil->xc_cil_lock);
list_del(&ctx->committing);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 5864850e9e34..2d3b6a498d63 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i)
shutdown */
#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
+typedef __uint32_t xlog_tid_t;
+
#ifdef __KERNEL__
/*
* Below are states for covering allocation transactions.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5cc464a17c93..04142caedb2b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -205,6 +205,35 @@ xlog_bread(
}
/*
+ * Read at an offset into the buffer. Returns with the buffer in it's original
+ * state regardless of the result of the read.
+ */
+STATIC int
+xlog_bread_offset(
+ xlog_t *log,
+ xfs_daddr_t blk_no, /* block to read from */
+ int nbblks, /* blocks to read */
+ xfs_buf_t *bp,
+ xfs_caddr_t offset)
+{
+ xfs_caddr_t orig_offset = XFS_BUF_PTR(bp);
+ int orig_len = bp->b_buffer_length;
+ int error, error2;
+
+ error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks));
+ if (error)
+ return error;
+
+ error = xlog_bread_noalign(log, blk_no, nbblks, bp);
+
+ /* must reset buffer pointer even on error */
+ error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len);
+ if (error)
+ return error;
+ return error2;
+}
+
+/*
* Write out the buffer at the given block for the given number of blocks.
* The buffer is kept locked across the write and is returned locked.
* This can only be used for synchronous log writes.
@@ -1229,20 +1258,12 @@ xlog_write_log_records(
*/
ealign = round_down(end_block, sectbb);
if (j == 0 && (start_block + endcount > ealign)) {
- offset = XFS_BUF_PTR(bp);
- balign = BBTOB(ealign - start_block);
- error = XFS_BUF_SET_PTR(bp, offset + balign,
- BBTOB(sectbb));
+ offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block);
+ error = xlog_bread_offset(log, ealign, sectbb,
+ bp, offset);
if (error)
break;
- error = xlog_bread_noalign(log, ealign, sectbb, bp);
- if (error)
- break;
-
- error = XFS_BUF_SET_PTR(bp, offset, bufblks);
- if (error)
- break;
}
offset = xlog_align(log, start_block, endcount, bp);
@@ -3448,19 +3469,9 @@ xlog_do_recovery_pass(
* - order is important.
*/
wrapped_hblks = hblks - split_hblks;
- error = XFS_BUF_SET_PTR(hbp,
- offset + BBTOB(split_hblks),
- BBTOB(hblks - split_hblks));
- if (error)
- goto bread_err2;
-
- error = xlog_bread_noalign(log, 0,
- wrapped_hblks, hbp);
- if (error)
- goto bread_err2;
-
- error = XFS_BUF_SET_PTR(hbp, offset,
- BBTOB(hblks));
+ error = xlog_bread_offset(log, 0,
+ wrapped_hblks, hbp,
+ offset + BBTOB(split_hblks));
if (error)
goto bread_err2;
}
@@ -3511,19 +3522,9 @@ xlog_do_recovery_pass(
* _first_, then the log start (LR header end)
* - order is important.
*/
- error = XFS_BUF_SET_PTR(dbp,
- offset + BBTOB(split_bblks),
- BBTOB(bblks - split_bblks));
- if (error)
- goto bread_err2;
-
- error = xlog_bread_noalign(log, wrapped_hblks,
- bblks - split_bblks,
- dbp);
- if (error)
- goto bread_err2;
-
- error = XFS_BUF_SET_PTR(dbp, offset, h_size);
+ error = xlog_bread_offset(log, 0,
+ bblks - split_bblks, hbp,
+ offset + BBTOB(split_bblks));
if (error)
goto bread_err2;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index bb3f9a7b24ed..b49b82363d20 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1900,7 +1900,7 @@ xfs_mod_incore_sb_batch(
uint nmsb,
int rsvd)
{
- xfs_mod_sb_t *msbp = &msb[0];
+ xfs_mod_sb_t *msbp;
int error = 0;
/*
@@ -1910,7 +1910,7 @@ xfs_mod_incore_sb_batch(
* changes will be atomic.
*/
spin_lock(&mp->m_sb_lock);
- for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
+ for (msbp = msb; msbp < (msb + nmsb); msbp++) {
ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
msbp->msb_field > XFS_SBS_FDBLOCKS);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 76922793f64f..d1f24858ccc4 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -608,10 +608,8 @@ STATIC void
xfs_trans_free(
struct xfs_trans *tp)
{
- struct xfs_busy_extent *busyp, *n;
-
- list_for_each_entry_safe(busyp, n, &tp->t_busy, list)
- xfs_alloc_busy_clear(tp->t_mountp, busyp);
+ xfs_alloc_busy_sort(&tp->t_busy);
+ xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy);
atomic_dec(&tp->t_mountp->m_active_trans);
xfs_trans_free_dqinfo(tp);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index acdb92f14d51..5fc2380092c8 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -346,20 +346,23 @@ xfs_ail_delete(
*/
STATIC void
xfs_ail_worker(
- struct work_struct *work)
+ struct work_struct *work)
{
- struct xfs_ail *ailp = container_of(to_delayed_work(work),
+ struct xfs_ail *ailp = container_of(to_delayed_work(work),
struct xfs_ail, xa_work);
- long tout;
- xfs_lsn_t target = ailp->xa_target;
- xfs_lsn_t lsn;
- xfs_log_item_t *lip;
- int flush_log, count, stuck;
- xfs_mount_t *mp = ailp->xa_mount;
+ xfs_mount_t *mp = ailp->xa_mount;
struct xfs_ail_cursor *cur = &ailp->xa_cursors;
- int push_xfsbufd = 0;
+ xfs_log_item_t *lip;
+ xfs_lsn_t lsn;
+ xfs_lsn_t target;
+ long tout = 10;
+ int flush_log = 0;
+ int stuck = 0;
+ int count = 0;
+ int push_xfsbufd = 0;
spin_lock(&ailp->xa_lock);
+ target = ailp->xa_target;
xfs_trans_ail_cursor_init(ailp, cur);
lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
@@ -368,8 +371,7 @@ xfs_ail_worker(
*/
xfs_trans_ail_cursor_done(ailp, cur);
spin_unlock(&ailp->xa_lock);
- ailp->xa_last_pushed_lsn = 0;
- return;
+ goto out_done;
}
XFS_STATS_INC(xs_push_ail);
@@ -386,8 +388,7 @@ xfs_ail_worker(
* lots of contention on the AIL lists.
*/
lsn = lip->li_lsn;
- flush_log = stuck = count = 0;
- while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
+ while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
int lock_result;
/*
* If we can lock the item without sleeping, unlock the AIL
@@ -480,21 +481,25 @@ xfs_ail_worker(
}
/* assume we have more work to do in a short while */
- tout = 10;
+out_done:
if (!count) {
/* We're past our target or empty, so idle */
ailp->xa_last_pushed_lsn = 0;
/*
- * Check for an updated push target before clearing the
- * XFS_AIL_PUSHING_BIT. If the target changed, we've got more
- * work to do. Wait a bit longer before starting that work.
+ * We clear the XFS_AIL_PUSHING_BIT first before checking
+ * whether the target has changed. If the target has changed,
+ * this pushes the requeue race directly onto the result of the
+ * atomic test/set bit, so we are guaranteed that either the
+ * the pusher that changed the target or ourselves will requeue
+ * the work (but not both).
*/
+ clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
smp_rmb();
- if (ailp->xa_target == target) {
- clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
+ if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
+ test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
return;
- }
+
tout = 50;
} else if (XFS_LSN_CMP(lsn, target) >= 0) {
/*
@@ -553,7 +558,7 @@ xfs_ail_push(
* the XFS_AIL_PUSHING_BIT.
*/
smp_wmb();
- ailp->xa_target = threshold_lsn;
+ xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
}
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 26d1867d8156..65584b55607d 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
-typedef __uint32_t xlog_tid_t; /* transaction ID type */
-
/*
* These types are 64 bits on disk but are either 32 or 64 bits in memory.
* Disk based types: