summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile3
-rw-r--r--fs/aio.c2
-rw-r--r--fs/binfmt_elf.c1
-rw-r--r--fs/bio.c2
-rw-r--r--fs/buffer.c11
-rw-r--r--fs/direct-io.c31
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/fat/dir.c23
-rw-r--r--fs/fat/fat.h40
-rw-r--r--fs/fat/file.c5
-rw-r--r--fs/fat/inode.c74
-rw-r--r--fs/fat/nfs.c221
-rw-r--r--fs/fscache/stats.c2
-rw-r--r--fs/hfsplus/extents.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/jbd/commit.c25
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/nfsd/nfs4state.c7
-rw-r--r--fs/notify/inotify/inotify_user.c8
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c6
-rw-r--r--fs/ocfs2/ioctl.c22
-rw-r--r--fs/ocfs2/move_extents.c53
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/array.c1
-rw-r--r--fs/proc/base.c100
-rw-r--r--fs/proc/internal.h18
-rw-r--r--fs/proc/kcore.c9
-rw-r--r--fs/proc/meminfo.c1
-rw-r--r--fs/proc/mmu.c60
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/sysfs/dir.c41
33 files changed, 536 insertions, 247 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 9d53192236fc..3b2c76759ec9 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o super.o \
ioctl.o readdir.o select.o fifo.o dcache.o inode.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
- pnode.o drop_caches.o splice.o sync.o utimes.o \
+ pnode.o splice.o sync.o utimes.o \
stack.o fs_struct.o statfs.o
ifeq ($(CONFIG_BLOCK),y)
@@ -49,6 +49,7 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
obj-$(CONFIG_COREDUMP) += coredump.o
+obj-$(CONFIG_SYSCTL) += drop_caches.o
obj-$(CONFIG_FHANDLE) += fhandle.o
diff --git a/fs/aio.c b/fs/aio.c
index 3f941f2a3059..1dc8786f4588 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1029,9 +1029,9 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
spin_unlock(&info->ring_lock);
out:
- kunmap_atomic(ring);
dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret,
(unsigned long)ring->head, (unsigned long)ring->tail);
+ kunmap_atomic(ring);
return ret;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3939829f6c5c..86af964c2425 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1137,6 +1137,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
goto whole;
if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
goto whole;
+ return 0;
}
/* Do not dump I/O mapped devices or special mappings */
diff --git a/fs/bio.c b/fs/bio.c
index bb5768f59b32..b96fc6ce4855 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1428,8 +1428,6 @@ void bio_endio(struct bio *bio, int error)
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
error = -EIO;
- trace_block_bio_complete(bio, error);
-
if (bio->bi_end_io)
bio->bi_end_io(bio, error);
}
diff --git a/fs/buffer.c b/fs/buffer.c
index b4dcb34c9635..10ef81e10b20 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -865,8 +865,6 @@ try_again:
/* Link the buffer to its page */
set_bh_page(bh, page, offset);
-
- init_buffer(bh, NULL, NULL);
}
return head;
/*
@@ -2949,7 +2947,7 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
}
}
-int submit_bh(int rw, struct buffer_head * bh)
+int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
{
struct bio *bio;
int ret = 0;
@@ -2984,6 +2982,7 @@ int submit_bh(int rw, struct buffer_head * bh)
bio->bi_end_io = end_bio_bh_io_sync;
bio->bi_private = bh;
+ bio->bi_flags |= bio_flags;
/* Take care of bh's that straddle the end of the device */
guard_bh_eod(rw, bio, bh);
@@ -2997,6 +2996,12 @@ int submit_bh(int rw, struct buffer_head * bh)
bio_put(bio);
return ret;
}
+EXPORT_SYMBOL_GPL(_submit_bh);
+
+int submit_bh(int rw, struct buffer_head *bh)
+{
+ return _submit_bh(rw, bh, 0);
+}
EXPORT_SYMBOL(submit_bh);
/**
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f853263cf74f..cfb816dc6d9f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -672,12 +672,6 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
if (sdio->final_block_in_bio != sdio->cur_page_block ||
cur_offset != bio_next_offset)
dio_bio_submit(dio, sdio);
- /*
- * Submit now if the underlying fs is about to perform a
- * metadata read
- */
- else if (sdio->boundary)
- dio_bio_submit(dio, sdio);
}
if (sdio->bio == NULL) {
@@ -737,16 +731,6 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
sdio->cur_page_block +
(sdio->cur_page_len >> sdio->blkbits) == blocknr) {
sdio->cur_page_len += len;
-
- /*
- * If sdio->boundary then we want to schedule the IO now to
- * avoid metadata seeks.
- */
- if (sdio->boundary) {
- ret = dio_send_cur_page(dio, sdio, map_bh);
- page_cache_release(sdio->cur_page);
- sdio->cur_page = NULL;
- }
goto out;
}
@@ -758,7 +742,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
page_cache_release(sdio->cur_page);
sdio->cur_page = NULL;
if (ret)
- goto out;
+ return ret;
}
page_cache_get(page); /* It is in dio */
@@ -768,6 +752,16 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
sdio->cur_page_block = blocknr;
sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits;
out:
+ /*
+ * If sdio->boundary then we want to schedule the IO now to
+ * avoid metadata seeks.
+ */
+ if (sdio->boundary) {
+ ret = dio_send_cur_page(dio, sdio, map_bh);
+ dio_bio_submit(dio, sdio);
+ page_cache_release(sdio->cur_page);
+ sdio->cur_page = NULL;
+ }
return ret;
}
@@ -969,7 +963,8 @@ do_holes:
this_chunk_bytes = this_chunk_blocks << blkbits;
BUG_ON(this_chunk_bytes == 0);
- sdio->boundary = buffer_boundary(map_bh);
+ if (this_chunk_blocks == sdio->blocks_available)
+ sdio->boundary = buffer_boundary(map_bh);
ret = submit_page_section(dio, sdio, page,
offset_in_page,
this_chunk_bytes,
diff --git a/fs/exec.c b/fs/exec.c
index a96a4885bbbf..87e731f020fb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -613,7 +613,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* when the old and new regions overlap clear from new_end.
*/
free_pgd_range(&tlb, new_end, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : 0);
+ vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
} else {
/*
* otherwise, clean from old_start; this is done to not touch
@@ -622,7 +622,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* for the others its just a little faster.
*/
free_pgd_range(&tlb, old_start, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : 0);
+ vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
}
tlb_finish_mmu(&tlb, new_end, old_end);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index fb5120a5505c..3dc48cc8b6eb 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2067,7 +2067,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
"writeback");
- sb->s_flags |= MS_SNAP_STABLE;
return 0;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 165012ef363a..7a6f02caf286 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -964,6 +964,29 @@ int fat_scan(struct inode *dir, const unsigned char *name,
}
EXPORT_SYMBOL_GPL(fat_scan);
+/*
+ * Scans a directory for a given logstart.
+ * Returns an error code or zero.
+ */
+int fat_scan_logstart(struct inode *dir, int i_logstart,
+ struct fat_slot_info *sinfo)
+{
+ struct super_block *sb = dir->i_sb;
+
+ sinfo->slot_off = 0;
+ sinfo->bh = NULL;
+ while (fat_get_short_entry(dir, &sinfo->slot_off, &sinfo->bh,
+ &sinfo->de) >= 0) {
+ if (fat_get_start(MSDOS_SB(sb), sinfo->de) == i_logstart) {
+ sinfo->slot_off -= sizeof(*sinfo->de);
+ sinfo->nr_slots = 1;
+ sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
{
struct super_block *sb = dir->i_sb;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e9cc3f0d58e2..21664fcf3616 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -23,6 +23,9 @@
#define FAT_ERRORS_PANIC 2 /* panic on error */
#define FAT_ERRORS_RO 3 /* remount r/o on error */
+#define FAT_NFS_STALE_RW 1 /* NFS RW support, can cause ESTALE */
+#define FAT_NFS_NOSTALE_RO 2 /* NFS RO support, no ESTALE issue */
+
struct fat_mount_options {
kuid_t fs_uid;
kgid_t fs_gid;
@@ -34,6 +37,7 @@ struct fat_mount_options {
unsigned short shortname; /* flags for shortname display/create rule */
unsigned char name_check; /* r = relaxed, n = normal, s = strict */
unsigned char errors; /* On error: continue, panic, remount-ro */
+ unsigned char nfs; /* NFS support: nostale_ro, stale_rw */
unsigned short allow_utime;/* permission for setting the [am]time */
unsigned quiet:1, /* set = fake successful chmods and chowns */
showexec:1, /* set = only set x bit for com/exe/bat */
@@ -48,8 +52,7 @@ struct fat_mount_options {
usefree:1, /* Use free_clusters for FAT32 */
tz_set:1, /* Filesystem timestamps' offset set */
rodir:1, /* allow ATTR_RO for directory */
- discard:1, /* Issue discard requests on deletions */
- nfs:1; /* Do extra work needed for NFS export */
+ discard:1; /* Issue discard requests on deletions */
};
#define FAT_HASH_BITS 8
@@ -72,6 +75,7 @@ struct msdos_sb_info {
unsigned long root_cluster; /* first cluster of the root directory */
unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */
struct mutex fat_lock;
+ struct mutex nfs_build_inode_lock;
struct mutex s_lock;
unsigned int prev_free; /* previously allocated cluster number */
unsigned int free_clusters; /* -1 if undefined */
@@ -215,6 +219,27 @@ static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus)
+ sbi->data_start;
}
+static inline void fat_get_blknr_offset(struct msdos_sb_info *sbi,
+ loff_t i_pos, sector_t *blknr, int *offset)
+{
+ *blknr = i_pos >> sbi->dir_per_block_bits;
+ *offset = i_pos & (sbi->dir_per_block - 1);
+}
+
+static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
+ struct inode *inode)
+{
+ loff_t i_pos;
+#if BITS_PER_LONG == 32
+ spin_lock(&sbi->inode_hash_lock);
+#endif
+ i_pos = MSDOS_I(inode)->i_pos;
+#if BITS_PER_LONG == 32
+ spin_unlock(&sbi->inode_hash_lock);
+#endif
+ return i_pos;
+}
+
static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len)
{
#ifdef __BIG_ENDIAN
@@ -271,6 +296,8 @@ extern int fat_dir_empty(struct inode *dir);
extern int fat_subdirs(struct inode *dir);
extern int fat_scan(struct inode *dir, const unsigned char *name,
struct fat_slot_info *sinfo);
+extern int fat_scan_logstart(struct inode *dir, int i_logstart,
+ struct fat_slot_info *sinfo);
extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
struct msdos_dir_entry **de);
extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts);
@@ -348,6 +375,7 @@ extern struct inode *fat_build_inode(struct super_block *sb,
extern int fat_sync_inode(struct inode *inode);
extern int fat_fill_super(struct super_block *sb, void *data, int silent,
int isvfat, void (*setup)(struct super_block *));
+extern int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de);
extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
struct inode *i2);
@@ -382,12 +410,8 @@ int fat_cache_init(void);
void fat_cache_destroy(void);
/* fat/nfs.c */
-struct fid;
-extern struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type);
-extern struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type);
-extern struct dentry *fat_get_parent(struct dentry *child_dir);
+extern const struct export_operations fat_export_ops;
+extern const struct export_operations fat_export_ops_nostale;
/* helper for printk */
typedef unsigned long long llu;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 3978f8ca1823..b0b632e50ddb 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -306,6 +306,11 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
struct inode *inode = dentry->d_inode;
generic_fillattr(inode, stat);
stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
+
+ if (MSDOS_SB(inode->i_sb)->options.nfs == FAT_NFS_NOSTALE_RO) {
+ /* Use i_pos for ino. This is used as fileid of nfs. */
+ stat->ino = fat_i_pos_read(MSDOS_SB(inode->i_sb), inode);
+ }
return 0;
}
EXPORT_SYMBOL_GPL(fat_getattr);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index acf6e479b443..4ff901632b26 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -18,7 +18,6 @@
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/buffer_head.h>
-#include <linux/exportfs.h>
#include <linux/mount.h>
#include <linux/vfs.h>
#include <linux/parser.h>
@@ -385,7 +384,7 @@ static int fat_calc_dir_size(struct inode *inode)
}
/* doesn't deal with root inode */
-static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
+int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
int error;
@@ -444,12 +443,25 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
return 0;
}
+static inline void fat_lock_build_inode(struct msdos_sb_info *sbi)
+{
+ if (sbi->options.nfs == FAT_NFS_NOSTALE_RO)
+ mutex_lock(&sbi->nfs_build_inode_lock);
+}
+
+static inline void fat_unlock_build_inode(struct msdos_sb_info *sbi)
+{
+ if (sbi->options.nfs == FAT_NFS_NOSTALE_RO)
+ mutex_unlock(&sbi->nfs_build_inode_lock);
+}
+
struct inode *fat_build_inode(struct super_block *sb,
struct msdos_dir_entry *de, loff_t i_pos)
{
struct inode *inode;
int err;
+ fat_lock_build_inode(MSDOS_SB(sb));
inode = fat_iget(sb, i_pos);
if (inode)
goto out;
@@ -469,6 +481,7 @@ struct inode *fat_build_inode(struct super_block *sb,
fat_attach(inode, i_pos);
insert_inode_hash(inode);
out:
+ fat_unlock_build_inode(MSDOS_SB(sb));
return inode;
}
@@ -655,20 +668,6 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
- struct inode *inode)
-{
- loff_t i_pos;
-#if BITS_PER_LONG == 32
- spin_lock(&sbi->inode_hash_lock);
-#endif
- i_pos = MSDOS_I(inode)->i_pos;
-#if BITS_PER_LONG == 32
- spin_unlock(&sbi->inode_hash_lock);
-#endif
- return i_pos;
-}
-
static int __fat_write_inode(struct inode *inode, int wait)
{
struct super_block *sb = inode->i_sb;
@@ -676,7 +675,8 @@ static int __fat_write_inode(struct inode *inode, int wait)
struct buffer_head *bh;
struct msdos_dir_entry *raw_entry;
loff_t i_pos;
- int err;
+ sector_t blocknr;
+ int err, offset;
if (inode->i_ino == MSDOS_ROOT_INO)
return 0;
@@ -686,7 +686,8 @@ retry:
if (!i_pos)
return 0;
- bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
+ fat_get_blknr_offset(sbi, i_pos, &blocknr, &offset);
+ bh = sb_bread(sb, blocknr);
if (!bh) {
fat_msg(sb, KERN_ERR, "unable to read inode block "
"for updating (i_pos %lld)", i_pos);
@@ -699,8 +700,7 @@ retry:
goto retry;
}
- raw_entry = &((struct msdos_dir_entry *) (bh->b_data))
- [i_pos & (sbi->dir_per_block - 1)];
+ raw_entry = &((struct msdos_dir_entry *) (bh->b_data))[offset];
if (S_ISDIR(inode->i_mode))
raw_entry->size = 0;
else
@@ -761,12 +761,6 @@ static const struct super_operations fat_sops = {
.show_options = fat_show_options,
};
-static const struct export_operations fat_export_ops = {
- .fh_to_dentry = fat_fh_to_dentry,
- .fh_to_parent = fat_fh_to_parent,
- .get_parent = fat_get_parent,
-};
-
static int fat_show_options(struct seq_file *m, struct dentry *root)
{
struct msdos_sb_info *sbi = MSDOS_SB(root->d_sb);
@@ -814,8 +808,6 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",usefree");
if (opts->quiet)
seq_puts(m, ",quiet");
- if (opts->nfs)
- seq_puts(m, ",nfs");
if (opts->showexec)
seq_puts(m, ",showexec");
if (opts->sys_immutable)
@@ -849,6 +841,10 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",errors=panic");
else
seq_puts(m, ",errors=remount-ro");
+ if (opts->nfs == FAT_NFS_NOSTALE_RO)
+ seq_puts(m, ",nfs=nostale_ro");
+ else if (opts->nfs)
+ seq_puts(m, ",nfs=stale_rw");
if (opts->discard)
seq_puts(m, ",discard");
@@ -865,7 +861,7 @@ enum {
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_time_offset,
- Opt_err,
+ Opt_nfs_stale_rw, Opt_nfs_nostale_ro, Opt_err,
};
static const match_table_t fat_tokens = {
@@ -895,7 +891,9 @@ static const match_table_t fat_tokens = {
{Opt_err_panic, "errors=panic"},
{Opt_err_ro, "errors=remount-ro"},
{Opt_discard, "discard"},
- {Opt_nfs, "nfs"},
+ {Opt_nfs_stale_rw, "nfs"},
+ {Opt_nfs_stale_rw, "nfs=stale_rw"},
+ {Opt_nfs_nostale_ro, "nfs=nostale_ro"},
{Opt_obsolete, "conv=binary"},
{Opt_obsolete, "conv=text"},
{Opt_obsolete, "conv=auto"},
@@ -1092,6 +1090,12 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
case Opt_err_ro:
opts->errors = FAT_ERRORS_RO;
break;
+ case Opt_nfs_stale_rw:
+ opts->nfs = FAT_NFS_STALE_RW;
+ break;
+ case Opt_nfs_nostale_ro:
+ opts->nfs = FAT_NFS_NOSTALE_RO;
+ break;
/* msdos specific */
case Opt_dots:
@@ -1150,9 +1154,6 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
case Opt_discard:
opts->discard = 1;
break;
- case Opt_nfs:
- opts->nfs = 1;
- break;
/* obsolete mount options */
case Opt_obsolete:
@@ -1183,6 +1184,10 @@ out:
opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
if (opts->unicode_xlate)
opts->utf8 = 0;
+ if (opts->nfs == FAT_NFS_NOSTALE_RO) {
+ sb->s_flags |= MS_RDONLY;
+ sb->s_export_op = &fat_export_ops_nostale;
+ }
return 0;
}
@@ -1193,7 +1198,7 @@ static int fat_read_root(struct inode *inode)
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int error;
- MSDOS_I(inode)->i_pos = 0;
+ MSDOS_I(inode)->i_pos = MSDOS_ROOT_INO;
inode->i_uid = sbi->options.fs_uid;
inode->i_gid = sbi->options.fs_gid;
inode->i_version++;
@@ -1256,6 +1261,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
sb->s_magic = MSDOS_SUPER_MAGIC;
sb->s_op = &fat_sops;
sb->s_export_op = &fat_export_ops;
+ mutex_init(&sbi->nfs_build_inode_lock);
ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
index 499c10438ca2..93e14933dcb6 100644
--- a/fs/fat/nfs.c
+++ b/fs/fat/nfs.c
@@ -14,6 +14,18 @@
#include <linux/exportfs.h>
#include "fat.h"
+struct fat_fid {
+ u32 i_gen;
+ u32 i_pos_low;
+ u16 i_pos_hi;
+ u16 parent_i_pos_hi;
+ u32 parent_i_pos_low;
+ u32 parent_i_gen;
+};
+
+#define FAT_FID_SIZE_WITHOUT_PARENT 3
+#define FAT_FID_SIZE_WITH_PARENT (sizeof(struct fat_fid)/sizeof(u32))
+
/**
* Look up a directory inode given its starting cluster.
*/
@@ -38,63 +50,252 @@ static struct inode *fat_dget(struct super_block *sb, int i_logstart)
return inode;
}
-static struct inode *fat_nfs_get_inode(struct super_block *sb,
- u64 ino, u32 generation)
+static struct inode *fat_ilookup(struct super_block *sb, u64 ino, loff_t i_pos)
{
- struct inode *inode;
+ if (MSDOS_SB(sb)->options.nfs == FAT_NFS_NOSTALE_RO)
+ return fat_iget(sb, i_pos);
- if ((ino < MSDOS_ROOT_INO) || (ino == MSDOS_FSINFO_INO))
- return NULL;
+ else {
+ if ((ino < MSDOS_ROOT_INO) || (ino == MSDOS_FSINFO_INO))
+ return NULL;
+ return ilookup(sb, ino);
+ }
+}
+
+static struct inode *__fat_nfs_get_inode(struct super_block *sb,
+ u64 ino, u32 generation, loff_t i_pos)
+{
+ struct inode *inode = fat_ilookup(sb, ino, i_pos);
- inode = ilookup(sb, ino);
if (inode && generation && (inode->i_generation != generation)) {
iput(inode);
inode = NULL;
}
+ if (inode == NULL && MSDOS_SB(sb)->options.nfs == FAT_NFS_NOSTALE_RO) {
+ struct buffer_head *bh = NULL;
+ struct msdos_dir_entry *de ;
+ sector_t blocknr;
+ int offset;
+ fat_get_blknr_offset(MSDOS_SB(sb), i_pos, &blocknr, &offset);
+ bh = sb_bread(sb, blocknr);
+ if (!bh) {
+ fat_msg(sb, KERN_ERR,
+ "unable to read block(%llu) for building NFS inode",
+ (llu)blocknr);
+ return inode;
+ }
+ de = (struct msdos_dir_entry *)bh->b_data;
+ /* If a file is deleted on server and client is not updated
+ * yet, we must not build the inode upon a lookup call.
+ */
+ if (IS_FREE(de[offset].name))
+ inode = NULL;
+ else
+ inode = fat_build_inode(sb, &de[offset], i_pos);
+ brelse(bh);
+ }
return inode;
}
+static struct inode *fat_nfs_get_inode(struct super_block *sb,
+ u64 ino, u32 generation)
+{
+
+ return __fat_nfs_get_inode(sb, ino, generation, 0);
+}
+
+static int
+fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp,
+ struct inode *parent)
+{
+ int len = *lenp;
+ struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
+ struct fat_fid *fid = (struct fat_fid *) fh;
+ loff_t i_pos;
+ int type = FILEID_FAT_WITHOUT_PARENT;
+
+ if (parent) {
+ if (len < FAT_FID_SIZE_WITH_PARENT) {
+ *lenp = FAT_FID_SIZE_WITH_PARENT;
+ return FILEID_INVALID;
+ }
+ } else {
+ if (len < FAT_FID_SIZE_WITHOUT_PARENT) {
+ *lenp = FAT_FID_SIZE_WITHOUT_PARENT;
+ return FILEID_INVALID;
+ }
+ }
+
+ i_pos = fat_i_pos_read(sbi, inode);
+ *lenp = FAT_FID_SIZE_WITHOUT_PARENT;
+ fid->i_gen = inode->i_generation;
+ fid->i_pos_low = i_pos & 0xFFFFFFFF;
+ fid->i_pos_hi = (i_pos >> 32) & 0xFFFF;
+ if (parent) {
+ i_pos = fat_i_pos_read(sbi, parent);
+ fid->parent_i_pos_hi = (i_pos >> 32) & 0xFFFF;
+ fid->parent_i_pos_low = i_pos & 0xFFFFFFFF;
+ fid->parent_i_gen = parent->i_generation;
+ type = FILEID_FAT_WITH_PARENT;
+ *lenp = FAT_FID_SIZE_WITH_PARENT;
+ }
+
+ return type;
+}
+
/**
* Map a NFS file handle to a corresponding dentry.
* The dentry may or may not be connected to the filesystem root.
*/
-struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid,
+static struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
fat_nfs_get_inode);
}
+static struct dentry *fat_fh_to_dentry_nostale(struct super_block *sb,
+ struct fid *fh, int fh_len,
+ int fh_type)
+{
+ struct inode *inode = NULL;
+ struct fat_fid *fid = (struct fat_fid *)fh;
+ loff_t i_pos;
+
+ switch (fh_type) {
+ case FILEID_FAT_WITHOUT_PARENT:
+ if (fh_len < FAT_FID_SIZE_WITHOUT_PARENT)
+ return NULL;
+ break;
+ case FILEID_FAT_WITH_PARENT:
+ if (fh_len < FAT_FID_SIZE_WITH_PARENT)
+ return NULL;
+ break;
+ default:
+ return NULL;
+ }
+ i_pos = fid->i_pos_hi;
+ i_pos = (i_pos << 32) | (fid->i_pos_low);
+ inode = __fat_nfs_get_inode(sb, 0, fid->i_gen, i_pos);
+
+ return d_obtain_alias(inode);
+}
+
/*
* Find the parent for a file specified by NFS handle.
* This requires that the handle contain the i_ino of the parent.
*/
-struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid,
+static struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
return generic_fh_to_parent(sb, fid, fh_len, fh_type,
fat_nfs_get_inode);
}
+static struct dentry *fat_fh_to_parent_nostale(struct super_block *sb,
+ struct fid *fh, int fh_len,
+ int fh_type)
+{
+ struct inode *inode = NULL;
+ struct fat_fid *fid = (struct fat_fid *)fh;
+ loff_t i_pos;
+
+ if (fh_len < FAT_FID_SIZE_WITH_PARENT)
+ return NULL;
+
+ switch (fh_type) {
+ case FILEID_FAT_WITH_PARENT:
+ i_pos = fid->parent_i_pos_hi;
+ i_pos = (i_pos << 32) | (fid->parent_i_pos_low);
+ inode = __fat_nfs_get_inode(sb, 0, fid->parent_i_gen, i_pos);
+ break;
+ }
+
+ return d_obtain_alias(inode);
+}
+
+/*
+ * Rebuild the parent for a directory that is not connected
+ * to the filesystem root
+ */
+static
+struct inode *fat_rebuild_parent(struct super_block *sb, int parent_logstart)
+{
+ int search_clus, clus_to_match;
+ struct msdos_dir_entry *de;
+ struct inode *parent = NULL;
+ struct inode *dummy_grand_parent = NULL;
+ struct fat_slot_info sinfo;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ sector_t blknr = fat_clus_to_blknr(sbi, parent_logstart);
+ struct buffer_head *parent_bh = sb_bread(sb, blknr);
+ if (!parent_bh) {
+ fat_msg(sb, KERN_ERR,
+ "unable to read cluster of parent directory");
+ return NULL;
+ }
+
+ de = (struct msdos_dir_entry *) parent_bh->b_data;
+ clus_to_match = fat_get_start(sbi, &de[0]);
+ search_clus = fat_get_start(sbi, &de[1]);
+
+ dummy_grand_parent = fat_dget(sb, search_clus);
+ if (!dummy_grand_parent) {
+ dummy_grand_parent = new_inode(sb);
+ if (!dummy_grand_parent) {
+ brelse(parent_bh);
+ return parent;
+ }
+
+ dummy_grand_parent->i_ino = iunique(sb, MSDOS_ROOT_INO);
+ fat_fill_inode(dummy_grand_parent, &de[1]);
+ MSDOS_I(dummy_grand_parent)->i_pos = -1;
+ }
+
+ if (!fat_scan_logstart(dummy_grand_parent, clus_to_match, &sinfo))
+ parent = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
+
+ brelse(parent_bh);
+ iput(dummy_grand_parent);
+
+ return parent;
+}
+
/*
* Find the parent for a directory that is not currently connected to
* the filesystem root.
*
* On entry, the caller holds child_dir->d_inode->i_mutex.
*/
-struct dentry *fat_get_parent(struct dentry *child_dir)
+static struct dentry *fat_get_parent(struct dentry *child_dir)
{
struct super_block *sb = child_dir->d_sb;
struct buffer_head *bh = NULL;
struct msdos_dir_entry *de;
struct inode *parent_inode = NULL;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
if (!fat_get_dotdot_entry(child_dir->d_inode, &bh, &de)) {
- int parent_logstart = fat_get_start(MSDOS_SB(sb), de);
+ int parent_logstart = fat_get_start(sbi, de);
parent_inode = fat_dget(sb, parent_logstart);
+ if (!parent_inode && sbi->options.nfs == FAT_NFS_NOSTALE_RO)
+ parent_inode = fat_rebuild_parent(sb, parent_logstart);
}
brelse(bh);
return d_obtain_alias(parent_inode);
}
+
+const struct export_operations fat_export_ops = {
+ .fh_to_dentry = fat_fh_to_dentry,
+ .fh_to_parent = fat_fh_to_parent,
+ .get_parent = fat_get_parent,
+};
+
+const struct export_operations fat_export_ops_nostale = {
+ .encode_fh = fat_encode_fh_nostale,
+ .fh_to_dentry = fat_fh_to_dentry_nostale,
+ .fh_to_parent = fat_fh_to_parent_nostale,
+ .get_parent = fat_get_parent,
+};
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 8179e8bc4a3d..40d13c70ef51 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -287,5 +287,5 @@ const struct file_operations fscache_stats_fops = {
.open = fscache_stats_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = single_release,
};
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index a94f0f779d5e..fe0a76213d9e 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -533,7 +533,7 @@ void hfsplus_file_truncate(struct inode *inode)
struct address_space *mapping = inode->i_mapping;
struct page *page;
void *fsdata;
- u32 size = inode->i_size;
+ loff_t size = inode->i_size;
res = pagecache_write_begin(NULL, mapping, size, 0,
AOP_FLAG_UNINTERRUPTIBLE,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 84e3d856e91d..523464e62849 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -110,7 +110,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
* way when do_mmap_pgoff unwinds (may be important on powerpc
* and ia64).
*/
- vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma->vm_ops = &hugetlb_vm_ops;
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 86b39b167c23..11bb11f48b3a 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -162,8 +162,17 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs,
for (i = 0; i < bufs; i++) {
wbuf[i]->b_end_io = end_buffer_write_sync;
- /* We use-up our safety reference in submit_bh() */
- submit_bh(write_op, wbuf[i]);
+ /*
+ * Here we write back pagecache data that may be mmaped. Since
+ * we cannot afford to clean the page and set PageWriteback
+ * here due to lock ordering (page lock ranks above transaction
+ * start), the data can change while IO is in flight. Tell the
+ * block layer it should bounce the bio pages if stable data
+ * during write is required.
+ *
+ * We use up our safety reference in submit_bh().
+ */
+ _submit_bh(write_op, wbuf[i], 1 << BIO_SNAP_STABLE);
}
}
@@ -667,7 +676,17 @@ start_journal_io:
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
bh->b_end_io = journal_end_buffer_io_sync;
- submit_bh(write_op, bh);
+ /*
+ * In data=journal mode, here we can end up
+ * writing pagecache data that might be
+ * mmapped. Since we can't afford to clean the
+ * page and set PageWriteback (see the comment
+ * near the other use of _submit_bh()), the
+ * data can change while the write is in
+ * flight. Tell the block layer to bounce the
+ * bio pages if stable pages are required.
+ */
+ _submit_bh(write_op, bh, 1 << BIO_SNAP_STABLE);
}
cond_resched();
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 81cc7eaff863..865c4308acb6 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -310,8 +310,6 @@ int journal_write_metadata_buffer(transaction_t *transaction,
new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
/* keep subsequent assertions sane */
- new_bh->b_state = 0;
- init_buffer(new_bh, NULL, NULL);
atomic_set(&new_bh->b_count, 1);
new_jh = journal_add_journal_head(new_bh); /* This sleeps */
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ed10991ab006..8b220f1ab54f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -367,8 +367,6 @@ retry_alloc:
}
/* keep subsequent assertions sane */
- new_bh->b_state = 0;
- init_buffer(new_bh, NULL, NULL);
atomic_set(&new_bh->b_count, 1);
new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2e27430b9070..417c84877742 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -234,7 +234,6 @@ static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct
kmem_cache *slab)
{
struct idr *stateids = &cl->cl_stateids;
- static int min_stateid = 0;
struct nfs4_stid *stid;
int new_id;
@@ -242,7 +241,7 @@ kmem_cache *slab)
if (!stid)
return NULL;
- new_id = idr_alloc(stateids, stid, min_stateid, 0, GFP_KERNEL);
+ new_id = idr_alloc_cyclic(stateids, stid, 0, 0, GFP_KERNEL);
if (new_id < 0)
goto out_free;
stid->sc_client = cl;
@@ -261,10 +260,6 @@ kmem_cache *slab)
* amount of time until an id is reused, by ensuring they always
* "increase" (mod INT_MAX):
*/
-
- min_stateid = new_id+1;
- if (min_stateid == INT_MAX)
- min_stateid = 0;
return stid;
out_free:
kfree(stid);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index e0f7c1241a6a..8562bd3af947 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -359,7 +359,6 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
}
static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
- int *last_wd,
struct inotify_inode_mark *i_mark)
{
int ret;
@@ -367,11 +366,10 @@ static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
idr_preload(GFP_KERNEL);
spin_lock(idr_lock);
- ret = idr_alloc(idr, i_mark, *last_wd + 1, 0, GFP_NOWAIT);
+ ret = idr_alloc_cyclic(idr, i_mark, 1, 0, GFP_NOWAIT);
if (ret >= 0) {
/* we added the mark to the idr, take a reference */
i_mark->wd = ret;
- *last_wd = i_mark->wd;
fsnotify_get_mark(&i_mark->fsn_mark);
}
@@ -638,8 +636,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
goto out_err;
- ret = inotify_add_to_idr(idr, idr_lock, &group->inotify_data.last_wd,
- tmp_i_mark);
+ ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
if (ret)
goto out_err;
@@ -697,7 +694,6 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
spin_lock_init(&group->inotify_data.idr_lock);
idr_init(&group->inotify_data.idr);
- group->inotify_data.last_wd = 0;
group->inotify_data.user = get_current_user();
if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index eeac97bb3bfa..b3fdd1a323d6 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1498,10 +1498,8 @@ leave:
dlm_put(dlm);
if (ret < 0) {
- if (buf)
- kfree(buf);
- if (item)
- kfree(item);
+ kfree(buf);
+ kfree(item);
mlog_errno(ret);
}
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 752f0b26221d..0c60ef2d8056 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -101,13 +101,6 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
if (!S_ISDIR(inode->i_mode))
flags &= ~OCFS2_DIRSYNC_FL;
- handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto bail_unlock;
- }
-
oldflags = ocfs2_inode->ip_attr;
flags = flags & mask;
flags |= oldflags & ~mask;
@@ -120,7 +113,14 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) &
(OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) {
if (!capable(CAP_LINUX_IMMUTABLE))
- goto bail_commit;
+ goto bail_unlock;
+ }
+
+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto bail_unlock;
}
ocfs2_inode->ip_attr = flags;
@@ -130,8 +130,8 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
if (status < 0)
mlog_errno(status);
-bail_commit:
ocfs2_commit_trans(osb, handle);
+
bail_unlock:
ocfs2_inode_unlock(inode, 1);
bail:
@@ -706,8 +706,10 @@ int ocfs2_info_handle_freefrag(struct inode *inode,
o2info_set_request_filled(&oiff->iff_req);
- if (o2info_to_user(*oiff, req))
+ if (o2info_to_user(*oiff, req)) {
+ status = -EFAULT;
goto bail;
+ }
status = 0;
bail:
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 9f8dcadd9a50..f1fc172175b6 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -471,7 +471,7 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
int ret, goal_bit = 0;
struct buffer_head *gd_bh = NULL;
- struct ocfs2_group_desc *bg = NULL;
+ struct ocfs2_group_desc *bg;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int c_to_b = 1 << (osb->s_clustersize_bits -
inode->i_sb->s_blocksize_bits);
@@ -482,13 +482,6 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
range->me_goal = ocfs2_block_to_cluster_start(inode->i_sb,
range->me_goal);
/*
- * moving goal is not allowd to start with a group desc blok(#0 blk)
- * let's compromise to the latter cluster.
- */
- if (range->me_goal == le64_to_cpu(bg->bg_blkno))
- range->me_goal += c_to_b;
-
- /*
* validate goal sits within global_bitmap, and return the victim
* group desc
*/
@@ -502,6 +495,13 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
bg = (struct ocfs2_group_desc *)gd_bh->b_data;
/*
+ * moving goal is not allowd to start with a group desc blok(#0 blk)
+ * let's compromise to the latter cluster.
+ */
+ if (range->me_goal == le64_to_cpu(bg->bg_blkno))
+ range->me_goal += c_to_b;
+
+ /*
* movement is not gonna cross two groups.
*/
if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize <
@@ -1057,42 +1057,40 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
struct inode *inode = file_inode(filp);
struct ocfs2_move_extents range;
- struct ocfs2_move_extents_context *context = NULL;
+ struct ocfs2_move_extents_context *context;
+
+ if (!argp)
+ return -EINVAL;
status = mnt_want_write_file(filp);
if (status)
return status;
if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE))
- goto out;
+ goto out_drop;
if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
status = -EPERM;
- goto out;
+ goto out_drop;
}
context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS);
if (!context) {
status = -ENOMEM;
mlog_errno(status);
- goto out;
+ goto out_drop;
}
context->inode = inode;
context->file = filp;
- if (argp) {
- if (copy_from_user(&range, argp, sizeof(range))) {
- status = -EFAULT;
- goto out;
- }
- } else {
- status = -EINVAL;
- goto out;
+ if (copy_from_user(&range, argp, sizeof(range))) {
+ status = -EFAULT;
+ goto out_free;
}
if (range.me_start > i_size_read(inode))
- goto out;
+ goto out_free;
if (range.me_start + range.me_len > i_size_read(inode))
range.me_len = i_size_read(inode) - range.me_start;
@@ -1124,25 +1122,24 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
status = ocfs2_validate_and_adjust_move_goal(inode, &range);
if (status)
- goto out;
+ goto out_copy;
}
status = ocfs2_move_extents(context);
if (status)
mlog_errno(status);
-out:
+out_copy:
/*
* movement/defragmentation may end up being partially completed,
* that's the reason why we need to return userspace the finished
* length and new_offset even if failure happens somewhere.
*/
- if (argp) {
- if (copy_to_user(argp, &range, sizeof(range)))
- status = -EFAULT;
- }
+ if (copy_to_user(argp, &range, sizeof(range)))
+ status = -EFAULT;
+out_free:
kfree(context);
-
+out_drop:
mnt_drop_write_file(filp);
return status;
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 712f24db9600..ab30716584f5 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -5,7 +5,7 @@
obj-y += proc.o
proc-y := nommu.o task_nommu.o
-proc-$(CONFIG_MMU) := mmu.o task_mmu.o
+proc-$(CONFIG_MMU) := task_mmu.o
proc-y += inode.o root.o base.o generic.o array.o \
fd.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f7ed9ee46eb9..cbd0f1b324b9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -143,6 +143,7 @@ static const char * const task_state_array[] = {
"x (dead)", /* 64 */
"K (wakekill)", /* 128 */
"W (waking)", /* 256 */
+ "P (parked)", /* 512 */
};
static inline const char *get_task_state(struct task_struct *tsk)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 69078c7cef1f..a19308604145 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -86,6 +86,7 @@
#include <linux/fs_struct.h>
#include <linux/slab.h>
#include <linux/flex_array.h>
+#include <linux/posix-timers.h>
#ifdef CONFIG_HARDWALL
#include <asm/hardwall.h>
#endif
@@ -2013,6 +2014,102 @@ static const struct file_operations proc_map_files_operations = {
.llseek = default_llseek,
};
+struct timers_private {
+ struct pid *pid;
+ struct task_struct *task;
+ struct sighand_struct *sighand;
+ struct pid_namespace *ns;
+ unsigned long flags;
+};
+
+static void *timers_start(struct seq_file *m, loff_t *pos)
+{
+ struct timers_private *tp = m->private;
+
+ tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
+ if (!tp->task)
+ return ERR_PTR(-ESRCH);
+
+ tp->sighand = lock_task_sighand(tp->task, &tp->flags);
+ if (!tp->sighand)
+ return ERR_PTR(-ESRCH);
+
+ return seq_list_start(&tp->task->signal->posix_timers, *pos);
+}
+
+static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct timers_private *tp = m->private;
+ return seq_list_next(v, &tp->task->signal->posix_timers, pos);
+}
+
+static void timers_stop(struct seq_file *m, void *v)
+{
+ struct timers_private *tp = m->private;
+
+ if (tp->sighand) {
+ unlock_task_sighand(tp->task, &tp->flags);
+ tp->sighand = NULL;
+ }
+
+ if (tp->task) {
+ put_task_struct(tp->task);
+ tp->task = NULL;
+ }
+}
+
+static int show_timer(struct seq_file *m, void *v)
+{
+ struct k_itimer *timer;
+ struct timers_private *tp = m->private;
+ int notify;
+ static char *nstr[] = {
+ [SIGEV_SIGNAL] = "signal",
+ [SIGEV_NONE] = "none",
+ [SIGEV_THREAD] = "thread",
+ };
+
+ timer = list_entry((struct list_head *)v, struct k_itimer, list);
+ notify = timer->it_sigev_notify;
+
+ seq_printf(m, "ID: %d\n", timer->it_id);
+ seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo,
+ timer->sigq->info.si_value.sival_ptr);
+ seq_printf(m, "notify: %s/%s.%d\n",
+ nstr[notify & ~SIGEV_THREAD_ID],
+ (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
+ pid_nr_ns(timer->it_pid, tp->ns));
+
+ return 0;
+}
+
+static const struct seq_operations proc_timers_seq_ops = {
+ .start = timers_start,
+ .next = timers_next,
+ .stop = timers_stop,
+ .show = show_timer,
+};
+
+static int proc_timers_open(struct inode *inode, struct file *file)
+{
+ struct timers_private *tp;
+
+ tp = __seq_open_private(file, &proc_timers_seq_ops,
+ sizeof(struct timers_private));
+ if (!tp)
+ return -ENOMEM;
+
+ tp->pid = proc_pid(inode);
+ tp->ns = inode->i_sb->s_fs_info;
+ return 0;
+}
+
+static const struct file_operations proc_timers_operations = {
+ .open = proc_timers_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
#endif /* CONFIG_CHECKPOINT_RESTORE */
static struct dentry *proc_pident_instantiate(struct inode *dir,
@@ -2583,6 +2680,9 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
#endif
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ REG("timers", S_IRUGO, proc_timers_operations),
+#endif
};
static int proc_tgid_base_readdir(struct file * filp,
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 85ff3a4598b3..75710357a517 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -30,24 +30,6 @@ extern int proc_net_init(void);
static inline int proc_net_init(void) { return 0; }
#endif
-struct vmalloc_info {
- unsigned long used;
- unsigned long largest_chunk;
-};
-
-#ifdef CONFIG_MMU
-#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
-extern void get_vmalloc_info(struct vmalloc_info *vmi);
-#else
-
-#define VMALLOC_TOTAL 0UL
-#define get_vmalloc_info(vmi) \
-do { \
- (vmi)->used = 0; \
- (vmi)->largest_chunk = 0; \
-} while(0)
-#endif
-
extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task);
extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index eda6f017f272..f6a13f489e30 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -15,6 +15,7 @@
#include <linux/capability.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
+#include <linux/notifier.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/printk.h>
@@ -564,7 +565,6 @@ static const struct file_operations proc_kcore_operations = {
.llseek = default_llseek,
};
-#ifdef CONFIG_MEMORY_HOTPLUG
/* just remember that we have to update kcore */
static int __meminit kcore_callback(struct notifier_block *self,
unsigned long action, void *arg)
@@ -578,8 +578,11 @@ static int __meminit kcore_callback(struct notifier_block *self,
}
return NOTIFY_OK;
}
-#endif
+static struct notifier_block kcore_callback_nb __meminitdata = {
+ .notifier_call = kcore_callback,
+ .priority = 0,
+};
static struct kcore_list kcore_vmalloc;
@@ -631,7 +634,7 @@ static int __init proc_kcore_init(void)
add_modules_range();
/* Store direct-map area from physical memory map */
kcore_update_ram();
- hotplug_memory_notifier(kcore_callback, 0);
+ register_hotmemory_notifier(&kcore_callback_nb);
return 0;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 1efaaa19c4f3..5aa847a603c0 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -11,6 +11,7 @@
#include <linux/swap.h>
#include <linux/vmstat.h>
#include <linux/atomic.h>
+#include <linux/vmalloc.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include "internal.h"
diff --git a/fs/proc/mmu.c b/fs/proc/mmu.c
deleted file mode 100644
index 8ae221dfd010..000000000000
--- a/fs/proc/mmu.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/* mmu.c: mmu memory info files
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/spinlock.h>
-#include <linux/vmalloc.h>
-#include <linux/highmem.h>
-#include <asm/pgtable.h>
-#include "internal.h"
-
-void get_vmalloc_info(struct vmalloc_info *vmi)
-{
- struct vm_struct *vma;
- unsigned long free_area_size;
- unsigned long prev_end;
-
- vmi->used = 0;
-
- if (!vmlist) {
- vmi->largest_chunk = VMALLOC_TOTAL;
- }
- else {
- vmi->largest_chunk = 0;
-
- prev_end = VMALLOC_START;
-
- read_lock(&vmlist_lock);
-
- for (vma = vmlist; vma; vma = vma->next) {
- unsigned long addr = (unsigned long) vma->addr;
-
- /*
- * Some archs keep another range for modules in vmlist
- */
- if (addr < VMALLOC_START)
- continue;
- if (addr >= VMALLOC_END)
- break;
-
- vmi->used += vma->size;
-
- free_area_size = addr - prev_end;
- if (vmi->largest_chunk < free_area_size)
- vmi->largest_chunk = free_area_size;
-
- prev_end = vma->size + addr;
- }
-
- if (VMALLOC_END - prev_end > vmi->largest_chunk)
- vmi->largest_chunk = VMALLOC_END - prev_end;
-
- read_unlock(&vmlist_lock);
- }
-}
diff --git a/fs/read_write.c b/fs/read_write.c
index e6ddc8dceb96..7a648911246b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -128,7 +128,7 @@ EXPORT_SYMBOL(generic_file_llseek_size);
*
* This is a generic implemenation of ->llseek useable for all normal local
* filesystems. It just updates the file offset to the value specified by
- * @offset and @whence under i_mutex.
+ * @offset and @whence.
*/
loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
{
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e14512678c9b..e8e0e71b29d5 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -165,21 +165,8 @@ struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
if (unlikely(!sd))
return NULL;
- while (1) {
- int v, t;
-
- v = atomic_read(&sd->s_active);
- if (unlikely(v < 0))
- return NULL;
-
- t = atomic_cmpxchg(&sd->s_active, v, v + 1);
- if (likely(t == v))
- break;
- if (t < 0)
- return NULL;
-
- cpu_relax();
- }
+ if (!atomic_inc_unless_negative(&sd->s_active))
+ return NULL;
if (likely(!ignore_lockdep(sd)))
rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
@@ -281,6 +268,10 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
*/
parent_sd = sd->s_parent;
+ WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED),
+ "sysfs: free using entry: %s/%s\n",
+ parent_sd ? parent_sd->s_name : "", sd->s_name);
+
if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
sysfs_put(sd->s_symlink.target_sd);
if (sysfs_type(sd) & SYSFS_COPY_NAME)
@@ -399,7 +390,7 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
sd->s_name = name;
sd->s_mode = mode;
- sd->s_flags = type;
+ sd->s_flags = type | SYSFS_FLAG_REMOVED;
return sd;
@@ -479,6 +470,9 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
}
+ /* Mark the entry added into directory tree */
+ sd->s_flags &= ~SYSFS_FLAG_REMOVED;
+
return 0;
}
@@ -1012,6 +1006,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
enum kobj_ns_type type;
const void *ns;
ino_t ino;
+ loff_t off;
type = sysfs_ns_type(parent_sd);
ns = sysfs_info(dentry->d_sb)->ns[type];
@@ -1034,6 +1029,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
return 0;
}
mutex_lock(&sysfs_mutex);
+ off = filp->f_pos;
for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos);
pos;
pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) {
@@ -1045,19 +1041,24 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
len = strlen(name);
ino = pos->s_ino;
type = dt_type(pos);
- filp->f_pos = pos->s_hash;
+ off = filp->f_pos = pos->s_hash;
filp->private_data = sysfs_get(pos);
mutex_unlock(&sysfs_mutex);
- ret = filldir(dirent, name, len, filp->f_pos, ino, type);
+ ret = filldir(dirent, name, len, off, ino, type);
mutex_lock(&sysfs_mutex);
if (ret < 0)
break;
}
mutex_unlock(&sysfs_mutex);
- if ((filp->f_pos > 1) && !pos) { /* EOF */
- filp->f_pos = INT_MAX;
+
+ /* don't reference last entry if its refcount is dropped */
+ if (!pos) {
filp->private_data = NULL;
+
+ /* EOF and not changed as 0 or 1 in read/write path */
+ if (off == filp->f_pos && off > 1)
+ filp->f_pos = INT_MAX;
}
return 0;
}