summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c257
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/ext3/fsync.c3
-rw-r--r--fs/ext4/fsync.c6
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/fs-writeback.c98
-rw-r--r--fs/gfs2/rgrp.c5
-rw-r--r--fs/jbd2/checkpoint.c3
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/nilfs2/the_nilfs.c4
-rw-r--r--fs/partitions/acorn.c68
-rw-r--r--fs/partitions/acorn.h10
-rw-r--r--fs/partitions/amiga.c13
-rw-r--r--fs/partitions/amiga.h2
-rw-r--r--fs/partitions/atari.c8
-rw-r--r--fs/partitions/atari.h2
-rw-r--r--fs/partitions/check.c84
-rw-r--r--fs/partitions/check.h12
-rw-r--r--fs/partitions/efi.c91
-rw-r--r--fs/partitions/efi.h2
-rw-r--r--fs/partitions/ibm.c21
-rw-r--r--fs/partitions/ibm.h2
-rw-r--r--fs/partitions/karma.c4
-rw-r--r--fs/partitions/karma.h2
-rw-r--r--fs/partitions/ldm.c89
-rw-r--r--fs/partitions/ldm.h2
-rw-r--r--fs/partitions/mac.c11
-rw-r--r--fs/partitions/mac.h2
-rw-r--r--fs/partitions/msdos.c85
-rw-r--r--fs/partitions/msdos.h2
-rw-r--r--fs/partitions/osf.c4
-rw-r--r--fs/partitions/osf.h2
-rw-r--r--fs/partitions/sgi.c6
-rw-r--r--fs/partitions/sgi.h2
-rw-r--r--fs/partitions/sun.c6
-rw-r--r--fs/partitions/sun.h2
-rw-r--r--fs/partitions/sysv68.c6
-rw-r--r--fs/partitions/sysv68.h2
-rw-r--r--fs/partitions/ultrix.c4
-rw-r--r--fs/partitions/ultrix.h2
-rw-r--r--fs/pipe.c122
-rw-r--r--fs/reiserfs/file.c3
-rw-r--r--fs/splice.c151
-rw-r--r--fs/sync.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
46 files changed, 826 insertions, 393 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6dcee88c2e5d..55dcb7884f4d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -417,7 +417,7 @@ int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
*/
mutex_unlock(&bd_inode->i_mutex);
- error = blkdev_issue_flush(bdev, NULL);
+ error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT);
if (error == -EOPNOTSUPP)
error = 0;
@@ -668,41 +668,209 @@ void bd_forget(struct inode *inode)
iput(bdev->bd_inode);
}
-int bd_claim(struct block_device *bdev, void *holder)
+/**
+ * bd_may_claim - test whether a block device can be claimed
+ * @bdev: block device of interest
+ * @whole: whole block device containing @bdev, may equal @bdev
+ * @holder: holder trying to claim @bdev
+ *
+ * Test whther @bdev can be claimed by @holder.
+ *
+ * CONTEXT:
+ * spin_lock(&bdev_lock).
+ *
+ * RETURNS:
+ * %true if @bdev can be claimed, %false otherwise.
+ */
+static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
+ void *holder)
{
- int res;
- spin_lock(&bdev_lock);
-
- /* first decide result */
if (bdev->bd_holder == holder)
- res = 0; /* already a holder */
+ return true; /* already a holder */
else if (bdev->bd_holder != NULL)
- res = -EBUSY; /* held by someone else */
+ return false; /* held by someone else */
else if (bdev->bd_contains == bdev)
- res = 0; /* is a whole device which isn't held */
+ return true; /* is a whole device which isn't held */
- else if (bdev->bd_contains->bd_holder == bd_claim)
- res = 0; /* is a partition of a device that is being partitioned */
- else if (bdev->bd_contains->bd_holder != NULL)
- res = -EBUSY; /* is a partition of a held device */
+ else if (whole->bd_holder == bd_claim)
+ return true; /* is a partition of a device that is being partitioned */
+ else if (whole->bd_holder != NULL)
+ return false; /* is a partition of a held device */
else
- res = 0; /* is a partition of an un-held device */
+ return true; /* is a partition of an un-held device */
+}
+
+/**
+ * bd_prepare_to_claim - prepare to claim a block device
+ * @bdev: block device of interest
+ * @whole: the whole device containing @bdev, may equal @bdev
+ * @holder: holder trying to claim @bdev
+ *
+ * Prepare to claim @bdev. This function fails if @bdev is already
+ * claimed by another holder and waits if another claiming is in
+ * progress. This function doesn't actually claim. On successful
+ * return, the caller has ownership of bd_claiming and bd_holder[s].
+ *
+ * CONTEXT:
+ * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab
+ * it multiple times.
+ *
+ * RETURNS:
+ * 0 if @bdev can be claimed, -EBUSY otherwise.
+ */
+static int bd_prepare_to_claim(struct block_device *bdev,
+ struct block_device *whole, void *holder)
+{
+retry:
+ /* if someone else claimed, fail */
+ if (!bd_may_claim(bdev, whole, holder))
+ return -EBUSY;
+
+ /* if someone else is claiming, wait for it to finish */
+ if (whole->bd_claiming && whole->bd_claiming != holder) {
+ wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&bdev_lock);
+ schedule();
+ finish_wait(wq, &wait);
+ spin_lock(&bdev_lock);
+ goto retry;
+ }
+
+ /* yay, all mine */
+ return 0;
+}
+
+/**
+ * bd_start_claiming - start claiming a block device
+ * @bdev: block device of interest
+ * @holder: holder trying to claim @bdev
+ *
+ * @bdev is about to be opened exclusively. Check @bdev can be opened
+ * exclusively and mark that an exclusive open is in progress. Each
+ * successful call to this function must be matched with a call to
+ * either bd_claim() or bd_abort_claiming(). If this function
+ * succeeds, the matching bd_claim() is guaranteed to succeed.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Pointer to the block device containing @bdev on success, ERR_PTR()
+ * value on failure.
+ */
+static struct block_device *bd_start_claiming(struct block_device *bdev,
+ void *holder)
+{
+ struct gendisk *disk;
+ struct block_device *whole;
+ int partno, err;
+
+ might_sleep();
+
+ /*
+ * @bdev might not have been initialized properly yet, look up
+ * and grab the outer block device the hard way.
+ */
+ disk = get_gendisk(bdev->bd_dev, &partno);
+ if (!disk)
+ return ERR_PTR(-ENXIO);
+
+ whole = bdget_disk(disk, 0);
+ put_disk(disk);
+ if (!whole)
+ return ERR_PTR(-ENOMEM);
+
+ /* prepare to claim, if successful, mark claiming in progress */
+ spin_lock(&bdev_lock);
+
+ err = bd_prepare_to_claim(bdev, whole, holder);
+ if (err == 0) {
+ whole->bd_claiming = holder;
+ spin_unlock(&bdev_lock);
+ return whole;
+ } else {
+ spin_unlock(&bdev_lock);
+ bdput(whole);
+ return ERR_PTR(err);
+ }
+}
- /* now impose change */
- if (res==0) {
+/* releases bdev_lock */
+static void __bd_abort_claiming(struct block_device *whole, void *holder)
+{
+ BUG_ON(whole->bd_claiming != holder);
+ whole->bd_claiming = NULL;
+ wake_up_bit(&whole->bd_claiming, 0);
+
+ spin_unlock(&bdev_lock);
+ bdput(whole);
+}
+
+/**
+ * bd_abort_claiming - abort claiming a block device
+ * @whole: whole block device returned by bd_start_claiming()
+ * @holder: holder trying to claim @bdev
+ *
+ * Abort a claiming block started by bd_start_claiming(). Note that
+ * @whole is not the block device to be claimed but the whole device
+ * returned by bd_start_claiming().
+ *
+ * CONTEXT:
+ * Grabs and releases bdev_lock.
+ */
+static void bd_abort_claiming(struct block_device *whole, void *holder)
+{
+ spin_lock(&bdev_lock);
+ __bd_abort_claiming(whole, holder); /* releases bdev_lock */
+}
+
+/**
+ * bd_claim - claim a block device
+ * @bdev: block device to claim
+ * @holder: holder trying to claim @bdev
+ *
+ * Try to claim @bdev which must have been opened successfully. This
+ * function may be called with or without preceding
+ * blk_start_claiming(). In the former case, this function is always
+ * successful and terminates the claiming block.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * 0 if successful, -EBUSY if @bdev is already claimed.
+ */
+int bd_claim(struct block_device *bdev, void *holder)
+{
+ struct block_device *whole = bdev->bd_contains;
+ int res;
+
+ might_sleep();
+
+ spin_lock(&bdev_lock);
+
+ res = bd_prepare_to_claim(bdev, whole, holder);
+ if (res == 0) {
/* note that for a whole device bd_holders
* will be incremented twice, and bd_holder will
* be set to bd_claim before being set to holder
*/
- bdev->bd_contains->bd_holders ++;
- bdev->bd_contains->bd_holder = bd_claim;
+ whole->bd_holders++;
+ whole->bd_holder = bd_claim;
bdev->bd_holders++;
bdev->bd_holder = holder;
}
- spin_unlock(&bdev_lock);
+
+ if (whole->bd_claiming)
+ __bd_abort_claiming(whole, holder); /* releases bdev_lock */
+ else
+ spin_unlock(&bdev_lock);
+
return res;
}
-
EXPORT_SYMBOL(bd_claim);
void bd_release(struct block_device *bdev)
@@ -1316,6 +1484,7 @@ EXPORT_SYMBOL(blkdev_get);
static int blkdev_open(struct inode * inode, struct file * filp)
{
+ struct block_device *whole = NULL;
struct block_device *bdev;
int res;
@@ -1338,22 +1507,25 @@ static int blkdev_open(struct inode * inode, struct file * filp)
if (bdev == NULL)
return -ENOMEM;
+ if (filp->f_mode & FMODE_EXCL) {
+ whole = bd_start_claiming(bdev, filp);
+ if (IS_ERR(whole)) {
+ bdput(bdev);
+ return PTR_ERR(whole);
+ }
+ }
+
filp->f_mapping = bdev->bd_inode->i_mapping;
res = blkdev_get(bdev, filp->f_mode);
- if (res)
- return res;
- if (filp->f_mode & FMODE_EXCL) {
- res = bd_claim(bdev, filp);
- if (res)
- goto out_blkdev_put;
+ if (whole) {
+ if (res == 0)
+ BUG_ON(bd_claim(bdev, filp) != 0);
+ else
+ bd_abort_claiming(whole, filp);
}
- return 0;
-
- out_blkdev_put:
- blkdev_put(bdev, filp->f_mode);
return res;
}
@@ -1564,27 +1736,34 @@ EXPORT_SYMBOL(lookup_bdev);
*/
struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
{
- struct block_device *bdev;
- int error = 0;
+ struct block_device *bdev, *whole;
+ int error;
bdev = lookup_bdev(path);
if (IS_ERR(bdev))
return bdev;
+ whole = bd_start_claiming(bdev, holder);
+ if (IS_ERR(whole)) {
+ bdput(bdev);
+ return whole;
+ }
+
error = blkdev_get(bdev, mode);
if (error)
- return ERR_PTR(error);
+ goto out_abort_claiming;
+
error = -EACCES;
if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
- goto blkdev_put;
- error = bd_claim(bdev, holder);
- if (error)
- goto blkdev_put;
+ goto out_blkdev_put;
+ BUG_ON(bd_claim(bdev, holder) != 0);
return bdev;
-
-blkdev_put:
+
+out_blkdev_put:
blkdev_put(bdev, mode);
+out_abort_claiming:
+ bd_abort_claiming(whole, holder);
return ERR_PTR(error);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b34d32fdaaec..c6a4f459ad76 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1589,7 +1589,7 @@ static void btrfs_issue_discard(struct block_device *bdev,
u64 start, u64 len)
{
blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
}
static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
diff --git a/fs/buffer.c b/fs/buffer.c
index c9c266db0624..08e422d56996 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -275,6 +275,7 @@ void invalidate_bdev(struct block_device *bdev)
return;
invalidate_bh_lrus();
+ lru_add_drain_all(); /* make sure all lru add caches are flushed */
invalidate_mapping_pages(mapping, 0, -1);
}
EXPORT_SYMBOL(invalidate_bdev);
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 26289e8f4163..fcf7487734b6 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -90,6 +90,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
* storage
*/
if (needs_barrier)
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
return ret;
}
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0d0c3239c1cd..ef3d980e67cb 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -100,9 +100,11 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
if (ext4_should_writeback_data(inode) &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
+ NULL, BLKDEV_IFL_WAIT);
jbd2_log_wait_commit(journal, commit_tid);
} else if (journal->j_flags & JBD2_BARRIER)
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
return ret;
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 0a140741b39e..f74d270ba155 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -14,6 +14,7 @@
#include <linux/dnotify.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/pipe_fs_i.h>
#include <linux/security.h>
#include <linux/ptrace.h>
#include <linux/signal.h>
@@ -412,6 +413,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_NOTIFY:
err = fcntl_dirnotify(fd, filp, arg);
break;
+ case F_SETPIPE_SZ:
+ case F_GETPIPE_SZ:
+ err = pipe_fcntl(filp, cmd, arg);
+ break;
default:
break;
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4b37f7cea4dd..437a7431b4ea 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -45,6 +45,7 @@ struct wb_writeback_args {
int for_kupdate:1;
int range_cyclic:1;
int for_background:1;
+ int sb_pinned:1;
};
/*
@@ -192,7 +193,8 @@ static void bdi_wait_on_work_clear(struct bdi_work *work)
}
static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
- struct wb_writeback_args *args)
+ struct wb_writeback_args *args,
+ int wait)
{
struct bdi_work *work;
@@ -204,6 +206,8 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
if (work) {
bdi_work_init(work, args);
bdi_queue_work(bdi, work);
+ if (wait)
+ bdi_wait_on_work_clear(work);
} else {
struct bdi_writeback *wb = &bdi->wb;
@@ -230,6 +234,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
.sync_mode = WB_SYNC_ALL,
.nr_pages = LONG_MAX,
.range_cyclic = 0,
+ /*
+ * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
+ * lets make it explicitly clear.
+ */
+ .sb_pinned = 1,
};
struct bdi_work work;
@@ -245,21 +254,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
* @bdi: the backing device to write from
* @sb: write inodes from this super_block
* @nr_pages: the number of pages to write
+ * @sb_locked: caller already holds sb umount sem.
*
* Description:
* This does WB_SYNC_NONE opportunistic writeback. The IO is only
* started when this function returns, we make no guarentees on
- * completion. Caller need not hold sb s_umount semaphore.
+ * completion. Caller specifies whether sb umount sem is held already or not.
*
*/
void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
- long nr_pages)
+ long nr_pages, int sb_locked)
{
struct wb_writeback_args args = {
.sb = sb,
.sync_mode = WB_SYNC_NONE,
.nr_pages = nr_pages,
.range_cyclic = 1,
+ .sb_pinned = sb_locked,
};
/*
@@ -271,7 +282,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
args.for_background = 1;
}
- bdi_alloc_queue_work(bdi, &args);
+ bdi_alloc_queue_work(bdi, &args, sb_locked);
}
/*
@@ -452,11 +463,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
BUG_ON(inode->i_state & I_SYNC);
- /* Set I_SYNC, reset I_DIRTY */
- dirty = inode->i_state & I_DIRTY;
+ /* Set I_SYNC, reset I_DIRTY_PAGES */
inode->i_state |= I_SYNC;
- inode->i_state &= ~I_DIRTY;
-
+ inode->i_state &= ~I_DIRTY_PAGES;
spin_unlock(&inode_lock);
ret = do_writepages(mapping, wbc);
@@ -472,6 +481,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
ret = err;
}
+ /*
+ * Some filesystems may redirty the inode during the writeback
+ * due to delalloc, clear dirty metadata flags right before
+ * write_inode()
+ */
+ spin_lock(&inode_lock);
+ dirty = inode->i_state & I_DIRTY;
+ inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ spin_unlock(&inode_lock);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
int err = write_inode(inode, wbc);
@@ -577,7 +595,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
/*
* Caller must already hold the ref for this
*/
- if (wbc->sync_mode == WB_SYNC_ALL) {
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
WARN_ON(!rwsem_is_locked(&sb->s_umount));
return SB_NOT_PINNED;
}
@@ -751,6 +769,7 @@ static long wb_writeback(struct bdi_writeback *wb,
.for_kupdate = args->for_kupdate,
.for_background = args->for_background,
.range_cyclic = args->range_cyclic,
+ .sb_pinned = args->sb_pinned,
};
unsigned long oldest_jif;
long wrote = 0;
@@ -852,6 +871,12 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
unsigned long expired;
long nr_pages;
+ /*
+ * When set to zero, disable periodic writeback
+ */
+ if (!dirty_writeback_interval)
+ return 0;
+
expired = wb->last_old_flush +
msecs_to_jiffies(dirty_writeback_interval * 10);
if (time_before(jiffies, expired))
@@ -887,6 +912,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
while ((work = get_next_work_item(bdi, wb)) != NULL) {
struct wb_writeback_args args = work->args;
+ int post_clear;
/*
* Override sync mode, in case we must wait for completion
@@ -894,11 +920,13 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
if (force_wait)
work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
+ post_clear = WB_SYNC_ALL || args.sb_pinned;
+
/*
* If this isn't a data integrity operation, just notify
* that we have seen this work and we are now starting it.
*/
- if (args.sync_mode == WB_SYNC_NONE)
+ if (!post_clear)
wb_clear_pending(wb, work);
wrote += wb_writeback(wb, &args);
@@ -907,7 +935,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
* This is a data integrity writeback, so only do the
* notification when we have completed the work.
*/
- if (args.sync_mode == WB_SYNC_ALL)
+ if (post_clear)
wb_clear_pending(wb, work);
}
@@ -947,8 +975,17 @@ int bdi_writeback_task(struct bdi_writeback *wb)
break;
}
- wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
- schedule_timeout_interruptible(wait_jiffies);
+ if (dirty_writeback_interval) {
+ wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
+ schedule_timeout_interruptible(wait_jiffies);
+ } else {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (list_empty_careful(&wb->bdi->work_list) &&
+ !kthread_should_stop())
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ }
+
try_to_freeze();
}
@@ -974,7 +1011,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages)
if (!bdi_has_dirty_io(bdi))
continue;
- bdi_alloc_queue_work(bdi, &args);
+ bdi_alloc_queue_work(bdi, &args, 0);
}
rcu_read_unlock();
@@ -1183,6 +1220,18 @@ static void wait_sb_inodes(struct super_block *sb)
iput(old_inode);
}
+static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
+{
+ unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+ unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+ long nr_to_write;
+
+ nr_to_write = nr_dirty + nr_unstable +
+ (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+ bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
+}
+
/**
* writeback_inodes_sb - writeback dirty inodes from given super_block
* @sb: the superblock
@@ -1194,18 +1243,23 @@ static void wait_sb_inodes(struct super_block *sb)
*/
void writeback_inodes_sb(struct super_block *sb)
{
- unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
- unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
- long nr_to_write;
-
- nr_to_write = nr_dirty + nr_unstable +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused);
-
- bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
+ __writeback_inodes_sb(sb, 0);
}
EXPORT_SYMBOL(writeback_inodes_sb);
/**
+ * writeback_inodes_sb_locked - writeback dirty inodes from given super_block
+ * @sb: the superblock
+ *
+ * Like writeback_inodes_sb(), except the caller already holds the
+ * sb umount sem.
+ */
+void writeback_inodes_sb_locked(struct super_block *sb)
+{
+ __writeback_inodes_sb(sb, 1);
+}
+
+/**
* writeback_inodes_sb_if_idle - start writeback if none underway
* @sb: the superblock
*
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8bce73ed4d8e..117fa4171f62 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -854,7 +854,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
if ((start + nr_sects) != blk) {
rv = blkdev_issue_discard(bdev, start,
nr_sects, GFP_NOFS,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_WAIT |
+ BLKDEV_IFL_BARRIER);
if (rv)
goto fail;
nr_sects = 0;
@@ -869,7 +870,7 @@ start_new_extent:
}
if (nr_sects) {
rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
if (rv)
goto fail;
}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 30beb11ef928..076d1cc44f95 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -530,7 +530,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
*/
if ((journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(journal->j_fs_dev, NULL);
+ blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
if (!(journal->j_flags & JBD2_ABORT))
jbd2_journal_update_superblock(journal, 1);
return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 671da7fb7ffd..75716d3d2be0 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -717,7 +717,8 @@ start_journal_io:
if (commit_transaction->t_flushed_data_blocks &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(journal->j_fs_dev, NULL);
+ blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
/* Done it all: now write the commit record asynchronously. */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -727,7 +728,8 @@ start_journal_io:
if (err)
__jbd2_journal_abort_hard(journal);
if (journal->j_flags & JBD2_BARRIER)
- blkdev_issue_flush(journal->j_dev, NULL);
+ blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
}
err = journal_finish_inode_data_buffers(journal, commit_transaction);
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index a756168a21c2..8c1097327abc 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -674,7 +674,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
start * sects_per_block,
nblocks * sects_per_block,
GFP_NOFS,
- DISCARD_FL_BARRIER);
+ BLKDEV_IFL_BARRIER);
if (ret < 0)
return ret;
nblocks = 0;
@@ -684,7 +684,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
ret = blkdev_issue_discard(nilfs->ns_bdev,
start * sects_per_block,
nblocks * sects_per_block,
- GFP_NOFS, DISCARD_FL_BARRIER);
+ GFP_NOFS, BLKDEV_IFL_BARRIER);
return ret;
}
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c
index a97b477ac0fc..6921e7890be6 100644
--- a/fs/partitions/acorn.c
+++ b/fs/partitions/acorn.c
@@ -70,14 +70,14 @@ struct riscix_record {
#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
defined(CONFIG_ACORN_PARTITION_ADFS)
-static int
-riscix_partition(struct parsed_partitions *state, struct block_device *bdev,
- unsigned long first_sect, int slot, unsigned long nr_sects)
+static int riscix_partition(struct parsed_partitions *state,
+ unsigned long first_sect, int slot,
+ unsigned long nr_sects)
{
Sector sect;
struct riscix_record *rr;
- rr = (struct riscix_record *)read_dev_sector(bdev, first_sect, &sect);
+ rr = read_part_sector(state, first_sect, &sect);
if (!rr)
return -1;
@@ -123,9 +123,9 @@ struct linux_part {
#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
defined(CONFIG_ACORN_PARTITION_ADFS)
-static int
-linux_partition(struct parsed_partitions *state, struct block_device *bdev,
- unsigned long first_sect, int slot, unsigned long nr_sects)
+static int linux_partition(struct parsed_partitions *state,
+ unsigned long first_sect, int slot,
+ unsigned long nr_sects)
{
Sector sect;
struct linux_part *linuxp;
@@ -135,7 +135,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev,
put_partition(state, slot++, first_sect, size);
- linuxp = (struct linux_part *)read_dev_sector(bdev, first_sect, &sect);
+ linuxp = read_part_sector(state, first_sect, &sect);
if (!linuxp)
return -1;
@@ -157,8 +157,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev,
#endif
#ifdef CONFIG_ACORN_PARTITION_CUMANA
-int
-adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev)
+int adfspart_check_CUMANA(struct parsed_partitions *state)
{
unsigned long first_sector = 0;
unsigned int start_blk = 0;
@@ -185,7 +184,7 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev
struct adfs_discrecord *dr;
unsigned int nr_sects;
- data = read_dev_sector(bdev, start_blk * 2 + 6, &sect);
+ data = read_part_sector(state, start_blk * 2 + 6, &sect);
if (!data)
return -1;
@@ -217,14 +216,14 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev
#ifdef CONFIG_ACORN_PARTITION_RISCIX
case PARTITION_RISCIX_SCSI:
/* RISCiX - we don't know how to find the next one. */
- slot = riscix_partition(state, bdev, first_sector,
- slot, nr_sects);
+ slot = riscix_partition(state, first_sector, slot,
+ nr_sects);
break;
#endif
case PARTITION_LINUX:
- slot = linux_partition(state, bdev, first_sector,
- slot, nr_sects);
+ slot = linux_partition(state, first_sector, slot,
+ nr_sects);
break;
}
put_dev_sector(sect);
@@ -249,8 +248,7 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev
* hda1 = ADFS partition on first drive.
* hda2 = non-ADFS partition.
*/
-int
-adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev)
+int adfspart_check_ADFS(struct parsed_partitions *state)
{
unsigned long start_sect, nr_sects, sectscyl, heads;
Sector sect;
@@ -259,7 +257,7 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev)
unsigned char id;
int slot = 1;
- data = read_dev_sector(bdev, 6, &sect);
+ data = read_part_sector(state, 6, &sect);
if (!data)
return -1;
@@ -278,21 +276,21 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev)
/*
* Work out start of non-adfs partition.
*/
- nr_sects = (bdev->bd_inode->i_size >> 9) - start_sect;
+ nr_sects = (state->bdev->bd_inode->i_size >> 9) - start_sect;
if (start_sect) {
switch (id) {
#ifdef CONFIG_ACORN_PARTITION_RISCIX
case PARTITION_RISCIX_SCSI:
case PARTITION_RISCIX_MFM:
- slot = riscix_partition(state, bdev, start_sect,
- slot, nr_sects);
+ slot = riscix_partition(state, start_sect, slot,
+ nr_sects);
break;
#endif
case PARTITION_LINUX:
- slot = linux_partition(state, bdev, start_sect,
- slot, nr_sects);
+ slot = linux_partition(state, start_sect, slot,
+ nr_sects);
break;
}
}
@@ -308,10 +306,11 @@ struct ics_part {
__le32 size;
};
-static int adfspart_check_ICSLinux(struct block_device *bdev, unsigned long block)
+static int adfspart_check_ICSLinux(struct parsed_partitions *state,
+ unsigned long block)
{
Sector sect;
- unsigned char *data = read_dev_sector(bdev, block, &sect);
+ unsigned char *data = read_part_sector(state, block, &sect);
int result = 0;
if (data) {
@@ -349,8 +348,7 @@ static inline int valid_ics_sector(const unsigned char *data)
* hda2 = ADFS partition 1 on first drive.
* ..etc..
*/
-int
-adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev)
+int adfspart_check_ICS(struct parsed_partitions *state)
{
const unsigned char *data;
const struct ics_part *p;
@@ -360,7 +358,7 @@ adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev)
/*
* Try ICS style partitions - sector 0 contains partition info.
*/
- data = read_dev_sector(bdev, 0, &sect);
+ data = read_part_sector(state, 0, &sect);
if (!data)
return -1;
@@ -392,7 +390,7 @@ adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev)
* partition is. We must not make this visible
* to the filesystem.
*/
- if (size > 1 && adfspart_check_ICSLinux(bdev, start)) {
+ if (size > 1 && adfspart_check_ICSLinux(state, start)) {
start += 1;
size -= 1;
}
@@ -446,8 +444,7 @@ static inline int valid_ptec_sector(const unsigned char *data)
* hda2 = ADFS partition 1 on first drive.
* ..etc..
*/
-int
-adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bdev)
+int adfspart_check_POWERTEC(struct parsed_partitions *state)
{
Sector sect;
const unsigned char *data;
@@ -455,7 +452,7 @@ adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bd
int slot = 1;
int i;
- data = read_dev_sector(bdev, 0, &sect);
+ data = read_part_sector(state, 0, &sect);
if (!data)
return -1;
@@ -508,8 +505,7 @@ static const char eesox_name[] = {
* 1. The individual ADFS boot block entries that are placed on the disk.
* 2. The start address of the next entry.
*/
-int
-adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev)
+int adfspart_check_EESOX(struct parsed_partitions *state)
{
Sector sect;
const unsigned char *data;
@@ -518,7 +514,7 @@ adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev)
sector_t start = 0;
int i, slot = 1;
- data = read_dev_sector(bdev, 7, &sect);
+ data = read_part_sector(state, 7, &sect);
if (!data)
return -1;
@@ -545,7 +541,7 @@ adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev)
if (i != 0) {
sector_t size;
- size = get_capacity(bdev->bd_disk);
+ size = get_capacity(state->bdev->bd_disk);
put_partition(state, slot++, start, size - start);
printk("\n");
}
diff --git a/fs/partitions/acorn.h b/fs/partitions/acorn.h
index 81fd50ecc080..ede828529692 100644
--- a/fs/partitions/acorn.h
+++ b/fs/partitions/acorn.h
@@ -7,8 +7,8 @@
* format, and everyone stick to it?
*/
-int adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev);
-int adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev);
-int adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev);
-int adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bdev);
-int adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev);
+int adfspart_check_CUMANA(struct parsed_partitions *state);
+int adfspart_check_ADFS(struct parsed_partitions *state);
+int adfspart_check_ICS(struct parsed_partitions *state);
+int adfspart_check_POWERTEC(struct parsed_partitions *state);
+int adfspart_check_EESOX(struct parsed_partitions *state);
diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c
index 9917a8c360f2..ba443d4229f8 100644
--- a/fs/partitions/amiga.c
+++ b/fs/partitions/amiga.c
@@ -23,8 +23,7 @@ checksum_block(__be32 *m, int size)
return sum;
}
-int
-amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
+int amiga_partition(struct parsed_partitions *state)
{
Sector sect;
unsigned char *data;
@@ -38,11 +37,11 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
for (blk = 0; ; blk++, put_dev_sector(sect)) {
if (blk == RDB_ALLOCATION_LIMIT)
goto rdb_done;
- data = read_dev_sector(bdev, blk, &sect);
+ data = read_part_sector(state, blk, &sect);
if (!data) {
if (warn_no_part)
printk("Dev %s: unable to read RDB block %d\n",
- bdevname(bdev, b), blk);
+ bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
}
@@ -64,7 +63,7 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
}
printk("Dev %s: RDB in block %d has bad checksum\n",
- bdevname(bdev, b), blk);
+ bdevname(state->bdev, b), blk);
}
/* blksize is blocks per 512 byte standard block */
@@ -75,11 +74,11 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
put_dev_sector(sect);
for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) {
blk *= blksize; /* Read in terms partition table understands */
- data = read_dev_sector(bdev, blk, &sect);
+ data = read_part_sector(state, blk, &sect);
if (!data) {
if (warn_no_part)
printk("Dev %s: unable to read partition block %d\n",
- bdevname(bdev, b), blk);
+ bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
}
diff --git a/fs/partitions/amiga.h b/fs/partitions/amiga.h
index 2f3e9ce22d53..d094585cadaa 100644
--- a/fs/partitions/amiga.h
+++ b/fs/partitions/amiga.h
@@ -2,5 +2,5 @@
* fs/partitions/amiga.h
*/
-int amiga_partition(struct parsed_partitions *state, struct block_device *bdev);
+int amiga_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c
index 1f3572d5b755..4439ff1b6cec 100644
--- a/fs/partitions/atari.c
+++ b/fs/partitions/atari.c
@@ -30,7 +30,7 @@ static inline int OK_id(char *s)
memcmp (s, "RAW", 3) == 0 ;
}
-int atari_partition(struct parsed_partitions *state, struct block_device *bdev)
+int atari_partition(struct parsed_partitions *state)
{
Sector sect;
struct rootsector *rs;
@@ -42,12 +42,12 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev)
int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */
#endif
- rs = (struct rootsector *) read_dev_sector(bdev, 0, &sect);
+ rs = read_part_sector(state, 0, &sect);
if (!rs)
return -1;
/* Verify this is an Atari rootsector: */
- hd_size = bdev->bd_inode->i_size >> 9;
+ hd_size = state->bdev->bd_inode->i_size >> 9;
if (!VALID_PARTITION(&rs->part[0], hd_size) &&
!VALID_PARTITION(&rs->part[1], hd_size) &&
!VALID_PARTITION(&rs->part[2], hd_size) &&
@@ -84,7 +84,7 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev)
printk(" XGM<");
partsect = extensect = be32_to_cpu(pi->st);
while (1) {
- xrs = (struct rootsector *)read_dev_sector(bdev, partsect, &sect2);
+ xrs = read_part_sector(state, partsect, &sect2);
if (!xrs) {
printk (" block %ld read failed\n", partsect);
put_dev_sector(sect);
diff --git a/fs/partitions/atari.h b/fs/partitions/atari.h
index 63186b00e135..fe2d32a89f36 100644
--- a/fs/partitions/atari.h
+++ b/fs/partitions/atari.h
@@ -31,4 +31,4 @@ struct rootsector
u16 checksum; /* checksum for bootable disks */
} __attribute__((__packed__));
-int atari_partition(struct parsed_partitions *state, struct block_device *bdev);
+int atari_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index e238ab23a9e7..5dcd4b0c5533 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -45,7 +45,7 @@ extern void md_autodetect_dev(dev_t dev);
int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
-static int (*check_part[])(struct parsed_partitions *, struct block_device *) = {
+static int (*check_part[])(struct parsed_partitions *) = {
/*
* Probe partition formats with tables at disk address 0
* that also have an ADFS boot block at 0xdc0.
@@ -161,10 +161,11 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
struct parsed_partitions *state;
int i, res, err;
- state = kmalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
+ state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
if (!state)
return NULL;
+ state->bdev = bdev;
disk_name(hd, 0, state->name);
printk(KERN_INFO " %s:", state->name);
if (isdigit(state->name[strlen(state->name)-1]))
@@ -174,7 +175,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
i = res = err = 0;
while (!res && check_part[i]) {
memset(&state->parts, 0, sizeof(state->parts));
- res = check_part[i++](state, bdev);
+ res = check_part[i++](state);
if (res < 0) {
/* We have hit an I/O error which we don't report now.
* But record it, and let the others do their job.
@@ -186,6 +187,8 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
}
if (res > 0)
return state;
+ if (state->access_beyond_eod)
+ err = -ENOSPC;
if (err)
/* The partition is unrecognized. So report I/O errors if there were any */
res = err;
@@ -538,12 +541,33 @@ exit:
disk_part_iter_exit(&piter);
}
+static bool disk_unlock_native_capacity(struct gendisk *disk)
+{
+ const struct block_device_operations *bdops = disk->fops;
+
+ if (bdops->unlock_native_capacity &&
+ !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
+ printk(KERN_CONT "enabling native capacity\n");
+ bdops->unlock_native_capacity(disk);
+ disk->flags |= GENHD_FL_NATIVE_CAPACITY;
+ return true;
+ } else {
+ printk(KERN_CONT "truncated\n");
+ return false;
+ }
+}
+
int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
{
+ struct parsed_partitions *state = NULL;
struct disk_part_iter piter;
struct hd_struct *part;
- struct parsed_partitions *state;
int p, highest, res;
+rescan:
+ if (state && !IS_ERR(state)) {
+ kfree(state);
+ state = NULL;
+ }
if (bdev->bd_part_count)
return -EBUSY;
@@ -562,8 +586,32 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
bdev->bd_invalidated = 0;
if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
return 0;
- if (IS_ERR(state)) /* I/O error reading the partition table */
+ if (IS_ERR(state)) {
+ /*
+ * I/O error reading the partition table. If any
+ * partition code tried to read beyond EOD, retry
+ * after unlocking native capacity.
+ */
+ if (PTR_ERR(state) == -ENOSPC) {
+ printk(KERN_WARNING "%s: partition table beyond EOD, ",
+ disk->disk_name);
+ if (disk_unlock_native_capacity(disk))
+ goto rescan;
+ }
return -EIO;
+ }
+ /*
+ * If any partition code tried to read beyond EOD, try
+ * unlocking native capacity even if partition table is
+ * sucessfully read as we could be missing some partitions.
+ */
+ if (state->access_beyond_eod) {
+ printk(KERN_WARNING
+ "%s: partition table partially beyond EOD, ",
+ disk->disk_name);
+ if (disk_unlock_native_capacity(disk))
+ goto rescan;
+ }
/* tell userspace that the media / partition table may have changed */
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
@@ -581,7 +629,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
/* add partitions */
for (p = 1; p < state->limit; p++) {
sector_t size, from;
-try_scan:
+
size = state->parts[p].size;
if (!size)
continue;
@@ -589,30 +637,21 @@ try_scan:
from = state->parts[p].from;
if (from >= get_capacity(disk)) {
printk(KERN_WARNING
- "%s: p%d ignored, start %llu is behind the end of the disk\n",
+ "%s: p%d start %llu is beyond EOD, ",
disk->disk_name, p, (unsigned long long) from);
+ if (disk_unlock_native_capacity(disk))
+ goto rescan;
continue;
}
if (from + size > get_capacity(disk)) {
- const struct block_device_operations *bdops = disk->fops;
- unsigned long long capacity;
-
printk(KERN_WARNING
- "%s: p%d size %llu exceeds device capacity, ",
+ "%s: p%d size %llu extends beyond EOD, ",
disk->disk_name, p, (unsigned long long) size);
- if (bdops->set_capacity &&
- (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) {
- printk(KERN_CONT "enabling native capacity\n");
- capacity = bdops->set_capacity(disk, ~0ULL);
- disk->flags |= GENHD_FL_NATIVE_CAPACITY;
- if (capacity > get_capacity(disk)) {
- set_capacity(disk, capacity);
- check_disk_size_change(disk, bdev);
- bdev->bd_invalidated = 0;
- }
- goto try_scan;
+ if (disk_unlock_native_capacity(disk)) {
+ /* free state and restart */
+ goto rescan;
} else {
/*
* we can not ignore partitions of broken tables
@@ -620,7 +659,6 @@ try_scan:
* we limit them to the end of the disk to avoid
* creating invalid block devices
*/
- printk(KERN_CONT "limited to end of disk\n");
size = get_capacity(disk) - from;
}
}
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 98dbe1a84528..52f8bd399396 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -6,6 +6,7 @@
* description.
*/
struct parsed_partitions {
+ struct block_device *bdev;
char name[BDEVNAME_SIZE];
struct {
sector_t from;
@@ -14,8 +15,19 @@ struct parsed_partitions {
} parts[DISK_MAX_PARTS];
int next;
int limit;
+ bool access_beyond_eod;
};
+static inline void *read_part_sector(struct parsed_partitions *state,
+ sector_t n, Sector *p)
+{
+ if (n >= get_capacity(state->bdev->bd_disk)) {
+ state->access_beyond_eod = true;
+ return NULL;
+ }
+ return read_dev_sector(state->bdev, n, p);
+}
+
static inline void
put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
{
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index 91babdae7587..9e346c19bbba 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -140,8 +140,7 @@ efi_crc32(const void *buf, unsigned long len)
* the part[0] entry for this disk, and is the number of
* physical sectors available on the disk.
*/
-static u64
-last_lba(struct block_device *bdev)
+static u64 last_lba(struct block_device *bdev)
{
if (!bdev || !bdev->bd_inode)
return 0;
@@ -181,27 +180,28 @@ is_pmbr_valid(legacy_mbr *mbr)
/**
* read_lba(): Read bytes from disk, starting at given LBA
- * @bdev
+ * @state
* @lba
* @buffer
* @size_t
*
- * Description: Reads @count bytes from @bdev into @buffer.
+ * Description: Reads @count bytes from @state->bdev into @buffer.
* Returns number of bytes read on success, 0 on error.
*/
-static size_t
-read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count)
+static size_t read_lba(struct parsed_partitions *state,
+ u64 lba, u8 *buffer, size_t count)
{
size_t totalreadcount = 0;
+ struct block_device *bdev = state->bdev;
sector_t n = lba * (bdev_logical_block_size(bdev) / 512);
- if (!bdev || !buffer || lba > last_lba(bdev))
+ if (!buffer || lba > last_lba(bdev))
return 0;
while (count) {
int copied = 512;
Sector sect;
- unsigned char *data = read_dev_sector(bdev, n++, &sect);
+ unsigned char *data = read_part_sector(state, n++, &sect);
if (!data)
break;
if (copied > count)
@@ -217,19 +217,20 @@ read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count)
/**
* alloc_read_gpt_entries(): reads partition entries from disk
- * @bdev
+ * @state
* @gpt - GPT header
*
* Description: Returns ptes on success, NULL on error.
* Allocates space for PTEs based on information found in @gpt.
* Notes: remember to free pte when you're done!
*/
-static gpt_entry *
-alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt)
+static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
+ gpt_header *gpt)
{
size_t count;
gpt_entry *pte;
- if (!bdev || !gpt)
+
+ if (!gpt)
return NULL;
count = le32_to_cpu(gpt->num_partition_entries) *
@@ -240,7 +241,7 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt)
if (!pte)
return NULL;
- if (read_lba(bdev, le64_to_cpu(gpt->partition_entry_lba),
+ if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba),
(u8 *) pte,
count) < count) {
kfree(pte);
@@ -252,27 +253,24 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt)
/**
* alloc_read_gpt_header(): Allocates GPT header, reads into it from disk
- * @bdev
+ * @state
* @lba is the Logical Block Address of the partition table
*
* Description: returns GPT header on success, NULL on error. Allocates
- * and fills a GPT header starting at @ from @bdev.
+ * and fills a GPT header starting at @ from @state->bdev.
* Note: remember to free gpt when finished with it.
*/
-static gpt_header *
-alloc_read_gpt_header(struct block_device *bdev, u64 lba)
+static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state,
+ u64 lba)
{
gpt_header *gpt;
- unsigned ssz = bdev_logical_block_size(bdev);
-
- if (!bdev)
- return NULL;
+ unsigned ssz = bdev_logical_block_size(state->bdev);
gpt = kzalloc(ssz, GFP_KERNEL);
if (!gpt)
return NULL;
- if (read_lba(bdev, lba, (u8 *) gpt, ssz) < ssz) {
+ if (read_lba(state, lba, (u8 *) gpt, ssz) < ssz) {
kfree(gpt);
gpt=NULL;
return NULL;
@@ -283,7 +281,7 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba)
/**
* is_gpt_valid() - tests one GPT header and PTEs for validity
- * @bdev
+ * @state
* @lba is the logical block address of the GPT header to test
* @gpt is a GPT header ptr, filled on return.
* @ptes is a PTEs ptr, filled on return.
@@ -291,16 +289,15 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba)
* Description: returns 1 if valid, 0 on error.
* If valid, returns pointers to newly allocated GPT header and PTEs.
*/
-static int
-is_gpt_valid(struct block_device *bdev, u64 lba,
- gpt_header **gpt, gpt_entry **ptes)
+static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
+ gpt_header **gpt, gpt_entry **ptes)
{
u32 crc, origcrc;
u64 lastlba;
- if (!bdev || !gpt || !ptes)
+ if (!ptes)
return 0;
- if (!(*gpt = alloc_read_gpt_header(bdev, lba)))
+ if (!(*gpt = alloc_read_gpt_header(state, lba)))
return 0;
/* Check the GUID Partition Table signature */
@@ -336,7 +333,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
/* Check the first_usable_lba and last_usable_lba are
* within the disk.
*/
- lastlba = last_lba(bdev);
+ lastlba = last_lba(state->bdev);
if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
(unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
@@ -350,7 +347,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
goto fail;
}
- if (!(*ptes = alloc_read_gpt_entries(bdev, *gpt)))
+ if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
goto fail;
/* Check the GUID Partition Entry Array CRC */
@@ -495,7 +492,7 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
/**
* find_valid_gpt() - Search disk for valid GPT headers and PTEs
- * @bdev
+ * @state
* @gpt is a GPT header ptr, filled on return.
* @ptes is a PTEs ptr, filled on return.
* Description: Returns 1 if valid, 0 on error.
@@ -508,24 +505,25 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
* This protects against devices which misreport their size, and forces
* the user to decide to use the Alternate GPT.
*/
-static int
-find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes)
+static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
+ gpt_entry **ptes)
{
int good_pgpt = 0, good_agpt = 0, good_pmbr = 0;
gpt_header *pgpt = NULL, *agpt = NULL;
gpt_entry *pptes = NULL, *aptes = NULL;
legacy_mbr *legacymbr;
u64 lastlba;
- if (!bdev || !gpt || !ptes)
+
+ if (!ptes)
return 0;
- lastlba = last_lba(bdev);
+ lastlba = last_lba(state->bdev);
if (!force_gpt) {
/* This will be added to the EFI Spec. per Intel after v1.02. */
legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL);
if (legacymbr) {
- read_lba(bdev, 0, (u8 *) legacymbr,
- sizeof (*legacymbr));
+ read_lba(state, 0, (u8 *) legacymbr,
+ sizeof (*legacymbr));
good_pmbr = is_pmbr_valid(legacymbr);
kfree(legacymbr);
}
@@ -533,15 +531,14 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes)
goto fail;
}
- good_pgpt = is_gpt_valid(bdev, GPT_PRIMARY_PARTITION_TABLE_LBA,
+ good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA,
&pgpt, &pptes);
if (good_pgpt)
- good_agpt = is_gpt_valid(bdev,
+ good_agpt = is_gpt_valid(state,
le64_to_cpu(pgpt->alternate_lba),
&agpt, &aptes);
if (!good_agpt && force_gpt)
- good_agpt = is_gpt_valid(bdev, lastlba,
- &agpt, &aptes);
+ good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes);
/* The obviously unsuccessful case */
if (!good_pgpt && !good_agpt)
@@ -583,9 +580,8 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes)
}
/**
- * efi_partition(struct parsed_partitions *state, struct block_device *bdev)
+ * efi_partition(struct parsed_partitions *state)
* @state
- * @bdev
*
* Description: called from check.c, if the disk contains GPT
* partitions, sets up partition entries in the kernel.
@@ -602,15 +598,14 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes)
* 1 if successful
*
*/
-int
-efi_partition(struct parsed_partitions *state, struct block_device *bdev)
+int efi_partition(struct parsed_partitions *state)
{
gpt_header *gpt = NULL;
gpt_entry *ptes = NULL;
u32 i;
- unsigned ssz = bdev_logical_block_size(bdev) / 512;
+ unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
- if (!find_valid_gpt(bdev, &gpt, &ptes) || !gpt || !ptes) {
+ if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
kfree(gpt);
kfree(ptes);
return 0;
@@ -623,7 +618,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
u64 size = le64_to_cpu(ptes[i].ending_lba) -
le64_to_cpu(ptes[i].starting_lba) + 1ULL;
- if (!is_pte_valid(&ptes[i], last_lba(bdev)))
+ if (!is_pte_valid(&ptes[i], last_lba(state->bdev)))
continue;
put_partition(state, i+1, start * ssz, size * ssz);
diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h
index 6998b589abf9..b69ab729558f 100644
--- a/fs/partitions/efi.h
+++ b/fs/partitions/efi.h
@@ -110,7 +110,7 @@ typedef struct _legacy_mbr {
} __attribute__ ((packed)) legacy_mbr;
/* Functions */
-extern int efi_partition(struct parsed_partitions *state, struct block_device *bdev);
+extern int efi_partition(struct parsed_partitions *state);
#endif
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index fc71aab08460..3e73de5967ff 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -58,9 +58,9 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
/*
*/
-int
-ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
+int ibm_partition(struct parsed_partitions *state)
{
+ struct block_device *bdev = state->bdev;
int blocksize, res;
loff_t i_size, offset, size, fmt_size;
dasd_information2_t *info;
@@ -100,7 +100,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
/*
* Get volume label, extract name and type.
*/
- data = read_dev_sector(bdev, info->label_block*(blocksize/512), &sect);
+ data = read_part_sector(state, info->label_block*(blocksize/512),
+ &sect);
if (data == NULL)
goto out_readerr;
@@ -193,8 +194,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
*/
blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
counter = 0;
- data = read_dev_sector(bdev, blk * (blocksize/512),
- &sect);
+ data = read_part_sector(state, blk * (blocksize/512),
+ &sect);
while (data != NULL) {
struct vtoc_format1_label f1;
@@ -208,9 +209,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
|| f1.DS1FMTID == _ascebc['7']
|| f1.DS1FMTID == _ascebc['9']) {
blk++;
- data = read_dev_sector(bdev, blk *
- (blocksize/512),
- &sect);
+ data = read_part_sector(state,
+ blk * (blocksize/512), &sect);
continue;
}
@@ -230,9 +230,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
size * (blocksize >> 9));
counter++;
blk++;
- data = read_dev_sector(bdev,
- blk * (blocksize/512),
- &sect);
+ data = read_part_sector(state,
+ blk * (blocksize/512), &sect);
}
if (!data)
diff --git a/fs/partitions/ibm.h b/fs/partitions/ibm.h
index 31f85a6ac459..08fb0804a812 100644
--- a/fs/partitions/ibm.h
+++ b/fs/partitions/ibm.h
@@ -1 +1 @@
-int ibm_partition(struct parsed_partitions *, struct block_device *);
+int ibm_partition(struct parsed_partitions *);
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c
index 176d89bcf123..1cc928bb762f 100644
--- a/fs/partitions/karma.c
+++ b/fs/partitions/karma.c
@@ -9,7 +9,7 @@
#include "check.h"
#include "karma.h"
-int karma_partition(struct parsed_partitions *state, struct block_device *bdev)
+int karma_partition(struct parsed_partitions *state)
{
int i;
int slot = 1;
@@ -29,7 +29,7 @@ int karma_partition(struct parsed_partitions *state, struct block_device *bdev)
} __attribute__((packed)) *label;
struct d_partition *p;
- data = read_dev_sector(bdev, 0, &sect);
+ data = read_part_sector(state, 0, &sect);
if (!data)
return -1;
diff --git a/fs/partitions/karma.h b/fs/partitions/karma.h
index ecf7d3f2a3d8..c764b2e9df21 100644
--- a/fs/partitions/karma.h
+++ b/fs/partitions/karma.h
@@ -4,5 +4,5 @@
#define KARMA_LABEL_MAGIC 0xAB56
-int karma_partition(struct parsed_partitions *state, struct block_device *bdev);
+int karma_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 8652fb99e962..3ceca05b668c 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -309,7 +309,7 @@ static bool ldm_compare_tocblocks (const struct tocblock *toc1,
/**
* ldm_validate_privheads - Compare the primary privhead with its backups
- * @bdev: Device holding the LDM Database
+ * @state: Partition check state including device holding the LDM Database
* @ph1: Memory struct to fill with ph contents
*
* Read and compare all three privheads from disk.
@@ -321,8 +321,8 @@ static bool ldm_compare_tocblocks (const struct tocblock *toc1,
* Return: 'true' Success
* 'false' Error
*/
-static bool ldm_validate_privheads (struct block_device *bdev,
- struct privhead *ph1)
+static bool ldm_validate_privheads(struct parsed_partitions *state,
+ struct privhead *ph1)
{
static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 };
struct privhead *ph[3] = { ph1 };
@@ -332,7 +332,7 @@ static bool ldm_validate_privheads (struct block_device *bdev,
long num_sects;
int i;
- BUG_ON (!bdev || !ph1);
+ BUG_ON (!state || !ph1);
ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL);
ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL);
@@ -346,8 +346,8 @@ static bool ldm_validate_privheads (struct block_device *bdev,
/* Read and parse privheads */
for (i = 0; i < 3; i++) {
- data = read_dev_sector (bdev,
- ph[0]->config_start + off[i], &sect);
+ data = read_part_sector(state, ph[0]->config_start + off[i],
+ &sect);
if (!data) {
ldm_crit ("Disk read failed.");
goto out;
@@ -363,7 +363,7 @@ static bool ldm_validate_privheads (struct block_device *bdev,
}
}
- num_sects = bdev->bd_inode->i_size >> 9;
+ num_sects = state->bdev->bd_inode->i_size >> 9;
if ((ph[0]->config_start > num_sects) ||
((ph[0]->config_start + ph[0]->config_size) > num_sects)) {
@@ -397,20 +397,20 @@ out:
/**
* ldm_validate_tocblocks - Validate the table of contents and its backups
- * @bdev: Device holding the LDM Database
- * @base: Offset, into @bdev, of the database
+ * @state: Partition check state including device holding the LDM Database
+ * @base: Offset, into @state->bdev, of the database
* @ldb: Cache of the database structures
*
* Find and compare the four tables of contents of the LDM Database stored on
- * @bdev and return the parsed information into @toc1.
+ * @state->bdev and return the parsed information into @toc1.
*
* The offsets and sizes of the configs are range-checked against a privhead.
*
* Return: 'true' @toc1 contains validated TOCBLOCK info
* 'false' @toc1 contents are undefined
*/
-static bool ldm_validate_tocblocks(struct block_device *bdev,
- unsigned long base, struct ldmdb *ldb)
+static bool ldm_validate_tocblocks(struct parsed_partitions *state,
+ unsigned long base, struct ldmdb *ldb)
{
static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4};
struct tocblock *tb[4];
@@ -420,7 +420,7 @@ static bool ldm_validate_tocblocks(struct block_device *bdev,
int i, nr_tbs;
bool result = false;
- BUG_ON(!bdev || !ldb);
+ BUG_ON(!state || !ldb);
ph = &ldb->ph;
tb[0] = &ldb->toc;
tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL);
@@ -437,7 +437,7 @@ static bool ldm_validate_tocblocks(struct block_device *bdev,
* skip any that fail as long as we get at least one valid TOCBLOCK.
*/
for (nr_tbs = i = 0; i < 4; i++) {
- data = read_dev_sector(bdev, base + off[i], &sect);
+ data = read_part_sector(state, base + off[i], &sect);
if (!data) {
ldm_error("Disk read failed for TOCBLOCK %d.", i);
continue;
@@ -473,7 +473,7 @@ err:
/**
* ldm_validate_vmdb - Read the VMDB and validate it
- * @bdev: Device holding the LDM Database
+ * @state: Partition check state including device holding the LDM Database
* @base: Offset, into @bdev, of the database
* @ldb: Cache of the database structures
*
@@ -483,8 +483,8 @@ err:
* Return: 'true' @ldb contains validated VBDB info
* 'false' @ldb contents are undefined
*/
-static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
- struct ldmdb *ldb)
+static bool ldm_validate_vmdb(struct parsed_partitions *state,
+ unsigned long base, struct ldmdb *ldb)
{
Sector sect;
u8 *data;
@@ -492,12 +492,12 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
struct vmdb *vm;
struct tocblock *toc;
- BUG_ON (!bdev || !ldb);
+ BUG_ON (!state || !ldb);
vm = &ldb->vm;
toc = &ldb->toc;
- data = read_dev_sector (bdev, base + OFF_VMDB, &sect);
+ data = read_part_sector(state, base + OFF_VMDB, &sect);
if (!data) {
ldm_crit ("Disk read failed.");
return false;
@@ -534,21 +534,21 @@ out:
/**
* ldm_validate_partition_table - Determine whether bdev might be a dynamic disk
- * @bdev: Device holding the LDM Database
+ * @state: Partition check state including device holding the LDM Database
*
* This function provides a weak test to decide whether the device is a dynamic
* disk or not. It looks for an MS-DOS-style partition table containing at
* least one partition of type 0x42 (formerly SFS, now used by Windows for
* dynamic disks).
*
- * N.B. The only possible error can come from the read_dev_sector and that is
+ * N.B. The only possible error can come from the read_part_sector and that is
* only likely to happen if the underlying device is strange. If that IS
* the case we should return zero to let someone else try.
*
- * Return: 'true' @bdev is a dynamic disk
- * 'false' @bdev is not a dynamic disk, or an error occurred
+ * Return: 'true' @state->bdev is a dynamic disk
+ * 'false' @state->bdev is not a dynamic disk, or an error occurred
*/
-static bool ldm_validate_partition_table (struct block_device *bdev)
+static bool ldm_validate_partition_table(struct parsed_partitions *state)
{
Sector sect;
u8 *data;
@@ -556,9 +556,9 @@ static bool ldm_validate_partition_table (struct block_device *bdev)
int i;
bool result = false;
- BUG_ON (!bdev);
+ BUG_ON(!state);
- data = read_dev_sector (bdev, 0, &sect);
+ data = read_part_sector(state, 0, &sect);
if (!data) {
ldm_crit ("Disk read failed.");
return false;
@@ -1391,8 +1391,8 @@ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb)
/**
* ldm_get_vblks - Read the on-disk database of VBLKs into memory
- * @bdev: Device holding the LDM Database
- * @base: Offset, into @bdev, of the database
+ * @state: Partition check state including device holding the LDM Database
+ * @base: Offset, into @state->bdev, of the database
* @ldb: Cache of the database structures
*
* To use the information from the VBLKs, they need to be read from the disk,
@@ -1401,8 +1401,8 @@ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb)
* Return: 'true' All the VBLKs were read successfully
* 'false' An error occurred
*/
-static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
- struct ldmdb *ldb)
+static bool ldm_get_vblks(struct parsed_partitions *state, unsigned long base,
+ struct ldmdb *ldb)
{
int size, perbuf, skip, finish, s, v, recs;
u8 *data = NULL;
@@ -1410,7 +1410,7 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
bool result = false;
LIST_HEAD (frags);
- BUG_ON (!bdev || !ldb);
+ BUG_ON(!state || !ldb);
size = ldb->vm.vblk_size;
perbuf = 512 / size;
@@ -1418,7 +1418,7 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
finish = (size * ldb->vm.last_vblk_seq) >> 9;
for (s = skip; s < finish; s++) { /* For each sector */
- data = read_dev_sector (bdev, base + OFF_VMDB + s, &sect);
+ data = read_part_sector(state, base + OFF_VMDB + s, &sect);
if (!data) {
ldm_crit ("Disk read failed.");
goto out;
@@ -1474,8 +1474,7 @@ static void ldm_free_vblks (struct list_head *lh)
/**
* ldm_partition - Find out whether a device is a dynamic disk and handle it
- * @pp: List of the partitions parsed so far
- * @bdev: Device holding the LDM Database
+ * @state: Partition check state including device holding the LDM Database
*
* This determines whether the device @bdev is a dynamic disk and if so creates
* the partitions necessary in the gendisk structure pointed to by @hd.
@@ -1485,21 +1484,21 @@ static void ldm_free_vblks (struct list_head *lh)
* example, if the device is hda, we would have: hda1: LDM database, hda2, hda3,
* and so on: the actual data containing partitions.
*
- * Return: 1 Success, @bdev is a dynamic disk and we handled it
- * 0 Success, @bdev is not a dynamic disk
+ * Return: 1 Success, @state->bdev is a dynamic disk and we handled it
+ * 0 Success, @state->bdev is not a dynamic disk
* -1 An error occurred before enough information had been read
- * Or @bdev is a dynamic disk, but it may be corrupted
+ * Or @state->bdev is a dynamic disk, but it may be corrupted
*/
-int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev)
+int ldm_partition(struct parsed_partitions *state)
{
struct ldmdb *ldb;
unsigned long base;
int result = -1;
- BUG_ON (!pp || !bdev);
+ BUG_ON(!state);
/* Look for signs of a Dynamic Disk */
- if (!ldm_validate_partition_table (bdev))
+ if (!ldm_validate_partition_table(state))
return 0;
ldb = kmalloc (sizeof (*ldb), GFP_KERNEL);
@@ -1509,15 +1508,15 @@ int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev)
}
/* Parse and check privheads. */
- if (!ldm_validate_privheads (bdev, &ldb->ph))
+ if (!ldm_validate_privheads(state, &ldb->ph))
goto out; /* Already logged */
/* All further references are relative to base (database start). */
base = ldb->ph.config_start;
/* Parse and check tocs and vmdb. */
- if (!ldm_validate_tocblocks (bdev, base, ldb) ||
- !ldm_validate_vmdb (bdev, base, ldb))
+ if (!ldm_validate_tocblocks(state, base, ldb) ||
+ !ldm_validate_vmdb(state, base, ldb))
goto out; /* Already logged */
/* Initialize vblk lists in ldmdb struct */
@@ -1527,13 +1526,13 @@ int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev)
INIT_LIST_HEAD (&ldb->v_comp);
INIT_LIST_HEAD (&ldb->v_part);
- if (!ldm_get_vblks (bdev, base, ldb)) {
+ if (!ldm_get_vblks(state, base, ldb)) {
ldm_crit ("Failed to read the VBLKs from the database.");
goto cleanup;
}
/* Finally, create the data partition devices. */
- if (ldm_create_data_partitions (pp, ldb)) {
+ if (ldm_create_data_partitions(state, ldb)) {
ldm_debug ("Parsed LDM database successfully.");
result = 1;
}
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index 30e08e809c1d..d1fb50b28d86 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -209,7 +209,7 @@ struct ldmdb { /* Cache of the database */
struct list_head v_part;
};
-int ldm_partition (struct parsed_partitions *state, struct block_device *bdev);
+int ldm_partition(struct parsed_partitions *state);
#endif /* _FS_PT_LDM_H_ */
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index d4a0fad3563b..13e27b0082f2 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -27,7 +27,7 @@ static inline void mac_fix_string(char *stg, int len)
stg[i] = 0;
}
-int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
+int mac_partition(struct parsed_partitions *state)
{
int slot = 1;
Sector sect;
@@ -42,7 +42,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
struct mac_driver_desc *md;
/* Get 0th block and look at the first partition map entry. */
- md = (struct mac_driver_desc *) read_dev_sector(bdev, 0, &sect);
+ md = read_part_sector(state, 0, &sect);
if (!md)
return -1;
if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) {
@@ -51,7 +51,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
}
secsize = be16_to_cpu(md->block_size);
put_dev_sector(sect);
- data = read_dev_sector(bdev, secsize/512, &sect);
+ data = read_part_sector(state, secsize/512, &sect);
if (!data)
return -1;
part = (struct mac_partition *) (data + secsize%512);
@@ -64,7 +64,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
for (blk = 1; blk <= blocks_in_map; ++blk) {
int pos = blk * secsize;
put_dev_sector(sect);
- data = read_dev_sector(bdev, pos/512, &sect);
+ data = read_part_sector(state, pos/512, &sect);
if (!data)
return -1;
part = (struct mac_partition *) (data + pos%512);
@@ -123,7 +123,8 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev)
}
#ifdef CONFIG_PPC_PMAC
if (found_root_goodness)
- note_bootable_part(bdev->bd_dev, found_root, found_root_goodness);
+ note_bootable_part(state->bdev->bd_dev, found_root,
+ found_root_goodness);
#endif
put_dev_sector(sect);
diff --git a/fs/partitions/mac.h b/fs/partitions/mac.h
index bbf26e1386fa..3c7d98436380 100644
--- a/fs/partitions/mac.h
+++ b/fs/partitions/mac.h
@@ -41,4 +41,4 @@ struct mac_driver_desc {
/* ... more stuff */
};
-int mac_partition(struct parsed_partitions *state, struct block_device *bdev);
+int mac_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 90be97f1f5a8..645a68d8c055 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -64,7 +64,7 @@ msdos_magic_present(unsigned char *p)
#define AIX_LABEL_MAGIC2 0xC2
#define AIX_LABEL_MAGIC3 0xD4
#define AIX_LABEL_MAGIC4 0xC1
-static int aix_magic_present(unsigned char *p, struct block_device *bdev)
+static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
{
struct partition *pt = (struct partition *) (p + 0x1be);
Sector sect;
@@ -85,7 +85,7 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev)
is_extended_partition(pt))
return 0;
}
- d = read_dev_sector(bdev, 7, &sect);
+ d = read_part_sector(state, 7, &sect);
if (d) {
if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M')
ret = 1;
@@ -105,15 +105,14 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev)
* only for the actual data partitions.
*/
-static void
-parse_extended(struct parsed_partitions *state, struct block_device *bdev,
- sector_t first_sector, sector_t first_size)
+static void parse_extended(struct parsed_partitions *state,
+ sector_t first_sector, sector_t first_size)
{
struct partition *p;
Sector sect;
unsigned char *data;
sector_t this_sector, this_size;
- sector_t sector_size = bdev_logical_block_size(bdev) / 512;
+ sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
int loopct = 0; /* number of links followed
without finding a data partition */
int i;
@@ -126,7 +125,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev,
return;
if (state->next == state->limit)
return;
- data = read_dev_sector(bdev, this_sector, &sect);
+ data = read_part_sector(state, this_sector, &sect);
if (!data)
return;
@@ -198,9 +197,8 @@ done:
/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also
indicates linux swap. Be careful before believing this is Solaris. */
-static void
-parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev,
- sector_t offset, sector_t size, int origin)
+static void parse_solaris_x86(struct parsed_partitions *state,
+ sector_t offset, sector_t size, int origin)
{
#ifdef CONFIG_SOLARIS_X86_PARTITION
Sector sect;
@@ -208,7 +206,7 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev,
int i;
short max_nparts;
- v = (struct solaris_x86_vtoc *)read_dev_sector(bdev, offset+1, &sect);
+ v = read_part_sector(state, offset + 1, &sect);
if (!v)
return;
if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) {
@@ -245,16 +243,15 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev,
* Create devices for BSD partitions listed in a disklabel, under a
* dos-like partition. See parse_extended() for more information.
*/
-static void
-parse_bsd(struct parsed_partitions *state, struct block_device *bdev,
- sector_t offset, sector_t size, int origin, char *flavour,
- int max_partitions)
+static void parse_bsd(struct parsed_partitions *state,
+ sector_t offset, sector_t size, int origin, char *flavour,
+ int max_partitions)
{
Sector sect;
struct bsd_disklabel *l;
struct bsd_partition *p;
- l = (struct bsd_disklabel *)read_dev_sector(bdev, offset+1, &sect);
+ l = read_part_sector(state, offset + 1, &sect);
if (!l)
return;
if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) {
@@ -291,33 +288,28 @@ parse_bsd(struct parsed_partitions *state, struct block_device *bdev,
}
#endif
-static void
-parse_freebsd(struct parsed_partitions *state, struct block_device *bdev,
- sector_t offset, sector_t size, int origin)
+static void parse_freebsd(struct parsed_partitions *state,
+ sector_t offset, sector_t size, int origin)
{
#ifdef CONFIG_BSD_DISKLABEL
- parse_bsd(state, bdev, offset, size, origin,
- "bsd", BSD_MAXPARTITIONS);
+ parse_bsd(state, offset, size, origin, "bsd", BSD_MAXPARTITIONS);
#endif
}
-static void
-parse_netbsd(struct parsed_partitions *state, struct block_device *bdev,
- sector_t offset, sector_t size, int origin)
+static void parse_netbsd(struct parsed_partitions *state,
+ sector_t offset, sector_t size, int origin)
{
#ifdef CONFIG_BSD_DISKLABEL
- parse_bsd(state, bdev, offset, size, origin,
- "netbsd", BSD_MAXPARTITIONS);
+ parse_bsd(state, offset, size, origin, "netbsd", BSD_MAXPARTITIONS);
#endif
}
-static void
-parse_openbsd(struct parsed_partitions *state, struct block_device *bdev,
- sector_t offset, sector_t size, int origin)
+static void parse_openbsd(struct parsed_partitions *state,
+ sector_t offset, sector_t size, int origin)
{
#ifdef CONFIG_BSD_DISKLABEL
- parse_bsd(state, bdev, offset, size, origin,
- "openbsd", OPENBSD_MAXPARTITIONS);
+ parse_bsd(state, offset, size, origin, "openbsd",
+ OPENBSD_MAXPARTITIONS);
#endif
}
@@ -325,16 +317,15 @@ parse_openbsd(struct parsed_partitions *state, struct block_device *bdev,
* Create devices for Unixware partitions listed in a disklabel, under a
* dos-like partition. See parse_extended() for more information.
*/
-static void
-parse_unixware(struct parsed_partitions *state, struct block_device *bdev,
- sector_t offset, sector_t size, int origin)
+static void parse_unixware(struct parsed_partitions *state,
+ sector_t offset, sector_t size, int origin)
{
#ifdef CONFIG_UNIXWARE_DISKLABEL
Sector sect;
struct unixware_disklabel *l;
struct unixware_slice *p;
- l = (struct unixware_disklabel *)read_dev_sector(bdev, offset+29, &sect);
+ l = read_part_sector(state, offset + 29, &sect);
if (!l)
return;
if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC ||
@@ -365,9 +356,8 @@ parse_unixware(struct parsed_partitions *state, struct block_device *bdev,
* Anand Krishnamurthy <anandk@wiproge.med.ge.com>
* Rajeev V. Pillai <rajeevvp@yahoo.com>
*/
-static void
-parse_minix(struct parsed_partitions *state, struct block_device *bdev,
- sector_t offset, sector_t size, int origin)
+static void parse_minix(struct parsed_partitions *state,
+ sector_t offset, sector_t size, int origin)
{
#ifdef CONFIG_MINIX_SUBPARTITION
Sector sect;
@@ -375,7 +365,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev,
struct partition *p;
int i;
- data = read_dev_sector(bdev, offset, &sect);
+ data = read_part_sector(state, offset, &sect);
if (!data)
return;
@@ -404,8 +394,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev,
static struct {
unsigned char id;
- void (*parse)(struct parsed_partitions *, struct block_device *,
- sector_t, sector_t, int);
+ void (*parse)(struct parsed_partitions *, sector_t, sector_t, int);
} subtypes[] = {
{FREEBSD_PARTITION, parse_freebsd},
{NETBSD_PARTITION, parse_netbsd},
@@ -417,16 +406,16 @@ static struct {
{0, NULL},
};
-int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
+int msdos_partition(struct parsed_partitions *state)
{
- sector_t sector_size = bdev_logical_block_size(bdev) / 512;
+ sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
Sector sect;
unsigned char *data;
struct partition *p;
struct fat_boot_sector *fb;
int slot;
- data = read_dev_sector(bdev, 0, &sect);
+ data = read_part_sector(state, 0, &sect);
if (!data)
return -1;
if (!msdos_magic_present(data + 510)) {
@@ -434,7 +423,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
return 0;
}
- if (aix_magic_present(data, bdev)) {
+ if (aix_magic_present(state, data)) {
put_dev_sector(sect);
printk( " [AIX]");
return 0;
@@ -503,7 +492,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
put_partition(state, slot, start, n);
printk(" <");
- parse_extended(state, bdev, start, size);
+ parse_extended(state, start, size);
printk(" >");
continue;
}
@@ -532,8 +521,8 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
if (!subtypes[n].parse)
continue;
- subtypes[n].parse(state, bdev, start_sect(p)*sector_size,
- nr_sects(p)*sector_size, slot);
+ subtypes[n].parse(state, start_sect(p) * sector_size,
+ nr_sects(p) * sector_size, slot);
}
put_dev_sector(sect);
return 1;
diff --git a/fs/partitions/msdos.h b/fs/partitions/msdos.h
index 01e5e0b6902d..38c781c490b3 100644
--- a/fs/partitions/msdos.h
+++ b/fs/partitions/msdos.h
@@ -4,5 +4,5 @@
#define MSDOS_LABEL_MAGIC 0xAA55
-int msdos_partition(struct parsed_partitions *state, struct block_device *bdev);
+int msdos_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index c05c17bc5df3..fc22b85d436a 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,7 +10,7 @@
#include "check.h"
#include "osf.h"
-int osf_partition(struct parsed_partitions *state, struct block_device *bdev)
+int osf_partition(struct parsed_partitions *state)
{
int i;
int slot = 1;
@@ -49,7 +49,7 @@ int osf_partition(struct parsed_partitions *state, struct block_device *bdev)
} * label;
struct d_partition * partition;
- data = read_dev_sector(bdev, 0, &sect);
+ data = read_part_sector(state, 0, &sect);
if (!data)
return -1;
diff --git a/fs/partitions/osf.h b/fs/partitions/osf.h
index 427b8eab314b..20ed2315ec16 100644
--- a/fs/partitions/osf.h
+++ b/fs/partitions/osf.h
@@ -4,4 +4,4 @@
#define DISKLABELMAGIC (0x82564557UL)
-int osf_partition(struct parsed_partitions *state, struct block_device *bdev);
+int osf_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c
index ed5ac83fe83a..43b1df9aa16c 100644
--- a/fs/partitions/sgi.c
+++ b/fs/partitions/sgi.c
@@ -27,7 +27,7 @@ struct sgi_disklabel {
__be32 _unused1; /* Padding */
};
-int sgi_partition(struct parsed_partitions *state, struct block_device *bdev)
+int sgi_partition(struct parsed_partitions *state)
{
int i, csum;
__be32 magic;
@@ -39,7 +39,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev)
struct sgi_partition *p;
char b[BDEVNAME_SIZE];
- label = (struct sgi_disklabel *) read_dev_sector(bdev, 0, &sect);
+ label = read_part_sector(state, 0, &sect);
if (!label)
return -1;
p = &label->partitions[0];
@@ -57,7 +57,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev)
}
if(csum) {
printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n",
- bdevname(bdev, b));
+ bdevname(state->bdev, b));
put_dev_sector(sect);
return 0;
}
diff --git a/fs/partitions/sgi.h b/fs/partitions/sgi.h
index 5d5595c09928..b9553ebdd5a9 100644
--- a/fs/partitions/sgi.h
+++ b/fs/partitions/sgi.h
@@ -2,7 +2,7 @@
* fs/partitions/sgi.h
*/
-extern int sgi_partition(struct parsed_partitions *state, struct block_device *bdev);
+extern int sgi_partition(struct parsed_partitions *state);
#define SGI_LABEL_MAGIC 0x0be5a941
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
index c95e6a62c01d..a32660e25f7f 100644
--- a/fs/partitions/sun.c
+++ b/fs/partitions/sun.c
@@ -10,7 +10,7 @@
#include "check.h"
#include "sun.h"
-int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
+int sun_partition(struct parsed_partitions *state)
{
int i;
__be16 csum;
@@ -61,7 +61,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
int use_vtoc;
int nparts;
- label = (struct sun_disklabel *)read_dev_sector(bdev, 0, &sect);
+ label = read_part_sector(state, 0, &sect);
if (!label)
return -1;
@@ -78,7 +78,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
csum ^= *ush--;
if (csum) {
printk("Dev %s Sun disklabel: Csum bad, label corrupted\n",
- bdevname(bdev, b));
+ bdevname(state->bdev, b));
put_dev_sector(sect);
return 0;
}
diff --git a/fs/partitions/sun.h b/fs/partitions/sun.h
index 7f864d1f86d4..2424baa8319f 100644
--- a/fs/partitions/sun.h
+++ b/fs/partitions/sun.h
@@ -5,4 +5,4 @@
#define SUN_LABEL_MAGIC 0xDABE
#define SUN_VTOC_SANITY 0x600DDEEE
-int sun_partition(struct parsed_partitions *state, struct block_device *bdev);
+int sun_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c
index 4eba27b78643..9030c864428e 100644
--- a/fs/partitions/sysv68.c
+++ b/fs/partitions/sysv68.c
@@ -46,7 +46,7 @@ struct slice {
};
-int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev)
+int sysv68_partition(struct parsed_partitions *state)
{
int i, slices;
int slot = 1;
@@ -55,7 +55,7 @@ int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev)
struct dkblk0 *b;
struct slice *slice;
- data = read_dev_sector(bdev, 0, &sect);
+ data = read_part_sector(state, 0, &sect);
if (!data)
return -1;
@@ -68,7 +68,7 @@ int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev)
i = be32_to_cpu(b->dk_ios.ios_slcblk);
put_dev_sector(sect);
- data = read_dev_sector(bdev, i, &sect);
+ data = read_part_sector(state, i, &sect);
if (!data)
return -1;
diff --git a/fs/partitions/sysv68.h b/fs/partitions/sysv68.h
index fa733f68431b..bf2f5ffa97ac 100644
--- a/fs/partitions/sysv68.h
+++ b/fs/partitions/sysv68.h
@@ -1 +1 @@
-extern int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev);
+extern int sysv68_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/ultrix.c b/fs/partitions/ultrix.c
index ec852c11dce4..db9eef260364 100644
--- a/fs/partitions/ultrix.c
+++ b/fs/partitions/ultrix.c
@@ -9,7 +9,7 @@
#include "check.h"
#include "ultrix.h"
-int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev)
+int ultrix_partition(struct parsed_partitions *state)
{
int i;
Sector sect;
@@ -26,7 +26,7 @@ int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev)
#define PT_MAGIC 0x032957 /* Partition magic number */
#define PT_VALID 1 /* Indicates if struct is valid */
- data = read_dev_sector(bdev, (16384 - sizeof(*label))/512, &sect);
+ data = read_part_sector(state, (16384 - sizeof(*label))/512, &sect);
if (!data)
return -1;
diff --git a/fs/partitions/ultrix.h b/fs/partitions/ultrix.h
index a74bf8e2d370..a3cc00b2bded 100644
--- a/fs/partitions/ultrix.h
+++ b/fs/partitions/ultrix.h
@@ -2,4 +2,4 @@
* fs/partitions/ultrix.h
*/
-int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev);
+int ultrix_partition(struct parsed_partitions *state);
diff --git a/fs/pipe.c b/fs/pipe.c
index 37ba29ff3158..d79872eba09a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/log2.h>
#include <linux/mount.h>
#include <linux/pipe_fs_i.h>
#include <linux/uio.h>
@@ -18,11 +19,18 @@
#include <linux/pagemap.h>
#include <linux/audit.h>
#include <linux/syscalls.h>
+#include <linux/fcntl.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
/*
+ * The max size that a non-root user is allowed to grow the pipe. Can
+ * be set by root in /proc/sys/fs/pipe-max-pages
+ */
+unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16;
+
+/*
* We use a start+len construction, which provides full use of the
* allocated memory.
* -- Florian Coosmann (FGC)
@@ -390,7 +398,7 @@ redo:
if (!buf->len) {
buf->ops = NULL;
ops->release(pipe, buf);
- curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
+ curbuf = (curbuf + 1) & (pipe->buffers - 1);
pipe->curbuf = curbuf;
pipe->nrbufs = --bufs;
do_wakeup = 1;
@@ -472,7 +480,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
if (pipe->nrbufs && chars != 0) {
int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
- (PIPE_BUFFERS-1);
+ (pipe->buffers - 1);
struct pipe_buffer *buf = pipe->bufs + lastbuf;
const struct pipe_buf_operations *ops = buf->ops;
int offset = buf->offset + buf->len;
@@ -518,8 +526,8 @@ redo1:
break;
}
bufs = pipe->nrbufs;
- if (bufs < PIPE_BUFFERS) {
- int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
+ if (bufs < pipe->buffers) {
+ int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
struct pipe_buffer *buf = pipe->bufs + newbuf;
struct page *page = pipe->tmp_page;
char *src;
@@ -580,7 +588,7 @@ redo2:
if (!total_len)
break;
}
- if (bufs < PIPE_BUFFERS)
+ if (bufs < pipe->buffers)
continue;
if (filp->f_flags & O_NONBLOCK) {
if (!ret)
@@ -640,7 +648,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
nrbufs = pipe->nrbufs;
while (--nrbufs >= 0) {
count += pipe->bufs[buf].len;
- buf = (buf+1) & (PIPE_BUFFERS-1);
+ buf = (buf+1) & (pipe->buffers - 1);
}
mutex_unlock(&inode->i_mutex);
@@ -671,7 +679,7 @@ pipe_poll(struct file *filp, poll_table *wait)
}
if (filp->f_mode & FMODE_WRITE) {
- mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
+ mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0;
/*
* Most Unices do not set POLLERR for FIFOs but on Linux they
* behave exactly like pipes for poll().
@@ -877,25 +885,32 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
if (pipe) {
- init_waitqueue_head(&pipe->wait);
- pipe->r_counter = pipe->w_counter = 1;
- pipe->inode = inode;
+ pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
+ if (pipe->bufs) {
+ init_waitqueue_head(&pipe->wait);
+ pipe->r_counter = pipe->w_counter = 1;
+ pipe->inode = inode;
+ pipe->buffers = PIPE_DEF_BUFFERS;
+ return pipe;
+ }
+ kfree(pipe);
}
- return pipe;
+ return NULL;
}
void __free_pipe_info(struct pipe_inode_info *pipe)
{
int i;
- for (i = 0; i < PIPE_BUFFERS; i++) {
+ for (i = 0; i < pipe->buffers; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
if (buf->ops)
buf->ops->release(pipe, buf);
}
if (pipe->tmp_page)
__free_page(pipe->tmp_page);
+ kfree(pipe->bufs);
kfree(pipe);
}
@@ -1094,6 +1109,89 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
}
/*
+ * Allocate a new array of pipe buffers and copy the info over. Returns the
+ * pipe size if successful, or return -ERROR on error.
+ */
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+{
+ struct pipe_buffer *bufs;
+
+ /*
+ * Must be a power-of-2 currently
+ */
+ if (!is_power_of_2(arg))
+ return -EINVAL;
+
+ /*
+ * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
+ * expect a lot of shrink+grow operations, just free and allocate
+ * again like we would do for growing. If the pipe currently
+ * contains more buffers than arg, then return busy.
+ */
+ if (arg < pipe->nrbufs)
+ return -EBUSY;
+
+ bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL);
+ if (unlikely(!bufs))
+ return -ENOMEM;
+
+ /*
+ * The pipe array wraps around, so just start the new one at zero
+ * and adjust the indexes.
+ */
+ if (pipe->nrbufs) {
+ const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1);
+ const unsigned int head = pipe->nrbufs - tail;
+
+ if (head)
+ memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
+ if (tail)
+ memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer));
+ }
+
+ pipe->curbuf = 0;
+ kfree(pipe->bufs);
+ pipe->bufs = bufs;
+ pipe->buffers = arg;
+ return arg;
+}
+
+long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct pipe_inode_info *pipe;
+ long ret;
+
+ pipe = file->f_path.dentry->d_inode->i_pipe;
+ if (!pipe)
+ return -EBADF;
+
+ mutex_lock(&pipe->inode->i_mutex);
+
+ switch (cmd) {
+ case F_SETPIPE_SZ:
+ if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages)
+ return -EINVAL;
+ /*
+ * The pipe needs to be at least 2 pages large to
+ * guarantee POSIX behaviour.
+ */
+ if (arg < 2)
+ return -EINVAL;
+ ret = pipe_set_size(pipe, arg);
+ break;
+ case F_GETPIPE_SZ:
+ ret = pipe->buffers;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ mutex_unlock(&pipe->inode->i_mutex);
+ return ret;
+}
+
+/*
* pipefs should _never_ be mounted by userland - too much of security hassle,
* no real gain from having the whole whorehouse mounted. So we don't need
* any operations on the root directory. However, we need a non-trivial
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 1d9c12714c5c..9977df9f3a54 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -147,7 +147,8 @@ static int reiserfs_sync_file(struct file *filp,
barrier_done = reiserfs_commit_for_inode(inode);
reiserfs_write_unlock(inode->i_sb);
if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
- blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
if (barrier_done < 0)
return barrier_done;
return (err < 0) ? -EIO : 0;
diff --git a/fs/splice.c b/fs/splice.c
index 9313b6124a2e..ac22b00d86c3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -193,8 +193,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
break;
}
- if (pipe->nrbufs < PIPE_BUFFERS) {
- int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
+ if (pipe->nrbufs < pipe->buffers) {
+ int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
struct pipe_buffer *buf = pipe->bufs + newbuf;
buf->page = spd->pages[page_nr];
@@ -214,7 +214,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
if (!--spd->nr_pages)
break;
- if (pipe->nrbufs < PIPE_BUFFERS)
+ if (pipe->nrbufs < pipe->buffers)
continue;
break;
@@ -265,6 +265,36 @@ static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
page_cache_release(spd->pages[i]);
}
+/*
+ * Check if we need to grow the arrays holding pages and partial page
+ * descriptions.
+ */
+int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
+{
+ if (pipe->buffers <= PIPE_DEF_BUFFERS)
+ return 0;
+
+ spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL);
+ spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL);
+
+ if (spd->pages && spd->partial)
+ return 0;
+
+ kfree(spd->pages);
+ kfree(spd->partial);
+ return -ENOMEM;
+}
+
+void splice_shrink_spd(struct pipe_inode_info *pipe,
+ struct splice_pipe_desc *spd)
+{
+ if (pipe->buffers <= PIPE_DEF_BUFFERS)
+ return;
+
+ kfree(spd->pages);
+ kfree(spd->partial);
+}
+
static int
__generic_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
@@ -272,8 +302,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
{
struct address_space *mapping = in->f_mapping;
unsigned int loff, nr_pages, req_pages;
- struct page *pages[PIPE_BUFFERS];
- struct partial_page partial[PIPE_BUFFERS];
+ struct page *pages[PIPE_DEF_BUFFERS];
+ struct partial_page partial[PIPE_DEF_BUFFERS];
struct page *page;
pgoff_t index, end_index;
loff_t isize;
@@ -286,15 +316,18 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
.spd_release = spd_release_page,
};
+ if (splice_grow_spd(pipe, &spd))
+ return -ENOMEM;
+
index = *ppos >> PAGE_CACHE_SHIFT;
loff = *ppos & ~PAGE_CACHE_MASK;
req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS);
+ nr_pages = min(req_pages, pipe->buffers);
/*
* Lookup the (hopefully) full range of pages we need.
*/
- spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
+ spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
index += spd.nr_pages;
/*
@@ -335,7 +368,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
unlock_page(page);
}
- pages[spd.nr_pages++] = page;
+ spd.pages[spd.nr_pages++] = page;
index++;
}
@@ -356,7 +389,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
* this_len is the max we'll use from this page
*/
this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
- page = pages[page_nr];
+ page = spd.pages[page_nr];
if (PageReadahead(page))
page_cache_async_readahead(mapping, &in->f_ra, in,
@@ -393,8 +426,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
error = -ENOMEM;
break;
}
- page_cache_release(pages[page_nr]);
- pages[page_nr] = page;
+ page_cache_release(spd.pages[page_nr]);
+ spd.pages[page_nr] = page;
}
/*
* page was already under io and is now done, great
@@ -451,8 +484,8 @@ fill_it:
len = this_len;
}
- partial[page_nr].offset = loff;
- partial[page_nr].len = this_len;
+ spd.partial[page_nr].offset = loff;
+ spd.partial[page_nr].len = this_len;
len -= this_len;
loff = 0;
spd.nr_pages++;
@@ -464,12 +497,13 @@ fill_it:
* we got, 'nr_pages' is how many pages are in the map.
*/
while (page_nr < nr_pages)
- page_cache_release(pages[page_nr++]);
+ page_cache_release(spd.pages[page_nr++]);
in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
if (spd.nr_pages)
- return splice_to_pipe(pipe, &spd);
+ error = splice_to_pipe(pipe, &spd);
+ splice_shrink_spd(pipe, &spd);
return error;
}
@@ -560,9 +594,9 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
unsigned int nr_pages;
unsigned int nr_freed;
size_t offset;
- struct page *pages[PIPE_BUFFERS];
- struct partial_page partial[PIPE_BUFFERS];
- struct iovec vec[PIPE_BUFFERS];
+ struct page *pages[PIPE_DEF_BUFFERS];
+ struct partial_page partial[PIPE_DEF_BUFFERS];
+ struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
pgoff_t index;
ssize_t res;
size_t this_len;
@@ -576,11 +610,22 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
.spd_release = spd_release_page,
};
+ if (splice_grow_spd(pipe, &spd))
+ return -ENOMEM;
+
+ res = -ENOMEM;
+ vec = __vec;
+ if (pipe->buffers > PIPE_DEF_BUFFERS) {
+ vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL);
+ if (!vec)
+ goto shrink_ret;
+ }
+
index = *ppos >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK;
nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
+ for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) {
struct page *page;
page = alloc_page(GFP_USER);
@@ -591,7 +636,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
vec[i].iov_base = (void __user *) page_address(page);
vec[i].iov_len = this_len;
- pages[i] = page;
+ spd.pages[i] = page;
spd.nr_pages++;
len -= this_len;
offset = 0;
@@ -610,11 +655,11 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
nr_freed = 0;
for (i = 0; i < spd.nr_pages; i++) {
this_len = min_t(size_t, vec[i].iov_len, res);
- partial[i].offset = 0;
- partial[i].len = this_len;
+ spd.partial[i].offset = 0;
+ spd.partial[i].len = this_len;
if (!this_len) {
- __free_page(pages[i]);
- pages[i] = NULL;
+ __free_page(spd.pages[i]);
+ spd.pages[i] = NULL;
nr_freed++;
}
res -= this_len;
@@ -625,13 +670,18 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
if (res > 0)
*ppos += res;
+shrink_ret:
+ if (vec != __vec)
+ kfree(vec);
+ splice_shrink_spd(pipe, &spd);
return res;
err:
for (i = 0; i < spd.nr_pages; i++)
- __free_page(pages[i]);
+ __free_page(spd.pages[i]);
- return error;
+ res = error;
+ goto shrink_ret;
}
EXPORT_SYMBOL(default_file_splice_read);
@@ -784,7 +834,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
if (!buf->len) {
buf->ops = NULL;
ops->release(pipe, buf);
- pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
+ pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
if (pipe->inode)
sd->need_wakeup = true;
@@ -1211,7 +1261,7 @@ out_release:
* If we did an incomplete transfer we must release
* the pipe buffers in question:
*/
- for (i = 0; i < PIPE_BUFFERS; i++) {
+ for (i = 0; i < pipe->buffers; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
if (buf->ops) {
@@ -1371,7 +1421,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
*/
static int get_iovec_page_array(const struct iovec __user *iov,
unsigned int nr_vecs, struct page **pages,
- struct partial_page *partial, int aligned)
+ struct partial_page *partial, int aligned,
+ unsigned int pipe_buffers)
{
int buffers = 0, error = 0;
@@ -1414,8 +1465,8 @@ static int get_iovec_page_array(const struct iovec __user *iov,
break;
npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (npages > PIPE_BUFFERS - buffers)
- npages = PIPE_BUFFERS - buffers;
+ if (npages > pipe_buffers - buffers)
+ npages = pipe_buffers - buffers;
error = get_user_pages_fast((unsigned long)base, npages,
0, &pages[buffers]);
@@ -1450,7 +1501,7 @@ static int get_iovec_page_array(const struct iovec __user *iov,
* or if we mapped the max number of pages that we have
* room for.
*/
- if (error < npages || buffers == PIPE_BUFFERS)
+ if (error < npages || buffers == pipe_buffers)
break;
nr_vecs--;
@@ -1593,8 +1644,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
unsigned long nr_segs, unsigned int flags)
{
struct pipe_inode_info *pipe;
- struct page *pages[PIPE_BUFFERS];
- struct partial_page partial[PIPE_BUFFERS];
+ struct page *pages[PIPE_DEF_BUFFERS];
+ struct partial_page partial[PIPE_DEF_BUFFERS];
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
@@ -1602,17 +1653,25 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
.ops = &user_page_pipe_buf_ops,
.spd_release = spd_release_page,
};
+ long ret;
pipe = pipe_info(file->f_path.dentry->d_inode);
if (!pipe)
return -EBADF;
- spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
- flags & SPLICE_F_GIFT);
+ if (splice_grow_spd(pipe, &spd))
+ return -ENOMEM;
+
+ spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
+ spd.partial, flags & SPLICE_F_GIFT,
+ pipe->buffers);
if (spd.nr_pages <= 0)
- return spd.nr_pages;
+ ret = spd.nr_pages;
+ else
+ ret = splice_to_pipe(pipe, &spd);
- return splice_to_pipe(pipe, &spd);
+ splice_shrink_spd(pipe, &spd);
+ return ret;
}
/*
@@ -1738,13 +1797,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
* Check ->nrbufs without the inode lock first. This function
* is speculative anyways, so missing one is ok.
*/
- if (pipe->nrbufs < PIPE_BUFFERS)
+ if (pipe->nrbufs < pipe->buffers)
return 0;
ret = 0;
pipe_lock(pipe);
- while (pipe->nrbufs >= PIPE_BUFFERS) {
+ while (pipe->nrbufs >= pipe->buffers) {
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
ret = -EPIPE;
@@ -1810,7 +1869,7 @@ retry:
* Cannot make any progress, because either the input
* pipe is empty or the output pipe is full.
*/
- if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) {
+ if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) {
/* Already processed some buffers, break */
if (ret)
break;
@@ -1831,7 +1890,7 @@ retry:
}
ibuf = ipipe->bufs + ipipe->curbuf;
- nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS;
+ nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
obuf = opipe->bufs + nbuf;
if (len >= ibuf->len) {
@@ -1841,7 +1900,7 @@ retry:
*obuf = *ibuf;
ibuf->ops = NULL;
opipe->nrbufs++;
- ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS;
+ ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1);
ipipe->nrbufs--;
input_wakeup = true;
} else {
@@ -1914,11 +1973,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* If we have iterated all input buffers or ran out of
* output room, break.
*/
- if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
+ if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers)
break;
- ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
- nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
+ ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1));
+ nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
/*
* Get a reference to this pipe buffer,
diff --git a/fs/sync.c b/fs/sync.c
index 92b228176f7c..de6a44192832 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
if (wait)
sync_inodes_sb(sb);
else
- writeback_inodes_sb(sb);
+ writeback_inodes_sb_locked(sb);
if (sb->s_op->sync_fs)
sb->s_op->sync_fs(sb, wait);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index e9002513e08f..f24dbe5efde3 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -725,7 +725,8 @@ void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
{
- blkdev_issue_flush(buftarg->bt_bdev, NULL);
+ blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
+ BLKDEV_IFL_WAIT);
}
STATIC void