summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c3
-rw-r--r--fs/binfmt_elf.c30
-rw-r--r--fs/bio-integrity.c2
-rw-r--r--fs/bio.c4
-rw-r--r--fs/btrfs/btrfs_inode.h5
-rw-r--r--fs/btrfs/ctree.c7
-rw-r--r--fs/btrfs/ctree.h17
-rw-r--r--fs/btrfs/dev-replace.c4
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c57
-rw-r--r--fs/btrfs/extent_io.c8
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/btrfs/free-space-cache.c67
-rw-r--r--fs/btrfs/free-space-cache.h5
-rw-r--r--fs/btrfs/inode.c16
-rw-r--r--fs/btrfs/ioctl.c80
-rw-r--r--fs/btrfs/ordered-data.c24
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/relocation.c43
-rw-r--r--fs/btrfs/scrub.c112
-rw-r--r--fs/btrfs/super.c21
-rw-r--r--fs/btrfs/transaction.c2
-rw-r--r--fs/btrfs/tree-log.c52
-rw-r--r--fs/btrfs/volumes.c7
-rw-r--r--fs/nfs/Kconfig11
-rw-r--r--fs/nfs/callback.c3
-rw-r--r--fs/nfs/client.c10
-rw-r--r--fs/nfs/dir.c10
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h8
-rw-r--r--fs/nfs/nfs4_fs.h17
-rw-r--r--fs/nfs/nfs4client.c138
-rw-r--r--fs/nfs/nfs4file.c3
-rw-r--r--fs/nfs/nfs4filelayoutdev.c20
-rw-r--r--fs/nfs/nfs4namespace.c118
-rw-r--r--fs/nfs/nfs4proc.c486
-rw-r--r--fs/nfs/nfs4state.c262
-rw-r--r--fs/nfs/nfs4super.c12
-rw-r--r--fs/nfs/nfs4xdr.c113
-rw-r--r--fs/nfs/super.c198
-rw-r--r--fs/nfs/unlink.c3
-rw-r--r--fs/nilfs2/page.c2
-rw-r--r--fs/nilfs2/segment.c11
-rw-r--r--fs/ocfs2/dcache.c7
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/reiserfs/journal.c67
-rw-r--r--fs/sysv/super.c1
-rw-r--r--fs/udf/ialloc.c16
-rw-r--r--fs/udf/super.c64
-rw-r--r--fs/udf/udf_sb.h2
-rw-r--r--fs/xfs/xfs_buf_item.c1
-rw-r--r--fs/xfs/xfs_da_btree.c5
-rw-r--r--fs/xfs/xfs_fs.h2
-rw-r--r--fs/xfs/xfs_icache.c9
-rw-r--r--fs/xfs/xfs_log_recover.c73
55 files changed, 1739 insertions, 514 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 646337dc5201..529300327f45 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -600,9 +600,6 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
/* lock down the parent dentry so we can peer at it */
parent = dget_parent(dentry);
- if (!parent->d_inode)
- goto out_bad;
-
dir = AFS_FS_I(parent->d_inode);
/* validate the parent directory */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 100edcc5e312..4c94a79991bb 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1413,7 +1413,7 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
* long file_ofs
* followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
*/
-static void fill_files_note(struct memelfnote *note)
+static int fill_files_note(struct memelfnote *note)
{
struct vm_area_struct *vma;
unsigned count, size, names_ofs, remaining, n;
@@ -1428,11 +1428,11 @@ static void fill_files_note(struct memelfnote *note)
names_ofs = (2 + 3 * count) * sizeof(data[0]);
alloc:
if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
- goto err;
+ return -EINVAL;
size = round_up(size, PAGE_SIZE);
data = vmalloc(size);
if (!data)
- goto err;
+ return -ENOMEM;
start_end_ofs = data + 2;
name_base = name_curpos = ((char *)data) + names_ofs;
@@ -1485,7 +1485,7 @@ static void fill_files_note(struct memelfnote *note)
size = name_curpos - (char *)data;
fill_note(note, "CORE", NT_FILE, size, data);
- err: ;
+ return 0;
}
#ifdef CORE_DUMP_USE_REGSET
@@ -1686,8 +1686,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_auxv_note(&info->auxv, current->mm);
info->size += notesize(&info->auxv);
- fill_files_note(&info->files);
- info->size += notesize(&info->files);
+ if (fill_files_note(&info->files) == 0)
+ info->size += notesize(&info->files);
return 1;
}
@@ -1719,7 +1719,8 @@ static int write_note_info(struct elf_note_info *info,
return 0;
if (first && !writenote(&info->auxv, file, foffset))
return 0;
- if (first && !writenote(&info->files, file, foffset))
+ if (first && info->files.data &&
+ !writenote(&info->files, file, foffset))
return 0;
for (i = 1; i < info->thread_notes; ++i)
@@ -1806,6 +1807,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
struct elf_note_info {
struct memelfnote *notes;
+ struct memelfnote *notes_files;
struct elf_prstatus *prstatus; /* NT_PRSTATUS */
struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
struct list_head thread_list;
@@ -1896,9 +1898,12 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
fill_auxv_note(info->notes + 3, current->mm);
- fill_files_note(info->notes + 4);
+ info->numnote = 4;
- info->numnote = 5;
+ if (fill_files_note(info->notes + info->numnote) == 0) {
+ info->notes_files = info->notes + info->numnote;
+ info->numnote++;
+ }
/* Try to dump the FPU. */
info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
@@ -1960,8 +1965,9 @@ static void free_note_info(struct elf_note_info *info)
kfree(list_entry(tmp, struct elf_thread_status, list));
}
- /* Free data allocated by fill_files_note(): */
- vfree(info->notes[4].data);
+ /* Free data possibly allocated by fill_files_note(): */
+ if (info->notes_files)
+ vfree(info->notes_files->data);
kfree(info->prstatus);
kfree(info->psinfo);
@@ -2044,7 +2050,7 @@ static int elf_core_dump(struct coredump_params *cprm)
struct vm_area_struct *vma, *gate_vma;
struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff, foffset;
- struct elf_note_info info;
+ struct elf_note_info info = { };
struct elf_phdr *phdr4note = NULL;
struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 60250847929f..fc60b31453ee 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -735,7 +735,7 @@ void bioset_integrity_free(struct bio_set *bs)
mempool_destroy(bs->bio_integrity_pool);
if (bs->bvec_integrity_pool)
- mempool_destroy(bs->bio_integrity_pool);
+ mempool_destroy(bs->bvec_integrity_pool);
}
EXPORT_SYMBOL(bioset_integrity_free);
diff --git a/fs/bio.c b/fs/bio.c
index b3b20ed9510e..ea5035da4d9a 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src)
src_p = kmap_atomic(src_bv->bv_page);
dst_p = kmap_atomic(dst_bv->bv_page);
- memcpy(dst_p + dst_bv->bv_offset,
- src_p + src_bv->bv_offset,
+ memcpy(dst_p + dst_offset,
+ src_p + src_offset,
bytes);
kunmap_atomic(dst_p);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d0ae226926ee..71f074e1870b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -213,7 +213,10 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
{
if (BTRFS_I(inode)->logged_trans == generation &&
- BTRFS_I(inode)->last_sub_trans <= BTRFS_I(inode)->last_log_commit)
+ BTRFS_I(inode)->last_sub_trans <=
+ BTRFS_I(inode)->last_log_commit &&
+ BTRFS_I(inode)->last_sub_trans <=
+ BTRFS_I(inode)->root->last_log_commit)
return 1;
return 0;
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 64346721173f..61b5bcd57b7e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1005,8 +1005,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
- if (root->ref_cows)
- btrfs_reloc_cow_block(trans, root, buf, cow);
+ if (root->ref_cows) {
+ ret = btrfs_reloc_cow_block(trans, root, buf, cow);
+ if (ret)
+ return ret;
+ }
if (buf == root->node) {
WARN_ON(parent && parent != buf);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3c1da6f98a4d..0506f40ede83 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1118,15 +1118,6 @@ struct btrfs_space_info {
*/
struct percpu_counter total_bytes_pinned;
- /*
- * we bump reservation progress every time we decrement
- * bytes_reserved. This way people waiting for reservations
- * know something good has happened and they can check
- * for progress. The number here isn't to be trusted, it
- * just shows reclaim activity
- */
- unsigned long reservation_progress;
-
unsigned int full:1; /* indicates that we cannot allocate any more
chunks for this space */
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
@@ -3135,7 +3126,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
unsigned num_items)
{
return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
- 3 * num_items;
+ 2 * num_items;
}
/*
@@ -3939,9 +3930,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
-void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *buf,
- struct extent_buffer *cow);
+int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *buf,
+ struct extent_buffer *cow);
void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index a64435359385..70681686e8dc 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -400,7 +400,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
btrfs_dev_replace_unlock(dev_replace);
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ btrfs_wait_all_ordered_extents(root->fs_info);
/* force writing the updated state information to disk */
trans = btrfs_start_transaction(root, 0);
@@ -475,7 +475,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
}
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ btrfs_wait_all_ordered_extents(root->fs_info);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4cbb00af92ff..4ae17ed13b32 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -157,6 +157,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
+ { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
{ .id = 0, .name_stem = "tree" },
};
@@ -3415,6 +3416,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
if (total_errors > max_errors) {
printk(KERN_ERR "btrfs: %d errors while writing supers\n",
total_errors);
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
/* FUA is masked off if unsupported and can't be the reason */
btrfs_error(root->fs_info, -EIO,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cfb3cf711b34..d58bef130a41 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3925,7 +3925,6 @@ static int can_overcommit(struct btrfs_root *root,
u64 space_size;
u64 avail;
u64 used;
- u64 to_add;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly;
@@ -3959,25 +3958,17 @@ static int can_overcommit(struct btrfs_root *root,
BTRFS_BLOCK_GROUP_RAID10))
avail >>= 1;
- to_add = space_info->total_bytes;
-
/*
* If we aren't flushing all things, let us overcommit up to
* 1/2th of the space. If we can flush, don't let us overcommit
* too much, let it overcommit up to 1/8 of the space.
*/
if (flush == BTRFS_RESERVE_FLUSH_ALL)
- to_add >>= 3;
+ avail >>= 3;
else
- to_add >>= 1;
-
- /*
- * Limit the overcommit to the amount of free space we could possibly
- * allocate for chunks.
- */
- to_add = min(avail, to_add);
+ avail >>= 1;
- if (used + bytes < space_info->total_bytes + to_add)
+ if (used + bytes < space_info->total_bytes + avail)
return 1;
return 0;
}
@@ -4000,7 +3991,7 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
*/
btrfs_start_all_delalloc_inodes(root->fs_info, 0);
if (!current->journal_info)
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ btrfs_wait_all_ordered_extents(root->fs_info);
}
}
@@ -4030,7 +4021,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
if (delalloc_bytes == 0) {
if (trans)
return;
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ btrfs_wait_all_ordered_extents(root->fs_info);
return;
}
@@ -4058,7 +4049,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
loops++;
if (wait_ordered && !trans) {
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ btrfs_wait_all_ordered_extents(root->fs_info);
} else {
time_left = schedule_timeout_killable(1);
if (time_left)
@@ -4465,7 +4456,6 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
- space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
}
@@ -4666,7 +4656,6 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
sinfo->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
sinfo->flags, num_bytes, 0);
- sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
@@ -5446,7 +5435,6 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
- space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -6117,10 +6105,13 @@ enum btrfs_loop_type {
/*
* walks the btree of allocated extents and find a hole of a given size.
* The key ins is changed to record the hole:
- * ins->objectid == block start
+ * ins->objectid == start position
* ins->flags = BTRFS_EXTENT_ITEM_KEY
- * ins->offset == number of blocks
+ * ins->offset == the size of the hole.
* Any available blocks before search_start are skipped.
+ *
+ * If there is no suitable free space, we will record the max size of
+ * the free space extent currently.
*/
static noinline int find_free_extent(struct btrfs_root *orig_root,
u64 num_bytes, u64 empty_size,
@@ -6133,6 +6124,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
struct btrfs_block_group_cache *block_group = NULL;
struct btrfs_block_group_cache *used_block_group;
u64 search_start = 0;
+ u64 max_extent_size = 0;
int empty_cluster = 2 * 1024 * 1024;
struct btrfs_space_info *space_info;
int loop = 0;
@@ -6292,7 +6284,10 @@ have_block_group:
btrfs_get_block_group(used_block_group);
offset = btrfs_alloc_from_cluster(used_block_group,
- last_ptr, num_bytes, used_block_group->key.objectid);
+ last_ptr,
+ num_bytes,
+ used_block_group->key.objectid,
+ &max_extent_size);
if (offset) {
/* we have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
@@ -6355,8 +6350,10 @@ refill_cluster:
* cluster
*/
offset = btrfs_alloc_from_cluster(block_group,
- last_ptr, num_bytes,
- search_start);
+ last_ptr,
+ num_bytes,
+ search_start,
+ &max_extent_size);
if (offset) {
/* we found one, proceed */
spin_unlock(&last_ptr->refill_lock);
@@ -6391,13 +6388,18 @@ unclustered_alloc:
if (cached &&
block_group->free_space_ctl->free_space <
num_bytes + empty_cluster + empty_size) {
+ if (block_group->free_space_ctl->free_space >
+ max_extent_size)
+ max_extent_size =
+ block_group->free_space_ctl->free_space;
spin_unlock(&block_group->free_space_ctl->tree_lock);
goto loop;
}
spin_unlock(&block_group->free_space_ctl->tree_lock);
offset = btrfs_find_space_for_alloc(block_group, search_start,
- num_bytes, empty_size);
+ num_bytes, empty_size,
+ &max_extent_size);
/*
* If we didn't find a chunk, and we haven't failed on this
* block group before, and this block group is in the middle of
@@ -6515,7 +6517,8 @@ loop:
ret = 0;
}
out:
-
+ if (ret == -ENOSPC)
+ ins->offset = max_extent_size;
return ret;
}
@@ -6573,8 +6576,8 @@ again:
flags);
if (ret == -ENOSPC) {
- if (!final_tried) {
- num_bytes = num_bytes >> 1;
+ if (!final_tried && ins->offset) {
+ num_bytes = min(num_bytes >> 1, ins->offset);
num_bytes = round_down(num_bytes, root->sectorsize);
num_bytes = max(num_bytes, min_alloc_size);
if (num_bytes == min_alloc_size)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 09582b81640c..c09a40db53db 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1481,10 +1481,12 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
*end = state->end;
cur_start = state->end + 1;
node = rb_next(node);
- if (!node)
- break;
total_bytes += state->end - state->start + 1;
- if (total_bytes >= max_bytes)
+ if (total_bytes >= max_bytes) {
+ *end = *start + max_bytes - 1;
+ break;
+ }
+ if (!node)
break;
}
out:
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bc5072b2db53..72da4df53c9a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1859,8 +1859,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = btrfs_log_dentry_safe(trans, root, dentry);
if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
- goto out;
+ /* Fallthrough and commit/free transaction. */
+ ret = 1;
}
/* we've logged all the items and now have a consistent
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3f0ddfce96e6..b4f9904c4c6b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1431,13 +1431,19 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
ctl->free_space += bytes;
}
+/*
+ * If we can not find suitable extent, we will use bytes to record
+ * the size of the max extent.
+ */
static int search_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *bitmap_info, u64 *offset,
u64 *bytes)
{
unsigned long found_bits = 0;
+ unsigned long max_bits = 0;
unsigned long bits, i;
unsigned long next_zero;
+ unsigned long extent_bits;
i = offset_to_bit(bitmap_info->offset, ctl->unit,
max_t(u64, *offset, bitmap_info->offset));
@@ -1446,9 +1452,12 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
next_zero = find_next_zero_bit(bitmap_info->bitmap,
BITS_PER_BITMAP, i);
- if ((next_zero - i) >= bits) {
- found_bits = next_zero - i;
+ extent_bits = next_zero - i;
+ if (extent_bits >= bits) {
+ found_bits = extent_bits;
break;
+ } else if (extent_bits > max_bits) {
+ max_bits = extent_bits;
}
i = next_zero;
}
@@ -1459,38 +1468,41 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
return 0;
}
+ *bytes = (u64)(max_bits) * ctl->unit;
return -1;
}
+/* Cache the size of the max extent in bytes */
static struct btrfs_free_space *
find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
- unsigned long align)
+ unsigned long align, u64 *max_extent_size)
{
struct btrfs_free_space *entry;
struct rb_node *node;
- u64 ctl_off;
u64 tmp;
u64 align_off;
int ret;
if (!ctl->free_space_offset.rb_node)
- return NULL;
+ goto out;
entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
if (!entry)
- return NULL;
+ goto out;
for (node = &entry->offset_index; node; node = rb_next(node)) {
entry = rb_entry(node, struct btrfs_free_space, offset_index);
- if (entry->bytes < *bytes)
+ if (entry->bytes < *bytes) {
+ if (entry->bytes > *max_extent_size)
+ *max_extent_size = entry->bytes;
continue;
+ }
/* make sure the space returned is big enough
* to match our requested alignment
*/
if (*bytes >= align) {
- ctl_off = entry->offset - ctl->start;
- tmp = ctl_off + align - 1;;
+ tmp = entry->offset - ctl->start + align - 1;
do_div(tmp, align);
tmp = tmp * align + ctl->start;
align_off = tmp - entry->offset;
@@ -1499,14 +1511,22 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
tmp = entry->offset;
}
- if (entry->bytes < *bytes + align_off)
+ if (entry->bytes < *bytes + align_off) {
+ if (entry->bytes > *max_extent_size)
+ *max_extent_size = entry->bytes;
continue;
+ }
if (entry->bitmap) {
- ret = search_bitmap(ctl, entry, &tmp, bytes);
+ u64 size = *bytes;
+
+ ret = search_bitmap(ctl, entry, &tmp, &size);
if (!ret) {
*offset = tmp;
+ *bytes = size;
return entry;
+ } else if (size > *max_extent_size) {
+ *max_extent_size = size;
}
continue;
}
@@ -1515,7 +1535,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
*bytes = entry->bytes - align_off;
return entry;
}
-
+out:
return NULL;
}
@@ -2116,7 +2136,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
}
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes, u64 empty_size)
+ u64 offset, u64 bytes, u64 empty_size,
+ u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry = NULL;
@@ -2127,7 +2148,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
spin_lock(&ctl->tree_lock);
entry = find_free_space(ctl, &offset, &bytes_search,
- block_group->full_stripe_len);
+ block_group->full_stripe_len, max_extent_size);
if (!entry)
goto out;
@@ -2137,7 +2158,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
if (!entry->bytes)
free_bitmap(ctl, entry);
} else {
-
unlink_free_space(ctl, entry);
align_gap_len = offset - entry->offset;
align_gap = entry->offset;
@@ -2151,7 +2171,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
else
link_free_space(ctl, entry);
}
-
out:
spin_unlock(&ctl->tree_lock);
@@ -2206,7 +2225,8 @@ int btrfs_return_cluster_to_free_space(
static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
struct btrfs_free_space *entry,
- u64 bytes, u64 min_start)
+ u64 bytes, u64 min_start,
+ u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
int err;
@@ -2218,8 +2238,11 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
search_bytes = bytes;
err = search_bitmap(ctl, entry, &search_start, &search_bytes);
- if (err)
+ if (err) {
+ if (search_bytes > *max_extent_size)
+ *max_extent_size = search_bytes;
return 0;
+ }
ret = search_start;
__bitmap_clear_bits(ctl, entry, ret, bytes);
@@ -2234,7 +2257,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
*/
u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
- u64 min_start)
+ u64 min_start, u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry = NULL;
@@ -2254,6 +2277,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
entry = rb_entry(node, struct btrfs_free_space, offset_index);
while(1) {
+ if (entry->bytes < bytes && entry->bytes > *max_extent_size)
+ *max_extent_size = entry->bytes;
+
if (entry->bytes < bytes ||
(!entry->bitmap && entry->offset < min_start)) {
node = rb_next(&entry->offset_index);
@@ -2267,7 +2293,8 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
if (entry->bitmap) {
ret = btrfs_alloc_from_bitmap(block_group,
cluster, entry, bytes,
- cluster->window_start);
+ cluster->window_start,
+ max_extent_size);
if (ret == 0) {
node = rb_next(&entry->offset_index);
if (!node)
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index c74904167476..e737f92cf6d0 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -94,7 +94,8 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group);
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes, u64 empty_size);
+ u64 offset, u64 bytes, u64 empty_size,
+ u64 *max_extent_size);
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
u64 bytes);
@@ -105,7 +106,7 @@ int btrfs_find_space_cluster(struct btrfs_root *root,
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
- u64 min_start);
+ u64 min_start, u64 *max_extent_size);
int btrfs_return_cluster_to_free_space(
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f338c5672d58..22ebc13b6c99 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4688,11 +4688,11 @@ static void inode_tree_add(struct inode *inode)
struct btrfs_inode *entry;
struct rb_node **p;
struct rb_node *parent;
+ struct rb_node *new = &BTRFS_I(inode)->rb_node;
u64 ino = btrfs_ino(inode);
if (inode_unhashed(inode))
return;
-again:
parent = NULL;
spin_lock(&root->inode_lock);
p = &root->inode_tree.rb_node;
@@ -4707,14 +4707,14 @@ again:
else {
WARN_ON(!(entry->vfs_inode.i_state &
(I_WILL_FREE | I_FREEING)));
- rb_erase(parent, &root->inode_tree);
+ rb_replace_node(parent, new, &root->inode_tree);
RB_CLEAR_NODE(parent);
spin_unlock(&root->inode_lock);
- goto again;
+ return;
}
}
- rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
- rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree);
+ rb_link_node(new, parent, p);
+ rb_insert_color(new, &root->inode_tree);
spin_unlock(&root->inode_lock);
}
@@ -8216,6 +8216,10 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
if (unlikely(!work)) {
+ if (delay_iput)
+ btrfs_add_delayed_iput(inode);
+ else
+ iput(inode);
ret = -ENOMEM;
goto out;
}
@@ -8613,11 +8617,13 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
.get_acl = btrfs_get_acl,
+ .update_time = btrfs_update_time,
};
static const struct inode_operations btrfs_dir_ro_inode_operations = {
.lookup = btrfs_lookup,
.permission = btrfs_permission,
.get_acl = btrfs_get_acl,
+ .update_time = btrfs_update_time,
};
static const struct file_operations btrfs_dir_file_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1a5b9462dd9a..9d46f60cb943 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -574,7 +574,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret)
return ret;
- btrfs_wait_ordered_extents(root, 0);
+ btrfs_wait_ordered_extents(root);
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot)
@@ -2696,9 +2696,9 @@ out_unlock:
static long btrfs_ioctl_file_extent_same(struct file *file,
void __user *argp)
{
- struct btrfs_ioctl_same_args *args = argp;
- struct btrfs_ioctl_same_args same;
- struct btrfs_ioctl_same_extent_info info;
+ struct btrfs_ioctl_same_args tmp;
+ struct btrfs_ioctl_same_args *same;
+ struct btrfs_ioctl_same_extent_info *info;
struct inode *src = file->f_dentry->d_inode;
struct file *dst_file = NULL;
struct inode *dst;
@@ -2706,6 +2706,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
u64 len;
int i;
int ret;
+ unsigned long size;
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
bool is_admin = capable(CAP_SYS_ADMIN);
@@ -2716,15 +2717,30 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
if (ret)
return ret;
- if (copy_from_user(&same,
+ if (copy_from_user(&tmp,
(struct btrfs_ioctl_same_args __user *)argp,
- sizeof(same))) {
+ sizeof(tmp))) {
ret = -EFAULT;
goto out;
}
- off = same.logical_offset;
- len = same.length;
+ size = sizeof(tmp) +
+ tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info);
+
+ same = kmalloc(size, GFP_NOFS);
+ if (!same) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (copy_from_user(same,
+ (struct btrfs_ioctl_same_args __user *)argp, size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ off = same->logical_offset;
+ len = same->length;
/*
* Limit the total length we will dedupe for each operation.
@@ -2752,27 +2768,28 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
if (!S_ISREG(src->i_mode))
goto out;
- ret = 0;
- for (i = 0; i < same.dest_count; i++) {
- if (copy_from_user(&info, &args->info[i], sizeof(info))) {
- ret = -EFAULT;
- goto out;
- }
+ /* pre-format output fields to sane values */
+ for (i = 0; i < same->dest_count; i++) {
+ same->info[i].bytes_deduped = 0ULL;
+ same->info[i].status = 0;
+ }
- info.bytes_deduped = 0;
+ ret = 0;
+ for (i = 0; i < same->dest_count; i++) {
+ info = &same->info[i];
- dst_file = fget(info.fd);
+ dst_file = fget(info->fd);
if (!dst_file) {
- info.status = -EBADF;
+ info->status = -EBADF;
goto next;
}
if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
- info.status = -EINVAL;
+ info->status = -EINVAL;
goto next;
}
- info.status = -EXDEV;
+ info->status = -EXDEV;
if (file->f_path.mnt != dst_file->f_path.mnt)
goto next;
@@ -2781,32 +2798,29 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
goto next;
if (S_ISDIR(dst->i_mode)) {
- info.status = -EISDIR;
+ info->status = -EISDIR;
goto next;
}
if (!S_ISREG(dst->i_mode)) {
- info.status = -EACCES;
+ info->status = -EACCES;
goto next;
}
- info.status = btrfs_extent_same(src, off, len, dst,
- info.logical_offset);
- if (info.status == 0)
- info.bytes_deduped += len;
+ info->status = btrfs_extent_same(src, off, len, dst,
+ info->logical_offset);
+ if (info->status == 0)
+ info->bytes_deduped += len;
next:
if (dst_file)
fput(dst_file);
-
- if (__put_user_unaligned(info.status, &args->info[i].status) ||
- __put_user_unaligned(info.bytes_deduped,
- &args->info[i].bytes_deduped)) {
- ret = -EFAULT;
- goto out;
- }
}
+ ret = copy_to_user(argp, same, size);
+ if (ret)
+ ret = -EFAULT;
+
out:
mnt_drop_write_file(file);
return ret;
@@ -3310,7 +3324,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
}
if (!objectid)
- objectid = root->root_key.objectid;
+ objectid = BTRFS_FS_TREE_OBJECTID;
location.objectid = objectid;
location.type = BTRFS_ROOT_ITEM_KEY;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 966b413a33b8..c702cb62f78a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -563,11 +563,10 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
* wait for all the ordered extents in a root. This is done when balancing
* space between drives.
*/
-void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
+void btrfs_wait_ordered_extents(struct btrfs_root *root)
{
struct list_head splice, works;
struct btrfs_ordered_extent *ordered, *next;
- struct inode *inode;
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
@@ -580,15 +579,6 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
root_extent_list);
list_move_tail(&ordered->root_extent_list,
&root->ordered_extents);
- /*
- * the inode may be getting freed (in sys_unlink path).
- */
- inode = igrab(ordered->inode);
- if (!inode) {
- cond_resched_lock(&root->ordered_extent_lock);
- continue;
- }
-
atomic_inc(&ordered->refs);
spin_unlock(&root->ordered_extent_lock);
@@ -605,21 +595,13 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
list_for_each_entry_safe(ordered, next, &works, work_list) {
list_del_init(&ordered->work_list);
wait_for_completion(&ordered->completion);
-
- inode = ordered->inode;
btrfs_put_ordered_extent(ordered);
- if (delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
-
cond_resched();
}
mutex_unlock(&root->fs_info->ordered_operations_mutex);
}
-void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
- int delay_iput)
+void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
struct list_head splice;
@@ -637,7 +619,7 @@ void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
&fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
- btrfs_wait_ordered_extents(root, delay_iput);
+ btrfs_wait_ordered_extents(root);
btrfs_put_fs_root(root);
spin_lock(&fs_info->ordered_root_lock);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index d9a5aa097b4f..0c0b35612d7a 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -195,9 +195,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
-void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput);
-void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
- int delay_iput);
+void btrfs_wait_ordered_extents(struct btrfs_root *root);
+void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info);
void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index aacc2121e87c..a5a26320503f 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1548,7 +1548,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
btrfs_file_extent_other_encoding(leaf, fi));
if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
- ret = 1;
+ ret = -EINVAL;
goto out;
}
@@ -1579,7 +1579,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
u64 end;
u32 nritems;
u32 i;
- int ret;
+ int ret = 0;
int first = 1;
int dirty = 0;
@@ -1642,11 +1642,13 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
ret = get_new_location(rc->data_inode, &new_bytenr,
bytenr, num_bytes);
- if (ret > 0) {
- WARN_ON(1);
- continue;
+ if (ret) {
+ /*
+ * Don't have to abort since we've not changed anything
+ * in the file extent yet.
+ */
+ break;
}
- BUG_ON(ret < 0);
btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
dirty = 1;
@@ -1656,18 +1658,24 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
num_bytes, parent,
btrfs_header_owner(leaf),
key.objectid, key.offset, 1);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ break;
+ }
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
parent, btrfs_header_owner(leaf),
key.objectid, key.offset, 1);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ break;
+ }
}
if (dirty)
btrfs_mark_buffer_dirty(leaf);
if (inode)
btrfs_add_delayed_iput(inode);
- return 0;
+ return ret;
}
static noinline_for_stack
@@ -4238,7 +4246,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
err = ret;
goto out;
}
- btrfs_wait_all_ordered_extents(fs_info, 0);
+ btrfs_wait_all_ordered_extents(fs_info);
while (1) {
mutex_lock(&fs_info->cleaner_mutex);
@@ -4499,19 +4507,19 @@ out:
return ret;
}
-void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *buf,
- struct extent_buffer *cow)
+int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *buf,
+ struct extent_buffer *cow)
{
struct reloc_control *rc;
struct backref_node *node;
int first_cow = 0;
int level;
- int ret;
+ int ret = 0;
rc = root->fs_info->reloc_ctl;
if (!rc)
- return;
+ return 0;
BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
@@ -4547,10 +4555,9 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
rc->nodes_relocated += buf->len;
}
- if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) {
+ if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
ret = replace_file_extents(trans, rc, root, cow);
- BUG_ON(ret);
- }
+ return ret;
}
/*
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 0afcd452fcb3..a18e0e23f6a6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -158,12 +158,20 @@ struct scrub_fixup_nodatasum {
int mirror_num;
};
+struct scrub_nocow_inode {
+ u64 inum;
+ u64 offset;
+ u64 root;
+ struct list_head list;
+};
+
struct scrub_copy_nocow_ctx {
struct scrub_ctx *sctx;
u64 logical;
u64 len;
int mirror_num;
u64 physical_for_dev_replace;
+ struct list_head inodes;
struct btrfs_work work;
};
@@ -245,7 +253,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
static int write_page_nocow(struct scrub_ctx *sctx,
u64 physical_for_dev_replace, struct page *page);
static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
- void *ctx);
+ struct scrub_copy_nocow_ctx *ctx);
static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
int mirror_num, u64 physical_for_dev_replace);
static void copy_nocow_pages_worker(struct btrfs_work *work);
@@ -3126,12 +3134,30 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
nocow_ctx->mirror_num = mirror_num;
nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
nocow_ctx->work.func = copy_nocow_pages_worker;
+ INIT_LIST_HEAD(&nocow_ctx->inodes);
btrfs_queue_worker(&fs_info->scrub_nocow_workers,
&nocow_ctx->work);
return 0;
}
+static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
+{
+ struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
+ struct scrub_nocow_inode *nocow_inode;
+
+ nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
+ if (!nocow_inode)
+ return -ENOMEM;
+ nocow_inode->inum = inum;
+ nocow_inode->offset = offset;
+ nocow_inode->root = root;
+ list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
+ return 0;
+}
+
+#define COPY_COMPLETE 1
+
static void copy_nocow_pages_worker(struct btrfs_work *work)
{
struct scrub_copy_nocow_ctx *nocow_ctx =
@@ -3167,8 +3193,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
}
ret = iterate_inodes_from_logical(logical, fs_info, path,
- copy_nocow_pages_for_inode,
- nocow_ctx);
+ record_inode_for_nocow, nocow_ctx);
if (ret != 0 && ret != -ENOENT) {
pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
logical, physical_for_dev_replace, len, mirror_num,
@@ -3177,7 +3202,33 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
goto out;
}
+ btrfs_end_transaction(trans, root);
+ trans = NULL;
+ while (!list_empty(&nocow_ctx->inodes)) {
+ struct scrub_nocow_inode *entry;
+ entry = list_first_entry(&nocow_ctx->inodes,
+ struct scrub_nocow_inode,
+ list);
+ list_del_init(&entry->list);
+ ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
+ entry->root, nocow_ctx);
+ kfree(entry);
+ if (ret == COPY_COMPLETE) {
+ ret = 0;
+ break;
+ } else if (ret) {
+ break;
+ }
+ }
out:
+ while (!list_empty(&nocow_ctx->inodes)) {
+ struct scrub_nocow_inode *entry;
+ entry = list_first_entry(&nocow_ctx->inodes,
+ struct scrub_nocow_inode,
+ list);
+ list_del_init(&entry->list);
+ kfree(entry);
+ }
if (trans && !IS_ERR(trans))
btrfs_end_transaction(trans, root);
if (not_written)
@@ -3190,20 +3241,25 @@ out:
scrub_pending_trans_workers_dec(sctx);
}
-static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
+static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
+ struct scrub_copy_nocow_ctx *nocow_ctx)
{
- struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
struct btrfs_key key;
struct inode *inode;
struct page *page;
struct btrfs_root *local_root;
+ struct btrfs_ordered_extent *ordered;
+ struct extent_map *em;
+ struct extent_state *cached_state = NULL;
+ struct extent_io_tree *io_tree;
u64 physical_for_dev_replace;
- u64 len;
+ u64 len = nocow_ctx->len;
+ u64 lockstart = offset, lockend = offset + len - 1;
unsigned long index;
int srcu_index;
- int ret;
- int err;
+ int ret = 0;
+ int err = 0;
key.objectid = root;
key.type = BTRFS_ROOT_ITEM_KEY;
@@ -3229,9 +3285,33 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
mutex_lock(&inode->i_mutex);
inode_dio_wait(inode);
- ret = 0;
physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
- len = nocow_ctx->len;
+ io_tree = &BTRFS_I(inode)->io_tree;
+
+ lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
+ ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
+ if (ordered) {
+ btrfs_put_ordered_extent(ordered);
+ goto out_unlock;
+ }
+
+ em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out_unlock;
+ }
+
+ /*
+ * This extent does not actually cover the logical extent anymore,
+ * move on to the next inode.
+ */
+ if (em->block_start > nocow_ctx->logical ||
+ em->block_start + em->block_len < nocow_ctx->logical + len) {
+ free_extent_map(em);
+ goto out_unlock;
+ }
+ free_extent_map(em);
+
while (len >= PAGE_CACHE_SIZE) {
index = offset >> PAGE_CACHE_SHIFT;
again:
@@ -3247,10 +3327,9 @@ again:
goto next_page;
} else {
ClearPageError(page);
- err = extent_read_full_page(&BTRFS_I(inode)->
- io_tree,
- page, btrfs_get_extent,
- nocow_ctx->mirror_num);
+ err = extent_read_full_page_nolock(io_tree, page,
+ btrfs_get_extent,
+ nocow_ctx->mirror_num);
if (err) {
ret = err;
goto next_page;
@@ -3264,6 +3343,7 @@ again:
* page in the page cache.
*/
if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
page_cache_release(page);
goto again;
}
@@ -3287,6 +3367,10 @@ next_page:
physical_for_dev_replace += PAGE_CACHE_SIZE;
len -= PAGE_CACHE_SIZE;
}
+ ret = COPY_COMPLETE;
+out_unlock:
+ unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
+ GFP_NOFS);
out:
mutex_unlock(&inode->i_mutex);
iput(inode);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3aab10ce63e8..e913328d0f2a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -921,7 +921,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0;
}
- btrfs_wait_all_ordered_extents(fs_info, 1);
+ btrfs_wait_all_ordered_extents(fs_info);
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
@@ -1340,6 +1340,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
} else {
+ if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
+ btrfs_err(fs_info,
+ "Remounting read-write after error is not allowed\n");
+ ret = -EINVAL;
+ goto restore;
+ }
if (fs_info->fs_devices->rw_devices == 0) {
ret = -EACCES;
goto restore;
@@ -1377,6 +1383,16 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
pr_warn("btrfs: failed to resume dev_replace\n");
goto restore;
}
+
+ if (!fs_info->uuid_root) {
+ pr_info("btrfs: creating UUID tree\n");
+ ret = btrfs_create_uuid_tree(fs_info);
+ if (ret) {
+ pr_warn("btrfs: failed to create the uuid tree"
+ "%d\n", ret);
+ goto restore;
+ }
+ }
sb->s_flags &= ~MS_RDONLY;
}
out:
@@ -1762,6 +1778,9 @@ static void btrfs_print_info(void)
#ifdef CONFIG_BTRFS_DEBUG
", debug=on"
#endif
+#ifdef CONFIG_BTRFS_ASSERT
+ ", assert=on"
+#endif
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
", integrity-checker=on"
#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index cac4a3f76323..e7a95356df83 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1603,7 +1603,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
- btrfs_wait_all_ordered_extents(fs_info, 1);
+ btrfs_wait_all_ordered_extents(fs_info);
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 0d9613c3f5e5..79f057c0619a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -93,7 +93,8 @@
*/
#define LOG_WALK_PIN_ONLY 0
#define LOG_WALK_REPLAY_INODES 1
-#define LOG_WALK_REPLAY_ALL 2
+#define LOG_WALK_REPLAY_DIR_INDEX 2
+#define LOG_WALK_REPLAY_ALL 3
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
@@ -393,6 +394,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
if (inode_item) {
struct btrfs_inode_item *item;
u64 nbytes;
+ u32 mode;
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
@@ -400,9 +402,19 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
item = btrfs_item_ptr(eb, slot,
struct btrfs_inode_item);
btrfs_set_inode_nbytes(eb, item, nbytes);
+
+ /*
+ * If this is a directory we need to reset the i_size to
+ * 0 so that we can set it up properly when replaying
+ * the rest of the items in this log.
+ */
+ mode = btrfs_inode_mode(eb, item);
+ if (S_ISDIR(mode))
+ btrfs_set_inode_size(eb, item, 0);
}
} else if (inode_item) {
struct btrfs_inode_item *item;
+ u32 mode;
/*
* New inode, set nbytes to 0 so that the nbytes comes out
@@ -410,6 +422,15 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
*/
item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
btrfs_set_inode_nbytes(eb, item, 0);
+
+ /*
+ * If this is a directory we need to reset the i_size to 0 so
+ * that we can set it up properly when replaying the rest of
+ * the items in this log.
+ */
+ mode = btrfs_inode_mode(eb, item);
+ if (S_ISDIR(mode))
+ btrfs_set_inode_size(eb, item, 0);
}
insert:
btrfs_release_path(path);
@@ -1496,6 +1517,7 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
iput(inode);
return -EIO;
}
+
ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
/* FIXME, put inode into FIXUP list */
@@ -1534,6 +1556,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
u8 log_type;
int exists;
int ret = 0;
+ bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
dir = read_one_inode(root, key->objectid);
if (!dir)
@@ -1604,6 +1627,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
goto insert;
out:
btrfs_release_path(path);
+ if (!ret && update_size) {
+ btrfs_i_size_write(dir, dir->i_size + name_len * 2);
+ ret = btrfs_update_inode(trans, root, dir);
+ }
kfree(name);
iput(dir);
return ret;
@@ -1614,6 +1641,7 @@ insert:
name, name_len, log_type, &log_key);
if (ret && ret != -ENOENT)
goto out;
+ update_size = false;
ret = 0;
goto out;
}
@@ -2027,6 +2055,15 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
if (ret)
break;
}
+
+ if (key.type == BTRFS_DIR_INDEX_KEY &&
+ wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {
+ ret = replay_one_dir_item(wc->trans, root, path,
+ eb, i, &key);
+ if (ret)
+ break;
+ }
+
if (wc->stage < LOG_WALK_REPLAY_ALL)
continue;
@@ -2048,8 +2085,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
eb, i, &key);
if (ret)
break;
- } else if (key.type == BTRFS_DIR_ITEM_KEY ||
- key.type == BTRFS_DIR_INDEX_KEY) {
+ } else if (key.type == BTRFS_DIR_ITEM_KEY) {
ret = replay_one_dir_item(wc->trans, root, path,
eb, i, &key);
if (ret)
@@ -3805,6 +3841,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
int ret = 0;
struct btrfs_root *root;
struct dentry *old_parent = NULL;
+ struct inode *orig_inode = inode;
/*
* for regular files, if its inode is already on disk, we don't
@@ -3824,7 +3861,14 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
}
while (1) {
- BTRFS_I(inode)->logged_trans = trans->transid;
+ /*
+ * If we are logging a directory then we start with our inode,
+ * not our parents inode, so we need to skipp setting the
+ * logged_trans so that further down in the log code we don't
+ * think this inode has already been logged.
+ */
+ if (inode != orig_inode)
+ BTRFS_I(inode)->logged_trans = trans->transid;
smp_mb();
if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0052ca8264d9..a10645830223 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -796,7 +796,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fs_devices->rotating = 1;
fs_devices->open_devices++;
- if (device->writeable && !device->is_tgtdev_for_dev_replace) {
+ if (device->writeable &&
+ device->devid != BTRFS_DEV_REPLACE_DEVID) {
fs_devices->rw_devices++;
list_add(&device->dev_alloc_list,
&fs_devices->alloc_list);
@@ -911,9 +912,9 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
if (disk_super->label[0]) {
if (disk_super->label[BTRFS_LABEL_SIZE - 1])
disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
- printk(KERN_INFO "device label %s ", disk_super->label);
+ printk(KERN_INFO "btrfs: device label %s ", disk_super->label);
} else {
- printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
+ printk(KERN_INFO "btrfs: device fsid %pU ", disk_super->fsid);
}
printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index b5e80b0af315..38c1768b4142 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -140,6 +140,17 @@ config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
If the NFS client is unchanged from the upstream kernel, this
option should be set to the default "kernel.org".
+config NFS_V4_1_MIGRATION
+ bool "NFSv4.1 client support for migration"
+ depends on NFS_V4_1
+ default n
+ help
+ This option makes the NFS client advertise to NFSv4.1 servers that
+ it can support NFSv4 migration.
+
+ The NFSv4.1 pieces of the Linux NFSv4 migration implementation are
+ still experimental. If you are not an NFSv4 developer, say N here.
+
config NFS_V4_SECURITY_LABEL
bool
depends on NFS_V4_2 && SECURITY
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 67cd73213168..073b4cf67ed9 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -164,8 +164,7 @@ nfs41_callback_up(struct svc_serv *serv)
svc_xprt_put(serv->sv_bc_xprt);
serv->sv_bc_xprt = NULL;
}
- dprintk("--> %s return %ld\n", __func__,
- IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0);
+ dprintk("--> %s return %d\n", __func__, PTR_ERR_OR_ZERO(rqstp));
return rqstp;
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 2dceee4db076..1d09289c8f0e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -590,6 +590,8 @@ int nfs_create_rpc_client(struct nfs_client *clp,
if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_DISCRTRY;
+ if (test_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags))
+ args.flags |= RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT;
if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags))
@@ -784,8 +786,10 @@ static int nfs_init_server(struct nfs_server *server,
goto error;
server->port = data->nfs_server.port;
+ server->auth_info = data->auth_info;
- error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
+ error = nfs_init_server_rpcclient(server, &timeparms,
+ data->selected_flavor);
if (error < 0)
goto error;
@@ -926,6 +930,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour
target->acdirmax = source->acdirmax;
target->caps = source->caps;
target->options = source->options;
+ target->auth_info = source->auth_info;
}
EXPORT_SYMBOL_GPL(nfs_server_copy_userdata);
@@ -943,7 +948,7 @@ void nfs_server_insert_lists(struct nfs_server *server)
}
EXPORT_SYMBOL_GPL(nfs_server_insert_lists);
-static void nfs_server_remove_lists(struct nfs_server *server)
+void nfs_server_remove_lists(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
struct nfs_net *nn;
@@ -960,6 +965,7 @@ static void nfs_server_remove_lists(struct nfs_server *server)
synchronize_rcu();
}
+EXPORT_SYMBOL_GPL(nfs_server_remove_lists);
/*
* Allocate and initialise a server record
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4c5edcc8b6e9..9a8676f33350 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1139,7 +1139,13 @@ out_zap_parent:
if (inode && S_ISDIR(inode->i_mode)) {
/* Purge readdir caches. */
nfs_zap_caches(inode);
- if (dentry->d_flags & DCACHE_DISCONNECTED)
+ /*
+ * We can't d_drop the root of a disconnected tree:
+ * its d_hash is on the s_anon list and d_drop() would hide
+ * it from shrink_dcache_for_unmount(), leading to busy
+ * inodes on unmount and further oopses.
+ */
+ if (IS_ROOT(dentry))
goto out_valid;
}
/* If we have submounts, don't unhash ! */
@@ -1458,7 +1464,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
trace_nfs_atomic_open_enter(dir, ctx, open_flags);
nfs_block_sillyrename(dentry->d_parent);
- inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
+ inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
nfs_unblock_sillyrename(dentry->d_parent);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bb90bff0cb7a..471ba59c42f9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1209,6 +1209,7 @@ u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh)
* not on the result */
return nfs_fhandle_hash(fh);
}
+EXPORT_SYMBOL_GPL(_nfs_display_fhandle_hash);
/*
* _nfs_display_fhandle - display an NFS file handle on the console
@@ -1253,6 +1254,7 @@ void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption)
}
}
}
+EXPORT_SYMBOL_GPL(_nfs_display_fhandle);
#endif
/**
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 38da8c2b81ac..bca6a3e3c49c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -88,8 +88,8 @@ struct nfs_parsed_mount_data {
unsigned int namlen;
unsigned int options;
unsigned int bsize;
- unsigned int auth_flavor_len;
- rpc_authflavor_t auth_flavors[1];
+ struct nfs_auth_info auth_info;
+ rpc_authflavor_t selected_flavor;
char *client_address;
unsigned int version;
unsigned int minorversion;
@@ -154,6 +154,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *,
rpc_authflavor_t);
int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *);
void nfs_server_insert_lists(struct nfs_server *);
+void nfs_server_remove_lists(struct nfs_server *);
void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int);
int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t,
rpc_authflavor_t);
@@ -174,6 +175,8 @@ extern struct nfs_server *nfs4_create_server(
struct nfs_subversion *);
extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
struct nfs_fh *);
+extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
+ struct sockaddr *sap, size_t salen);
extern void nfs_free_server(struct nfs_server *server);
extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fh *,
@@ -323,6 +326,7 @@ extern struct file_system_type nfs_xdev_fs_type;
extern struct file_system_type nfs4_xdev_fs_type;
extern struct file_system_type nfs4_referral_fs_type;
#endif
+bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
struct nfs_subversion *);
void nfs_initialise_sb(struct super_block *);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 28842abafab4..3ce79b04522e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -29,6 +29,8 @@ enum nfs4_client_state {
NFS4CLNT_SERVER_SCOPE_MISMATCH,
NFS4CLNT_PURGE_STATE,
NFS4CLNT_BIND_CONN_TO_SESSION,
+ NFS4CLNT_MOVED,
+ NFS4CLNT_LEASE_MOVED,
};
#define NFS4_RENEW_TIMEOUT 0x01
@@ -50,6 +52,7 @@ struct nfs4_minor_version_ops {
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
const struct nfs4_state_recovery_ops *nograce_recovery_ops;
const struct nfs4_state_maintenance_ops *state_renewal_ops;
+ const struct nfs4_mig_recovery_ops *mig_recovery_ops;
};
#define NFS_SEQID_CONFIRMED 1
@@ -203,6 +206,12 @@ struct nfs4_state_maintenance_ops {
int (*renew_lease)(struct nfs_client *, struct rpc_cred *);
};
+struct nfs4_mig_recovery_ops {
+ int (*get_locations)(struct inode *, struct nfs4_fs_locations *,
+ struct page *, struct rpc_cred *);
+ int (*fsid_present)(struct inode *, struct rpc_cred *);
+};
+
extern const struct dentry_operations nfs4_dentry_operations;
/* dir.c */
@@ -213,10 +222,11 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
extern struct file_system_type nfs4_fs_type;
/* nfs4namespace.c */
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
struct nfs_fh *, struct nfs_fattr *);
+int nfs4_replace_transport(struct nfs_server *server,
+ const struct nfs4_fs_locations *locations);
/* nfs4proc.c */
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
@@ -231,6 +241,9 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
struct nfs4_fs_locations *, struct page *);
+extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
+ struct page *page, struct rpc_cred *);
+extern int nfs4_proc_fsid_present(struct inode *, struct rpc_cred *);
extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *,
struct nfs_fh *, struct nfs_fattr *);
extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
@@ -411,6 +424,8 @@ extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
extern void nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
+extern int nfs4_schedule_migration_recovery(const struct nfs_server *);
+extern void nfs4_schedule_lease_moved_recovery(struct nfs_client *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_server_scope(struct nfs_client *,
struct nfs41_server_scope **);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index a860ab566d6e..b4a160a405ce 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -197,6 +197,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
clp->cl_minorversion = cl_init->minorversion;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
+ clp->cl_mig_gen = 1;
return clp;
error:
@@ -368,6 +369,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
if (clp->cl_minorversion != 0)
__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
+ __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I);
if (error == -EINVAL)
error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
@@ -924,7 +926,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
dprintk("Server FSID: %llx:%llx\n",
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
- dprintk("Mount FH: %d\n", mntfh->size);
+ nfs_display_fhandle(mntfh, "Pseudo-fs root FH");
nfs4_session_set_rwsize(server);
@@ -947,9 +949,8 @@ out:
* Create a version 4 volume record
*/
static int nfs4_init_server(struct nfs_server *server,
- const struct nfs_parsed_mount_data *data)
+ struct nfs_parsed_mount_data *data)
{
- rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
struct rpc_timeout timeparms;
int error;
@@ -961,9 +962,15 @@ static int nfs4_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = data->flags;
server->options = data->options;
+ server->auth_info = data->auth_info;
- if (data->auth_flavor_len >= 1)
- pseudoflavor = data->auth_flavors[0];
+ /* Use the first specified auth flavor. If this flavor isn't
+ * allowed by the server, use the SECINFO path to try the
+ * other specified flavors */
+ if (data->auth_info.flavor_len >= 1)
+ data->selected_flavor = data->auth_info.flavors[0];
+ else
+ data->selected_flavor = RPC_AUTH_UNIX;
/* Get a client record */
error = nfs4_set_client(server,
@@ -971,7 +978,7 @@ static int nfs4_init_server(struct nfs_server *server,
(const struct sockaddr *)&data->nfs_server.address,
data->nfs_server.addrlen,
data->client_address,
- pseudoflavor,
+ data->selected_flavor,
data->nfs_server.protocol,
&timeparms,
data->minorversion,
@@ -991,7 +998,8 @@ static int nfs4_init_server(struct nfs_server *server,
server->port = data->nfs_server.port;
- error = nfs_init_server_rpcclient(server, &timeparms, pseudoflavor);
+ error = nfs_init_server_rpcclient(server, &timeparms,
+ data->selected_flavor);
error:
/* Done */
@@ -1018,7 +1026,7 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
if (!server)
return ERR_PTR(-ENOMEM);
- auth_probe = mount_info->parsed->auth_flavor_len < 1;
+ auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
/* set up the general RPC client */
error = nfs4_init_server(server, mount_info->parsed);
@@ -1046,6 +1054,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
{
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
+ bool auth_probe;
int error;
dprintk("--> nfs4_create_referral_server()\n");
@@ -1078,8 +1087,9 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (error < 0)
goto error;
- error = nfs4_server_common_setup(server, mntfh,
- !(parent_server->flags & NFS_MOUNT_SECFLAVOUR));
+ auth_probe = parent_server->auth_info.flavor_len < 1;
+
+ error = nfs4_server_common_setup(server, mntfh, auth_probe);
if (error < 0)
goto error;
@@ -1091,3 +1101,111 @@ error:
dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
return ERR_PTR(error);
}
+
+/*
+ * Grab the destination's particulars, including lease expiry time.
+ *
+ * Returns zero if probe succeeded and retrieved FSID matches the FSID
+ * we have cached.
+ */
+static int nfs_probe_destination(struct nfs_server *server)
+{
+ struct inode *inode = server->super->s_root->d_inode;
+ struct nfs_fattr *fattr;
+ int error;
+
+ fattr = nfs_alloc_fattr();
+ if (fattr == NULL)
+ return -ENOMEM;
+
+ /* Sanity: the probe won't work if the destination server
+ * does not recognize the migrated FH. */
+ error = nfs_probe_fsinfo(server, NFS_FH(inode), fattr);
+
+ nfs_free_fattr(fattr);
+ return error;
+}
+
+/**
+ * nfs4_update_server - Move an nfs_server to a different nfs_client
+ *
+ * @server: represents FSID to be moved
+ * @hostname: new end-point's hostname
+ * @sap: new end-point's socket address
+ * @salen: size of "sap"
+ *
+ * The nfs_server must be quiescent before this function is invoked.
+ * Either its session is drained (NFSv4.1+), or its transport is
+ * plugged and drained (NFSv4.0).
+ *
+ * Returns zero on success, or a negative errno value.
+ */
+int nfs4_update_server(struct nfs_server *server, const char *hostname,
+ struct sockaddr *sap, size_t salen)
+{
+ struct nfs_client *clp = server->nfs_client;
+ struct rpc_clnt *clnt = server->client;
+ struct xprt_create xargs = {
+ .ident = clp->cl_proto,
+ .net = &init_net,
+ .dstaddr = sap,
+ .addrlen = salen,
+ .servername = hostname,
+ };
+ char buf[INET6_ADDRSTRLEN + 1];
+ struct sockaddr_storage address;
+ struct sockaddr *localaddr = (struct sockaddr *)&address;
+ int error;
+
+ dprintk("--> %s: move FSID %llx:%llx to \"%s\")\n", __func__,
+ (unsigned long long)server->fsid.major,
+ (unsigned long long)server->fsid.minor,
+ hostname);
+
+ error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout);
+ if (error != 0) {
+ dprintk("<-- %s(): rpc_switch_client_transport returned %d\n",
+ __func__, error);
+ goto out;
+ }
+
+ error = rpc_localaddr(clnt, localaddr, sizeof(address));
+ if (error != 0) {
+ dprintk("<-- %s(): rpc_localaddr returned %d\n",
+ __func__, error);
+ goto out;
+ }
+
+ error = -EAFNOSUPPORT;
+ if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) {
+ dprintk("<-- %s(): rpc_ntop returned %d\n",
+ __func__, error);
+ goto out;
+ }
+
+ nfs_server_remove_lists(server);
+ error = nfs4_set_client(server, hostname, sap, salen, buf,
+ clp->cl_rpcclient->cl_auth->au_flavor,
+ clp->cl_proto, clnt->cl_timeout,
+ clp->cl_minorversion, clp->cl_net);
+ nfs_put_client(clp);
+ if (error != 0) {
+ nfs_server_insert_lists(server);
+ dprintk("<-- %s(): nfs4_set_client returned %d\n",
+ __func__, error);
+ goto out;
+ }
+
+ if (server->nfs_client->cl_hostname == NULL)
+ server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+ nfs_server_insert_lists(server);
+
+ error = nfs_probe_destination(server);
+ if (error < 0)
+ goto out;
+
+ dprintk("<-- %s() succeeded\n", __func__);
+
+out:
+ return error;
+}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 5b8a618a0f7a..1f01b55692ee 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -19,6 +19,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
struct inode *dir;
unsigned openflags = filp->f_flags;
struct iattr attr;
+ int opened = 0;
int err;
/*
@@ -55,7 +56,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
nfs_wb_all(inode);
}
- inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
+ inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
switch (err) {
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 95604f64cab8..c7c295e556ed 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -185,6 +185,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
if (status)
goto out_put;
+ smp_wmb();
ds->ds_clp = clp;
dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
@@ -801,34 +802,35 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
-
- if (filelayout_test_devid_unavailable(devid))
- return NULL;
+ struct nfs4_pnfs_ds *ret = ds;
if (ds == NULL) {
printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
__func__, ds_idx);
filelayout_mark_devid_invalid(devid);
- return NULL;
+ goto out;
}
+ smp_rmb();
if (ds->ds_clp)
- return ds;
+ goto out_test_devid;
if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
int err;
err = nfs4_ds_connect(s, ds);
- if (err) {
+ if (err)
nfs4_mark_deviceid_unavailable(devid);
- ds = NULL;
- }
nfs4_clear_ds_conn_bit(ds);
} else {
/* Either ds is connected, or ds is NULL */
nfs4_wait_ds_connect(ds);
}
- return ds;
+out_test_devid:
+ if (filelayout_test_devid_unavailable(devid))
+ ret = NULL;
+out:
+ return ret;
}
module_param(dataserver_retrans, uint, 0644);
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 2288cd3c9278..c08cbf40c59e 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -137,6 +137,7 @@ static size_t nfs_parse_server_name(char *string, size_t len,
/**
* nfs_find_best_sec - Find a security mechanism supported locally
+ * @server: NFS server struct
* @flavors: List of security tuples returned by SECINFO procedure
*
* Return the pseudoflavor of the first security mechanism in
@@ -145,7 +146,8 @@ static size_t nfs_parse_server_name(char *string, size_t len,
* is searched in the order returned from the server, per RFC 3530
* recommendation.
*/
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
+ struct nfs4_secinfo_flavors *flavors)
{
rpc_authflavor_t pseudoflavor;
struct nfs4_secinfo4 *secinfo;
@@ -160,12 +162,19 @@ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
case RPC_AUTH_GSS:
pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
&secinfo->flavor_info);
- if (pseudoflavor != RPC_AUTH_MAXFLAVOR)
+ /* make sure pseudoflavor matches sec= mount opt */
+ if (pseudoflavor != RPC_AUTH_MAXFLAVOR &&
+ nfs_auth_info_match(&server->auth_info,
+ pseudoflavor))
return pseudoflavor;
break;
}
}
+ /* if there were any sec= options then nothing matched */
+ if (server->auth_info.flavor_len > 0)
+ return -EPERM;
+
return RPC_AUTH_UNIX;
}
@@ -187,7 +196,7 @@ static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr
goto out;
}
- flavor = nfs_find_best_sec(flavors);
+ flavor = nfs_find_best_sec(NFS_SERVER(inode), flavors);
out:
put_page(page);
@@ -390,7 +399,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
if (client->cl_auth->au_flavor != flavor)
flavor = client->cl_auth->au_flavor;
- else if (!(server->flags & NFS_MOUNT_SECFLAVOUR)) {
+ else {
rpc_authflavor_t new = nfs4_negotiate_security(dir, name);
if ((int)new >= 0)
flavor = new;
@@ -400,3 +409,104 @@ out:
rpc_shutdown_client(client);
return mnt;
}
+
+/*
+ * Try one location from the fs_locations array.
+ *
+ * Returns zero on success, or a negative errno value.
+ */
+static int nfs4_try_replacing_one_location(struct nfs_server *server,
+ char *page, char *page2,
+ const struct nfs4_fs_location *location)
+{
+ const size_t addr_bufsize = sizeof(struct sockaddr_storage);
+ struct sockaddr *sap;
+ unsigned int s;
+ size_t salen;
+ int error;
+
+ sap = kmalloc(addr_bufsize, GFP_KERNEL);
+ if (sap == NULL)
+ return -ENOMEM;
+
+ error = -ENOENT;
+ for (s = 0; s < location->nservers; s++) {
+ const struct nfs4_string *buf = &location->servers[s];
+ char *hostname;
+
+ if (buf->len <= 0 || buf->len > PAGE_SIZE)
+ continue;
+
+ if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len) != NULL)
+ continue;
+
+ salen = nfs_parse_server_name(buf->data, buf->len,
+ sap, addr_bufsize, server);
+ if (salen == 0)
+ continue;
+ rpc_set_port(sap, NFS_PORT);
+
+ error = -ENOMEM;
+ hostname = kstrndup(buf->data, buf->len, GFP_KERNEL);
+ if (hostname == NULL)
+ break;
+
+ error = nfs4_update_server(server, hostname, sap, salen);
+ kfree(hostname);
+ if (error == 0)
+ break;
+ }
+
+ kfree(sap);
+ return error;
+}
+
+/**
+ * nfs4_replace_transport - set up transport to destination server
+ *
+ * @server: export being migrated
+ * @locations: fs_locations array
+ *
+ * Returns zero on success, or a negative errno value.
+ *
+ * The client tries all the entries in the "locations" array, in the
+ * order returned by the server, until one works or the end of the
+ * array is reached.
+ */
+int nfs4_replace_transport(struct nfs_server *server,
+ const struct nfs4_fs_locations *locations)
+{
+ char *page = NULL, *page2 = NULL;
+ int loc, error;
+
+ error = -ENOENT;
+ if (locations == NULL || locations->nlocations <= 0)
+ goto out;
+
+ error = -ENOMEM;
+ page = (char *) __get_free_page(GFP_USER);
+ if (!page)
+ goto out;
+ page2 = (char *) __get_free_page(GFP_USER);
+ if (!page2)
+ goto out;
+
+ for (loc = 0; loc < locations->nlocations; loc++) {
+ const struct nfs4_fs_location *location =
+ &locations->locations[loc];
+
+ if (location == NULL || location->nservers <= 0 ||
+ location->rootpath.ncomponents == 0)
+ continue;
+
+ error = nfs4_try_replacing_one_location(server, page,
+ page2, location);
+ if (error == 0)
+ break;
+ }
+
+out:
+ free_page((unsigned long)page);
+ free_page((unsigned long)page2);
+ return error;
+}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 989bb9d3074d..b02c4cc7b0a9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -384,6 +384,14 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
case -NFS4ERR_STALE_CLIENTID:
nfs4_schedule_lease_recovery(clp);
goto wait_on_recovery;
+ case -NFS4ERR_MOVED:
+ ret = nfs4_schedule_migration_recovery(server);
+ if (ret < 0)
+ break;
+ goto wait_on_recovery;
+ case -NFS4ERR_LEASE_MOVED:
+ nfs4_schedule_lease_moved_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -431,6 +439,8 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
return nfs4_map_errors(ret);
wait_on_recovery:
ret = nfs4_wait_clnt_recover(clp);
+ if (test_bit(NFS_MIG_FAILED, &server->mig_status))
+ return -EIO;
if (ret == 0)
exception->retry = 1;
return ret;
@@ -912,6 +922,7 @@ struct nfs4_opendata {
struct iattr attrs;
unsigned long timestamp;
unsigned int rpc_done : 1;
+ unsigned int file_created : 1;
unsigned int is_recover : 1;
int rpc_status;
int cancelled;
@@ -1317,31 +1328,24 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
int ret;
if (!data->rpc_done) {
- ret = data->rpc_status;
- goto err;
+ if (data->rpc_status) {
+ ret = data->rpc_status;
+ goto err;
+ }
+ /* cached opens have already been processed */
+ goto update;
}
- ret = -ESTALE;
- if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) ||
- !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) ||
- !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE))
- goto err;
-
- ret = -ENOMEM;
- state = nfs4_get_open_state(inode, data->owner);
- if (state == NULL)
- goto err;
-
ret = nfs_refresh_inode(inode, &data->f_attr);
if (ret)
goto err;
- nfs_setsecurity(inode, &data->f_attr, data->f_label);
-
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
+update:
update_open_stateid(state, &data->o_res.stateid, NULL,
data->o_arg.fmode);
+ atomic_inc(&state->count);
return state;
err:
@@ -1574,6 +1578,12 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
/* Don't recall a delegation if it was lost */
nfs4_schedule_lease_recovery(server->nfs_client);
return -EAGAIN;
+ case -NFS4ERR_MOVED:
+ nfs4_schedule_migration_recovery(server);
+ return -EAGAIN;
+ case -NFS4ERR_LEASE_MOVED:
+ nfs4_schedule_lease_moved_recovery(server->nfs_client);
+ return -EAGAIN;
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
@@ -1946,8 +1956,13 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
nfs_fattr_map_and_free_names(server, &data->f_attr);
- if (o_arg->open_flags & O_CREAT)
+ if (o_arg->open_flags & O_CREAT) {
update_changeattr(dir, &o_res->cinfo);
+ if (o_arg->open_flags & O_EXCL)
+ data->file_created = 1;
+ else if (o_res->cinfo.before != o_res->cinfo.after)
+ data->file_created = 1;
+ }
if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
server->caps &= ~NFS_CAP_POSIX_LOCK;
if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -2191,7 +2206,8 @@ static int _nfs4_do_open(struct inode *dir,
struct nfs_open_context *ctx,
int flags,
struct iattr *sattr,
- struct nfs4_label *label)
+ struct nfs4_label *label,
+ int *opened)
{
struct nfs4_state_owner *sp;
struct nfs4_state *state = NULL;
@@ -2261,6 +2277,8 @@ static int _nfs4_do_open(struct inode *dir,
nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
}
}
+ if (opendata->file_created)
+ *opened |= FILE_CREATED;
if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
*ctx_th = opendata->f_attr.mdsthreshold;
@@ -2289,7 +2307,8 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
struct nfs_open_context *ctx,
int flags,
struct iattr *sattr,
- struct nfs4_label *label)
+ struct nfs4_label *label,
+ int *opened)
{
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_exception exception = { };
@@ -2297,7 +2316,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
int status;
do {
- status = _nfs4_do_open(dir, ctx, flags, sattr, label);
+ status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened);
res = ctx->state;
trace_nfs4_open_file(ctx, flags, status);
if (status == 0)
@@ -2659,7 +2678,8 @@ out:
}
static struct inode *
-nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr)
+nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx,
+ int open_flags, struct iattr *attr, int *opened)
{
struct nfs4_state *state;
struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL;
@@ -2667,7 +2687,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
label = nfs4_label_init_security(dir, ctx->dentry, attr, &l);
/* Protect against concurrent sillydeletes */
- state = nfs4_do_open(dir, ctx, open_flags, attr, label);
+ state = nfs4_do_open(dir, ctx, open_flags, attr, label, opened);
nfs4_label_release_security(label);
@@ -2853,11 +2873,24 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
int status = -EPERM;
size_t i;
- for (i = 0; i < ARRAY_SIZE(flav_array); i++) {
- status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
- if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
- continue;
- break;
+ if (server->auth_info.flavor_len > 0) {
+ /* try each flavor specified by user */
+ for (i = 0; i < server->auth_info.flavor_len; i++) {
+ status = nfs4_lookup_root_sec(server, fhandle, info,
+ server->auth_info.flavors[i]);
+ if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
+ continue;
+ break;
+ }
+ } else {
+ /* no flavors specified by user, try default list */
+ for (i = 0; i < ARRAY_SIZE(flav_array); i++) {
+ status = nfs4_lookup_root_sec(server, fhandle, info,
+ flav_array[i]);
+ if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
+ continue;
+ break;
+ }
}
/*
@@ -2899,9 +2932,6 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
status = nfs4_lookup_root(server, fhandle, info);
if (status != -NFS4ERR_WRONGSEC)
break;
- /* Did user force a 'sec=' mount option? */
- if (server->flags & NFS_MOUNT_SECFLAVOUR)
- break;
default:
status = nfs4_do_find_root_sec(server, fhandle, info);
}
@@ -2970,11 +3000,16 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir,
status = nfs4_proc_fs_locations(client, dir, name, locations, page);
if (status != 0)
goto out;
- /* Make sure server returned a different fsid for the referral */
+
+ /*
+ * If the fsid didn't change, this is a migration event, not a
+ * referral. Cause us to drop into the exception handler, which
+ * will kick off migration recovery.
+ */
if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
dprintk("%s: server did not return a different fsid for"
" a referral at %s\n", __func__, name->name);
- status = -EIO;
+ status = -NFS4ERR_MOVED;
goto out;
}
/* Fixup attributes for the nfs_lookup() call to nfs_fhget() */
@@ -3154,9 +3189,6 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
err = -EPERM;
if (client != *clnt)
goto out;
- /* No security negotiation if the user specified 'sec=' */
- if (NFS_SERVER(dir)->flags & NFS_MOUNT_SECFLAVOUR)
- goto out;
client = nfs4_create_sec_client(client, dir, name);
if (IS_ERR(client))
return PTR_ERR(client);
@@ -3332,6 +3364,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
struct nfs4_label l, *ilabel = NULL;
struct nfs_open_context *ctx;
struct nfs4_state *state;
+ int opened = 0;
int status = 0;
ctx = alloc_nfs_open_context(dentry, FMODE_READ);
@@ -3341,7 +3374,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
ilabel = nfs4_label_init_security(dir, dentry, sattr, &l);
sattr->ia_mode &= ~current_umask();
- state = nfs4_do_open(dir, ctx, flags, sattr, ilabel);
+ state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, &opened);
if (IS_ERR(state)) {
status = PTR_ERR(state);
goto out;
@@ -4209,7 +4242,13 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
unsigned long timestamp = data->timestamp;
trace_nfs4_renew_async(clp, task->tk_status);
- if (task->tk_status < 0) {
+ switch (task->tk_status) {
+ case 0:
+ break;
+ case -NFS4ERR_LEASE_MOVED:
+ nfs4_schedule_lease_moved_recovery(clp);
+ break;
+ default:
/* Unless we're shutting down, schedule state recovery! */
if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0)
return;
@@ -4723,17 +4762,24 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
if (state == NULL)
break;
if (nfs4_schedule_stateid_recovery(server, state) < 0)
- goto stateid_invalid;
+ goto recovery_failed;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL) {
if (nfs4_schedule_stateid_recovery(server, state) < 0)
- goto stateid_invalid;
+ goto recovery_failed;
}
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
nfs4_schedule_lease_recovery(clp);
goto wait_on_recovery;
+ case -NFS4ERR_MOVED:
+ if (nfs4_schedule_migration_recovery(server) < 0)
+ goto recovery_failed;
+ goto wait_on_recovery;
+ case -NFS4ERR_LEASE_MOVED:
+ nfs4_schedule_lease_moved_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -4745,29 +4791,28 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- task->tk_status = 0;
- return -EAGAIN;
+ goto restart_call;
#endif /* CONFIG_NFS_V4_1 */
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
case -NFS4ERR_GRACE:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
- task->tk_status = 0;
- return -EAGAIN;
case -NFS4ERR_RETRY_UNCACHED_REP:
case -NFS4ERR_OLD_STATEID:
- task->tk_status = 0;
- return -EAGAIN;
+ goto restart_call;
}
task->tk_status = nfs4_map_errors(task->tk_status);
return 0;
-stateid_invalid:
+recovery_failed:
task->tk_status = -EIO;
return 0;
wait_on_recovery:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
+ if (test_bit(NFS_MIG_FAILED, &server->mig_status))
+ goto recovery_failed;
+restart_call:
task->tk_status = 0;
return -EAGAIN;
}
@@ -5094,6 +5139,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
status = 0;
}
request->fl_ops->fl_release_private(request);
+ request->fl_ops = NULL;
out:
return status;
}
@@ -5767,6 +5813,7 @@ struct nfs_release_lockowner_data {
struct nfs_release_lockowner_args args;
struct nfs4_sequence_args seq_args;
struct nfs4_sequence_res seq_res;
+ unsigned long timestamp;
};
static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata)
@@ -5774,12 +5821,27 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata
struct nfs_release_lockowner_data *data = calldata;
nfs40_setup_sequence(data->server,
&data->seq_args, &data->seq_res, task);
+ data->timestamp = jiffies;
}
static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
{
struct nfs_release_lockowner_data *data = calldata;
+ struct nfs_server *server = data->server;
+
nfs40_sequence_done(task, &data->seq_res);
+
+ switch (task->tk_status) {
+ case 0:
+ renew_lease(server, data->timestamp);
+ break;
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_LEASE_MOVED:
+ case -NFS4ERR_DELAY:
+ if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
+ rpc_restart_call_prepare(task);
+ }
}
static void nfs4_release_lockowner_release(void *calldata)
@@ -5978,6 +6040,283 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
return err;
}
+/*
+ * This operation also signals the server that this client is
+ * performing migration recovery. The server can stop returning
+ * NFS4ERR_LEASE_MOVED to this client. A RENEW operation is
+ * appended to this compound to identify the client ID which is
+ * performing recovery.
+ */
+static int _nfs40_proc_get_locations(struct inode *inode,
+ struct nfs4_fs_locations *locations,
+ struct page *page, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct rpc_clnt *clnt = server->client;
+ u32 bitmask[2] = {
+ [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
+ };
+ struct nfs4_fs_locations_arg args = {
+ .clientid = server->nfs_client->cl_clientid,
+ .fh = NFS_FH(inode),
+ .page = page,
+ .bitmask = bitmask,
+ .migration = 1, /* skip LOOKUP */
+ .renew = 1, /* append RENEW */
+ };
+ struct nfs4_fs_locations_res res = {
+ .fs_locations = locations,
+ .migration = 1,
+ .renew = 1,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ .rpc_cred = cred,
+ };
+ unsigned long now = jiffies;
+ int status;
+
+ nfs_fattr_init(&locations->fattr);
+ locations->server = server;
+ locations->nlocations = 0;
+
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+ nfs4_set_sequence_privileged(&args.seq_args);
+ status = nfs4_call_sync_sequence(clnt, server, &msg,
+ &args.seq_args, &res.seq_res);
+ if (status)
+ return status;
+
+ renew_lease(server, now);
+ return 0;
+}
+
+#ifdef CONFIG_NFS_V4_1
+
+/*
+ * This operation also signals the server that this client is
+ * performing migration recovery. The server can stop asserting
+ * SEQ4_STATUS_LEASE_MOVED for this client. The client ID
+ * performing this operation is identified in the SEQUENCE
+ * operation in this compound.
+ *
+ * When the client supports GETATTR(fs_locations_info), it can
+ * be plumbed in here.
+ */
+static int _nfs41_proc_get_locations(struct inode *inode,
+ struct nfs4_fs_locations *locations,
+ struct page *page, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct rpc_clnt *clnt = server->client;
+ u32 bitmask[2] = {
+ [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
+ };
+ struct nfs4_fs_locations_arg args = {
+ .fh = NFS_FH(inode),
+ .page = page,
+ .bitmask = bitmask,
+ .migration = 1, /* skip LOOKUP */
+ };
+ struct nfs4_fs_locations_res res = {
+ .fs_locations = locations,
+ .migration = 1,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ .rpc_cred = cred,
+ };
+ int status;
+
+ nfs_fattr_init(&locations->fattr);
+ locations->server = server;
+ locations->nlocations = 0;
+
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+ nfs4_set_sequence_privileged(&args.seq_args);
+ status = nfs4_call_sync_sequence(clnt, server, &msg,
+ &args.seq_args, &res.seq_res);
+ if (status == NFS4_OK &&
+ res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED)
+ status = -NFS4ERR_LEASE_MOVED;
+ return status;
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
+/**
+ * nfs4_proc_get_locations - discover locations for a migrated FSID
+ * @inode: inode on FSID that is migrating
+ * @locations: result of query
+ * @page: buffer
+ * @cred: credential to use for this operation
+ *
+ * Returns NFS4_OK on success, a negative NFS4ERR status code if the
+ * operation failed, or a negative errno if a local error occurred.
+ *
+ * On success, "locations" is filled in, but if the server has
+ * no locations information, NFS_ATTR_FATTR_V4_LOCATIONS is not
+ * asserted.
+ *
+ * -NFS4ERR_LEASE_MOVED is returned if the server still has leases
+ * from this client that require migration recovery.
+ */
+int nfs4_proc_get_locations(struct inode *inode,
+ struct nfs4_fs_locations *locations,
+ struct page *page, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = server->nfs_client;
+ const struct nfs4_mig_recovery_ops *ops =
+ clp->cl_mvops->mig_recovery_ops;
+ struct nfs4_exception exception = { };
+ int status;
+
+ dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
+ (unsigned long long)server->fsid.major,
+ (unsigned long long)server->fsid.minor,
+ clp->cl_hostname);
+ nfs_display_fhandle(NFS_FH(inode), __func__);
+
+ do {
+ status = ops->get_locations(inode, locations, page, cred);
+ if (status != -NFS4ERR_DELAY)
+ break;
+ nfs4_handle_exception(server, status, &exception);
+ } while (exception.retry);
+ return status;
+}
+
+/*
+ * This operation also signals the server that this client is
+ * performing "lease moved" recovery. The server can stop
+ * returning NFS4ERR_LEASE_MOVED to this client. A RENEW operation
+ * is appended to this compound to identify the client ID which is
+ * performing recovery.
+ */
+static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct rpc_clnt *clnt = server->client;
+ struct nfs4_fsid_present_arg args = {
+ .fh = NFS_FH(inode),
+ .clientid = clp->cl_clientid,
+ .renew = 1, /* append RENEW */
+ };
+ struct nfs4_fsid_present_res res = {
+ .renew = 1,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ .rpc_cred = cred,
+ };
+ unsigned long now = jiffies;
+ int status;
+
+ res.fh = nfs_alloc_fhandle();
+ if (res.fh == NULL)
+ return -ENOMEM;
+
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+ nfs4_set_sequence_privileged(&args.seq_args);
+ status = nfs4_call_sync_sequence(clnt, server, &msg,
+ &args.seq_args, &res.seq_res);
+ nfs_free_fhandle(res.fh);
+ if (status)
+ return status;
+
+ do_renew_lease(clp, now);
+ return 0;
+}
+
+#ifdef CONFIG_NFS_V4_1
+
+/*
+ * This operation also signals the server that this client is
+ * performing "lease moved" recovery. The server can stop asserting
+ * SEQ4_STATUS_LEASE_MOVED for this client. The client ID performing
+ * this operation is identified in the SEQUENCE operation in this
+ * compound.
+ */
+static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct rpc_clnt *clnt = server->client;
+ struct nfs4_fsid_present_arg args = {
+ .fh = NFS_FH(inode),
+ };
+ struct nfs4_fsid_present_res res = {
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ .rpc_cred = cred,
+ };
+ int status;
+
+ res.fh = nfs_alloc_fhandle();
+ if (res.fh == NULL)
+ return -ENOMEM;
+
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+ nfs4_set_sequence_privileged(&args.seq_args);
+ status = nfs4_call_sync_sequence(clnt, server, &msg,
+ &args.seq_args, &res.seq_res);
+ nfs_free_fhandle(res.fh);
+ if (status == NFS4_OK &&
+ res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED)
+ status = -NFS4ERR_LEASE_MOVED;
+ return status;
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
+/**
+ * nfs4_proc_fsid_present - Is this FSID present or absent on server?
+ * @inode: inode on FSID to check
+ * @cred: credential to use for this operation
+ *
+ * Server indicates whether the FSID is present, moved, or not
+ * recognized. This operation is necessary to clear a LEASE_MOVED
+ * condition for this client ID.
+ *
+ * Returns NFS4_OK if the FSID is present on this server,
+ * -NFS4ERR_MOVED if the FSID is no longer present, a negative
+ * NFS4ERR code if some error occurred on the server, or a
+ * negative errno if a local failure occurred.
+ */
+int nfs4_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = server->nfs_client;
+ const struct nfs4_mig_recovery_ops *ops =
+ clp->cl_mvops->mig_recovery_ops;
+ struct nfs4_exception exception = { };
+ int status;
+
+ dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
+ (unsigned long long)server->fsid.major,
+ (unsigned long long)server->fsid.minor,
+ clp->cl_hostname);
+ nfs_display_fhandle(NFS_FH(inode), __func__);
+
+ do {
+ status = ops->fsid_present(inode, cred);
+ if (status != -NFS4ERR_DELAY)
+ break;
+ nfs4_handle_exception(server, status, &exception);
+ } while (exception.retry);
+ return status;
+}
+
/**
* If 'use_integrity' is true and the state managment nfs_client
* cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient
@@ -6264,8 +6603,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
struct nfs41_exchange_id_args args = {
.verifier = &verifier,
.client = clp,
+#ifdef CONFIG_NFS_V4_1_MIGRATION
.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER |
- EXCHGID4_FLAG_BIND_PRINC_STATEID,
+ EXCHGID4_FLAG_BIND_PRINC_STATEID |
+ EXCHGID4_FLAG_SUPP_MOVED_MIGR,
+#else
+ .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER |
+ EXCHGID4_FLAG_BIND_PRINC_STATEID,
+#endif
};
struct nfs41_exchange_id_res res = {
0
@@ -7564,8 +7909,10 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
{
int err;
struct page *page;
- rpc_authflavor_t flavor;
+ rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
struct nfs4_secinfo_flavors *flavors;
+ struct nfs4_secinfo4 *secinfo;
+ int i;
page = alloc_page(GFP_KERNEL);
if (!page) {
@@ -7587,9 +7934,34 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
if (err)
goto out_freepage;
- flavor = nfs_find_best_sec(flavors);
- if (err == 0)
- err = nfs4_lookup_root_sec(server, fhandle, info, flavor);
+ for (i = 0; i < flavors->num_flavors; i++) {
+ secinfo = &flavors->flavors[i];
+
+ switch (secinfo->flavor) {
+ case RPC_AUTH_NULL:
+ case RPC_AUTH_UNIX:
+ case RPC_AUTH_GSS:
+ flavor = rpcauth_get_pseudoflavor(secinfo->flavor,
+ &secinfo->flavor_info);
+ break;
+ default:
+ flavor = RPC_AUTH_MAXFLAVOR;
+ break;
+ }
+
+ if (!nfs_auth_info_match(&server->auth_info, flavor))
+ flavor = RPC_AUTH_MAXFLAVOR;
+
+ if (flavor != RPC_AUTH_MAXFLAVOR) {
+ err = nfs4_lookup_root_sec(server, fhandle,
+ info, flavor);
+ if (!err)
+ break;
+ }
+ }
+
+ if (flavor == RPC_AUTH_MAXFLAVOR)
+ err = -EPERM;
out_freepage:
put_page(page);
@@ -7851,6 +8223,18 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
};
#endif
+static const struct nfs4_mig_recovery_ops nfs40_mig_recovery_ops = {
+ .get_locations = _nfs40_proc_get_locations,
+ .fsid_present = _nfs40_proc_fsid_present,
+};
+
+#if defined(CONFIG_NFS_V4_1)
+static const struct nfs4_mig_recovery_ops nfs41_mig_recovery_ops = {
+ .get_locations = _nfs41_proc_get_locations,
+ .fsid_present = _nfs41_proc_fsid_present,
+};
+#endif /* CONFIG_NFS_V4_1 */
+
static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
.minor_version = 0,
.init_caps = NFS_CAP_READDIRPLUS
@@ -7866,6 +8250,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
.state_renewal_ops = &nfs40_state_renewal_ops,
+ .mig_recovery_ops = &nfs40_mig_recovery_ops,
};
#if defined(CONFIG_NFS_V4_1)
@@ -7886,6 +8271,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
.state_renewal_ops = &nfs41_state_renewal_ops,
+ .mig_recovery_ops = &nfs41_mig_recovery_ops,
};
#endif
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index cc14cbb78b73..452f4c8dadea 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -239,8 +239,6 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
}
}
-#if defined(CONFIG_NFS_V4_1)
-
static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
{
set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
@@ -270,6 +268,8 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)
return nfs4_drain_slot_tbl(&ses->fc_slot_table);
}
+#if defined(CONFIG_NFS_V4_1)
+
static int nfs41_setup_state_renewal(struct nfs_client *clp)
{
int status;
@@ -1197,20 +1197,74 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
}
EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
+/**
+ * nfs4_schedule_migration_recovery - trigger migration recovery
+ *
+ * @server: FSID that is migrating
+ *
+ * Returns zero if recovery has started, otherwise a negative NFS4ERR
+ * value is returned.
+ */
+int nfs4_schedule_migration_recovery(const struct nfs_server *server)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ if (server->fh_expire_type != NFS4_FH_PERSISTENT) {
+ pr_err("NFS: volatile file handles not supported (server %s)\n",
+ clp->cl_hostname);
+ return -NFS4ERR_IO;
+ }
+
+ if (test_bit(NFS_MIG_FAILED, &server->mig_status))
+ return -NFS4ERR_IO;
+
+ dprintk("%s: scheduling migration recovery for (%llx:%llx) on %s\n",
+ __func__,
+ (unsigned long long)server->fsid.major,
+ (unsigned long long)server->fsid.minor,
+ clp->cl_hostname);
+
+ set_bit(NFS_MIG_IN_TRANSITION,
+ &((struct nfs_server *)server)->mig_status);
+ set_bit(NFS4CLNT_MOVED, &clp->cl_state);
+
+ nfs4_schedule_state_manager(clp);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nfs4_schedule_migration_recovery);
+
+/**
+ * nfs4_schedule_lease_moved_recovery - start lease-moved recovery
+ *
+ * @clp: server to check for moved leases
+ *
+ */
+void nfs4_schedule_lease_moved_recovery(struct nfs_client *clp)
+{
+ dprintk("%s: scheduling lease-moved recovery for client ID %llx on %s\n",
+ __func__, clp->cl_clientid, clp->cl_hostname);
+
+ set_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state);
+ nfs4_schedule_state_manager(clp);
+}
+EXPORT_SYMBOL_GPL(nfs4_schedule_lease_moved_recovery);
+
int nfs4_wait_clnt_recover(struct nfs_client *clp)
{
int res;
might_sleep();
+ atomic_inc(&clp->cl_count);
res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (res)
- return res;
-
+ goto out;
if (clp->cl_cons_state < 0)
- return clp->cl_cons_state;
- return 0;
+ res = clp->cl_cons_state;
+out:
+ nfs_put_client(clp);
+ return res;
}
int nfs4_client_recover_expired_lease(struct nfs_client *clp)
@@ -1375,8 +1429,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
goto out;
default:
- printk(KERN_ERR "NFS: %s: unhandled error %d. "
- "Zeroing state\n", __func__, status);
+ printk(KERN_ERR "NFS: %s: unhandled error %d\n",
+ __func__, status);
case -ENOMEM:
case -NFS4ERR_DENIED:
case -NFS4ERR_RECLAIM_BAD:
@@ -1439,15 +1493,12 @@ restart:
}
switch (status) {
default:
- printk(KERN_ERR "NFS: %s: unhandled error %d. "
- "Zeroing state\n", __func__, status);
+ printk(KERN_ERR "NFS: %s: unhandled error %d\n",
+ __func__, status);
case -ENOENT:
case -ENOMEM:
case -ESTALE:
- /*
- * Open state on this file cannot be recovered
- * All we can do is revert to using the zero stateid.
- */
+ /* Open state on this file cannot be recovered */
nfs4_state_mark_recovery_failed(state, status);
break;
case -EAGAIN:
@@ -1628,7 +1679,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
nfs4_state_end_reclaim_reboot(clp);
break;
case -NFS4ERR_STALE_CLIENTID:
- case -NFS4ERR_LEASE_MOVED:
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
nfs4_state_clear_reclaim_reboot(clp);
nfs4_state_start_reclaim_reboot(clp);
@@ -1829,6 +1879,168 @@ static int nfs4_purge_lease(struct nfs_client *clp)
return 0;
}
+/*
+ * Try remote migration of one FSID from a source server to a
+ * destination server. The source server provides a list of
+ * potential destinations.
+ *
+ * Returns zero or a negative NFS4ERR status code.
+ */
+static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred)
+{
+ struct nfs_client *clp = server->nfs_client;
+ struct nfs4_fs_locations *locations = NULL;
+ struct inode *inode;
+ struct page *page;
+ int status, result;
+
+ dprintk("--> %s: FSID %llx:%llx on \"%s\"\n", __func__,
+ (unsigned long long)server->fsid.major,
+ (unsigned long long)server->fsid.minor,
+ clp->cl_hostname);
+
+ result = 0;
+ page = alloc_page(GFP_KERNEL);
+ locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
+ if (page == NULL || locations == NULL) {
+ dprintk("<-- %s: no memory\n", __func__);
+ goto out;
+ }
+
+ inode = server->super->s_root->d_inode;
+ result = nfs4_proc_get_locations(inode, locations, page, cred);
+ if (result) {
+ dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
+ __func__, result);
+ goto out;
+ }
+
+ result = -NFS4ERR_NXIO;
+ if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
+ dprintk("<-- %s: No fs_locations data, migration skipped\n",
+ __func__);
+ goto out;
+ }
+
+ nfs4_begin_drain_session(clp);
+
+ status = nfs4_replace_transport(server, locations);
+ if (status != 0) {
+ dprintk("<-- %s: failed to replace transport: %d\n",
+ __func__, status);
+ goto out;
+ }
+
+ result = 0;
+ dprintk("<-- %s: migration succeeded\n", __func__);
+
+out:
+ if (page != NULL)
+ __free_page(page);
+ kfree(locations);
+ if (result) {
+ pr_err("NFS: migration recovery failed (server %s)\n",
+ clp->cl_hostname);
+ set_bit(NFS_MIG_FAILED, &server->mig_status);
+ }
+ return result;
+}
+
+/*
+ * Returns zero or a negative NFS4ERR status code.
+ */
+static int nfs4_handle_migration(struct nfs_client *clp)
+{
+ const struct nfs4_state_maintenance_ops *ops =
+ clp->cl_mvops->state_renewal_ops;
+ struct nfs_server *server;
+ struct rpc_cred *cred;
+
+ dprintk("%s: migration reported on \"%s\"\n", __func__,
+ clp->cl_hostname);
+
+ spin_lock(&clp->cl_lock);
+ cred = ops->get_state_renewal_cred_locked(clp);
+ spin_unlock(&clp->cl_lock);
+ if (cred == NULL)
+ return -NFS4ERR_NOENT;
+
+ clp->cl_mig_gen++;
+restart:
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ int status;
+
+ if (server->mig_gen == clp->cl_mig_gen)
+ continue;
+ server->mig_gen = clp->cl_mig_gen;
+
+ if (!test_and_clear_bit(NFS_MIG_IN_TRANSITION,
+ &server->mig_status))
+ continue;
+
+ rcu_read_unlock();
+ status = nfs4_try_migration(server, cred);
+ if (status < 0) {
+ put_rpccred(cred);
+ return status;
+ }
+ goto restart;
+ }
+ rcu_read_unlock();
+ put_rpccred(cred);
+ return 0;
+}
+
+/*
+ * Test each nfs_server on the clp's cl_superblocks list to see
+ * if it's moved to another server. Stop when the server no longer
+ * returns NFS4ERR_LEASE_MOVED.
+ */
+static int nfs4_handle_lease_moved(struct nfs_client *clp)
+{
+ const struct nfs4_state_maintenance_ops *ops =
+ clp->cl_mvops->state_renewal_ops;
+ struct nfs_server *server;
+ struct rpc_cred *cred;
+
+ dprintk("%s: lease moved reported on \"%s\"\n", __func__,
+ clp->cl_hostname);
+
+ spin_lock(&clp->cl_lock);
+ cred = ops->get_state_renewal_cred_locked(clp);
+ spin_unlock(&clp->cl_lock);
+ if (cred == NULL)
+ return -NFS4ERR_NOENT;
+
+ clp->cl_mig_gen++;
+restart:
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ struct inode *inode;
+ int status;
+
+ if (server->mig_gen == clp->cl_mig_gen)
+ continue;
+ server->mig_gen = clp->cl_mig_gen;
+
+ rcu_read_unlock();
+
+ inode = server->super->s_root->d_inode;
+ status = nfs4_proc_fsid_present(inode, cred);
+ if (status != -NFS4ERR_MOVED)
+ goto restart; /* wasn't this one */
+ if (nfs4_try_migration(server, cred) == -NFS4ERR_LEASE_MOVED)
+ goto restart; /* there are more */
+ goto out;
+ }
+ rcu_read_unlock();
+
+out:
+ put_rpccred(cred);
+ return 0;
+}
+
/**
* nfs4_discover_server_trunking - Detect server IP address trunking
*
@@ -2017,9 +2229,10 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
nfs41_handle_server_reboot(clp);
if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
- SEQ4_STATUS_ADMIN_STATE_REVOKED |
- SEQ4_STATUS_LEASE_MOVED))
+ SEQ4_STATUS_ADMIN_STATE_REVOKED))
nfs41_handle_state_revoked(clp);
+ if (flags & SEQ4_STATUS_LEASE_MOVED)
+ nfs4_schedule_lease_moved_recovery(clp);
if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
nfs41_handle_recallable_state_revoked(clp);
if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
@@ -2157,7 +2370,20 @@ static void nfs4_state_manager(struct nfs_client *clp)
status = nfs4_check_lease(clp);
if (status < 0)
goto out_error;
- continue;
+ }
+
+ if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) {
+ section = "migration";
+ status = nfs4_handle_migration(clp);
+ if (status < 0)
+ goto out_error;
+ }
+
+ if (test_and_clear_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state)) {
+ section = "lease moved";
+ status = nfs4_handle_lease_moved(clp);
+ if (status < 0)
+ goto out_error;
}
/* First recover reboot state... */
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index e26acdd1a645..65ab0a0ca1c4 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -261,9 +261,9 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name,
res = nfs_follow_remote_path(root_mnt, export_path);
- dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
- IS_ERR(res) ? PTR_ERR(res) : 0,
- IS_ERR(res) ? " [error]" : "");
+ dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n",
+ PTR_ERR_OR_ZERO(res),
+ IS_ERR(res) ? " [error]" : "");
return res;
}
@@ -319,9 +319,9 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
data->mnt_path = export_path;
res = nfs_follow_remote_path(root_mnt, export_path);
- dprintk("<-- nfs4_referral_mount() = %ld%s\n",
- IS_ERR(res) ? PTR_ERR(res) : 0,
- IS_ERR(res) ? " [error]" : "");
+ dprintk("<-- nfs4_referral_mount() = %d%s\n",
+ PTR_ERR_OR_ZERO(res),
+ IS_ERR(res) ? " [error]" : "");
return res;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 79210d23f607..f903389d90f1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -595,11 +595,13 @@ static int nfs4_stat_to_errno(int);
#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
- encode_getattr_maxsz)
+ encode_getattr_maxsz + \
+ encode_renew_maxsz)
#define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
- decode_getattr_maxsz)
+ decode_getattr_maxsz + \
+ decode_renew_maxsz)
#define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -736,13 +738,15 @@ static int nfs4_stat_to_errno(int);
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_lookup_maxsz + \
- encode_fs_locations_maxsz)
+ encode_fs_locations_maxsz + \
+ encode_renew_maxsz)
#define NFS4_dec_fs_locations_sz \
(compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_lookup_maxsz + \
- decode_fs_locations_maxsz)
+ decode_fs_locations_maxsz + \
+ decode_renew_maxsz)
#define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -751,6 +755,18 @@ static int nfs4_stat_to_errno(int);
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_secinfo_maxsz)
+#define NFS4_enc_fsid_present_sz \
+ (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getfh_maxsz + \
+ encode_renew_maxsz)
+#define NFS4_dec_fsid_present_sz \
+ (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_getfh_maxsz + \
+ decode_renew_maxsz)
#if defined(CONFIG_NFS_V4_1)
#define NFS4_enc_bind_conn_to_session_sz \
(compound_encode_hdr_maxsz + \
@@ -2687,11 +2703,20 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
- encode_putfh(xdr, args->dir_fh, &hdr);
- encode_lookup(xdr, args->name, &hdr);
- replen = hdr.replen; /* get the attribute into args->page */
- encode_fs_locations(xdr, args->bitmask, &hdr);
+ if (args->migration) {
+ encode_putfh(xdr, args->fh, &hdr);
+ replen = hdr.replen;
+ encode_fs_locations(xdr, args->bitmask, &hdr);
+ if (args->renew)
+ encode_renew(xdr, args->clientid, &hdr);
+ } else {
+ encode_putfh(xdr, args->dir_fh, &hdr);
+ encode_lookup(xdr, args->name, &hdr);
+ replen = hdr.replen;
+ encode_fs_locations(xdr, args->bitmask, &hdr);
+ }
+ /* Set up reply kvec to capture returned fs_locations array. */
xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
0, PAGE_SIZE);
encode_nops(&hdr);
@@ -2715,6 +2740,26 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
encode_nops(&hdr);
}
+/*
+ * Encode FSID_PRESENT request
+ */
+static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_fsid_present_arg *args)
+{
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_getfh(xdr, &hdr);
+ if (args->renew)
+ encode_renew(xdr, args->clientid, &hdr);
+ encode_nops(&hdr);
+}
+
#if defined(CONFIG_NFS_V4_1)
/*
* BIND_CONN_TO_SESSION request
@@ -6824,13 +6869,26 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_lookup(xdr);
- if (status)
- goto out;
- xdr_enter_page(xdr, PAGE_SIZE);
- status = decode_getfattr_generic(xdr, &res->fs_locations->fattr,
+ if (res->migration) {
+ xdr_enter_page(xdr, PAGE_SIZE);
+ status = decode_getfattr_generic(xdr,
+ &res->fs_locations->fattr,
NULL, res->fs_locations,
NULL, res->fs_locations->server);
+ if (status)
+ goto out;
+ if (res->renew)
+ status = decode_renew(xdr);
+ } else {
+ status = decode_lookup(xdr);
+ if (status)
+ goto out;
+ xdr_enter_page(xdr, PAGE_SIZE);
+ status = decode_getfattr_generic(xdr,
+ &res->fs_locations->fattr,
+ NULL, res->fs_locations,
+ NULL, res->fs_locations->server);
+ }
out:
return status;
}
@@ -6859,6 +6917,34 @@ out:
return status;
}
+/*
+ * Decode FSID_PRESENT response
+ */
+static int nfs4_xdr_dec_fsid_present(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs4_fsid_present_res *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_getfh(xdr, res->fh);
+ if (status)
+ goto out;
+ if (res->renew)
+ status = decode_renew(xdr);
+out:
+ return status;
+}
+
#if defined(CONFIG_NFS_V4_1)
/*
* Decode BIND_CONN_TO_SESSION response
@@ -7373,6 +7459,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
PROC(SECINFO, enc_secinfo, dec_secinfo),
+ PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present),
#if defined(CONFIG_NFS_V4_1)
PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
PROC(CREATE_SESSION, enc_create_session, dec_create_session),
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a03b9c6f9489..317d6fc2160e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -497,7 +497,8 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
static const struct {
rpc_authflavor_t flavour;
const char *str;
- } sec_flavours[] = {
+ } sec_flavours[NFS_AUTH_INFO_MAX_FLAVORS] = {
+ /* update NFS_AUTH_INFO_MAX_FLAVORS when this list changes! */
{ RPC_AUTH_NULL, "null" },
{ RPC_AUTH_UNIX, "sys" },
{ RPC_AUTH_GSS_KRB5, "krb5" },
@@ -923,8 +924,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
data->mount_server.port = NFS_UNSPEC_PORT;
data->nfs_server.port = NFS_UNSPEC_PORT;
data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- data->auth_flavors[0] = RPC_AUTH_MAXFLAVOR;
- data->auth_flavor_len = 0;
+ data->selected_flavor = RPC_AUTH_MAXFLAVOR;
data->minorversion = 0;
data->need_mount = true;
data->net = current->nsproxy->net_ns;
@@ -1019,12 +1019,51 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
}
}
-static void nfs_set_auth_parsed_mount_data(struct nfs_parsed_mount_data *data,
- rpc_authflavor_t pseudoflavor)
+/*
+ * Add 'flavor' to 'auth_info' if not already present.
+ * Returns true if 'flavor' ends up in the list, false otherwise
+ */
+static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
+ rpc_authflavor_t flavor)
+{
+ unsigned int i;
+ unsigned int max_flavor_len = (sizeof(auth_info->flavors) /
+ sizeof(auth_info->flavors[0]));
+
+ /* make sure this flavor isn't already in the list */
+ for (i = 0; i < auth_info->flavor_len; i++) {
+ if (flavor == auth_info->flavors[i])
+ return true;
+ }
+
+ if (auth_info->flavor_len + 1 >= max_flavor_len) {
+ dfprintk(MOUNT, "NFS: too many sec= flavors\n");
+ return false;
+ }
+
+ auth_info->flavors[auth_info->flavor_len++] = flavor;
+ return true;
+}
+
+/*
+ * Return true if 'match' is in auth_info or auth_info is empty.
+ * Return false otherwise.
+ */
+bool nfs_auth_info_match(const struct nfs_auth_info *auth_info,
+ rpc_authflavor_t match)
{
- data->auth_flavors[0] = pseudoflavor;
- data->auth_flavor_len = 1;
+ int i;
+
+ if (!auth_info->flavor_len)
+ return true;
+
+ for (i = 0; i < auth_info->flavor_len; i++) {
+ if (auth_info->flavors[i] == match)
+ return true;
+ }
+ return false;
}
+EXPORT_SYMBOL_GPL(nfs_auth_info_match);
/*
* Parse the value of the 'sec=' option.
@@ -1034,49 +1073,55 @@ static int nfs_parse_security_flavors(char *value,
{
substring_t args[MAX_OPT_ARGS];
rpc_authflavor_t pseudoflavor;
+ char *p;
dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value);
- switch (match_token(value, nfs_secflavor_tokens, args)) {
- case Opt_sec_none:
- pseudoflavor = RPC_AUTH_NULL;
- break;
- case Opt_sec_sys:
- pseudoflavor = RPC_AUTH_UNIX;
- break;
- case Opt_sec_krb5:
- pseudoflavor = RPC_AUTH_GSS_KRB5;
- break;
- case Opt_sec_krb5i:
- pseudoflavor = RPC_AUTH_GSS_KRB5I;
- break;
- case Opt_sec_krb5p:
- pseudoflavor = RPC_AUTH_GSS_KRB5P;
- break;
- case Opt_sec_lkey:
- pseudoflavor = RPC_AUTH_GSS_LKEY;
- break;
- case Opt_sec_lkeyi:
- pseudoflavor = RPC_AUTH_GSS_LKEYI;
- break;
- case Opt_sec_lkeyp:
- pseudoflavor = RPC_AUTH_GSS_LKEYP;
- break;
- case Opt_sec_spkm:
- pseudoflavor = RPC_AUTH_GSS_SPKM;
- break;
- case Opt_sec_spkmi:
- pseudoflavor = RPC_AUTH_GSS_SPKMI;
- break;
- case Opt_sec_spkmp:
- pseudoflavor = RPC_AUTH_GSS_SPKMP;
- break;
- default:
- return 0;
+ while ((p = strsep(&value, ":")) != NULL) {
+ switch (match_token(p, nfs_secflavor_tokens, args)) {
+ case Opt_sec_none:
+ pseudoflavor = RPC_AUTH_NULL;
+ break;
+ case Opt_sec_sys:
+ pseudoflavor = RPC_AUTH_UNIX;
+ break;
+ case Opt_sec_krb5:
+ pseudoflavor = RPC_AUTH_GSS_KRB5;
+ break;
+ case Opt_sec_krb5i:
+ pseudoflavor = RPC_AUTH_GSS_KRB5I;
+ break;
+ case Opt_sec_krb5p:
+ pseudoflavor = RPC_AUTH_GSS_KRB5P;
+ break;
+ case Opt_sec_lkey:
+ pseudoflavor = RPC_AUTH_GSS_LKEY;
+ break;
+ case Opt_sec_lkeyi:
+ pseudoflavor = RPC_AUTH_GSS_LKEYI;
+ break;
+ case Opt_sec_lkeyp:
+ pseudoflavor = RPC_AUTH_GSS_LKEYP;
+ break;
+ case Opt_sec_spkm:
+ pseudoflavor = RPC_AUTH_GSS_SPKM;
+ break;
+ case Opt_sec_spkmi:
+ pseudoflavor = RPC_AUTH_GSS_SPKMI;
+ break;
+ case Opt_sec_spkmp:
+ pseudoflavor = RPC_AUTH_GSS_SPKMP;
+ break;
+ default:
+ dfprintk(MOUNT,
+ "NFS: sec= option '%s' not recognized\n", p);
+ return 0;
+ }
+
+ if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor))
+ return 0;
}
- mnt->flags |= NFS_MOUNT_SECFLAVOUR;
- nfs_set_auth_parsed_mount_data(mnt, pseudoflavor);
return 1;
}
@@ -1623,12 +1668,14 @@ out_security_failure:
}
/*
- * Ensure that the specified authtype in args->auth_flavors[0] is supported by
- * the server. Returns 0 if it's ok, and -EACCES if not.
+ * Ensure that a specified authtype in args->auth_info is supported by
+ * the server. Returns 0 and sets args->selected_flavor if it's ok, and
+ * -EACCES if not.
*/
-static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args,
+static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
rpc_authflavor_t *server_authlist, unsigned int count)
{
+ rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
unsigned int i;
/*
@@ -1640,17 +1687,20 @@ static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args,
* can be used.
*/
for (i = 0; i < count; i++) {
- if (args->auth_flavors[0] == server_authlist[i] ||
- server_authlist[i] == RPC_AUTH_NULL)
+ flavor = server_authlist[i];
+
+ if (nfs_auth_info_match(&args->auth_info, flavor) ||
+ flavor == RPC_AUTH_NULL)
goto out;
}
- dfprintk(MOUNT, "NFS: auth flavor %u not supported by server\n",
- args->auth_flavors[0]);
+ dfprintk(MOUNT,
+ "NFS: specified auth flavors not supported by server\n");
return -EACCES;
out:
- dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]);
+ args->selected_flavor = flavor;
+ dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor);
return 0;
}
@@ -1738,9 +1788,10 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
* Was a sec= authflavor specified in the options? First, verify
* whether the server supports it, and then just try to use it if so.
*/
- if (args->auth_flavor_len > 0) {
- status = nfs_verify_authflavor(args, authlist, authlist_len);
- dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]);
+ if (args->auth_info.flavor_len > 0) {
+ status = nfs_verify_authflavors(args, authlist, authlist_len);
+ dfprintk(MOUNT, "NFS: using auth flavor %u\n",
+ args->selected_flavor);
if (status)
return ERR_PTR(status);
return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
@@ -1769,7 +1820,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
/* Fallthrough */
}
dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor);
- nfs_set_auth_parsed_mount_data(args, flavor);
+ args->selected_flavor = flavor;
server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
if (!IS_ERR(server))
return server;
@@ -1785,7 +1836,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
/* Last chance! Try AUTH_UNIX */
dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX);
- nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
+ args->selected_flavor = RPC_AUTH_UNIX;
return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
}
@@ -1972,9 +2023,9 @@ static int nfs23_validate_mount_data(void *options,
args->bsize = data->bsize;
if (data->flags & NFS_MOUNT_SECFLAVOUR)
- nfs_set_auth_parsed_mount_data(args, data->pseudoflavor);
+ args->selected_flavor = data->pseudoflavor;
else
- nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
+ args->selected_flavor = RPC_AUTH_UNIX;
if (!args->nfs_server.hostname)
goto out_nomem;
@@ -2108,9 +2159,6 @@ static int nfs_validate_text_mount_data(void *options,
nfs_set_port(sap, &args->nfs_server.port, port);
- if (args->auth_flavor_len > 1)
- goto out_bad_auth;
-
return nfs_parse_devname(dev_name,
&args->nfs_server.hostname,
max_namelen,
@@ -2130,10 +2178,6 @@ out_invalid_transport_udp:
out_no_address:
dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
return -EINVAL;
-
-out_bad_auth:
- dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n");
- return -EINVAL;
}
static int
@@ -2143,8 +2187,10 @@ nfs_compare_remount_data(struct nfs_server *nfss,
if (data->flags != nfss->flags ||
data->rsize != nfss->rsize ||
data->wsize != nfss->wsize ||
+ data->version != nfss->nfs_client->rpc_ops->version ||
+ data->minorversion != nfss->nfs_client->cl_minorversion ||
data->retrans != nfss->client->cl_timeout->to_retries ||
- data->auth_flavors[0] != nfss->client->cl_auth->au_flavor ||
+ data->selected_flavor != nfss->client->cl_auth->au_flavor ||
data->acregmin != nfss->acregmin / HZ ||
data->acregmax != nfss->acregmax / HZ ||
data->acdirmin != nfss->acdirmin / HZ ||
@@ -2189,7 +2235,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
data->rsize = nfss->rsize;
data->wsize = nfss->wsize;
data->retrans = nfss->client->cl_timeout->to_retries;
- nfs_set_auth_parsed_mount_data(data, nfss->client->cl_auth->au_flavor);
+ data->selected_flavor = nfss->client->cl_auth->au_flavor;
+ data->auth_info = nfss->auth_info;
data->acregmin = nfss->acregmin / HZ;
data->acregmax = nfss->acregmax / HZ;
data->acdirmin = nfss->acdirmin / HZ;
@@ -2197,12 +2244,14 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ;
data->nfs_server.port = nfss->port;
data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
+ data->version = nfsvers;
+ data->minorversion = nfss->nfs_client->cl_minorversion;
memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
data->nfs_server.addrlen);
/* overwrite those values with any that were specified */
- error = nfs_parse_mount_options((char *)options, data);
- if (error < 0)
+ error = -EINVAL;
+ if (!nfs_parse_mount_options((char *)options, data))
goto out;
/*
@@ -2332,7 +2381,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n
goto Ebusy;
if (a->acdirmax != b->acdirmax)
goto Ebusy;
- if (b->flags & NFS_MOUNT_SECFLAVOUR &&
+ if (b->auth_info.flavor_len > 0 &&
clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
goto Ebusy;
return 1;
@@ -2530,6 +2579,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
mntroot = ERR_PTR(error);
goto error_splat_bdi;
}
+ server->super = s;
}
if (!s->s_root) {
@@ -2713,9 +2763,9 @@ static int nfs4_validate_mount_data(void *options,
data->auth_flavours,
sizeof(pseudoflavor)))
return -EFAULT;
- nfs_set_auth_parsed_mount_data(args, pseudoflavor);
+ args->selected_flavor = pseudoflavor;
} else
- nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
+ args->selected_flavor = RPC_AUTH_UNIX;
c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
if (IS_ERR(c))
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index bb939edd4c99..0c29b1bb3936 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -493,7 +493,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
unsigned long long fileid;
struct dentry *sdentry;
struct rpc_task *task;
- int error = -EIO;
+ int error = -EBUSY;
dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -503,7 +503,6 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
/*
* We don't allow a dentry to be silly-renamed twice.
*/
- error = -EBUSY;
if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
goto out;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0ba679866e50..da276640f776 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh)
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_checked(bh);
clear_buffer_nilfs_redirected(bh);
+ clear_buffer_async_write(bh);
clear_buffer_dirty(bh);
if (nilfs_page_buffers_clean(page))
__nilfs_clear_page_dirty(page);
@@ -429,6 +430,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
"discard block %llu, size %zu",
(u64)bh->b_blocknr, bh->b_size);
}
+ clear_buffer_async_write(bh);
clear_buffer_dirty(bh);
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_checked(bh);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index bd88a7461063..9f6b486b6c01 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -665,7 +665,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
bh = head = page_buffers(page);
do {
- if (!buffer_dirty(bh))
+ if (!buffer_dirty(bh) || buffer_async_write(bh))
continue;
get_bh(bh);
list_add_tail(&bh->b_assoc_buffers, listp);
@@ -699,7 +699,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
for (i = 0; i < pagevec_count(&pvec); i++) {
bh = head = page_buffers(pvec.pages[i]);
do {
- if (buffer_dirty(bh)) {
+ if (buffer_dirty(bh) &&
+ !buffer_async_write(bh)) {
get_bh(bh);
list_add_tail(&bh->b_assoc_buffers,
listp);
@@ -1579,6 +1580,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
+ set_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page) {
lock_page(bd_page);
@@ -1592,6 +1594,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
+ set_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) {
lock_page(bd_page);
@@ -1677,6 +1680,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
+ clear_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
@@ -1686,6 +1690,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
+ clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
@@ -1755,6 +1760,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
+ clear_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
@@ -1776,6 +1782,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
+ clear_buffer_async_write(bh);
clear_buffer_delay(bh);
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_redirected(bh);
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index ef999729e274..0d3a97d2d5f6 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -70,9 +70,10 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
*/
if (inode == NULL) {
unsigned long gen = (unsigned long) dentry->d_fsdata;
- unsigned long pgen =
- OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
-
+ unsigned long pgen;
+ spin_lock(&dentry->d_lock);
+ pgen = OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
+ spin_unlock(&dentry->d_lock);
trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len,
dentry->d_name.name,
pgen, gen);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 121da2dc3be8..d4e81e4a9b04 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1924,7 +1924,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
{
int tmp, hangup_needed = 0;
struct ocfs2_super *osb = NULL;
- char nodestr[8];
+ char nodestr[12];
trace_ocfs2_dismount_volume(sb);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 73feacc49b2e..fd777032c2ba 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1163,21 +1163,6 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
return NULL;
}
-static int newer_jl_done(struct reiserfs_journal_cnode *cn)
-{
- struct super_block *sb = cn->sb;
- b_blocknr_t blocknr = cn->blocknr;
-
- cn = cn->hprev;
- while (cn) {
- if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
- atomic_read(&cn->jlist->j_commit_left) != 0)
- return 0;
- cn = cn->hprev;
- }
- return 1;
-}
-
static void remove_journal_hash(struct super_block *,
struct reiserfs_journal_cnode **,
struct reiserfs_journal_list *, unsigned long,
@@ -1353,7 +1338,6 @@ static int flush_journal_list(struct super_block *s,
reiserfs_warning(s, "clm-2048", "called with wcount %d",
atomic_read(&journal->j_wcount));
}
- BUG_ON(jl->j_trans_id == 0);
/* if flushall == 0, the lock is already held */
if (flushall) {
@@ -1593,31 +1577,6 @@ static int flush_journal_list(struct super_block *s,
return err;
}
-static int test_transaction(struct super_block *s,
- struct reiserfs_journal_list *jl)
-{
- struct reiserfs_journal_cnode *cn;
-
- if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
- return 1;
-
- cn = jl->j_realblock;
- while (cn) {
- /* if the blocknr == 0, this has been cleared from the hash,
- ** skip it
- */
- if (cn->blocknr == 0) {
- goto next;
- }
- if (cn->bh && !newer_jl_done(cn))
- return 0;
- next:
- cn = cn->next;
- cond_resched();
- }
- return 0;
-}
-
static int write_one_transaction(struct super_block *s,
struct reiserfs_journal_list *jl,
struct buffer_chunk *chunk)
@@ -1805,6 +1764,8 @@ static int flush_used_journal_lists(struct super_block *s,
break;
tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
}
+ get_journal_list(jl);
+ get_journal_list(flush_jl);
/* try to find a group of blocks we can flush across all the
** transactions, but only bother if we've actually spanned
** across multiple lists
@@ -1813,6 +1774,8 @@ static int flush_used_journal_lists(struct super_block *s,
ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
}
flush_journal_list(s, flush_jl, 1);
+ put_journal_list(s, flush_jl);
+ put_journal_list(s, jl);
return 0;
}
@@ -3868,27 +3831,6 @@ int reiserfs_prepare_for_journal(struct super_block *sb,
return 1;
}
-static void flush_old_journal_lists(struct super_block *s)
-{
- struct reiserfs_journal *journal = SB_JOURNAL(s);
- struct reiserfs_journal_list *jl;
- struct list_head *entry;
- time_t now = get_seconds();
-
- while (!list_empty(&journal->j_journal_list)) {
- entry = journal->j_journal_list.next;
- jl = JOURNAL_LIST_ENTRY(entry);
- /* this check should always be run, to send old lists to disk */
- if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
- atomic_read(&jl->j_commit_left) == 0 &&
- test_transaction(s, jl)) {
- flush_used_journal_lists(s, jl);
- } else {
- break;
- }
- }
-}
-
/*
** long and ugly. If flush, will not return until all commit
** blocks and all real buffers in the trans are on disk.
@@ -4232,7 +4174,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
}
}
}
- flush_old_journal_lists(sb);
journal->j_current_jl->j_list_bitmap =
get_list_bitmap(sb, journal->j_current_jl);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index d0c6a007ce83..eda10959714f 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -487,6 +487,7 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_sb = sb;
sbi->s_block_base = 0;
sbi->s_type = FSTYPE_V7;
+ mutex_init(&sbi->s_lock);
sb->s_fs_info = sbi;
sb_set_blocksize(sb, 512);
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 7e5aae4bf46f..6eaf5edf1ea1 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -30,18 +30,17 @@ void udf_free_inode(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
struct udf_sb_info *sbi = UDF_SB(sb);
+ struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb);
- mutex_lock(&sbi->s_alloc_mutex);
- if (sbi->s_lvid_bh) {
- struct logicalVolIntegrityDescImpUse *lvidiu =
- udf_sb_lvidiu(sbi);
+ if (lvidiu) {
+ mutex_lock(&sbi->s_alloc_mutex);
if (S_ISDIR(inode->i_mode))
le32_add_cpu(&lvidiu->numDirs, -1);
else
le32_add_cpu(&lvidiu->numFiles, -1);
udf_updated_lvid(sb);
+ mutex_unlock(&sbi->s_alloc_mutex);
}
- mutex_unlock(&sbi->s_alloc_mutex);
udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
}
@@ -55,6 +54,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
uint32_t start = UDF_I(dir)->i_location.logicalBlockNum;
struct udf_inode_info *iinfo;
struct udf_inode_info *dinfo = UDF_I(dir);
+ struct logicalVolIntegrityDescImpUse *lvidiu;
inode = new_inode(sb);
@@ -92,12 +92,10 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
return NULL;
}
- if (sbi->s_lvid_bh) {
- struct logicalVolIntegrityDescImpUse *lvidiu;
-
+ lvidiu = udf_sb_lvidiu(sb);
+ if (lvidiu) {
iinfo->i_unique = lvid_get_unique_id(sb);
mutex_lock(&sbi->s_alloc_mutex);
- lvidiu = udf_sb_lvidiu(sbi);
if (S_ISDIR(mode))
le32_add_cpu(&lvidiu->numDirs, 1);
else
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 839a2bad7f45..91219385691d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -94,13 +94,25 @@ static unsigned int udf_count_free(struct super_block *);
static int udf_statfs(struct dentry *, struct kstatfs *);
static int udf_show_options(struct seq_file *, struct dentry *);
-struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
+struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb)
{
- struct logicalVolIntegrityDesc *lvid =
- (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
- __u32 number_of_partitions = le32_to_cpu(lvid->numOfPartitions);
- __u32 offset = number_of_partitions * 2 *
- sizeof(uint32_t)/sizeof(uint8_t);
+ struct logicalVolIntegrityDesc *lvid;
+ unsigned int partnum;
+ unsigned int offset;
+
+ if (!UDF_SB(sb)->s_lvid_bh)
+ return NULL;
+ lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data;
+ partnum = le32_to_cpu(lvid->numOfPartitions);
+ if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) -
+ offsetof(struct logicalVolIntegrityDesc, impUse)) /
+ (2 * sizeof(uint32_t)) < partnum) {
+ udf_err(sb, "Logical volume integrity descriptor corrupted "
+ "(numOfPartitions = %u)!\n", partnum);
+ return NULL;
+ }
+ /* The offset is to skip freeSpaceTable and sizeTable arrays */
+ offset = partnum * 2 * sizeof(uint32_t);
return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]);
}
@@ -629,9 +641,10 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
struct udf_options uopt;
struct udf_sb_info *sbi = UDF_SB(sb);
int error = 0;
+ struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb);
- if (sbi->s_lvid_bh) {
- int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
+ if (lvidiu) {
+ int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev);
if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY))
return -EACCES;
}
@@ -1905,11 +1918,12 @@ static void udf_open_lvid(struct super_block *sb)
if (!bh)
return;
-
- mutex_lock(&sbi->s_alloc_mutex);
lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
- lvidiu = udf_sb_lvidiu(sbi);
+ lvidiu = udf_sb_lvidiu(sb);
+ if (!lvidiu)
+ return;
+ mutex_lock(&sbi->s_alloc_mutex);
lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
udf_time_to_disk_stamp(&lvid->recordingDateAndTime,
@@ -1937,10 +1951,12 @@ static void udf_close_lvid(struct super_block *sb)
if (!bh)
return;
+ lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
+ lvidiu = udf_sb_lvidiu(sb);
+ if (!lvidiu)
+ return;
mutex_lock(&sbi->s_alloc_mutex);
- lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
- lvidiu = udf_sb_lvidiu(sbi);
lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
udf_time_to_disk_stamp(&lvid->recordingDateAndTime, CURRENT_TIME);
@@ -2093,15 +2109,19 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (sbi->s_lvid_bh) {
struct logicalVolIntegrityDescImpUse *lvidiu =
- udf_sb_lvidiu(sbi);
- uint16_t minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev);
- uint16_t minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev);
- /* uint16_t maxUDFWriteRev =
- le16_to_cpu(lvidiu->maxUDFWriteRev); */
+ udf_sb_lvidiu(sb);
+ uint16_t minUDFReadRev;
+ uint16_t minUDFWriteRev;
+ if (!lvidiu) {
+ ret = -EINVAL;
+ goto error_out;
+ }
+ minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev);
+ minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev);
if (minUDFReadRev > UDF_MAX_READ_VERSION) {
udf_err(sb, "minUDFReadRev=%x (max is %x)\n",
- le16_to_cpu(lvidiu->minUDFReadRev),
+ minUDFReadRev,
UDF_MAX_READ_VERSION);
ret = -EINVAL;
goto error_out;
@@ -2265,11 +2285,7 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
struct logicalVolIntegrityDescImpUse *lvidiu;
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
- if (sbi->s_lvid_bh != NULL)
- lvidiu = udf_sb_lvidiu(sbi);
- else
- lvidiu = NULL;
-
+ lvidiu = udf_sb_lvidiu(sb);
buf->f_type = UDF_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = sbi->s_partmaps[sbi->s_partition].s_partition_len;
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index ed401e94aa8c..1f32c7bd9f57 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -162,7 +162,7 @@ static inline struct udf_sb_info *UDF_SB(struct super_block *sb)
return sb->s_fs_info;
}
-struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi);
+struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb);
int udf_compute_nr_groups(struct super_block *sb, u32 partition);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 88c5ea75ebf6..f1d85cfc0a54 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -628,6 +628,7 @@ xfs_buf_item_unlock(
else if (aborted) {
ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
if (lip->li_flags & XFS_LI_IN_AIL) {
+ spin_lock(&lip->li_ailp->xa_lock);
xfs_trans_ail_delete(lip->li_ailp, lip,
SHUTDOWN_LOG_IO_ERROR);
}
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 069537c845e5..20bf8e8002d6 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1224,6 +1224,7 @@ xfs_da3_node_toosmall(
/* start with smaller blk num */
forward = nodehdr.forw < nodehdr.back;
for (i = 0; i < 2; forward = !forward, i++) {
+ struct xfs_da3_icnode_hdr thdr;
if (forward)
blkno = nodehdr.forw;
else
@@ -1236,10 +1237,10 @@ xfs_da3_node_toosmall(
return(error);
node = bp->b_addr;
- xfs_da3_node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(&thdr, node);
xfs_trans_brelse(state->args->trans, bp);
- if (count - nodehdr.count >= 0)
+ if (count - thdr.count >= 0)
break; /* fits with at least 25% to spare */
}
if (i >= 2) {
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 1edb5cc3e5f4..18272c766a50 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -515,7 +515,7 @@ typedef struct xfs_swapext
/* XFS_IOC_GETBIOSIZE ---- deprecated 47 */
#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
-#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks)
+#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
/*
* ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 193206ba4358..474807a401c8 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -119,11 +119,6 @@ xfs_inode_free(
ip->i_itemp = NULL;
}
- /* asserts to verify all state is correct here */
- ASSERT(atomic_read(&ip->i_pincount) == 0);
- ASSERT(!spin_is_locked(&ip->i_flags_lock));
- ASSERT(!xfs_isiflocked(ip));
-
/*
* Because we use RCU freeing we need to ensure the inode always
* appears to be reclaimed with an invalid inode number when in the
@@ -135,6 +130,10 @@ xfs_inode_free(
ip->i_ino = 0;
spin_unlock(&ip->i_flags_lock);
+ /* asserts to verify all state is correct here */
+ ASSERT(atomic_read(&ip->i_pincount) == 0);
+ ASSERT(!xfs_isiflocked(ip));
+
call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index dabda9521b4b..cc179878fe41 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1970,6 +1970,13 @@ xlog_recover_do_inode_buffer(
* magic number. If we don't recognise the magic number in the buffer, then
* return a LSN of -1 so that the caller knows it was an unrecognised block and
* so can recover the buffer.
+ *
+ * Note: we cannot rely solely on magic number matches to determine that the
+ * buffer has a valid LSN - we also need to verify that it belongs to this
+ * filesystem, so we need to extract the object's LSN and compare it to that
+ * which we read from the superblock. If the UUIDs don't match, then we've got a
+ * stale metadata block from an old filesystem instance that we need to recover
+ * over the top of.
*/
static xfs_lsn_t
xlog_recover_get_buf_lsn(
@@ -1980,6 +1987,8 @@ xlog_recover_get_buf_lsn(
__uint16_t magic16;
__uint16_t magicda;
void *blk = bp->b_addr;
+ uuid_t *uuid;
+ xfs_lsn_t lsn = -1;
/* v4 filesystems always recover immediately */
if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -1992,43 +2001,79 @@ xlog_recover_get_buf_lsn(
case XFS_ABTB_MAGIC:
case XFS_ABTC_MAGIC:
case XFS_IBT_CRC_MAGIC:
- case XFS_IBT_MAGIC:
- return be64_to_cpu(
- ((struct xfs_btree_block *)blk)->bb_u.s.bb_lsn);
+ case XFS_IBT_MAGIC: {
+ struct xfs_btree_block *btb = blk;
+
+ lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
+ uuid = &btb->bb_u.s.bb_uuid;
+ break;
+ }
case XFS_BMAP_CRC_MAGIC:
- case XFS_BMAP_MAGIC:
- return be64_to_cpu(
- ((struct xfs_btree_block *)blk)->bb_u.l.bb_lsn);
+ case XFS_BMAP_MAGIC: {
+ struct xfs_btree_block *btb = blk;
+
+ lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
+ uuid = &btb->bb_u.l.bb_uuid;
+ break;
+ }
case XFS_AGF_MAGIC:
- return be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
+ lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
+ uuid = &((struct xfs_agf *)blk)->agf_uuid;
+ break;
case XFS_AGFL_MAGIC:
- return be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
+ lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
+ uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
+ break;
case XFS_AGI_MAGIC:
- return be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
+ lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
+ uuid = &((struct xfs_agi *)blk)->agi_uuid;
+ break;
case XFS_SYMLINK_MAGIC:
- return be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
+ lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
+ uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
+ break;
case XFS_DIR3_BLOCK_MAGIC:
case XFS_DIR3_DATA_MAGIC:
case XFS_DIR3_FREE_MAGIC:
- return be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
+ lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
+ uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
+ break;
case XFS_ATTR3_RMT_MAGIC:
- return be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
+ lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
+ uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid;
+ break;
case XFS_SB_MAGIC:
- return be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
+ lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
+ uuid = &((struct xfs_dsb *)blk)->sb_uuid;
+ break;
default:
break;
}
+ if (lsn != (xfs_lsn_t)-1) {
+ if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
+ goto recover_immediately;
+ return lsn;
+ }
+
magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
switch (magicda) {
case XFS_DIR3_LEAF1_MAGIC:
case XFS_DIR3_LEAFN_MAGIC:
case XFS_DA3_NODE_MAGIC:
- return be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
+ lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
+ uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
+ break;
default:
break;
}
+ if (lsn != (xfs_lsn_t)-1) {
+ if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
+ goto recover_immediately;
+ return lsn;
+ }
+
/*
* We do individual object checks on dquot and inode buffers as they
* have their own individual LSN records. Also, we could have a stale