summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cmservice.c12
-rw-r--r--fs/afs/dir.c1
-rw-r--r--fs/afs/inode.c3
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/main.c13
-rw-r--r--fs/afs/mntpt.c63
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/afs/server.c13
-rw-r--r--fs/afs/vlocation.c14
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/autofs4/autofs_i.h99
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/autofs4/expire.c51
-rw-r--r--fs/autofs4/inode.c26
-rw-r--r--fs/autofs4/root.c673
-rw-r--r--fs/autofs4/waitq.c17
-rw-r--r--fs/block_dev.c93
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/cifs/cifs_dfs_ref.c120
-rw-r--r--fs/cifs/cifsfs.h6
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/inode.c8
-rw-r--r--fs/dcache.c9
-rw-r--r--fs/fs_struct.c35
-rw-r--r--fs/fscache/operation.c2
-rw-r--r--fs/internal.h5
-rw-r--r--fs/locks.c8
-rw-r--r--fs/namei.c408
-rw-r--r--fs/namespace.c196
-rw-r--r--fs/nfs/dir.c4
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/namespace.c77
-rw-r--r--fs/nfsd/acl.h59
-rw-r--r--fs/nfsd/export.c4
-rw-r--r--fs/nfsd/idmap.h62
-rw-r--r--fs/nfsd/nfs3proc.c8
-rw-r--r--fs/nfsd/nfs4acl.c2
-rw-r--r--fs/nfsd/nfs4callback.c151
-rw-r--r--fs/nfsd/nfs4idmap.c15
-rw-r--r--fs/nfsd/nfs4proc.c59
-rw-r--r--fs/nfsd/nfs4recover.c1
-rw-r--r--fs/nfsd/nfs4state.c243
-rw-r--r--fs/nfsd/nfs4xdr.c115
-rw-r--r--fs/nfsd/nfsctl.c4
-rw-r--r--fs/nfsd/nfsd.h1
-rw-r--r--fs/nfsd/nfsproc.c6
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/state.h16
-rw-r--r--fs/nfsd/vfs.c90
-rw-r--r--fs/nfsd/xdr4.h9
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/squashfs/Kconfig18
-rw-r--r--fs/squashfs/Makefile1
-rw-r--r--fs/squashfs/block.c1
-rw-r--r--fs/squashfs/cache.c1
-rw-r--r--fs/squashfs/decompressor.c16
-rw-r--r--fs/squashfs/decompressor.h9
-rw-r--r--fs/squashfs/fragment.c1
-rw-r--r--fs/squashfs/id.c1
-rw-r--r--fs/squashfs/lzo_wrapper.c1
-rw-r--r--fs/squashfs/squashfs.h8
-rw-r--r--fs/squashfs/squashfs_fs.h1
-rw-r--r--fs/squashfs/squashfs_fs_i.h6
-rw-r--r--fs/squashfs/xattr_id.c1
-rw-r--r--fs/squashfs/xz_wrapper.c153
-rw-r--r--fs/squashfs/zlib_wrapper.c15
-rw-r--r--fs/stat.c4
-rw-r--r--fs/super.c2
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c191
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c535
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h33
-rw-r--r--fs/xfs/support/debug.c112
-rw-r--r--fs/xfs/support/debug.h25
-rw-r--r--fs/xfs/xfs_alloc.c10
-rw-r--r--fs/xfs/xfs_alloc.h25
-rw-r--r--fs/xfs/xfs_buf_item.c151
-rw-r--r--fs/xfs/xfs_error.c31
-rw-r--r--fs/xfs/xfs_error.h18
-rw-r--r--fs/xfs/xfs_fsops.c10
-rw-r--r--fs/xfs/xfs_fsops.h2
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_trans.c2
92 files changed, 2587 insertions, 1684 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index a3bcec75c54a..1c8c6cc6de30 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -289,7 +289,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
call->server = server;
INIT_WORK(&call->work, SRXAFSCB_CallBack);
- schedule_work(&call->work);
+ queue_work(afs_wq, &call->work);
return 0;
}
@@ -336,7 +336,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
call->server = server;
INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
- schedule_work(&call->work);
+ queue_work(afs_wq, &call->work);
return 0;
}
@@ -367,7 +367,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
call->server = server;
INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
- schedule_work(&call->work);
+ queue_work(afs_wq, &call->work);
return 0;
}
@@ -400,7 +400,7 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
call->state = AFS_CALL_REPLYING;
INIT_WORK(&call->work, SRXAFSCB_Probe);
- schedule_work(&call->work);
+ queue_work(afs_wq, &call->work);
return 0;
}
@@ -496,7 +496,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
call->state = AFS_CALL_REPLYING;
INIT_WORK(&call->work, SRXAFSCB_ProbeUuid);
- schedule_work(&call->work);
+ queue_work(afs_wq, &call->work);
return 0;
}
@@ -580,6 +580,6 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
call->state = AFS_CALL_REPLYING;
INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself);
- schedule_work(&call->work);
+ queue_work(afs_wq, &call->work);
return 0;
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e6a4ab980e31..20c106f24927 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -66,6 +66,7 @@ const struct dentry_operations afs_fs_dentry_operations = {
.d_revalidate = afs_d_revalidate,
.d_delete = afs_d_delete,
.d_release = afs_d_release,
+ .d_automount = afs_d_automount,
};
#define AFS_DIR_HASHTBL_SIZE 128
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 0747339011c3..db66c5201474 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -184,7 +184,8 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
inode->i_generation = 0;
set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
- inode->i_flags |= S_NOATIME;
+ set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+ inode->i_flags |= S_AUTOMOUNT | S_NOATIME;
unlock_new_inode(inode);
_leave(" = %p", inode);
return inode;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index ab6db5abaf53..5a9b6843bac1 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -577,6 +577,7 @@ extern int afs_drop_inode(struct inode *);
/*
* main.c
*/
+extern struct workqueue_struct *afs_wq;
extern struct afs_uuid afs_uuid;
/*
@@ -591,6 +592,7 @@ extern const struct inode_operations afs_mntpt_inode_operations;
extern const struct inode_operations afs_autocell_inode_operations;
extern const struct file_operations afs_mntpt_file_operations;
+extern struct vfsmount *afs_d_automount(struct path *);
extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
extern void afs_mntpt_kill_timer(void);
diff --git a/fs/afs/main.c b/fs/afs/main.c
index cfd1cbe25b22..42dd2e499ed8 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -30,6 +30,7 @@ module_param(rootcell, charp, 0);
MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
struct afs_uuid afs_uuid;
+struct workqueue_struct *afs_wq;
/*
* get a client UUID
@@ -87,10 +88,16 @@ static int __init afs_init(void)
if (ret < 0)
return ret;
+ /* create workqueue */
+ ret = -ENOMEM;
+ afs_wq = alloc_workqueue("afs", 0, 0);
+ if (!afs_wq)
+ return ret;
+
/* register the /proc stuff */
ret = afs_proc_init();
if (ret < 0)
- return ret;
+ goto error_proc;
#ifdef CONFIG_AFS_FSCACHE
/* we want to be able to cache */
@@ -140,6 +147,8 @@ error_cell_init:
error_cache:
#endif
afs_proc_cleanup();
+error_proc:
+ destroy_workqueue(afs_wq);
rcu_barrier();
printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
return ret;
@@ -163,7 +172,7 @@ static void __exit afs_exit(void)
afs_purge_servers();
afs_callback_update_kill();
afs_vlocation_purge();
- flush_scheduled_work();
+ destroy_workqueue(afs_wq);
afs_cell_purge();
#ifdef CONFIG_AFS_FSCACHE
fscache_unregister_netfs(&afs_cache_netfs);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 6153417caf57..aa59184151d0 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -24,7 +24,6 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
struct dentry *dentry,
struct nameidata *nd);
static int afs_mntpt_open(struct inode *inode, struct file *file);
-static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
static void afs_mntpt_expiry_timed_out(struct work_struct *work);
const struct file_operations afs_mntpt_file_operations = {
@@ -34,13 +33,11 @@ const struct file_operations afs_mntpt_file_operations = {
const struct inode_operations afs_mntpt_inode_operations = {
.lookup = afs_mntpt_lookup,
- .follow_link = afs_mntpt_follow_link,
.readlink = page_readlink,
.getattr = afs_getattr,
};
const struct inode_operations afs_autocell_inode_operations = {
- .follow_link = afs_mntpt_follow_link,
.getattr = afs_getattr,
};
@@ -88,6 +85,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
_debug("symlink is a mountpoint");
spin_lock(&vnode->lock);
set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+ vnode->vfs_inode.i_flags |= S_AUTOMOUNT;
spin_unlock(&vnode->lock);
}
@@ -238,52 +236,24 @@ error_no_devname:
}
/*
- * follow a link from a mountpoint directory, thus causing it to be mounted
+ * handle an automount point
*/
-static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
+struct vfsmount *afs_d_automount(struct path *path)
{
struct vfsmount *newmnt;
- int err;
- _enter("%p{%s},{%s:%p{%s},}",
- dentry,
- dentry->d_name.name,
- nd->path.mnt->mnt_devname,
- dentry,
- nd->path.dentry->d_name.name);
-
- dput(nd->path.dentry);
- nd->path.dentry = dget(dentry);
+ _enter("{%s,%s}", path->mnt->mnt_devname, path->dentry->d_name.name);
- newmnt = afs_mntpt_do_automount(nd->path.dentry);
- if (IS_ERR(newmnt)) {
- path_put(&nd->path);
- return (void *)newmnt;
- }
-
- mntget(newmnt);
- err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
- switch (err) {
- case 0:
- path_put(&nd->path);
- nd->path.mnt = newmnt;
- nd->path.dentry = dget(newmnt->mnt_root);
- schedule_delayed_work(&afs_mntpt_expiry_timer,
- afs_mntpt_expiry_timeout * HZ);
- break;
- case -EBUSY:
- /* someone else made a mount here whilst we were busy */
- while (d_mountpoint(nd->path.dentry) &&
- follow_down(&nd->path))
- ;
- err = 0;
- default:
- mntput(newmnt);
- break;
- }
+ newmnt = afs_mntpt_do_automount(path->dentry);
+ if (IS_ERR(newmnt))
+ return newmnt;
- _leave(" = %d", err);
- return ERR_PTR(err);
+ mntget(newmnt); /* prevent immediate expiration */
+ mnt_set_expiry(newmnt, &afs_vfsmounts);
+ queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
+ afs_mntpt_expiry_timeout * HZ);
+ _leave(" = %p {%s}", newmnt, newmnt->mnt_devname);
+ return newmnt;
}
/*
@@ -295,8 +265,8 @@ static void afs_mntpt_expiry_timed_out(struct work_struct *work)
if (!list_empty(&afs_vfsmounts)) {
mark_mounts_for_expiry(&afs_vfsmounts);
- schedule_delayed_work(&afs_mntpt_expiry_timer,
- afs_mntpt_expiry_timeout * HZ);
+ queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
+ afs_mntpt_expiry_timeout * HZ);
}
_leave("");
@@ -310,6 +280,5 @@ void afs_mntpt_kill_timer(void)
_enter("");
ASSERT(list_empty(&afs_vfsmounts));
- cancel_delayed_work(&afs_mntpt_expiry_timer);
- flush_scheduled_work();
+ cancel_delayed_work_sync(&afs_mntpt_expiry_timer);
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 654d8fdbf01f..e45a323aebb4 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -410,7 +410,7 @@ static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID,
if (!call) {
/* its an incoming call for our callback service */
skb_queue_tail(&afs_incoming_calls, skb);
- schedule_work(&afs_collect_incoming_call_work);
+ queue_work(afs_wq, &afs_collect_incoming_call_work);
} else {
/* route the messages directly to the appropriate call */
skb_queue_tail(&call->rx_queue, skb);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 9fdc7fe3a7bc..d59b7516e943 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -238,8 +238,8 @@ void afs_put_server(struct afs_server *server)
if (atomic_read(&server->usage) == 0) {
list_move_tail(&server->grave, &afs_server_graveyard);
server->time_of_death = get_seconds();
- schedule_delayed_work(&afs_server_reaper,
- afs_server_timeout * HZ);
+ queue_delayed_work(afs_wq, &afs_server_reaper,
+ afs_server_timeout * HZ);
}
spin_unlock(&afs_server_graveyard_lock);
_leave(" [dead]");
@@ -285,10 +285,11 @@ static void afs_reap_server(struct work_struct *work)
expiry = server->time_of_death + afs_server_timeout;
if (expiry > now) {
delay = (expiry - now) * HZ;
- if (!schedule_delayed_work(&afs_server_reaper, delay)) {
+ if (!queue_delayed_work(afs_wq, &afs_server_reaper,
+ delay)) {
cancel_delayed_work(&afs_server_reaper);
- schedule_delayed_work(&afs_server_reaper,
- delay);
+ queue_delayed_work(afs_wq, &afs_server_reaper,
+ delay);
}
break;
}
@@ -323,5 +324,5 @@ void __exit afs_purge_servers(void)
{
afs_server_timeout = 0;
cancel_delayed_work(&afs_server_reaper);
- schedule_delayed_work(&afs_server_reaper, 0);
+ queue_delayed_work(afs_wq, &afs_server_reaper, 0);
}
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 9ac260d1361d..431984d2e372 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -507,8 +507,8 @@ void afs_put_vlocation(struct afs_vlocation *vl)
_debug("buried");
list_move_tail(&vl->grave, &afs_vlocation_graveyard);
vl->time_of_death = get_seconds();
- schedule_delayed_work(&afs_vlocation_reap,
- afs_vlocation_timeout * HZ);
+ queue_delayed_work(afs_wq, &afs_vlocation_reap,
+ afs_vlocation_timeout * HZ);
/* suspend updates on this record */
if (!list_empty(&vl->update)) {
@@ -561,11 +561,11 @@ static void afs_vlocation_reaper(struct work_struct *work)
if (expiry > now) {
delay = (expiry - now) * HZ;
_debug("delay %lu", delay);
- if (!schedule_delayed_work(&afs_vlocation_reap,
- delay)) {
+ if (!queue_delayed_work(afs_wq, &afs_vlocation_reap,
+ delay)) {
cancel_delayed_work(&afs_vlocation_reap);
- schedule_delayed_work(&afs_vlocation_reap,
- delay);
+ queue_delayed_work(afs_wq, &afs_vlocation_reap,
+ delay);
}
break;
}
@@ -620,7 +620,7 @@ void afs_vlocation_purge(void)
destroy_workqueue(afs_vlocation_update_worker);
cancel_delayed_work(&afs_vlocation_reap);
- schedule_delayed_work(&afs_vlocation_reap, 0);
+ queue_delayed_work(afs_wq, &afs_vlocation_reap, 0);
}
/*
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index cbe57f3c4d89..c5567cb78432 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -233,7 +233,7 @@ static int __init anon_inode_init(void)
return 0;
err_mntput:
- mntput_long(anon_inode_mnt);
+ mntput(anon_inode_mnt);
err_unregister_filesystem:
unregister_filesystem(&anon_inode_fs_type);
err_exit:
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 0fffe1c24cec..1f016bfb42d5 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -99,7 +99,6 @@ struct autofs_info {
};
#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
-#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
struct autofs_wait_queue {
@@ -176,13 +175,6 @@ static inline int autofs4_ispending(struct dentry *dentry)
return 0;
}
-static inline void autofs4_copy_atime(struct file *src, struct file *dst)
-{
- dst->f_path.dentry->d_inode->i_atime =
- src->f_path.dentry->d_inode->i_atime;
- return;
-}
-
struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *);
void autofs4_free_ino(struct autofs_info *);
@@ -212,11 +204,83 @@ void autofs_dev_ioctl_exit(void);
extern const struct inode_operations autofs4_symlink_inode_operations;
extern const struct inode_operations autofs4_dir_inode_operations;
-extern const struct inode_operations autofs4_root_inode_operations;
-extern const struct inode_operations autofs4_indirect_root_inode_operations;
-extern const struct inode_operations autofs4_direct_root_inode_operations;
extern const struct file_operations autofs4_dir_operations;
extern const struct file_operations autofs4_root_operations;
+extern const struct dentry_operations autofs4_dentry_operations;
+
+/* VFS automount flags management functions */
+
+static inline void __managed_dentry_set_automount(struct dentry *dentry)
+{
+ dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
+}
+
+static inline void managed_dentry_set_automount(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_set_automount(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __managed_dentry_clear_automount(struct dentry *dentry)
+{
+ dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
+}
+
+static inline void managed_dentry_clear_automount(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_clear_automount(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __managed_dentry_set_transit(struct dentry *dentry)
+{
+ dentry->d_flags |= DCACHE_MANAGE_TRANSIT;
+}
+
+static inline void managed_dentry_set_transit(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_set_transit(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __managed_dentry_clear_transit(struct dentry *dentry)
+{
+ dentry->d_flags &= ~DCACHE_MANAGE_TRANSIT;
+}
+
+static inline void managed_dentry_clear_transit(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_clear_transit(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __managed_dentry_set_managed(struct dentry *dentry)
+{
+ dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT);
+}
+
+static inline void managed_dentry_set_managed(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_set_managed(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __managed_dentry_clear_managed(struct dentry *dentry)
+{
+ dentry->d_flags &= ~(DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT);
+}
+
+static inline void managed_dentry_clear_managed(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_clear_managed(dentry);
+ spin_unlock(&dentry->d_lock);
+}
/* Initializing function */
@@ -229,19 +293,6 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
void autofs4_catatonic_mode(struct autofs_sb_info *);
-static inline int autofs4_follow_mount(struct path *path)
-{
- int res = 0;
-
- while (d_mountpoint(path->dentry)) {
- int followed = follow_down(path);
- if (!followed)
- break;
- res = 1;
- }
- return res;
-}
-
static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
{
return new_encode_dev(sbi->sb->s_dev);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index eff9a419469a..1442da4860e5 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -551,7 +551,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
err = have_submounts(path.dentry);
- if (follow_down(&path))
+ if (follow_down_one(&path))
magic = path.mnt->mnt_sb->s_magic;
}
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index cc1d01365905..3ed79d76c233 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -26,10 +26,6 @@ static inline int autofs4_can_expire(struct dentry *dentry,
if (ino == NULL)
return 0;
- /* No point expiring a pending mount */
- if (ino->flags & AUTOFS_INF_PENDING)
- return 0;
-
if (!do_now) {
/* Too young to die */
if (!timeout || time_after(ino->last_used + timeout, now))
@@ -56,7 +52,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
path_get(&path);
- if (!follow_down(&path))
+ if (!follow_down_one(&path))
goto done;
if (is_autofs4_dentry(path.dentry)) {
@@ -283,6 +279,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
unsigned long timeout;
struct dentry *root = dget(sb->s_root);
int do_now = how & AUTOFS_EXP_IMMEDIATE;
+ struct autofs_info *ino;
if (!root)
return NULL;
@@ -291,19 +288,21 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
timeout = sbi->exp_timeout;
spin_lock(&sbi->fs_lock);
+ ino = autofs4_dentry_ino(root);
+ /* No point expiring a pending mount */
+ if (ino->flags & AUTOFS_INF_PENDING) {
+ spin_unlock(&sbi->fs_lock);
+ return NULL;
+ }
+ managed_dentry_set_transit(root);
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
struct autofs_info *ino = autofs4_dentry_ino(root);
- if (d_mountpoint(root)) {
- ino->flags |= AUTOFS_INF_MOUNTPOINT;
- spin_lock(&root->d_lock);
- root->d_flags &= ~DCACHE_MOUNTED;
- spin_unlock(&root->d_lock);
- }
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
return root;
}
+ managed_dentry_clear_transit(root);
spin_unlock(&sbi->fs_lock);
dput(root);
@@ -340,6 +339,10 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
while ((dentry = get_next_positive_dentry(dentry, root))) {
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
+ /* No point expiring a pending mount */
+ if (ino->flags & AUTOFS_INF_PENDING)
+ goto cont;
+ managed_dentry_set_transit(dentry);
/*
* Case 1: (i) indirect mount or top level pseudo direct mount
@@ -399,6 +402,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
}
}
next:
+ managed_dentry_clear_transit(dentry);
+cont:
spin_unlock(&sbi->fs_lock);
}
return NULL;
@@ -479,6 +484,8 @@ int autofs4_expire_run(struct super_block *sb,
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
ino->flags &= ~AUTOFS_INF_EXPIRING;
+ if (!d_unhashed(dentry))
+ managed_dentry_clear_transit(dentry);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
@@ -504,18 +511,18 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
spin_lock(&sbi->fs_lock);
- if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
- spin_lock(&sb->s_root->d_lock);
- /*
- * If we haven't been expired away, then reset
- * mounted status.
- */
- if (mnt->mnt_parent != mnt)
- sb->s_root->d_flags |= DCACHE_MOUNTED;
- spin_unlock(&sb->s_root->d_lock);
- ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
- }
ino->flags &= ~AUTOFS_INF_EXPIRING;
+ spin_lock(&dentry->d_lock);
+ if (ret)
+ __managed_dentry_clear_transit(dentry);
+ else {
+ if ((IS_ROOT(dentry) ||
+ (autofs_type_indirect(sbi->type) &&
+ IS_ROOT(dentry->d_parent))) &&
+ !(dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
+ __managed_dentry_set_automount(dentry);
+ }
+ spin_unlock(&dentry->d_lock);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
dput(dentry);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index a7bdb9dcac84..9e1a9dad23e1 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -45,7 +45,6 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
if (!reinit) {
ino->flags = 0;
- ino->inode = NULL;
ino->dentry = NULL;
ino->size = 0;
INIT_LIST_HEAD(&ino->active);
@@ -76,19 +75,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
void autofs4_free_ino(struct autofs_info *ino)
{
- struct autofs_info *p_ino;
-
if (ino->dentry) {
ino->dentry->d_fsdata = NULL;
- if (ino->dentry->d_inode) {
- struct dentry *parent = ino->dentry->d_parent;
- if (atomic_dec_and_test(&ino->count)) {
- p_ino = autofs4_dentry_ino(parent);
- if (p_ino && parent != ino->dentry)
- atomic_dec(&p_ino->count);
- }
- dput(ino->dentry);
- }
ino->dentry = NULL;
}
if (ino->free)
@@ -251,10 +239,6 @@ static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi)
return ino;
}
-static const struct dentry_operations autofs4_sb_dentry_operations = {
- .d_release = autofs4_dentry_release,
-};
-
int autofs4_fill_super(struct super_block *s, void *data, int silent)
{
struct inode * root_inode;
@@ -292,6 +276,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
s->s_blocksize_bits = 10;
s->s_magic = AUTOFS_SUPER_MAGIC;
s->s_op = &autofs4_sops;
+ s->s_d_op = &autofs4_dentry_operations;
s->s_time_gran = 1;
/*
@@ -309,7 +294,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
goto fail_iput;
pipe = NULL;
- d_set_d_op(root, &autofs4_sb_dentry_operations);
root->d_fsdata = ino;
/* Can this call block? */
@@ -320,10 +304,11 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
goto fail_dput;
}
+ if (autofs_type_trigger(sbi->type))
+ __managed_dentry_set_managed(root);
+
root_inode->i_fop = &autofs4_root_operations;
- root_inode->i_op = autofs_type_trigger(sbi->type) ?
- &autofs4_direct_root_inode_operations :
- &autofs4_indirect_root_inode_operations;
+ root_inode->i_op = &autofs4_dir_inode_operations;
/* Couldn't this be tested earlier? */
if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
@@ -391,7 +376,6 @@ struct inode *autofs4_get_inode(struct super_block *sb,
if (inode == NULL)
return NULL;
- inf->inode = inode;
inode->i_mode = inf->mode;
if (sb->s_root) {
inode->i_uid = sb->s_root->d_inode->i_uid;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 651e4ef563b1..1dba035fc376 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -35,10 +35,8 @@ static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
#endif
static int autofs4_dir_open(struct inode *inode, struct file *file);
static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
-static void *autofs4_follow_link(struct dentry *, struct nameidata *);
-
-#define TRIGGER_FLAGS (LOOKUP_CONTINUE | LOOKUP_DIRECTORY)
-#define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE)
+static struct vfsmount *autofs4_d_automount(struct path *);
+static int autofs4_d_manage(struct dentry *, bool, bool);
const struct file_operations autofs4_root_operations = {
.open = dcache_dir_open,
@@ -60,7 +58,7 @@ const struct file_operations autofs4_dir_operations = {
.llseek = dcache_dir_lseek,
};
-const struct inode_operations autofs4_indirect_root_inode_operations = {
+const struct inode_operations autofs4_dir_inode_operations = {
.lookup = autofs4_lookup,
.unlink = autofs4_dir_unlink,
.symlink = autofs4_dir_symlink,
@@ -68,20 +66,10 @@ const struct inode_operations autofs4_indirect_root_inode_operations = {
.rmdir = autofs4_dir_rmdir,
};
-const struct inode_operations autofs4_direct_root_inode_operations = {
- .lookup = autofs4_lookup,
- .unlink = autofs4_dir_unlink,
- .mkdir = autofs4_dir_mkdir,
- .rmdir = autofs4_dir_rmdir,
- .follow_link = autofs4_follow_link,
-};
-
-const struct inode_operations autofs4_dir_inode_operations = {
- .lookup = autofs4_lookup,
- .unlink = autofs4_dir_unlink,
- .symlink = autofs4_dir_symlink,
- .mkdir = autofs4_dir_mkdir,
- .rmdir = autofs4_dir_rmdir,
+const struct dentry_operations autofs4_dentry_operations = {
+ .d_automount = autofs4_d_automount,
+ .d_manage = autofs4_d_manage,
+ .d_release = autofs4_dentry_release,
};
static void autofs4_add_active(struct dentry *dentry)
@@ -116,14 +104,6 @@ static void autofs4_del_active(struct dentry *dentry)
return;
}
-static unsigned int autofs4_need_mount(unsigned int flags)
-{
- unsigned int res = 0;
- if (flags & (TRIGGER_FLAGS | TRIGGER_INTENTS))
- res = 1;
- return res;
-}
-
static int autofs4_dir_open(struct inode *inode, struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
@@ -158,239 +138,6 @@ out:
return dcache_dir_open(inode, file);
}
-static int try_to_fill_dentry(struct dentry *dentry, int flags)
-{
- struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
- int status;
-
- DPRINTK("dentry=%p %.*s ino=%p",
- dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
-
- /*
- * Wait for a pending mount, triggering one if there
- * isn't one already
- */
- if (dentry->d_inode == NULL) {
- DPRINTK("waiting for mount name=%.*s",
- dentry->d_name.len, dentry->d_name.name);
-
- status = autofs4_wait(sbi, dentry, NFY_MOUNT);
-
- DPRINTK("mount done status=%d", status);
-
- /* Turn this into a real negative dentry? */
- if (status == -ENOENT) {
- spin_lock(&sbi->fs_lock);
- ino->flags &= ~AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
- return status;
- } else if (status) {
- /* Return a negative dentry, but leave it "pending" */
- return status;
- }
- /* Trigger mount for path component or follow link */
- } else if (ino->flags & AUTOFS_INF_PENDING ||
- autofs4_need_mount(flags)) {
- DPRINTK("waiting for mount name=%.*s",
- dentry->d_name.len, dentry->d_name.name);
-
- spin_lock(&sbi->fs_lock);
- ino->flags |= AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
- status = autofs4_wait(sbi, dentry, NFY_MOUNT);
-
- DPRINTK("mount done status=%d", status);
-
- if (status) {
- spin_lock(&sbi->fs_lock);
- ino->flags &= ~AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
- return status;
- }
- }
-
- /* Initialize expiry counter after successful mount */
- ino->last_used = jiffies;
-
- spin_lock(&sbi->fs_lock);
- ino->flags &= ~AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
-
- return 0;
-}
-
-/* For autofs direct mounts the follow link triggers the mount */
-static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
- int oz_mode = autofs4_oz_mode(sbi);
- unsigned int lookup_type;
- int status;
-
- DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
- dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
- nd->flags);
- /*
- * For an expire of a covered direct or offset mount we need
- * to break out of follow_down() at the autofs mount trigger
- * (d_mounted--), so we can see the expiring flag, and manage
- * the blocking and following here until the expire is completed.
- */
- if (oz_mode) {
- spin_lock(&sbi->fs_lock);
- if (ino->flags & AUTOFS_INF_EXPIRING) {
- spin_unlock(&sbi->fs_lock);
- /* Follow down to our covering mount. */
- if (!follow_down(&nd->path))
- goto done;
- goto follow;
- }
- spin_unlock(&sbi->fs_lock);
- goto done;
- }
-
- /* If an expire request is pending everyone must wait. */
- autofs4_expire_wait(dentry);
-
- /* We trigger a mount for almost all flags */
- lookup_type = autofs4_need_mount(nd->flags);
- spin_lock(&sbi->fs_lock);
- spin_lock(&autofs4_lock);
- spin_lock(&dentry->d_lock);
- if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
- spin_unlock(&sbi->fs_lock);
- goto follow;
- }
-
- /*
- * If the dentry contains directories then it is an autofs
- * multi-mount with no root mount offset. So don't try to
- * mount it again.
- */
- if (ino->flags & AUTOFS_INF_PENDING ||
- (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
- spin_unlock(&sbi->fs_lock);
-
- status = try_to_fill_dentry(dentry, nd->flags);
- if (status)
- goto out_error;
-
- goto follow;
- }
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
- spin_unlock(&sbi->fs_lock);
-follow:
- /*
- * If there is no root mount it must be an autofs
- * multi-mount with no root offset so we don't need
- * to follow it.
- */
- if (d_mountpoint(dentry)) {
- if (!autofs4_follow_mount(&nd->path)) {
- status = -ENOENT;
- goto out_error;
- }
- }
-
-done:
- return NULL;
-
-out_error:
- path_put(&nd->path);
- return ERR_PTR(status);
-}
-
-/*
- * Revalidate is called on every cache lookup. Some of those
- * cache lookups may actually happen while the dentry is not
- * yet completely filled in, and revalidate has to delay such
- * lookups..
- */
-static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
- struct inode *dir;
- struct autofs_sb_info *sbi;
- int oz_mode;
- int flags = nd ? nd->flags : 0;
- int status = 1;
-
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
- dir = dentry->d_parent->d_inode;
- sbi = autofs4_sbi(dir->i_sb);
- oz_mode = autofs4_oz_mode(sbi);
-
- /* Pending dentry */
- spin_lock(&sbi->fs_lock);
- if (autofs4_ispending(dentry)) {
- /* The daemon never causes a mount to trigger */
- spin_unlock(&sbi->fs_lock);
-
- if (oz_mode)
- return 1;
-
- /*
- * If the directory has gone away due to an expire
- * we have been called as ->d_revalidate() and so
- * we need to return false and proceed to ->lookup().
- */
- if (autofs4_expire_wait(dentry) == -EAGAIN)
- return 0;
-
- /*
- * A zero status is success otherwise we have a
- * negative error code.
- */
- status = try_to_fill_dentry(dentry, flags);
- if (status == 0)
- return 1;
-
- return status;
- }
- spin_unlock(&sbi->fs_lock);
-
- /* Negative dentry.. invalidate if "old" */
- if (dentry->d_inode == NULL)
- return 0;
-
- /* Check for a non-mountpoint directory with no contents */
- spin_lock(&autofs4_lock);
- spin_lock(&dentry->d_lock);
- if (S_ISDIR(dentry->d_inode->i_mode) &&
- !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
- DPRINTK("dentry=%p %.*s, emptydir",
- dentry, dentry->d_name.len, dentry->d_name.name);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
-
- /* The daemon never causes a mount to trigger */
- if (oz_mode)
- return 1;
-
- /*
- * A zero status is success otherwise we have a
- * negative error code.
- */
- status = try_to_fill_dentry(dentry, flags);
- if (status == 0)
- return 1;
-
- return status;
- }
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
-
- return 1;
-}
-
void autofs4_dentry_release(struct dentry *de)
{
struct autofs_info *inf;
@@ -398,11 +145,8 @@ void autofs4_dentry_release(struct dentry *de)
DPRINTK("releasing %p", de);
inf = autofs4_dentry_ino(de);
- de->d_fsdata = NULL;
-
if (inf) {
struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
-
if (sbi) {
spin_lock(&sbi->lookup_lock);
if (!list_empty(&inf->active))
@@ -411,26 +155,10 @@ void autofs4_dentry_release(struct dentry *de)
list_del(&inf->expiring);
spin_unlock(&sbi->lookup_lock);
}
-
- inf->dentry = NULL;
- inf->inode = NULL;
-
autofs4_free_ino(inf);
}
}
-/* For dentries of directories in the root dir */
-static const struct dentry_operations autofs4_root_dentry_operations = {
- .d_revalidate = autofs4_revalidate,
- .d_release = autofs4_dentry_release,
-};
-
-/* For other dentries */
-static const struct dentry_operations autofs4_dentry_operations = {
- .d_revalidate = autofs4_revalidate,
- .d_release = autofs4_dentry_release,
-};
-
static struct dentry *autofs4_lookup_active(struct dentry *dentry)
{
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
@@ -541,50 +269,244 @@ next:
return NULL;
}
+static int autofs4_mount_wait(struct dentry *dentry)
+{
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ int status;
+
+ if (ino->flags & AUTOFS_INF_PENDING) {
+ DPRINTK("waiting for mount name=%.*s",
+ dentry->d_name.len, dentry->d_name.name);
+ status = autofs4_wait(sbi, dentry, NFY_MOUNT);
+ DPRINTK("mount wait done status=%d", status);
+ ino->last_used = jiffies;
+ return status;
+ }
+ return 0;
+}
+
+static int do_expire_wait(struct dentry *dentry)
+{
+ struct dentry *expiring;
+
+ expiring = autofs4_lookup_expiring(dentry);
+ if (!expiring)
+ return autofs4_expire_wait(dentry);
+ else {
+ /*
+ * If we are racing with expire the request might not
+ * be quite complete, but the directory has been removed
+ * so it must have been successful, just wait for it.
+ */
+ autofs4_expire_wait(expiring);
+ autofs4_del_expiring(expiring);
+ dput(expiring);
+ }
+ return 0;
+}
+
+static struct dentry *autofs4_mountpoint_changed(struct path *path)
+{
+ struct dentry *dentry = path->dentry;
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+
+ /*
+ * If this is an indirect mount the dentry could have gone away
+ * as a result of an expire and a new one created.
+ */
+ if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
+ struct dentry *parent = dentry->d_parent;
+ struct dentry *new = d_lookup(parent, &dentry->d_name);
+ if (!new)
+ return NULL;
+ dput(path->dentry);
+ path->dentry = new;
+ }
+ return path->dentry;
+}
+
+static struct vfsmount *autofs4_d_automount(struct path *path)
+{
+ struct dentry *dentry = path->dentry;
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ int status;
+
+ DPRINTK("dentry=%p %.*s",
+ dentry, dentry->d_name.len, dentry->d_name.name);
+
+ /*
+ * Someone may have manually umounted this or it was a submount
+ * that has gone away.
+ */
+ spin_lock(&dentry->d_lock);
+ if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
+ if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
+ (dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
+ __managed_dentry_set_transit(path->dentry);
+ }
+ spin_unlock(&dentry->d_lock);
+
+ /* The daemon never triggers a mount. */
+ if (autofs4_oz_mode(sbi))
+ return NULL;
+
+ /*
+ * If an expire request is pending everyone must wait.
+ * If the expire fails we're still mounted so continue
+ * the follow and return. A return of -EAGAIN (which only
+ * happens with indirect mounts) means the expire completed
+ * and the directory was removed, so just go ahead and try
+ * the mount.
+ */
+ status = do_expire_wait(dentry);
+ if (status && status != -EAGAIN)
+ return NULL;
+
+ /* Callback to the daemon to perform the mount or wait */
+ spin_lock(&sbi->fs_lock);
+ if (ino->flags & AUTOFS_INF_PENDING) {
+ spin_unlock(&sbi->fs_lock);
+ status = autofs4_mount_wait(dentry);
+ if (status)
+ return ERR_PTR(status);
+ spin_lock(&sbi->fs_lock);
+ goto done;
+ }
+
+ /*
+ * If the dentry is a symlink it's equivalent to a directory
+ * having d_mountpoint() true, so there's no need to call back
+ * to the daemon.
+ */
+ if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))
+ goto done;
+ if (!d_mountpoint(dentry)) {
+ /*
+ * It's possible that user space hasn't removed directories
+ * after umounting a rootless multi-mount, although it
+ * should. For v5 have_submounts() is sufficient to handle
+ * this because the leaves of the directory tree under the
+ * mount never trigger mounts themselves (they have an autofs
+ * trigger mount mounted on them). But v4 pseudo direct mounts
+ * do need the leaves to to trigger mounts. In this case we
+ * have no choice but to use the list_empty() check and
+ * require user space behave.
+ */
+ if (sbi->version > 4) {
+ if (have_submounts(dentry))
+ goto done;
+ } else {
+ spin_lock(&dentry->d_lock);
+ if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&dentry->d_lock);
+ goto done;
+ }
+ spin_unlock(&dentry->d_lock);
+ }
+ ino->flags |= AUTOFS_INF_PENDING;
+ spin_unlock(&sbi->fs_lock);
+ status = autofs4_mount_wait(dentry);
+ if (status)
+ return ERR_PTR(status);
+ spin_lock(&sbi->fs_lock);
+ ino->flags &= ~AUTOFS_INF_PENDING;
+ }
+done:
+ if (!(ino->flags & AUTOFS_INF_EXPIRING)) {
+ /*
+ * Any needed mounting has been completed and the path updated
+ * so turn this into a normal dentry so we don't continually
+ * call ->d_automount() and ->d_manage().
+ */
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_clear_transit(dentry);
+ /*
+ * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and
+ * symlinks as in all other cases the dentry will be covered by
+ * an actual mount so ->d_automount() won't be called during
+ * the follow.
+ */
+ if ((!d_mountpoint(dentry) &&
+ !list_empty(&dentry->d_subdirs)) ||
+ (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)))
+ __managed_dentry_clear_automount(dentry);
+ spin_unlock(&dentry->d_lock);
+ }
+ spin_unlock(&sbi->fs_lock);
+
+ /* Mount succeeded, check if we ended up with a new dentry */
+ dentry = autofs4_mountpoint_changed(path);
+ if (!dentry)
+ return ERR_PTR(-ENOENT);
+
+ return NULL;
+}
+
+int autofs4_d_manage(struct dentry *dentry, bool mounting_here, bool rcu_walk)
+{
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+
+ DPRINTK("dentry=%p %.*s",
+ dentry, dentry->d_name.len, dentry->d_name.name);
+
+ /* The daemon never waits. */
+ if (autofs4_oz_mode(sbi) || mounting_here) {
+ if (!d_mountpoint(dentry))
+ return -EISDIR;
+ return 0;
+ }
+
+ /* We need to sleep, so we need pathwalk to be in ref-mode */
+ if (rcu_walk)
+ return -ECHILD;
+
+ /* Wait for pending expires */
+ do_expire_wait(dentry);
+
+ /*
+ * This dentry may be under construction so wait on mount
+ * completion.
+ */
+ return autofs4_mount_wait(dentry);
+}
+
/* Lookups in the root directory */
static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
struct autofs_sb_info *sbi;
struct autofs_info *ino;
- struct dentry *expiring, *active;
- int oz_mode;
+ struct dentry *active;
- DPRINTK("name = %.*s",
- dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("name = %.*s", dentry->d_name.len, dentry->d_name.name);
/* File name too long to exist */
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
sbi = autofs4_sbi(dir->i_sb);
- oz_mode = autofs4_oz_mode(sbi);
DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
- current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
+ current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
active = autofs4_lookup_active(dentry);
if (active) {
- dentry = active;
- ino = autofs4_dentry_ino(dentry);
+ return active;
} else {
/*
- * Mark the dentry incomplete but don't hash it. We do this
- * to serialize our inode creation operations (symlink and
- * mkdir) which prevents deadlock during the callback to
- * the daemon. Subsequent user space lookups for the same
- * dentry are placed on the wait queue while the daemon
- * itself is allowed passage unresticted so the create
- * operation itself can then hash the dentry. Finally,
- * we check for the hashed dentry and return the newly
- * hashed dentry.
+ * A dentry that is not within the root can never trigger a
+ * mount operation, unless the directory already exists, so we
+ * can return fail immediately. The daemon however does need
+ * to create directories within the file system.
*/
- d_set_d_op(dentry, &autofs4_root_dentry_operations);
+ if (!autofs4_oz_mode(sbi) && !IS_ROOT(dentry->d_parent))
+ return ERR_PTR(-ENOENT);
+
+ /* Mark entries in the root as mount triggers */
+ if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent))
+ __managed_dentry_set_managed(dentry);
- /*
- * And we need to ensure that the same dentry is used for
- * all following lookup calls until it is hashed so that
- * the dentry flags are persistent throughout the request.
- */
ino = autofs4_init_ino(NULL, sbi, 0555);
if (!ino)
return ERR_PTR(-ENOMEM);
@@ -596,82 +518,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
d_instantiate(dentry, NULL);
}
-
- if (!oz_mode) {
- mutex_unlock(&dir->i_mutex);
- expiring = autofs4_lookup_expiring(dentry);
- if (expiring) {
- /*
- * If we are racing with expire the request might not
- * be quite complete but the directory has been removed
- * so it must have been successful, so just wait for it.
- */
- autofs4_expire_wait(expiring);
- autofs4_del_expiring(expiring);
- dput(expiring);
- }
-
- spin_lock(&sbi->fs_lock);
- ino->flags |= AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
- if (dentry->d_op && dentry->d_op->d_revalidate)
- (dentry->d_op->d_revalidate)(dentry, nd);
- mutex_lock(&dir->i_mutex);
- }
-
- /*
- * If we are still pending, check if we had to handle
- * a signal. If so we can force a restart..
- */
- if (ino->flags & AUTOFS_INF_PENDING) {
- /* See if we were interrupted */
- if (signal_pending(current)) {
- sigset_t *sigset = &current->pending.signal;
- if (sigismember (sigset, SIGKILL) ||
- sigismember (sigset, SIGQUIT) ||
- sigismember (sigset, SIGINT)) {
- if (active)
- dput(active);
- return ERR_PTR(-ERESTARTNOINTR);
- }
- }
- if (!oz_mode) {
- spin_lock(&sbi->fs_lock);
- ino->flags &= ~AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
- }
- }
-
- /*
- * If this dentry is unhashed, then we shouldn't honour this
- * lookup. Returning ENOENT here doesn't do the right thing
- * for all system calls, but it should be OK for the operations
- * we permit from an autofs.
- */
- if (!oz_mode && d_unhashed(dentry)) {
- /*
- * A user space application can (and has done in the past)
- * remove and re-create this directory during the callback.
- * This can leave us with an unhashed dentry, but a
- * successful mount! So we need to perform another
- * cached lookup in case the dentry now exists.
- */
- struct dentry *parent = dentry->d_parent;
- struct dentry *new = d_lookup(parent, &dentry->d_name);
- if (new != NULL)
- dentry = new;
- else
- dentry = ERR_PTR(-ENOENT);
-
- if (active)
- dput(active);
-
- return dentry;
- }
-
- if (active)
- return active;
-
return NULL;
}
@@ -716,18 +562,12 @@ static int autofs4_dir_symlink(struct inode *dir,
}
d_add(dentry, inode);
- if (dir == dir->i_sb->s_root->d_inode)
- d_set_d_op(dentry, &autofs4_root_dentry_operations);
- else
- d_set_d_op(dentry, &autofs4_dentry_operations);
-
dentry->d_fsdata = ino;
ino->dentry = dget(dentry);
atomic_inc(&ino->count);
p_ino = autofs4_dentry_ino(dentry->d_parent);
if (p_ino && dentry->d_parent != dentry)
atomic_inc(&p_ino->count);
- ino->inode = inode;
ino->u.symlink = cp;
dir->i_mtime = CURRENT_TIME;
@@ -782,6 +622,58 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
return 0;
}
+/*
+ * Version 4 of autofs provides a pseudo direct mount implementation
+ * that relies on directories at the leaves of a directory tree under
+ * an indirect mount to trigger mounts. To allow for this we need to
+ * set the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags on the leaves
+ * of the directory tree. There is no need to clear the automount flag
+ * following a mount or restore it after an expire because these mounts
+ * are always covered. However, it is neccessary to ensure that these
+ * flags are clear on non-empty directories to avoid unnecessary calls
+ * during path walks.
+ */
+static void autofs_set_leaf_automount_flags(struct dentry *dentry)
+{
+ struct dentry *parent;
+
+ /* root and dentrys in the root are already handled */
+ if (IS_ROOT(dentry->d_parent))
+ return;
+
+ managed_dentry_set_managed(dentry);
+
+ parent = dentry->d_parent;
+ /* only consider parents below dentrys in the root */
+ if (IS_ROOT(parent->d_parent))
+ return;
+ managed_dentry_clear_managed(parent);
+ return;
+}
+
+static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
+{
+ struct list_head *d_child;
+ struct dentry *parent;
+
+ /* flags for dentrys in the root are handled elsewhere */
+ if (IS_ROOT(dentry->d_parent))
+ return;
+
+ managed_dentry_clear_managed(dentry);
+
+ parent = dentry->d_parent;
+ /* only consider parents below dentrys in the root */
+ if (IS_ROOT(parent->d_parent))
+ return;
+ d_child = &dentry->d_u.d_child;
+ /* Set parent managed if it's becoming empty */
+ if (d_child->next == &parent->d_subdirs &&
+ d_child->prev == &parent->d_subdirs)
+ managed_dentry_set_managed(parent);
+ return;
+}
+
static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
{
struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
@@ -809,6 +701,9 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
spin_unlock(&dentry->d_lock);
spin_unlock(&autofs4_lock);
+ if (sbi->version < 5)
+ autofs_clear_leaf_automount_flags(dentry);
+
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs4_dentry_ino(dentry->d_parent);
if (p_ino && dentry->d_parent != dentry)
@@ -851,10 +746,8 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
}
d_add(dentry, inode);
- if (dir == dir->i_sb->s_root->d_inode)
- d_set_d_op(dentry, &autofs4_root_dentry_operations);
- else
- d_set_d_op(dentry, &autofs4_dentry_operations);
+ if (sbi->version < 5)
+ autofs_set_leaf_automount_flags(dentry);
dentry->d_fsdata = ino;
ino->dentry = dget(dentry);
@@ -862,7 +755,6 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
p_ino = autofs4_dentry_ino(dentry->d_parent);
if (p_ino && dentry->d_parent != dentry)
atomic_inc(&p_ino->count);
- ino->inode = inode;
inc_nlink(dir);
dir->i_mtime = CURRENT_TIME;
@@ -944,8 +836,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
int is_autofs4_dentry(struct dentry *dentry)
{
return dentry && dentry->d_inode &&
- (dentry->d_op == &autofs4_root_dentry_operations ||
- dentry->d_op == &autofs4_dentry_operations) &&
+ dentry->d_op == &autofs4_dentry_operations &&
dentry->d_fsdata != NULL;
}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index c5f8459c905e..56010056b2e6 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -309,6 +309,9 @@ static int validate_request(struct autofs_wait_queue **wait,
* completed while we waited on the mutex ...
*/
if (notify == NFY_MOUNT) {
+ struct dentry *new = NULL;
+ int valid = 1;
+
/*
* If the dentry was successfully mounted while we slept
* on the wait queue mutex we can return success. If it
@@ -316,8 +319,20 @@ static int validate_request(struct autofs_wait_queue **wait,
* a multi-mount with no mount at it's base) we can
* continue on and create a new request.
*/
+ if (!IS_ROOT(dentry)) {
+ if (dentry->d_inode && d_unhashed(dentry)) {
+ struct dentry *parent = dentry->d_parent;
+ new = d_lookup(parent, &dentry->d_name);
+ if (new)
+ dentry = new;
+ }
+ }
if (have_submounts(dentry))
- return 0;
+ valid = 0;
+
+ if (new)
+ dput(new);
+ return valid;
}
return 1;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index fe3f59c14a02..333a7bb4cb9c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -432,6 +432,9 @@ static void init_once(void *foo)
mutex_init(&bdev->bd_mutex);
INIT_LIST_HEAD(&bdev->bd_inodes);
INIT_LIST_HEAD(&bdev->bd_list);
+#ifdef CONFIG_SYSFS
+ INIT_LIST_HEAD(&bdev->bd_holder_disks);
+#endif
inode_init_once(&ei->vfs_inode);
/* Initialize mutex for freeze. */
mutex_init(&bdev->bd_fsfreeze_mutex);
@@ -779,6 +782,23 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
}
#ifdef CONFIG_SYSFS
+struct bd_holder_disk {
+ struct list_head list;
+ struct gendisk *disk;
+ int refcnt;
+};
+
+static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
+ struct gendisk *disk)
+{
+ struct bd_holder_disk *holder;
+
+ list_for_each_entry(holder, &bdev->bd_holder_disks, list)
+ if (holder->disk == disk)
+ return holder;
+ return NULL;
+}
+
static int add_symlink(struct kobject *from, struct kobject *to)
{
return sysfs_create_link(from, to, kobject_name(to));
@@ -794,6 +814,8 @@ static void del_symlink(struct kobject *from, struct kobject *to)
* @bdev: the claimed slave bdev
* @disk: the holding disk
*
+ * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
+ *
* This functions creates the following sysfs symlinks.
*
* - from "slaves" directory of the holder @disk to the claimed @bdev
@@ -817,47 +839,83 @@ static void del_symlink(struct kobject *from, struct kobject *to)
*/
int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
+ struct bd_holder_disk *holder;
int ret = 0;
mutex_lock(&bdev->bd_mutex);
- WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk);
+ WARN_ON_ONCE(!bdev->bd_holder);
/* FIXME: remove the following once add_disk() handles errors */
if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
goto out_unlock;
- ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
- if (ret)
+ holder = bd_find_holder_disk(bdev, disk);
+ if (holder) {
+ holder->refcnt++;
goto out_unlock;
+ }
- ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
- if (ret) {
- del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ holder = kzalloc(sizeof(*holder), GFP_KERNEL);
+ if (!holder) {
+ ret = -ENOMEM;
goto out_unlock;
}
- bdev->bd_holder_disk = disk;
+ INIT_LIST_HEAD(&holder->list);
+ holder->disk = disk;
+ holder->refcnt = 1;
+
+ ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ if (ret)
+ goto out_free;
+
+ ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
+ if (ret)
+ goto out_del;
+
+ list_add(&holder->list, &bdev->bd_holder_disks);
+ goto out_unlock;
+
+out_del:
+ del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+out_free:
+ kfree(holder);
out_unlock:
mutex_unlock(&bdev->bd_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(bd_link_disk_holder);
-static void bd_unlink_disk_holder(struct block_device *bdev)
+/**
+ * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
+ * @bdev: the calimed slave bdev
+ * @disk: the holding disk
+ *
+ * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
- struct gendisk *disk = bdev->bd_holder_disk;
+ struct bd_holder_disk *holder;
- bdev->bd_holder_disk = NULL;
- if (!disk)
- return;
+ mutex_lock(&bdev->bd_mutex);
- del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
- del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
+ holder = bd_find_holder_disk(bdev, disk);
+
+ if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
+ del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ del_symlink(bdev->bd_part->holder_dir,
+ &disk_to_dev(disk)->kobj);
+ list_del_init(&holder->list);
+ kfree(holder);
+ }
+
+ mutex_unlock(&bdev->bd_mutex);
}
-#else
-static inline void bd_unlink_disk_holder(struct block_device *bdev)
-{ }
+EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
#endif
/**
@@ -1380,7 +1438,6 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
* unblock evpoll if it was a write holder.
*/
if (bdev_free) {
- bd_unlink_disk_holder(bdev);
if (bdev->bd_write_holder) {
disk_unblock_events(bdev->bd_disk);
bdev->bd_write_holder = false;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a142d204b526..b875d445ea81 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -27,6 +27,7 @@
#include <linux/backing-dev.h>
#include <linux/wait.h>
#include <linux/slab.h>
+#include <linux/kobject.h>
#include <asm/kmap_types.h>
#include "extent_io.h"
#include "extent_map.h"
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index c68a056f27fd..7ed36536e754 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -255,35 +255,6 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
}
-static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
- struct list_head *mntlist)
-{
- /* stolen from afs code */
- int err;
-
- mntget(newmnt);
- err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist);
- switch (err) {
- case 0:
- path_put(&nd->path);
- nd->path.mnt = newmnt;
- nd->path.dentry = dget(newmnt->mnt_root);
- schedule_delayed_work(&cifs_dfs_automount_task,
- cifs_dfs_mountpoint_expiry_timeout);
- break;
- case -EBUSY:
- /* someone else made a mount here whilst we were busy */
- while (d_mountpoint(nd->path.dentry) &&
- follow_down(&nd->path))
- ;
- err = 0;
- default:
- mntput(newmnt);
- break;
- }
- return err;
-}
-
static void dump_referral(const struct dfs_info3_param *ref)
{
cFYI(1, "DFS: ref path: %s", ref->path_name);
@@ -293,45 +264,43 @@ static void dump_referral(const struct dfs_info3_param *ref)
ref->path_consumed);
}
-
-static void*
-cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
+/*
+ * Create a vfsmount that we can automount
+ */
+static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
{
struct dfs_info3_param *referrals = NULL;
unsigned int num_referrals = 0;
struct cifs_sb_info *cifs_sb;
struct cifsSesInfo *ses;
- char *full_path = NULL;
+ char *full_path;
int xid, i;
- int rc = 0;
- struct vfsmount *mnt = ERR_PTR(-ENOENT);
+ int rc;
+ struct vfsmount *mnt;
struct tcon_link *tlink;
cFYI(1, "in %s", __func__);
- BUG_ON(IS_ROOT(dentry));
+ BUG_ON(IS_ROOT(mntpt));
xid = GetXid();
- dput(nd->path.dentry);
- nd->path.dentry = dget(dentry);
-
/*
* The MSDFS spec states that paths in DFS referral requests and
* responses must be prefixed by a single '\' character instead of
* the double backslashes usually used in the UNC. This function
* gives us the latter, so we must adjust the result.
*/
- full_path = build_path_from_dentry(dentry);
- if (full_path == NULL) {
- rc = -ENOMEM;
- goto out_err;
- }
+ mnt = ERR_PTR(-ENOMEM);
+ full_path = build_path_from_dentry(mntpt);
+ if (full_path == NULL)
+ goto free_xid;
- cifs_sb = CIFS_SB(dentry->d_inode->i_sb);
+ cifs_sb = CIFS_SB(mntpt->d_inode->i_sb);
tlink = cifs_sb_tlink(cifs_sb);
+ mnt = ERR_PTR(-EINVAL);
if (IS_ERR(tlink)) {
- rc = PTR_ERR(tlink);
- goto out_err;
+ mnt = ERR_CAST(tlink);
+ goto free_full_path;
}
ses = tlink_tcon(tlink)->ses;
@@ -341,46 +310,63 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
cifs_put_tlink(tlink);
+ mnt = ERR_PTR(-ENOENT);
for (i = 0; i < num_referrals; i++) {
int len;
- dump_referral(referrals+i);
+ dump_referral(referrals + i);
/* connect to a node */
len = strlen(referrals[i].node_name);
if (len < 2) {
cERROR(1, "%s: Net Address path too short: %s",
__func__, referrals[i].node_name);
- rc = -EINVAL;
- goto out_err;
+ mnt = ERR_PTR(-EINVAL);
+ break;
}
mnt = cifs_dfs_do_refmount(cifs_sb,
full_path, referrals + i);
cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__,
referrals[i].node_name, mnt);
-
- /* complete mount procedure if we accured submount */
if (!IS_ERR(mnt))
- break;
+ goto success;
}
- /* we need it cause for() above could exit without valid submount */
- rc = PTR_ERR(mnt);
- if (IS_ERR(mnt))
- goto out_err;
-
- rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list);
+ /* no valid submounts were found; return error from get_dfs_path() by
+ * preference */
+ if (rc != 0)
+ mnt = ERR_PTR(rc);
-out:
- FreeXid(xid);
+success:
free_dfs_info_array(referrals, num_referrals);
+free_full_path:
kfree(full_path);
+free_xid:
+ FreeXid(xid);
cFYI(1, "leaving %s" , __func__);
- return ERR_PTR(rc);
-out_err:
- path_put(&nd->path);
- goto out;
+ return mnt;
+}
+
+/*
+ * Attempt to automount the referral
+ */
+struct vfsmount *cifs_dfs_d_automount(struct path *path)
+{
+ struct vfsmount *newmnt;
+
+ cFYI(1, "in %s", __func__);
+
+ newmnt = cifs_dfs_do_automount(path->dentry);
+ if (IS_ERR(newmnt)) {
+ cFYI(1, "leaving %s [automount failed]" , __func__);
+ return newmnt;
+ }
+
+ mntget(newmnt); /* prevent immediate expiration */
+ mnt_set_expiry(newmnt, &cifs_dfs_automount_list);
+ schedule_delayed_work(&cifs_dfs_automount_task,
+ cifs_dfs_mountpoint_expiry_timeout);
+ cFYI(1, "leaving %s [ok]" , __func__);
+ return newmnt;
}
const struct inode_operations cifs_dfs_referral_inode_operations = {
- .follow_link = cifs_dfs_follow_mountpoint,
};
-
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 897b2b2b28b5..851030f74939 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -93,6 +93,12 @@ extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir);
extern const struct dentry_operations cifs_dentry_ops;
extern const struct dentry_operations cifs_ci_dentry_ops;
+#ifdef CONFIG_CIFS_DFS_UPCALL
+extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
+#else
+#define cifs_dfs_d_automount NULL
+#endif
+
/* Functions related to symlinks */
extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
extern void cifs_put_link(struct dentry *direntry,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 1e95dd635632..dd5f22918c33 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -675,6 +675,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
const struct dentry_operations cifs_dentry_ops = {
.d_revalidate = cifs_d_revalidate,
+ .d_automount = cifs_dfs_d_automount,
/* d_delete: cifs_d_delete, */ /* not needed except for debugging */
};
@@ -711,4 +712,5 @@ const struct dentry_operations cifs_ci_dentry_ops = {
.d_revalidate = cifs_d_revalidate,
.d_hash = cifs_ci_hash,
.d_compare = cifs_ci_compare,
+ .d_automount = cifs_dfs_d_automount,
};
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b06b60620240..6c9ee8014ff0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -32,7 +32,7 @@
#include "fscache.h"
-static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
+static void cifs_set_ops(struct inode *inode)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -60,7 +60,7 @@ static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
break;
case S_IFDIR:
#ifdef CONFIG_CIFS_DFS_UPCALL
- if (is_dfs_referral) {
+ if (IS_AUTOMOUNT(inode)) {
inode->i_op = &cifs_dfs_referral_inode_operations;
} else {
#else /* NO DFS support, treat as a directory */
@@ -167,7 +167,9 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
}
spin_unlock(&inode->i_lock);
- cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL);
+ if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL)
+ inode->i_flags |= S_AUTOMOUNT;
+ cifs_set_ops(inode);
}
void
diff --git a/fs/dcache.c b/fs/dcache.c
index 0c6d5c549d84..9f493ee4dcba 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1357,8 +1357,8 @@ EXPORT_SYMBOL(d_alloc_name);
void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
{
- BUG_ON(dentry->d_op);
- BUG_ON(dentry->d_flags & (DCACHE_OP_HASH |
+ WARN_ON_ONCE(dentry->d_op);
+ WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH |
DCACHE_OP_COMPARE |
DCACHE_OP_REVALIDATE |
DCACHE_OP_DELETE ));
@@ -1380,8 +1380,11 @@ EXPORT_SYMBOL(d_set_d_op);
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
spin_lock(&dentry->d_lock);
- if (inode)
+ if (inode) {
+ if (unlikely(IS_AUTOMOUNT(inode)))
+ dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
list_add(&dentry->d_alias, &inode->i_dentry);
+ }
dentry->d_inode = inode;
dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 68ca487bedb1..78b519c13536 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -4,6 +4,19 @@
#include <linux/path.h>
#include <linux/slab.h>
#include <linux/fs_struct.h>
+#include "internal.h"
+
+static inline void path_get_longterm(struct path *path)
+{
+ path_get(path);
+ mnt_make_longterm(path->mnt);
+}
+
+static inline void path_put_longterm(struct path *path)
+{
+ mnt_make_shortterm(path->mnt);
+ path_put(path);
+}
/*
* Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
@@ -17,11 +30,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
write_seqcount_begin(&fs->seq);
old_root = fs->root;
fs->root = *path;
- path_get_long(path);
+ path_get_longterm(path);
write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
if (old_root.dentry)
- path_put_long(&old_root);
+ path_put_longterm(&old_root);
}
/*
@@ -36,12 +49,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
write_seqcount_begin(&fs->seq);
old_pwd = fs->pwd;
fs->pwd = *path;
- path_get_long(path);
+ path_get_longterm(path);
write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
if (old_pwd.dentry)
- path_put_long(&old_pwd);
+ path_put_longterm(&old_pwd);
}
void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -59,13 +72,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
write_seqcount_begin(&fs->seq);
if (fs->root.dentry == old_root->dentry
&& fs->root.mnt == old_root->mnt) {
- path_get_long(new_root);
+ path_get_longterm(new_root);
fs->root = *new_root;
count++;
}
if (fs->pwd.dentry == old_root->dentry
&& fs->pwd.mnt == old_root->mnt) {
- path_get_long(new_root);
+ path_get_longterm(new_root);
fs->pwd = *new_root;
count++;
}
@@ -76,13 +89,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
while (count--)
- path_put_long(old_root);
+ path_put_longterm(old_root);
}
void free_fs_struct(struct fs_struct *fs)
{
- path_put_long(&fs->root);
- path_put_long(&fs->pwd);
+ path_put_longterm(&fs->root);
+ path_put_longterm(&fs->pwd);
kmem_cache_free(fs_cachep, fs);
}
@@ -118,9 +131,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
spin_lock(&old->lock);
fs->root = old->root;
- path_get_long(&fs->root);
+ path_get_longterm(&fs->root);
fs->pwd = old->pwd;
- path_get_long(&fs->pwd);
+ path_get_longterm(&fs->pwd);
spin_unlock(&old->lock);
}
return fs;
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index b9f34eaede09..48a18f184d50 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -101,7 +101,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
object->n_ops++;
object->n_exclusive++; /* reads and writes must wait */
- if (object->n_ops > 0) {
+ if (object->n_ops > 1) {
atomic_inc(&op->usage);
list_add_tail(&op->pend_link, &object->pending_ops);
fscache_stat(&fscache_n_op_pend);
diff --git a/fs/internal.h b/fs/internal.h
index 9687c2ee2735..12ccb86edef7 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -70,6 +70,11 @@ extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
extern void release_mounts(struct list_head *);
extern void umount_tree(struct vfsmount *, int, struct list_head *);
extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
+extern int do_add_mount(struct vfsmount *, struct path *, int);
+extern void mnt_clear_expiry(struct vfsmount *);
+
+extern void mnt_make_longterm(struct vfsmount *);
+extern void mnt_make_shortterm(struct vfsmount *);
extern void __init mnt_init(void);
diff --git a/fs/locks.c b/fs/locks.c
index 08415b2a6d36..0f3998291f78 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -444,15 +444,9 @@ static void lease_release_private_callback(struct file_lock *fl)
fl->fl_file->f_owner.signum = 0;
}
-static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try)
-{
- return fl->fl_file == try->fl_file;
-}
-
static const struct lock_manager_operations lease_manager_ops = {
.fl_break = lease_break_callback,
.fl_release_private = lease_release_private_callback,
- .fl_mylease = lease_mylease_callback,
.fl_change = lease_modify,
};
@@ -1405,7 +1399,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
for (before = &inode->i_flock;
((fl = *before) != NULL) && IS_LEASE(fl);
before = &fl->fl_next) {
- if (lease->fl_lmops->fl_mylease(fl, lease))
+ if (fl->fl_file == filp)
my_before = before;
else if (fl->fl_type == (F_INPROGRESS | F_UNLCK))
/*
diff --git a/fs/namei.c b/fs/namei.c
index 86643302079e..8f7b41a14882 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -368,18 +368,6 @@ void path_get(struct path *path)
EXPORT_SYMBOL(path_get);
/**
- * path_get_long - get a long reference to a path
- * @path: path to get the reference to
- *
- * Given a path increment the reference count to the dentry and the vfsmount.
- */
-void path_get_long(struct path *path)
-{
- mntget_long(path->mnt);
- dget(path->dentry);
-}
-
-/**
* path_put - put a reference to a path
* @path: path to put the reference to
*
@@ -393,18 +381,6 @@ void path_put(struct path *path)
EXPORT_SYMBOL(path_put);
/**
- * path_put_long - put a long reference to a path
- * @path: path to put the reference to
- *
- * Given a path decrement the reference count to the dentry and the vfsmount.
- */
-void path_put_long(struct path *path)
-{
- dput(path->dentry);
- mntput_long(path->mnt);
-}
-
-/**
* nameidata_drop_rcu - drop this nameidata out of rcu-walk
* @nd: nameidata pathwalk data to drop
* Returns: 0 on success, -ECHILD on failure
@@ -779,7 +755,8 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
mntput(path->mnt);
}
-static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
+static inline void path_to_nameidata(const struct path *path,
+ struct nameidata *nd)
{
if (!(nd->flags & LOOKUP_RCU)) {
dput(nd->path.dentry);
@@ -791,20 +768,16 @@ static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
}
static __always_inline int
-__do_follow_link(struct path *path, struct nameidata *nd, void **p)
+__do_follow_link(const struct path *link, struct nameidata *nd, void **p)
{
int error;
- struct dentry *dentry = path->dentry;
+ struct dentry *dentry = link->dentry;
- touch_atime(path->mnt, dentry);
+ touch_atime(link->mnt, dentry);
nd_set_link(nd, NULL);
- if (path->mnt != nd->path.mnt) {
- path_to_nameidata(path, nd);
- nd->inode = nd->path.dentry->d_inode;
- dget(dentry);
- }
- mntget(path->mnt);
+ if (link->mnt == nd->path.mnt)
+ mntget(link->mnt);
nd->last_type = LAST_BIND;
*p = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -895,54 +868,169 @@ int follow_up(struct path *path)
}
/*
- * serialization is taken care of in namespace.c
+ * Perform an automount
+ * - return -EISDIR to tell follow_managed() to stop and return the path we
+ * were called with.
*/
-static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
- struct inode **inode)
+static int follow_automount(struct path *path, unsigned flags,
+ bool *need_mntput)
{
- while (d_mountpoint(path->dentry)) {
- struct vfsmount *mounted;
- mounted = __lookup_mnt(path->mnt, path->dentry, 1);
- if (!mounted)
- return;
- path->mnt = mounted;
- path->dentry = mounted->mnt_root;
- nd->seq = read_seqcount_begin(&path->dentry->d_seq);
- *inode = path->dentry->d_inode;
+ struct vfsmount *mnt;
+ int err;
+
+ if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
+ return -EREMOTE;
+
+ /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
+ * and this is the terminal part of the path.
+ */
+ if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE))
+ return -EISDIR; /* we actually want to stop here */
+
+ /* We want to mount if someone is trying to open/create a file of any
+ * type under the mountpoint, wants to traverse through the mountpoint
+ * or wants to open the mounted directory.
+ *
+ * We don't want to mount if someone's just doing a stat and they've
+ * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and
+ * appended a '/' to the name.
+ */
+ if (!(flags & LOOKUP_FOLLOW) &&
+ !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY |
+ LOOKUP_OPEN | LOOKUP_CREATE)))
+ return -EISDIR;
+
+ current->total_link_count++;
+ if (current->total_link_count >= 40)
+ return -ELOOP;
+
+ mnt = path->dentry->d_op->d_automount(path);
+ if (IS_ERR(mnt)) {
+ /*
+ * The filesystem is allowed to return -EISDIR here to indicate
+ * it doesn't want to automount. For instance, autofs would do
+ * this so that its userspace daemon can mount on this dentry.
+ *
+ * However, we can only permit this if it's a terminal point in
+ * the path being looked up; if it wasn't then the remainder of
+ * the path is inaccessible and we should say so.
+ */
+ if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE))
+ return -EREMOTE;
+ return PTR_ERR(mnt);
}
-}
-static int __follow_mount(struct path *path)
-{
- int res = 0;
- while (d_mountpoint(path->dentry)) {
- struct vfsmount *mounted = lookup_mnt(path);
- if (!mounted)
- break;
+ if (!mnt) /* mount collision */
+ return 0;
+
+ /* The new mount record should have at least 2 refs to prevent it being
+ * expired before we get a chance to add it
+ */
+ BUG_ON(mnt_get_count(mnt) < 2);
+
+ if (mnt->mnt_sb == path->mnt->mnt_sb &&
+ mnt->mnt_root == path->dentry) {
+ mnt_clear_expiry(mnt);
+ mntput(mnt);
+ mntput(mnt);
+ return -ELOOP;
+ }
+
+ /* We need to add the mountpoint to the parent. The filesystem may
+ * have placed it on an expiry list, and so we need to make sure it
+ * won't be expired under us if do_add_mount() fails (do_add_mount()
+ * will eat a reference unconditionally).
+ */
+ mntget(mnt);
+ err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
+ switch (err) {
+ case -EBUSY:
+ /* Someone else made a mount here whilst we were busy */
+ err = 0;
+ default:
+ mnt_clear_expiry(mnt);
+ mntput(mnt);
+ mntput(mnt);
+ return err;
+ case 0:
+ mntput(mnt);
dput(path->dentry);
- if (res)
+ if (*need_mntput)
mntput(path->mnt);
- path->mnt = mounted;
- path->dentry = dget(mounted->mnt_root);
- res = 1;
+ path->mnt = mnt;
+ path->dentry = dget(mnt->mnt_root);
+ *need_mntput = true;
+ return 0;
}
- return res;
}
-static void follow_mount(struct path *path)
+/*
+ * Handle a dentry that is managed in some way.
+ * - Flagged for transit management (autofs)
+ * - Flagged as mountpoint
+ * - Flagged as automount point
+ *
+ * This may only be called in refwalk mode.
+ *
+ * Serialization is taken care of in namespace.c
+ */
+static int follow_managed(struct path *path, unsigned flags)
{
- while (d_mountpoint(path->dentry)) {
- struct vfsmount *mounted = lookup_mnt(path);
- if (!mounted)
- break;
- dput(path->dentry);
- mntput(path->mnt);
- path->mnt = mounted;
- path->dentry = dget(mounted->mnt_root);
+ unsigned managed;
+ bool need_mntput = false;
+ int ret;
+
+ /* Given that we're not holding a lock here, we retain the value in a
+ * local variable for each dentry as we look at it so that we don't see
+ * the components of that value change under us */
+ while (managed = ACCESS_ONCE(path->dentry->d_flags),
+ managed &= DCACHE_MANAGED_DENTRY,
+ unlikely(managed != 0)) {
+ /* Allow the filesystem to manage the transit without i_mutex
+ * being held. */
+ if (managed & DCACHE_MANAGE_TRANSIT) {
+ BUG_ON(!path->dentry->d_op);
+ BUG_ON(!path->dentry->d_op->d_manage);
+ ret = path->dentry->d_op->d_manage(path->dentry,
+ false, false);
+ if (ret < 0)
+ return ret == -EISDIR ? 0 : ret;
+ }
+
+ /* Transit to a mounted filesystem. */
+ if (managed & DCACHE_MOUNTED) {
+ struct vfsmount *mounted = lookup_mnt(path);
+ if (mounted) {
+ dput(path->dentry);
+ if (need_mntput)
+ mntput(path->mnt);
+ path->mnt = mounted;
+ path->dentry = dget(mounted->mnt_root);
+ need_mntput = true;
+ continue;
+ }
+
+ /* Something is mounted on this dentry in another
+ * namespace and/or whatever was mounted there in this
+ * namespace got unmounted before we managed to get the
+ * vfsmount_lock */
+ }
+
+ /* Handle an automount point */
+ if (managed & DCACHE_NEED_AUTOMOUNT) {
+ ret = follow_automount(path, flags, &need_mntput);
+ if (ret < 0)
+ return ret == -EISDIR ? 0 : ret;
+ continue;
+ }
+
+ /* We didn't change the current path point */
+ break;
}
+ return 0;
}
-int follow_down(struct path *path)
+int follow_down_one(struct path *path)
{
struct vfsmount *mounted;
@@ -957,13 +1045,41 @@ int follow_down(struct path *path)
return 0;
}
+/*
+ * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we
+ * meet a managed dentry and we're not walking to "..". True is returned to
+ * continue, false to abort.
+ */
+static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
+ struct inode **inode, bool reverse_transit)
+{
+ while (d_mountpoint(path->dentry)) {
+ struct vfsmount *mounted;
+ if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
+ !reverse_transit &&
+ path->dentry->d_op->d_manage(path->dentry, false, true) < 0)
+ return false;
+ mounted = __lookup_mnt(path->mnt, path->dentry, 1);
+ if (!mounted)
+ break;
+ path->mnt = mounted;
+ path->dentry = mounted->mnt_root;
+ nd->seq = read_seqcount_begin(&path->dentry->d_seq);
+ *inode = path->dentry->d_inode;
+ }
+
+ if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
+ return reverse_transit;
+ return true;
+}
+
static int follow_dotdot_rcu(struct nameidata *nd)
{
struct inode *inode = nd->inode;
set_root_rcu(nd);
- while(1) {
+ while (1) {
if (nd->path.dentry == nd->root.dentry &&
nd->path.mnt == nd->root.mnt) {
break;
@@ -986,12 +1102,80 @@ static int follow_dotdot_rcu(struct nameidata *nd)
nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
inode = nd->path.dentry->d_inode;
}
- __follow_mount_rcu(nd, &nd->path, &inode);
+ __follow_mount_rcu(nd, &nd->path, &inode, true);
nd->inode = inode;
return 0;
}
+/*
+ * Follow down to the covering mount currently visible to userspace. At each
+ * point, the filesystem owning that dentry may be queried as to whether the
+ * caller is permitted to proceed or not.
+ *
+ * Care must be taken as namespace_sem may be held (indicated by mounting_here
+ * being true).
+ */
+int follow_down(struct path *path, bool mounting_here)
+{
+ unsigned managed;
+ int ret;
+
+ while (managed = ACCESS_ONCE(path->dentry->d_flags),
+ unlikely(managed & DCACHE_MANAGED_DENTRY)) {
+ /* Allow the filesystem to manage the transit without i_mutex
+ * being held.
+ *
+ * We indicate to the filesystem if someone is trying to mount
+ * something here. This gives autofs the chance to deny anyone
+ * other than its daemon the right to mount on its
+ * superstructure.
+ *
+ * The filesystem may sleep at this point.
+ */
+ if (managed & DCACHE_MANAGE_TRANSIT) {
+ BUG_ON(!path->dentry->d_op);
+ BUG_ON(!path->dentry->d_op->d_manage);
+ ret = path->dentry->d_op->d_manage(
+ path->dentry, mounting_here, false);
+ if (ret < 0)
+ return ret == -EISDIR ? 0 : ret;
+ }
+
+ /* Transit to a mounted filesystem. */
+ if (managed & DCACHE_MOUNTED) {
+ struct vfsmount *mounted = lookup_mnt(path);
+ if (!mounted)
+ break;
+ dput(path->dentry);
+ mntput(path->mnt);
+ path->mnt = mounted;
+ path->dentry = dget(mounted->mnt_root);
+ continue;
+ }
+
+ /* Don't handle automount points here */
+ break;
+ }
+ return 0;
+}
+
+/*
+ * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
+ */
+static void follow_mount(struct path *path)
+{
+ while (d_mountpoint(path->dentry)) {
+ struct vfsmount *mounted = lookup_mnt(path);
+ if (!mounted)
+ break;
+ dput(path->dentry);
+ mntput(path->mnt);
+ path->mnt = mounted;
+ path->dentry = dget(mounted->mnt_root);
+ }
+}
+
static void follow_dotdot(struct nameidata *nd)
{
set_root(nd);
@@ -1056,12 +1240,14 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry;
struct inode *dir;
+ int err;
+
/*
* See if the low-level filesystem might want
* to use its own hash..
*/
if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
- int err = parent->d_op->d_hash(parent, nd->inode, name);
+ err = parent->d_op->d_hash(parent, nd->inode, name);
if (err < 0)
return err;
}
@@ -1088,22 +1274,28 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
nd->seq = seq;
if (dentry->d_flags & DCACHE_OP_REVALIDATE)
goto need_revalidate;
+done2:
path->mnt = mnt;
path->dentry = dentry;
- __follow_mount_rcu(nd, path, inode);
- } else {
- dentry = __d_lookup(parent, name);
- if (!dentry)
- goto need_lookup;
+ if (likely(__follow_mount_rcu(nd, path, inode, false)))
+ return 0;
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ /* fallthru */
+ }
+ dentry = __d_lookup(parent, name);
+ if (!dentry)
+ goto need_lookup;
found:
- if (dentry->d_flags & DCACHE_OP_REVALIDATE)
- goto need_revalidate;
+ if (dentry->d_flags & DCACHE_OP_REVALIDATE)
+ goto need_revalidate;
done:
- path->mnt = mnt;
- path->dentry = dentry;
- __follow_mount(path);
- *inode = path->dentry->d_inode;
- }
+ path->mnt = mnt;
+ path->dentry = dentry;
+ err = follow_managed(path, nd->flags);
+ if (unlikely(err < 0))
+ return err;
+ *inode = path->dentry->d_inode;
return 0;
need_lookup:
@@ -1142,6 +1334,8 @@ need_revalidate:
goto need_lookup;
if (IS_ERR(dentry))
goto fail;
+ if (nd->flags & LOOKUP_RCU)
+ goto done2;
goto done;
fail:
@@ -1149,17 +1343,6 @@ fail:
}
/*
- * This is a temporary kludge to deal with "automount" symlinks; proper
- * solution is to trigger them on follow_mount(), so that do_lookup()
- * would DTRT. To be killed before 2.6.34-final.
- */
-static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
-{
- return inode && unlikely(inode->i_op->follow_link) &&
- ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
-}
-
-/*
* Name resolution.
* This is the basic name resolution function, turning a pathname into
* the final dentry. We expect 'base' to be positive and a directory.
@@ -1297,7 +1480,8 @@ last_component:
err = do_lookup(nd, &this, &next, &inode);
if (err)
break;
- if (follow_on_final(inode, lookup_flags)) {
+ if (inode && unlikely(inode->i_op->follow_link) &&
+ (lookup_flags & LOOKUP_FOLLOW)) {
if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
return -ECHILD;
BUG_ON(inode != next.dentry->d_inode);
@@ -2199,11 +2383,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (open_flag & O_EXCL)
goto exit_dput;
- if (__follow_mount(path)) {
- error = -ELOOP;
- if (open_flag & O_NOFOLLOW)
- goto exit_dput;
- }
+ error = follow_managed(path, nd->flags);
+ if (error < 0)
+ goto exit_dput;
error = -ENOENT;
if (!path->dentry->d_inode)
@@ -2348,11 +2530,11 @@ reval:
nd.flags = flags;
filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
while (unlikely(!filp)) { /* trailing symlink */
- struct path holder;
+ struct path link = path;
+ struct inode *linki = link.dentry->d_inode;
void *cookie;
error = -ELOOP;
- /* S_ISDIR part is a temporary automount kludge */
- if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode))
+ if (!(nd.flags & LOOKUP_FOLLOW))
goto exit_dput;
if (count++ == 32)
goto exit_dput;
@@ -2368,23 +2550,22 @@ reval:
* just set LAST_BIND.
*/
nd.flags |= LOOKUP_PARENT;
- error = security_inode_follow_link(path.dentry, &nd);
+ error = security_inode_follow_link(link.dentry, &nd);
if (error)
goto exit_dput;
- error = __do_follow_link(&path, &nd, &cookie);
+ error = __do_follow_link(&link, &nd, &cookie);
if (unlikely(error)) {
- if (!IS_ERR(cookie) && nd.inode->i_op->put_link)
- nd.inode->i_op->put_link(path.dentry, &nd, cookie);
+ if (!IS_ERR(cookie) && linki->i_op->put_link)
+ linki->i_op->put_link(link.dentry, &nd, cookie);
/* nd.path had been dropped */
- nd.path = path;
+ nd.path = link;
goto out_path;
}
- holder = path;
nd.flags &= ~LOOKUP_PARENT;
filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
- if (nd.inode->i_op->put_link)
- nd.inode->i_op->put_link(holder.dentry, &nd, cookie);
- path_put(&holder);
+ if (linki->i_op->put_link)
+ linki->i_op->put_link(link.dentry, &nd, cookie);
+ path_put(&link);
}
out:
if (nd.root.mnt)
@@ -3412,6 +3593,7 @@ const struct inode_operations page_symlink_inode_operations = {
};
EXPORT_SYMBOL(user_path_at);
+EXPORT_SYMBOL(follow_down_one);
EXPORT_SYMBOL(follow_down);
EXPORT_SYMBOL(follow_up);
EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
diff --git a/fs/namespace.c b/fs/namespace.c
index 3ddfd9046c44..9f544f35ed34 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -183,7 +183,7 @@ static inline void mnt_dec_count(struct vfsmount *mnt)
unsigned int mnt_get_count(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
- unsigned int count = atomic_read(&mnt->mnt_longrefs);
+ unsigned int count = 0;
int cpu;
for_each_possible_cpu(cpu) {
@@ -217,7 +217,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
if (!mnt->mnt_pcp)
goto out_free_devname;
- atomic_set(&mnt->mnt_longrefs, 1);
+ this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
#else
mnt->mnt_count = 1;
mnt->mnt_writers = 0;
@@ -611,6 +611,21 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path)
list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
}
+static inline void __mnt_make_longterm(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ atomic_inc(&mnt->mnt_longterm);
+#endif
+}
+
+/* needs vfsmount lock for write */
+static inline void __mnt_make_shortterm(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ atomic_dec(&mnt->mnt_longterm);
+#endif
+}
+
/*
* vfsmount lock must be held for write
*/
@@ -624,8 +639,11 @@ static void commit_tree(struct vfsmount *mnt)
BUG_ON(parent == mnt);
list_add_tail(&head, &mnt->mnt_list);
- list_for_each_entry(m, &head, mnt_list)
+ list_for_each_entry(m, &head, mnt_list) {
m->mnt_ns = n;
+ __mnt_make_longterm(m);
+ }
+
list_splice(&head, n->list.prev);
list_add_tail(&mnt->mnt_hash, mount_hashtable +
@@ -734,51 +752,30 @@ static inline void mntfree(struct vfsmount *mnt)
deactivate_super(sb);
}
-#ifdef CONFIG_SMP
-static inline void __mntput(struct vfsmount *mnt, int longrefs)
+static void mntput_no_expire(struct vfsmount *mnt)
{
- if (!longrefs) {
put_again:
- br_read_lock(vfsmount_lock);
- if (likely(atomic_read(&mnt->mnt_longrefs))) {
- mnt_dec_count(mnt);
- br_read_unlock(vfsmount_lock);
- return;
- }
+#ifdef CONFIG_SMP
+ br_read_lock(vfsmount_lock);
+ if (likely(atomic_read(&mnt->mnt_longterm))) {
+ mnt_dec_count(mnt);
br_read_unlock(vfsmount_lock);
- } else {
- BUG_ON(!atomic_read(&mnt->mnt_longrefs));
- if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
- return;
+ return;
}
+ br_read_unlock(vfsmount_lock);
br_write_lock(vfsmount_lock);
- if (!longrefs)
- mnt_dec_count(mnt);
- else
- atomic_dec(&mnt->mnt_longrefs);
+ mnt_dec_count(mnt);
if (mnt_get_count(mnt)) {
br_write_unlock(vfsmount_lock);
return;
}
- if (unlikely(mnt->mnt_pinned)) {
- mnt_add_count(mnt, mnt->mnt_pinned + 1);
- mnt->mnt_pinned = 0;
- br_write_unlock(vfsmount_lock);
- acct_auto_close_mnt(mnt);
- goto put_again;
- }
- br_write_unlock(vfsmount_lock);
- mntfree(mnt);
-}
#else
-static inline void __mntput(struct vfsmount *mnt, int longrefs)
-{
-put_again:
mnt_dec_count(mnt);
if (likely(mnt_get_count(mnt)))
return;
br_write_lock(vfsmount_lock);
+#endif
if (unlikely(mnt->mnt_pinned)) {
mnt_add_count(mnt, mnt->mnt_pinned + 1);
mnt->mnt_pinned = 0;
@@ -789,12 +786,6 @@ put_again:
br_write_unlock(vfsmount_lock);
mntfree(mnt);
}
-#endif
-
-static void mntput_no_expire(struct vfsmount *mnt)
-{
- __mntput(mnt, 0);
-}
void mntput(struct vfsmount *mnt)
{
@@ -802,7 +793,7 @@ void mntput(struct vfsmount *mnt)
/* avoid cacheline pingpong, hope gcc doesn't get "smart" */
if (unlikely(mnt->mnt_expiry_mark))
mnt->mnt_expiry_mark = 0;
- __mntput(mnt, 0);
+ mntput_no_expire(mnt);
}
}
EXPORT_SYMBOL(mntput);
@@ -815,33 +806,6 @@ struct vfsmount *mntget(struct vfsmount *mnt)
}
EXPORT_SYMBOL(mntget);
-void mntput_long(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
- if (mnt) {
- /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
- if (unlikely(mnt->mnt_expiry_mark))
- mnt->mnt_expiry_mark = 0;
- __mntput(mnt, 1);
- }
-#else
- mntput(mnt);
-#endif
-}
-EXPORT_SYMBOL(mntput_long);
-
-struct vfsmount *mntget_long(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
- if (mnt)
- atomic_inc(&mnt->mnt_longrefs);
- return mnt;
-#else
- return mntget(mnt);
-#endif
-}
-EXPORT_SYMBOL(mntget_long);
-
void mnt_pin(struct vfsmount *mnt)
{
br_write_lock(vfsmount_lock);
@@ -1216,7 +1180,7 @@ void release_mounts(struct list_head *head)
dput(dentry);
mntput(m);
}
- mntput_long(mnt);
+ mntput(mnt);
}
}
@@ -1226,19 +1190,21 @@ void release_mounts(struct list_head *head)
*/
void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
{
+ LIST_HEAD(tmp_list);
struct vfsmount *p;
for (p = mnt; p; p = next_mnt(p, mnt))
- list_move(&p->mnt_hash, kill);
+ list_move(&p->mnt_hash, &tmp_list);
if (propagate)
- propagate_umount(kill);
+ propagate_umount(&tmp_list);
- list_for_each_entry(p, kill, mnt_hash) {
+ list_for_each_entry(p, &tmp_list, mnt_hash) {
list_del_init(&p->mnt_expire);
list_del_init(&p->mnt_list);
__touch_mnt_namespace(p->mnt_ns);
p->mnt_ns = NULL;
+ __mnt_make_shortterm(p);
list_del_init(&p->mnt_child);
if (p->mnt_parent != p) {
p->mnt_parent->mnt_ghosts++;
@@ -1246,6 +1212,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
}
change_mnt_propagation(p, MS_PRIVATE);
}
+ list_splice(&tmp_list, kill);
}
static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts);
@@ -1844,9 +1811,10 @@ static int do_move_mount(struct path *path, char *old_name)
return err;
down_write(&namespace_sem);
- while (d_mountpoint(path->dentry) &&
- follow_down(path))
- ;
+ err = follow_down(path, true);
+ if (err < 0)
+ goto out;
+
err = -EINVAL;
if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
goto out;
@@ -1924,15 +1892,14 @@ static int do_new_mount(struct path *path, char *type, int flags,
if (IS_ERR(mnt))
return PTR_ERR(mnt);
- return do_add_mount(mnt, path, mnt_flags, NULL);
+ return do_add_mount(mnt, path, mnt_flags);
}
/*
* add a mount into a namespace's mount tree
- * - provide the option of adding the new mount to an expiration list
+ * - this unconditionally eats one of the caller's references to newmnt.
*/
-int do_add_mount(struct vfsmount *newmnt, struct path *path,
- int mnt_flags, struct list_head *fslist)
+int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
{
int err;
@@ -1940,9 +1907,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
down_write(&namespace_sem);
/* Something was mounted here while we slept */
- while (d_mountpoint(path->dentry) &&
- follow_down(path))
- ;
+ err = follow_down(path, true);
+ if (err < 0)
+ goto unlock;
+
err = -EINVAL;
if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
goto unlock;
@@ -1961,19 +1929,45 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
if ((err = graft_tree(newmnt, path)))
goto unlock;
- if (fslist) /* add to the specified expiration list */
- list_add_tail(&newmnt->mnt_expire, fslist);
-
up_write(&namespace_sem);
return 0;
unlock:
up_write(&namespace_sem);
- mntput_long(newmnt);
+ mntput(newmnt);
return err;
}
-EXPORT_SYMBOL_GPL(do_add_mount);
+/**
+ * mnt_set_expiry - Put a mount on an expiration list
+ * @mnt: The mount to list.
+ * @expiry_list: The list to add the mount to.
+ */
+void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
+{
+ down_write(&namespace_sem);
+ br_write_lock(vfsmount_lock);
+
+ list_add_tail(&mnt->mnt_expire, expiry_list);
+
+ br_write_unlock(vfsmount_lock);
+ up_write(&namespace_sem);
+}
+EXPORT_SYMBOL(mnt_set_expiry);
+
+/*
+ * Remove a vfsmount from any expiration list it may be on
+ */
+void mnt_clear_expiry(struct vfsmount *mnt)
+{
+ if (!list_empty(&mnt->mnt_expire)) {
+ down_write(&namespace_sem);
+ br_write_lock(vfsmount_lock);
+ list_del_init(&mnt->mnt_expire);
+ br_write_unlock(vfsmount_lock);
+ up_write(&namespace_sem);
+ }
+}
/*
* process a list of expirable mountpoints with the intent of discarding any
@@ -2262,6 +2256,22 @@ static struct mnt_namespace *alloc_mnt_ns(void)
return new_ns;
}
+void mnt_make_longterm(struct vfsmount *mnt)
+{
+ __mnt_make_longterm(mnt);
+}
+
+void mnt_make_shortterm(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
+ return;
+ br_write_lock(vfsmount_lock);
+ atomic_dec(&mnt->mnt_longterm);
+ br_write_unlock(vfsmount_lock);
+#endif
+}
+
/*
* Allocate a new namespace structure and populate it with contents
* copied from the namespace of the passed in task structure.
@@ -2299,14 +2309,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
q = new_ns->root;
while (p) {
q->mnt_ns = new_ns;
+ __mnt_make_longterm(q);
if (fs) {
if (p == fs->root.mnt) {
+ fs->root.mnt = mntget(q);
+ __mnt_make_longterm(q);
+ mnt_make_shortterm(p);
rootmnt = p;
- fs->root.mnt = mntget_long(q);
}
if (p == fs->pwd.mnt) {
+ fs->pwd.mnt = mntget(q);
+ __mnt_make_longterm(q);
+ mnt_make_shortterm(p);
pwdmnt = p;
- fs->pwd.mnt = mntget_long(q);
}
}
p = next_mnt(p, mnt_ns->root);
@@ -2315,9 +2330,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
up_write(&namespace_sem);
if (rootmnt)
- mntput_long(rootmnt);
+ mntput(rootmnt);
if (pwdmnt)
- mntput_long(pwdmnt);
+ mntput(pwdmnt);
return new_ns;
}
@@ -2350,6 +2365,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
new_ns = alloc_mnt_ns();
if (!IS_ERR(new_ns)) {
mnt->mnt_ns = new_ns;
+ __mnt_make_longterm(mnt);
new_ns->root = mnt;
list_add(&new_ns->list, &new_ns->root->mnt_list);
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index df8c03a02161..2c3eb33b904d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -970,7 +970,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
{
struct nfs_server *server = NFS_SERVER(inode);
- if (test_bit(NFS_INO_MOUNTPOINT, &NFS_I(inode)->flags))
+ if (IS_AUTOMOUNT(inode))
return 0;
if (nd != NULL) {
/* VFS wants an on-the-wire revalidation */
@@ -1173,6 +1173,7 @@ const struct dentry_operations nfs_dentry_operations = {
.d_revalidate = nfs_lookup_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
+ .d_automount = nfs_d_automount,
};
static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
@@ -1246,6 +1247,7 @@ const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs_open_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
+ .d_automount = nfs_d_automount,
};
/*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ce00b704452c..d8512423ba72 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -300,7 +300,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
else
inode->i_op = &nfs_mountpoint_inode_operations;
inode->i_fop = NULL;
- set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags);
+ inode->i_flags |= S_AUTOMOUNT;
}
} else if (S_ISLNK(inode->i_mode))
inode->i_op = &nfs_symlink_inode_operations;
@@ -1208,7 +1208,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
/* Update the fsid? */
if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
!nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
- !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags))
+ !IS_AUTOMOUNT(inode))
server->fsid = fattr->fsid;
/*
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index bfa3a34af801..4644f04b4b46 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -252,6 +252,7 @@ extern char *nfs_path(const char *base,
const struct dentry *droot,
const struct dentry *dentry,
char *buffer, ssize_t buflen);
+extern struct vfsmount *nfs_d_automount(struct path *path);
/* getroot.c */
extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 74aaf3963c10..f32b8603dca8 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -97,9 +97,8 @@ Elong:
}
/*
- * nfs_follow_mountpoint - handle crossing a mountpoint on the server
- * @dentry - dentry of mountpoint
- * @nd - nameidata info
+ * nfs_d_automount - Handle crossing a mountpoint on the server
+ * @path - The mountpoint
*
* When we encounter a mountpoint on the server, we want to set up
* a mountpoint on the client too, to prevent inode numbers from
@@ -109,87 +108,65 @@ Elong:
* situation, and that different filesystems may want to use
* different security flavours.
*/
-static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
+struct vfsmount *nfs_d_automount(struct path *path)
{
struct vfsmount *mnt;
- struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+ struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
struct dentry *parent;
struct nfs_fh *fh = NULL;
struct nfs_fattr *fattr = NULL;
int err;
- dprintk("--> nfs_follow_mountpoint()\n");
+ dprintk("--> nfs_d_automount()\n");
- err = -ESTALE;
- if (IS_ROOT(dentry))
- goto out_err;
+ mnt = ERR_PTR(-ESTALE);
+ if (IS_ROOT(path->dentry))
+ goto out_nofree;
- err = -ENOMEM;
+ mnt = ERR_PTR(-ENOMEM);
fh = nfs_alloc_fhandle();
fattr = nfs_alloc_fattr();
if (fh == NULL || fattr == NULL)
- goto out_err;
+ goto out;
dprintk("%s: enter\n", __func__);
- dput(nd->path.dentry);
- nd->path.dentry = dget(dentry);
- /* Look it up again */
- parent = dget_parent(nd->path.dentry);
+ /* Look it up again to get its attributes */
+ parent = dget_parent(path->dentry);
err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
- &nd->path.dentry->d_name,
+ &path->dentry->d_name,
fh, fattr);
dput(parent);
- if (err != 0)
- goto out_err;
+ if (err != 0) {
+ mnt = ERR_PTR(err);
+ goto out;
+ }
if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
- mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry);
+ mnt = nfs_do_refmount(path->mnt, path->dentry);
else
- mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, fh,
- fattr);
- err = PTR_ERR(mnt);
+ mnt = nfs_do_submount(path->mnt, path->dentry, fh, fattr);
if (IS_ERR(mnt))
- goto out_err;
+ goto out;
- mntget(mnt);
- err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
- &nfs_automount_list);
- if (err < 0) {
- mntput(mnt);
- if (err == -EBUSY)
- goto out_follow;
- goto out_err;
- }
- path_put(&nd->path);
- nd->path.mnt = mnt;
- nd->path.dentry = dget(mnt->mnt_root);
+ dprintk("%s: done, success\n", __func__);
+ mntget(mnt); /* prevent immediate expiration */
+ mnt_set_expiry(mnt, &nfs_automount_list);
schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+
out:
nfs_free_fattr(fattr);
nfs_free_fhandle(fh);
- dprintk("%s: done, returned %d\n", __func__, err);
-
- dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
- return ERR_PTR(err);
-out_err:
- path_put(&nd->path);
- goto out;
-out_follow:
- while (d_mountpoint(nd->path.dentry) &&
- follow_down(&nd->path))
- ;
- err = 0;
- goto out;
+out_nofree:
+ dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt);
+ return mnt;
}
const struct inode_operations nfs_mountpoint_inode_operations = {
- .follow_link = nfs_follow_mountpoint,
.getattr = nfs_getattr,
};
const struct inode_operations nfs_referral_inode_operations = {
- .follow_link = nfs_follow_mountpoint,
};
static void nfs_expire_automounts(struct work_struct *work)
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
new file mode 100644
index 000000000000..34e5c40af5ef
--- /dev/null
+++ b/fs/nfsd/acl.h
@@ -0,0 +1,59 @@
+/*
+ * Common NFSv4 ACL handling definitions.
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LINUX_NFS4_ACL_H
+#define LINUX_NFS4_ACL_H
+
+#include <linux/posix_acl.h>
+
+/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to
+ * fit in a page: */
+#define NFS4_ACL_MAX 170
+
+struct nfs4_acl *nfs4_acl_new(int);
+int nfs4_acl_get_whotype(char *, u32);
+int nfs4_acl_write_who(int who, char *p);
+int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group,
+ uid_t who, u32 mask);
+
+#define NFS4_ACL_TYPE_DEFAULT 0x01
+#define NFS4_ACL_DIR 0x02
+#define NFS4_ACL_OWNER 0x04
+
+struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct posix_acl *,
+ struct posix_acl *, unsigned int flags);
+int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *, struct posix_acl **,
+ struct posix_acl **, unsigned int flags);
+
+#endif /* LINUX_NFS4_ACL_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c0fcb7ab7f6d..8b31e5f8795d 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1,4 +1,3 @@
-#define MSNFS /* HACK HACK */
/*
* NFS exporting and validation.
*
@@ -1444,9 +1443,6 @@ static struct flags {
{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
{ NFSEXP_V4ROOT, {"v4root", ""}},
-#ifdef MSNFS
- { NFSEXP_MSNFS, {"msnfs", ""}},
-#endif
{ 0, {"", ""}}
};
diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h
new file mode 100644
index 000000000000..2f3be1321534
--- /dev/null
+++ b/fs/nfsd/idmap.h
@@ -0,0 +1,62 @@
+/*
+ * Mapping of UID to name and vice versa.
+ *
+ * Copyright (c) 2002, 2003 The Regents of the University of
+ * Michigan. All rights reserved.
+> *
+ * Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LINUX_NFSD_IDMAP_H
+#define LINUX_NFSD_IDMAP_H
+
+#include <linux/in.h>
+#include <linux/sunrpc/svc.h>
+
+/* XXX from linux/nfs_idmap.h */
+#define IDMAP_NAMESZ 128
+
+#ifdef CONFIG_NFSD_V4
+int nfsd_idmap_init(void);
+void nfsd_idmap_shutdown(void);
+#else
+static inline int nfsd_idmap_init(void)
+{
+ return 0;
+}
+static inline void nfsd_idmap_shutdown(void)
+{
+}
+#endif
+
+__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *);
+__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *);
+int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *);
+int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *);
+
+#endif /* LINUX_NFSD_IDMAP_H */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 5b7e3021e06b..2247fc91d5e9 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -151,10 +151,10 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
__be32 nfserr;
u32 max_blocksize = svc_max_payload(rqstp);
- dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
+ dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
(unsigned long) argp->count,
- (unsigned long) argp->offset);
+ (unsigned long long) argp->offset);
/* Obtain buffer pointer for payload.
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
@@ -191,10 +191,10 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
__be32 nfserr;
unsigned long cnt = argp->len;
- dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n",
+ dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n",
SVCFH_fmt(&argp->fh),
argp->len,
- (unsigned long) argp->offset,
+ (unsigned long long) argp->offset,
argp->stable? " stable" : "");
fh_copy(&resp->fh, &argp->fh);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index e48052615159..ad88f1c0a4c3 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -36,7 +36,7 @@
#include <linux/slab.h>
#include <linux/nfs_fs.h>
-#include <linux/nfs4_acl.h>
+#include "acl.h"
/* mode bit translations: */
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 21a63da305ff..3be975e18919 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -628,10 +628,8 @@ static int max_cb_time(void)
return max(nfsd4_lease/10, (time_t)1) * HZ;
}
-/* Reference counting, callback cleanup, etc., all look racy as heck.
- * And why is cl_cb_set an atomic? */
-int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
+static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
{
struct rpc_timeout timeparms = {
.to_initval = max_cb_time(),
@@ -641,6 +639,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
.net = &init_net,
.address = (struct sockaddr *) &conn->cb_addr,
.addrsize = conn->cb_addrlen,
+ .saddress = (struct sockaddr *) &conn->cb_saddr,
.timeout = &timeparms,
.program = &cb_program,
.version = 0,
@@ -657,6 +656,10 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
args.protocol = XPRT_TRANSPORT_TCP;
clp->cl_cb_ident = conn->cb_ident;
} else {
+ if (!conn->cb_xprt)
+ return -EINVAL;
+ clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
+ clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
args.prognumber = clp->cl_cb_session->se_cb_prog;
args.protocol = XPRT_TRANSPORT_BC_TCP;
@@ -679,14 +682,20 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason)
(int)clp->cl_name.len, clp->cl_name.data, reason);
}
+static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
+{
+ clp->cl_cb_state = NFSD4_CB_DOWN;
+ warn_no_callback_path(clp, reason);
+}
+
static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
{
struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
if (task->tk_status)
- warn_no_callback_path(clp, task->tk_status);
+ nfsd4_mark_cb_down(clp, task->tk_status);
else
- atomic_set(&clp->cl_cb_set, 1);
+ clp->cl_cb_state = NFSD4_CB_UP;
}
static const struct rpc_call_ops nfsd4_cb_probe_ops = {
@@ -709,6 +718,11 @@ int set_callback_cred(void)
static struct workqueue_struct *callback_wq;
+static void run_nfsd4_cb(struct nfsd4_callback *cb)
+{
+ queue_work(callback_wq, &cb->cb_work);
+}
+
static void do_probe_callback(struct nfs4_client *clp)
{
struct nfsd4_callback *cb = &clp->cl_cb_null;
@@ -723,7 +737,7 @@ static void do_probe_callback(struct nfs4_client *clp)
cb->cb_ops = &nfsd4_cb_probe_ops;
- queue_work(callback_wq, &cb->cb_work);
+ run_nfsd4_cb(cb);
}
/*
@@ -732,14 +746,21 @@ static void do_probe_callback(struct nfs4_client *clp)
*/
void nfsd4_probe_callback(struct nfs4_client *clp)
{
+ /* XXX: atomicity? Also, should we be using cl_cb_flags? */
+ clp->cl_cb_state = NFSD4_CB_UNKNOWN;
set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
do_probe_callback(clp);
}
-void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
+void nfsd4_probe_callback_sync(struct nfs4_client *clp)
{
- BUG_ON(atomic_read(&clp->cl_cb_set));
+ nfsd4_probe_callback(clp);
+ flush_workqueue(callback_wq);
+}
+void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
+{
+ clp->cl_cb_state = NFSD4_CB_UNKNOWN;
spin_lock(&clp->cl_lock);
memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
spin_unlock(&clp->cl_lock);
@@ -750,24 +771,14 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
* If the slot is available, then mark it busy. Otherwise, set the
* thread for sleeping on the callback RPC wait queue.
*/
-static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
- struct rpc_task *task)
+static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
{
- u32 *ptr = (u32 *)clp->cl_cb_session->se_sessionid.data;
- int status = 0;
-
- dprintk("%s: %u:%u:%u:%u\n", __func__,
- ptr[0], ptr[1], ptr[2], ptr[3]);
-
if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
dprintk("%s slot is busy\n", __func__);
- status = -EAGAIN;
- goto out;
+ return false;
}
-out:
- dprintk("%s status=%d\n", __func__, status);
- return status;
+ return true;
}
/*
@@ -780,20 +791,19 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
struct nfs4_client *clp = dp->dl_client;
u32 minorversion = clp->cl_minorversion;
- int status = 0;
cb->cb_minorversion = minorversion;
if (minorversion) {
- status = nfsd41_cb_setup_sequence(clp, task);
- if (status) {
- if (status != -EAGAIN) {
- /* terminate rpc task */
- task->tk_status = status;
- task->tk_action = NULL;
- }
+ if (!nfsd41_cb_get_slot(clp, task))
return;
- }
}
+ spin_lock(&clp->cl_lock);
+ if (list_empty(&cb->cb_per_client)) {
+ /* This is the first call, not a restart */
+ cb->cb_done = false;
+ list_add(&cb->cb_per_client, &clp->cl_callbacks);
+ }
+ spin_unlock(&clp->cl_lock);
rpc_call_start(task);
}
@@ -829,15 +839,18 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
nfsd4_cb_done(task, calldata);
- if (current_rpc_client == NULL) {
- /* We're shutting down; give up. */
- /* XXX: err, or is it ok just to fall through
- * and rpc_restart_call? */
+ if (current_rpc_client != task->tk_client) {
+ /* We're shutting down or changing cl_cb_client; leave
+ * it to nfsd4_process_cb_update to restart the call if
+ * necessary. */
return;
}
+ if (cb->cb_done)
+ return;
switch (task->tk_status) {
case 0:
+ cb->cb_done = true;
return;
case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID:
@@ -846,32 +859,30 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
break;
default:
/* Network partition? */
- atomic_set(&clp->cl_cb_set, 0);
- warn_no_callback_path(clp, task->tk_status);
- if (current_rpc_client != task->tk_client) {
- /* queue a callback on the new connection: */
- atomic_inc(&dp->dl_count);
- nfsd4_cb_recall(dp);
- return;
- }
+ nfsd4_mark_cb_down(clp, task->tk_status);
}
if (dp->dl_retries--) {
rpc_delay(task, 2*HZ);
task->tk_status = 0;
rpc_restart_call_prepare(task);
return;
- } else {
- atomic_set(&clp->cl_cb_set, 0);
- warn_no_callback_path(clp, task->tk_status);
}
+ nfsd4_mark_cb_down(clp, task->tk_status);
+ cb->cb_done = true;
}
static void nfsd4_cb_recall_release(void *calldata)
{
struct nfsd4_callback *cb = calldata;
+ struct nfs4_client *clp = cb->cb_clp;
struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
- nfs4_put_delegation(dp);
+ if (cb->cb_done) {
+ spin_lock(&clp->cl_lock);
+ list_del(&cb->cb_per_client);
+ spin_unlock(&clp->cl_lock);
+ nfs4_put_delegation(dp);
+ }
}
static const struct rpc_call_ops nfsd4_cb_recall_ops = {
@@ -906,16 +917,33 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
flush_workqueue(callback_wq);
}
-void nfsd4_release_cb(struct nfsd4_callback *cb)
+static void nfsd4_release_cb(struct nfsd4_callback *cb)
{
if (cb->cb_ops->rpc_release)
cb->cb_ops->rpc_release(cb);
}
-void nfsd4_process_cb_update(struct nfsd4_callback *cb)
+/* requires cl_lock: */
+static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
+{
+ struct nfsd4_session *s;
+ struct nfsd4_conn *c;
+
+ list_for_each_entry(s, &clp->cl_sessions, se_perclnt) {
+ list_for_each_entry(c, &s->se_conns, cn_persession) {
+ if (c->cn_flags & NFS4_CDFC4_BACK)
+ return c;
+ }
+ }
+ return NULL;
+}
+
+static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
{
struct nfs4_cb_conn conn;
struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd4_session *ses = NULL;
+ struct nfsd4_conn *c;
int err;
/*
@@ -926,6 +954,10 @@ void nfsd4_process_cb_update(struct nfsd4_callback *cb)
rpc_shutdown_client(clp->cl_cb_client);
clp->cl_cb_client = NULL;
}
+ if (clp->cl_cb_conn.cb_xprt) {
+ svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+ clp->cl_cb_conn.cb_xprt = NULL;
+ }
if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags))
return;
spin_lock(&clp->cl_lock);
@@ -936,11 +968,22 @@ void nfsd4_process_cb_update(struct nfsd4_callback *cb)
BUG_ON(!clp->cl_cb_flags);
clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
+ c = __nfsd4_find_backchannel(clp);
+ if (c) {
+ svc_xprt_get(c->cn_xprt);
+ conn.cb_xprt = c->cn_xprt;
+ ses = c->cn_session;
+ }
spin_unlock(&clp->cl_lock);
- err = setup_callback_client(clp, &conn);
- if (err)
+ err = setup_callback_client(clp, &conn, ses);
+ if (err) {
warn_no_callback_path(clp, err);
+ return;
+ }
+ /* Yay, the callback channel's back! Restart any callbacks: */
+ list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
+ run_nfsd4_cb(cb);
}
void nfsd4_do_callback_rpc(struct work_struct *w)
@@ -965,10 +1008,11 @@ void nfsd4_do_callback_rpc(struct work_struct *w)
void nfsd4_cb_recall(struct nfs4_delegation *dp)
{
struct nfsd4_callback *cb = &dp->dl_recall;
+ struct nfs4_client *clp = dp->dl_client;
dp->dl_retries = 1;
cb->cb_op = dp;
- cb->cb_clp = dp->dl_client;
+ cb->cb_clp = clp;
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL];
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
@@ -977,5 +1021,8 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp)
cb->cb_ops = &nfsd4_cb_recall_ops;
dp->dl_retries = 1;
- queue_work(callback_wq, &dp->dl_recall.cb_work);
+ INIT_LIST_HEAD(&cb->cb_per_client);
+ cb->cb_done = true;
+
+ run_nfsd4_cb(&dp->dl_recall);
}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index f0695e815f0e..6d2c397d458b 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -33,10 +33,11 @@
*/
#include <linux/module.h>
-#include <linux/nfsd_idmap.h>
#include <linux/seq_file.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include "idmap.h"
+#include "nfsd.h"
/*
* Cache entry
@@ -514,7 +515,7 @@ rqst_authname(struct svc_rqst *rqstp)
return clp->name;
}
-static int
+static __be32
idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen,
uid_t *id)
{
@@ -524,15 +525,15 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
int ret;
if (namelen + 1 > sizeof(key.name))
- return -EINVAL;
+ return nfserr_badowner;
memcpy(key.name, name, namelen);
key.name[namelen] = '\0';
strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item);
if (ret == -ENOENT)
- ret = -ESRCH; /* nfserr_badname */
+ return nfserr_badowner;
if (ret)
- return ret;
+ return nfserrno(ret);
*id = item->id;
cache_put(&item->h, &nametoid_cache);
return 0;
@@ -560,14 +561,14 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
return ret;
}
-int
+__be32
nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
__u32 *id)
{
return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id);
}
-int
+__be32
nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
__u32 *id)
{
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0cdfd022bb7b..db52546143d1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -604,9 +604,7 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
-static __be32
-nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
- void *arg)
+static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
{
struct svc_fh tmp_fh;
__be32 ret;
@@ -615,13 +613,19 @@ nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
ret = exp_pseudoroot(rqstp, &tmp_fh);
if (ret)
return ret;
- if (tmp_fh.fh_dentry == cstate->current_fh.fh_dentry) {
+ if (tmp_fh.fh_dentry == fh->fh_dentry) {
fh_put(&tmp_fh);
return nfserr_noent;
}
fh_put(&tmp_fh);
- return nfsd_lookup(rqstp, &cstate->current_fh,
- "..", 2, &cstate->current_fh);
+ return nfsd_lookup(rqstp, fh, "..", 2, fh);
+}
+
+static __be32
+nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ void *arg)
+{
+ return nfsd4_do_lookupp(rqstp, &cstate->current_fh);
}
static __be32
@@ -769,10 +773,36 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
} else
secinfo->si_exp = exp;
dput(dentry);
+ if (cstate->minorversion)
+ /* See rfc 5661 section 2.6.3.1.1.8 */
+ fh_put(&cstate->current_fh);
return err;
}
static __be32
+nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_secinfo_no_name *sin)
+{
+ __be32 err;
+
+ switch (sin->sin_style) {
+ case NFS4_SECINFO_STYLE4_CURRENT_FH:
+ break;
+ case NFS4_SECINFO_STYLE4_PARENT:
+ err = nfsd4_do_lookupp(rqstp, &cstate->current_fh);
+ if (err)
+ return err;
+ break;
+ default:
+ return nfserr_inval;
+ }
+ exp_get(cstate->current_fh.fh_export);
+ sin->sin_exp = cstate->current_fh.fh_export;
+ fh_put(&cstate->current_fh);
+ return nfs_ok;
+}
+
+static __be32
nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_setattr *setattr)
{
@@ -974,8 +1004,8 @@ static const char *nfsd4_op_name(unsigned opnum);
* Also note, enforced elsewhere:
* - SEQUENCE other than as first op results in
* NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().)
- * - BIND_CONN_TO_SESSION must be the only op in its compound
- * (Will be enforced in nfsd4_bind_conn_to_session().)
+ * - BIND_CONN_TO_SESSION must be the only op in its compound.
+ * (Enforced in nfsd4_bind_conn_to_session().)
* - DESTROY_SESSION must be the final operation in a compound, if
* sessionid's in SEQUENCE and DESTROY_SESSION are the same.
* (Enforced in nfsd4_destroy_session().)
@@ -1126,10 +1156,6 @@ encode_op:
nfsd4_increment_op_stats(op->opnum);
}
- if (!rqstp->rq_usedeferral && status == nfserr_dropit) {
- dprintk("%s Dropit - send NFS4ERR_DELAY\n", __func__);
- status = nfserr_jukebox;
- }
resp->cstate.status = status;
fh_put(&resp->cstate.current_fh);
@@ -1300,6 +1326,11 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
.op_name = "OP_EXCHANGE_ID",
},
+ [OP_BIND_CONN_TO_SESSION] = {
+ .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
+ .op_name = "OP_BIND_CONN_TO_SESSION",
+ },
[OP_CREATE_SESSION] = {
.op_func = (nfsd4op_func)nfsd4_create_session,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
@@ -1320,6 +1351,10 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_RECLAIM_COMPLETE",
},
+ [OP_SECINFO_NO_NAME] = {
+ .op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
+ .op_name = "OP_SECINFO_NO_NAME",
+ },
};
static const char *nfsd4_op_name(unsigned opnum)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 7e26caab2a26..ffb59ef6f82f 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -302,7 +302,6 @@ purge_old(struct dentry *parent, struct dentry *child)
{
int status;
- /* note: we currently use this path only for minorversion 0 */
if (nfs4_has_reclaimed_state(child->d_name.name, false))
return 0;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fbd18c3074bb..d98d0213285d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -230,7 +230,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
dp->dl_client = clp;
get_nfs4_file(fp);
dp->dl_file = fp;
- nfs4_file_get_access(fp, O_RDONLY);
+ dp->dl_vfs_file = find_readable_file(fp);
+ get_file(dp->dl_vfs_file);
dp->dl_flock = NULL;
dp->dl_type = type;
dp->dl_stateid.si_boot = boot_time;
@@ -252,6 +253,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
if (atomic_dec_and_test(&dp->dl_count)) {
dprintk("NFSD: freeing dp %p\n",dp);
put_nfs4_file(dp->dl_file);
+ fput(dp->dl_vfs_file);
kmem_cache_free(deleg_slab, dp);
num_delegations--;
}
@@ -265,12 +267,10 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
static void
nfs4_close_delegation(struct nfs4_delegation *dp)
{
- struct file *filp = find_readable_file(dp->dl_file);
-
dprintk("NFSD: close_delegation dp %p\n",dp);
+ /* XXX: do we even need this check?: */
if (dp->dl_flock)
- vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
- nfs4_file_put_access(dp->dl_file, O_RDONLY);
+ vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock);
}
/* Called under the state lock. */
@@ -642,6 +642,7 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u)
free_conn(c);
}
spin_unlock(&clp->cl_lock);
+ nfsd4_probe_callback(clp);
}
static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags)
@@ -679,15 +680,12 @@ static int nfsd4_register_conn(struct nfsd4_conn *conn)
return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
}
-static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
+static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses, u32 dir)
{
struct nfsd4_conn *conn;
- u32 flags = NFS4_CDFC4_FORE;
int ret;
- if (ses->se_flags & SESSION4_BACK_CHAN)
- flags |= NFS4_CDFC4_BACK;
- conn = alloc_conn(rqstp, flags);
+ conn = alloc_conn(rqstp, dir);
if (!conn)
return nfserr_jukebox;
nfsd4_hash_conn(conn, ses);
@@ -698,6 +696,17 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
return nfs_ok;
}
+static __be32 nfsd4_new_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_session *ses)
+{
+ u32 dir = NFS4_CDFC4_FORE;
+
+ if (ses->se_flags & SESSION4_BACK_CHAN)
+ dir |= NFS4_CDFC4_BACK;
+
+ return nfsd4_new_conn(rqstp, ses, dir);
+}
+
+/* must be called under client_lock */
static void nfsd4_del_conns(struct nfsd4_session *s)
{
struct nfs4_client *clp = s->se_client;
@@ -749,6 +758,8 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n
*/
slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached);
numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs);
+ if (numslots < 1)
+ return NULL;
new = alloc_session(slotsize, numslots);
if (!new) {
@@ -769,25 +780,30 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n
idx = hash_sessionid(&new->se_sessionid);
spin_lock(&client_lock);
list_add(&new->se_hash, &sessionid_hashtbl[idx]);
+ spin_lock(&clp->cl_lock);
list_add(&new->se_perclnt, &clp->cl_sessions);
+ spin_unlock(&clp->cl_lock);
spin_unlock(&client_lock);
- status = nfsd4_new_conn(rqstp, new);
+ status = nfsd4_new_conn_from_crses(rqstp, new);
/* whoops: benny points out, status is ignored! (err, or bogus) */
if (status) {
free_session(&new->se_ref);
return NULL;
}
- if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) {
+ if (cses->flags & SESSION4_BACK_CHAN) {
struct sockaddr *sa = svc_addr(rqstp);
-
- clp->cl_cb_session = new;
- clp->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
- svc_xprt_get(rqstp->rq_xprt);
+ /*
+ * This is a little silly; with sessions there's no real
+ * use for the callback address. Use the peer address
+ * as a reasonable default for now, but consider fixing
+ * the rpc client not to require an address in the
+ * future:
+ */
rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
- nfsd4_probe_callback(clp);
}
+ nfsd4_probe_callback(clp);
return new;
}
@@ -817,7 +833,9 @@ static void
unhash_session(struct nfsd4_session *ses)
{
list_del(&ses->se_hash);
+ spin_lock(&ses->se_client->cl_lock);
list_del(&ses->se_perclnt);
+ spin_unlock(&ses->se_client->cl_lock);
}
/* must be called under the client_lock */
@@ -923,8 +941,10 @@ unhash_client_locked(struct nfs4_client *clp)
mark_client_expired(clp);
list_del(&clp->cl_lru);
+ spin_lock(&clp->cl_lock);
list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
list_del_init(&ses->se_hash);
+ spin_unlock(&clp->cl_lock);
}
static void
@@ -1051,12 +1071,13 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
atomic_set(&clp->cl_refcount, 0);
- atomic_set(&clp->cl_cb_set, 0);
+ clp->cl_cb_state = NFSD4_CB_UNKNOWN;
INIT_LIST_HEAD(&clp->cl_idhash);
INIT_LIST_HEAD(&clp->cl_strhash);
INIT_LIST_HEAD(&clp->cl_openowners);
INIT_LIST_HEAD(&clp->cl_delegations);
INIT_LIST_HEAD(&clp->cl_lru);
+ INIT_LIST_HEAD(&clp->cl_callbacks);
spin_lock_init(&clp->cl_lock);
INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc);
clp->cl_time = get_seconds();
@@ -1132,54 +1153,55 @@ find_unconfirmed_client(clientid_t *clid)
return NULL;
}
-/*
- * Return 1 iff clp's clientid establishment method matches the use_exchange_id
- * parameter. Matching is based on the fact the at least one of the
- * EXCHGID4_FLAG_USE_{NON_PNFS,PNFS_MDS,PNFS_DS} flags must be set for v4.1
- *
- * FIXME: we need to unify the clientid namespaces for nfsv4.x
- * and correctly deal with client upgrade/downgrade in EXCHANGE_ID
- * and SET_CLIENTID{,_CONFIRM}
- */
-static inline int
-match_clientid_establishment(struct nfs4_client *clp, bool use_exchange_id)
+static bool clp_used_exchangeid(struct nfs4_client *clp)
{
- bool has_exchange_flags = (clp->cl_exchange_flags != 0);
- return use_exchange_id == has_exchange_flags;
-}
+ return clp->cl_exchange_flags != 0;
+}
static struct nfs4_client *
-find_confirmed_client_by_str(const char *dname, unsigned int hashval,
- bool use_exchange_id)
+find_confirmed_client_by_str(const char *dname, unsigned int hashval)
{
struct nfs4_client *clp;
list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
- if (same_name(clp->cl_recdir, dname) &&
- match_clientid_establishment(clp, use_exchange_id))
+ if (same_name(clp->cl_recdir, dname))
return clp;
}
return NULL;
}
static struct nfs4_client *
-find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
- bool use_exchange_id)
+find_unconfirmed_client_by_str(const char *dname, unsigned int hashval)
{
struct nfs4_client *clp;
list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
- if (same_name(clp->cl_recdir, dname) &&
- match_clientid_establishment(clp, use_exchange_id))
+ if (same_name(clp->cl_recdir, dname))
return clp;
}
return NULL;
}
+static void rpc_svcaddr2sockaddr(struct sockaddr *sa, unsigned short family, union svc_addr_u *svcaddr)
+{
+ switch (family) {
+ case AF_INET:
+ ((struct sockaddr_in *)sa)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sa)->sin_addr = svcaddr->addr;
+ return;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)sa)->sin6_family = AF_INET6;
+ ((struct sockaddr_in6 *)sa)->sin6_addr = svcaddr->addr6;
+ return;
+ }
+}
+
static void
-gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
+gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp)
{
struct nfs4_cb_conn *conn = &clp->cl_cb_conn;
+ struct sockaddr *sa = svc_addr(rqstp);
+ u32 scopeid = rpc_get_scope_id(sa);
unsigned short expected_family;
/* Currently, we only support tcp and tcp6 for the callback channel */
@@ -1205,6 +1227,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
conn->cb_prog = se->se_callback_prog;
conn->cb_ident = se->se_callback_ident;
+ rpc_svcaddr2sockaddr((struct sockaddr *)&conn->cb_saddr, expected_family, &rqstp->rq_daddr);
return;
out_err:
conn->cb_addr.ss_family = AF_UNSPEC;
@@ -1344,7 +1367,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
case SP4_NONE:
break;
case SP4_SSV:
- return nfserr_encr_alg_unsupp;
+ return nfserr_serverfault;
default:
BUG(); /* checked by xdr code */
case SP4_MACH_CRED:
@@ -1361,8 +1384,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
nfs4_lock_state();
status = nfs_ok;
- conf = find_confirmed_client_by_str(dname, strhashval, true);
+ conf = find_confirmed_client_by_str(dname, strhashval);
if (conf) {
+ if (!clp_used_exchangeid(conf)) {
+ status = nfserr_clid_inuse; /* XXX: ? */
+ goto out;
+ }
if (!same_verf(&verf, &conf->cl_verifier)) {
/* 18.35.4 case 8 */
if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
@@ -1403,7 +1430,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
goto out;
}
- unconf = find_unconfirmed_client_by_str(dname, strhashval, true);
+ unconf = find_unconfirmed_client_by_str(dname, strhashval);
if (unconf) {
/*
* Possible retry or client restart. Per 18.35.4 case 4,
@@ -1560,6 +1587,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
status = nfs_ok;
memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
NFS4_MAX_SESSIONID_LEN);
+ memcpy(&cr_ses->fore_channel, &new->se_fchannel,
+ sizeof(struct nfsd4_channel_attrs));
cs_slot->sl_seqid++;
cr_ses->seqid = cs_slot->sl_seqid;
@@ -1581,6 +1610,45 @@ static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
return argp->opcnt == resp->opcnt;
}
+static __be32 nfsd4_map_bcts_dir(u32 *dir)
+{
+ switch (*dir) {
+ case NFS4_CDFC4_FORE:
+ case NFS4_CDFC4_BACK:
+ return nfs_ok;
+ case NFS4_CDFC4_FORE_OR_BOTH:
+ case NFS4_CDFC4_BACK_OR_BOTH:
+ *dir = NFS4_CDFC4_BOTH;
+ return nfs_ok;
+ };
+ return nfserr_inval;
+}
+
+__be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_bind_conn_to_session *bcts)
+{
+ __be32 status;
+
+ if (!nfsd4_last_compound_op(rqstp))
+ return nfserr_not_only_op;
+ spin_lock(&client_lock);
+ cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid);
+ /* Sorta weird: we only need the refcnt'ing because new_conn acquires
+ * client_lock iself: */
+ if (cstate->session) {
+ nfsd4_get_session(cstate->session);
+ atomic_inc(&cstate->session->se_client->cl_refcount);
+ }
+ spin_unlock(&client_lock);
+ if (!cstate->session)
+ return nfserr_badsession;
+
+ status = nfsd4_map_bcts_dir(&bcts->dir);
+ nfsd4_new_conn(rqstp, cstate->session, bcts->dir);
+ return nfs_ok;
+}
+
static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
{
if (!session)
@@ -1619,8 +1687,7 @@ nfsd4_destroy_session(struct svc_rqst *r,
spin_unlock(&client_lock);
nfs4_lock_state();
- /* wait for callbacks */
- nfsd4_shutdown_callback(ses->se_client);
+ nfsd4_probe_callback_sync(ses->se_client);
nfs4_unlock_state();
nfsd4_del_conns(ses);
@@ -1733,8 +1800,12 @@ nfsd4_sequence(struct svc_rqst *rqstp,
out:
/* Hold a session reference until done processing the compound. */
if (cstate->session) {
+ struct nfs4_client *clp = session->se_client;
+
nfsd4_get_session(cstate->session);
- atomic_inc(&session->se_client->cl_refcount);
+ atomic_inc(&clp->cl_refcount);
+ if (clp->cl_cb_state == NFSD4_CB_DOWN)
+ seq->status_flags |= SEQ4_STATUS_CB_PATH_DOWN;
}
kfree(conn);
spin_unlock(&client_lock);
@@ -1775,7 +1846,6 @@ __be32
nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_setclientid *setclid)
{
- struct sockaddr *sa = svc_addr(rqstp);
struct xdr_netobj clname = {
.len = setclid->se_namelen,
.data = setclid->se_name,
@@ -1801,10 +1871,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
strhashval = clientstr_hashval(dname);
nfs4_lock_state();
- conf = find_confirmed_client_by_str(dname, strhashval, false);
+ conf = find_confirmed_client_by_str(dname, strhashval);
if (conf) {
/* RFC 3530 14.2.33 CASE 0: */
status = nfserr_clid_inuse;
+ if (clp_used_exchangeid(conf))
+ goto out;
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
char addr_str[INET6_ADDRSTRLEN];
rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
@@ -1819,7 +1891,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* has a description of SETCLIENTID request processing consisting
* of 5 bullet points, labeled as CASE0 - CASE4 below.
*/
- unconf = find_unconfirmed_client_by_str(dname, strhashval, false);
+ unconf = find_unconfirmed_client_by_str(dname, strhashval);
status = nfserr_resource;
if (!conf) {
/*
@@ -1876,7 +1948,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* for consistent minorversion use throughout:
*/
new->cl_minorversion = 0;
- gen_callback(new, setclid, rpc_get_scope_id(sa));
+ gen_callback(new, setclid, rqstp);
add_to_unconfirmed(new, strhashval);
setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
@@ -1935,7 +2007,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
status = nfserr_clid_inuse;
else {
- atomic_set(&conf->cl_cb_set, 0);
nfsd4_change_callback(conf, &unconf->cl_cb_conn);
nfsd4_probe_callback(conf);
expire_client(unconf);
@@ -1964,7 +2035,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
unsigned int hash =
clientstr_hashval(unconf->cl_recdir);
conf = find_confirmed_client_by_str(unconf->cl_recdir,
- hash, false);
+ hash);
if (conf) {
nfsd4_remove_clid_dir(conf);
expire_client(conf);
@@ -2300,41 +2371,6 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
nfsd4_cb_recall(dp);
}
-/*
- * The file_lock is being reapd.
- *
- * Called by locks_free_lock() with lock_flocks() held.
- */
-static
-void nfsd_release_deleg_cb(struct file_lock *fl)
-{
- struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
-
- dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d\n", fl,dp, atomic_read(&dp->dl_count));
-
- if (!(fl->fl_flags & FL_LEASE) || !dp)
- return;
- dp->dl_flock = NULL;
-}
-
-/*
- * Called from setlease() with lock_flocks() held
- */
-static
-int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
-{
- struct nfs4_delegation *onlistd =
- (struct nfs4_delegation *)onlist->fl_owner;
- struct nfs4_delegation *tryd =
- (struct nfs4_delegation *)try->fl_owner;
-
- if (onlist->fl_lmops != try->fl_lmops)
- return 0;
-
- return onlistd->dl_client == tryd->dl_client;
-}
-
-
static
int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
{
@@ -2346,8 +2382,6 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
static const struct lock_manager_operations nfsd_lease_mng_ops = {
.fl_break = nfsd_break_deleg_cb,
- .fl_release_private = nfsd_release_deleg_cb,
- .fl_mylease = nfsd_same_client_deleg_cb,
.fl_change = nfsd_change_deleg_cb,
};
@@ -2514,8 +2548,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file
if (!fp->fi_fds[oflag]) {
status = nfsd_open(rqstp, cur_fh, S_IFREG, access,
&fp->fi_fds[oflag]);
- if (status == nfserr_dropit)
- status = nfserr_jukebox;
if (status)
return status;
}
@@ -2596,6 +2628,19 @@ nfs4_set_claim_prev(struct nfsd4_open *open)
open->op_stateowner->so_client->cl_firststate = 1;
}
+/* Should we give out recallable state?: */
+static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
+{
+ if (clp->cl_cb_state == NFSD4_CB_UP)
+ return true;
+ /*
+ * In the sessions case, since we don't have to establish a
+ * separate connection for callbacks, we assume it's OK
+ * until we hear otherwise:
+ */
+ return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
+}
+
/*
* Attempt to hand out a delegation.
*/
@@ -2604,10 +2649,11 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
{
struct nfs4_delegation *dp;
struct nfs4_stateowner *sop = stp->st_stateowner;
- int cb_up = atomic_read(&sop->so_client->cl_cb_set);
+ int cb_up;
struct file_lock *fl;
int status, flag = 0;
+ cb_up = nfsd4_cb_channel_good(sop->so_client);
flag = NFS4_OPEN_DELEGATE_NONE;
open->op_recall = 0;
switch (open->op_claim_type) {
@@ -2655,7 +2701,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
dp->dl_flock = fl;
/* vfs_setlease checks to see if delegation should be handed out.
- * the lock_manager callbacks fl_mylease and fl_change are used
+ * the lock_manager callback fl_change is used
*/
if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
dprintk("NFSD: setlease failed [%d], no delegation\n", status);
@@ -2794,7 +2840,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
renew_client(clp);
status = nfserr_cb_path_down;
if (!list_empty(&clp->cl_delegations)
- && !atomic_read(&clp->cl_cb_set))
+ && clp->cl_cb_state != NFSD4_CB_UP)
goto out;
status = nfs_ok;
out:
@@ -3081,9 +3127,10 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
if (status)
goto out;
renew_client(dp->dl_client);
- if (filpp)
+ if (filpp) {
*filpp = find_readable_file(dp->dl_file);
- BUG_ON(!*filpp);
+ BUG_ON(!*filpp);
+ }
} else { /* open or lock stateid */
stp = find_stateid(stateid, flags);
if (!stp)
@@ -4107,7 +4154,7 @@ nfs4_has_reclaimed_state(const char *name, bool use_exchange_id)
unsigned int strhashval = clientstr_hashval(name);
struct nfs4_client *clp;
- clp = find_confirmed_client_by_str(name, strhashval, use_exchange_id);
+ clp = find_confirmed_client_by_str(name, strhashval);
return clp ? 1 : 0;
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f35a94a04026..956629b9cdc9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -44,13 +44,14 @@
#include <linux/namei.h>
#include <linux/statfs.h>
#include <linux/utsname.h>
-#include <linux/nfsd_idmap.h>
-#include <linux/nfs4_acl.h>
#include <linux/sunrpc/svcauth_gss.h>
+#include "idmap.h"
+#include "acl.h"
#include "xdr4.h"
#include "vfs.h"
+
#define NFSDDBG_FACILITY NFSDDBG_XDR
/*
@@ -288,17 +289,17 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
len += XDR_QUADLEN(dummy32) << 2;
READMEM(buf, dummy32);
ace->whotype = nfs4_acl_get_whotype(buf, dummy32);
- host_err = 0;
+ status = nfs_ok;
if (ace->whotype != NFS4_ACL_WHO_NAMED)
ace->who = 0;
else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
- host_err = nfsd_map_name_to_gid(argp->rqstp,
+ status = nfsd_map_name_to_gid(argp->rqstp,
buf, dummy32, &ace->who);
else
- host_err = nfsd_map_name_to_uid(argp->rqstp,
+ status = nfsd_map_name_to_uid(argp->rqstp,
buf, dummy32, &ace->who);
- if (host_err)
- goto out_nfserr;
+ if (status)
+ return status;
}
} else
*acl = NULL;
@@ -420,6 +421,21 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access
DECODE_TAIL;
}
+static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
+{
+ DECODE_HEAD;
+ u32 dummy;
+
+ READ_BUF(NFS4_MAX_SESSIONID_LEN + 8);
+ COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+ READ32(bcts->dir);
+ /* XXX: Perhaps Tom Tucker could help us figure out how we
+ * should be using ctsa_use_conn_in_rdma_mode: */
+ READ32(dummy);
+
+ DECODE_TAIL;
+}
+
static __be32
nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
{
@@ -847,6 +863,17 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
}
static __be32
+nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
+ struct nfsd4_secinfo_no_name *sin)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(sin->sin_style);
+ DECODE_TAIL;
+}
+
+static __be32
nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
{
__be32 status;
@@ -1005,7 +1032,7 @@ static __be32
nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
struct nfsd4_exchange_id *exid)
{
- int dummy;
+ int dummy, tmp;
DECODE_HEAD;
READ_BUF(NFS4_VERIFIER_SIZE);
@@ -1053,15 +1080,23 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
/* ssp_hash_algs<> */
READ_BUF(4);
- READ32(dummy);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
+ READ32(tmp);
+ while (tmp--) {
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy);
+ p += XDR_QUADLEN(dummy);
+ }
/* ssp_encr_algs<> */
READ_BUF(4);
- READ32(dummy);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
+ READ32(tmp);
+ while (tmp--) {
+ READ_BUF(4);
+ READ32(dummy);
+ READ_BUF(dummy);
+ p += XDR_QUADLEN(dummy);
+ }
/* ssp_window and ssp_num_gss_handles */
READ_BUF(8);
@@ -1339,7 +1374,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
/* new operations for NFSv4.1 */
[OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session,
[OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id,
[OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session,
[OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
@@ -1350,7 +1385,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
[OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name,
[OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
[OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -2309,8 +2344,6 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
case nfserr_resource:
nfserr = nfserr_toosmall;
goto fail;
- case nfserr_dropit:
- goto fail;
case nfserr_noent:
goto skip_entry;
default:
@@ -2365,6 +2398,21 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
return nfserr;
}
+static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8);
+ WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+ WRITE32(bcts->dir);
+ /* XXX: ? */
+ WRITE32(0);
+ ADJUST_ARGS();
+ }
+ return nfserr;
+}
+
static __be32
nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
{
@@ -2826,11 +2874,10 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
}
static __be32
-nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_secinfo *secinfo)
+nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,
+ __be32 nfserr,struct svc_export *exp)
{
int i = 0;
- struct svc_export *exp = secinfo->si_exp;
u32 nflavs;
struct exp_flavor_info *flavs;
struct exp_flavor_info def_flavs[2];
@@ -2892,6 +2939,20 @@ out:
return nfserr;
}
+static __be32
+nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_secinfo *secinfo)
+{
+ return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp);
+}
+
+static __be32
+nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_secinfo_no_name *secinfo)
+{
+ return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp);
+}
+
/*
* The SETATTR encode routine is special -- it always encodes a bitmap,
* regardless of the error status.
@@ -3076,13 +3137,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
WRITE32(seq->seqid);
WRITE32(seq->slotid);
WRITE32(seq->maxslots);
- /*
- * FIXME: for now:
- * target_maxslots = maxslots
- * status_flags = 0
- */
+ /* For now: target_maxslots = maxslots */
WRITE32(seq->maxslots);
- WRITE32(0);
+ WRITE32(seq->status_flags);
ADJUST_ARGS();
resp->cstate.datap = p; /* DRC cache data pointer */
@@ -3143,7 +3200,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
/* NFSv4.1 operations */
[OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session,
[OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id,
[OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session,
[OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session,
@@ -3154,7 +3211,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name,
[OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
[OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
[OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 4514ebbee4d6..33b3e2b06779 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -8,12 +8,12 @@
#include <linux/namei.h>
#include <linux/ctype.h>
-#include <linux/nfsd_idmap.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/nfsd/syscall.h>
#include <linux/lockd/lockd.h>
#include <linux/sunrpc/clnt.h>
+#include "idmap.h"
#include "nfsd.h"
#include "cache.h"
@@ -127,6 +127,7 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
{
+#ifdef CONFIG_NFSD_DEPRECATED
static int warned;
if (file->f_dentry->d_name.name[0] == '.' && !warned) {
printk(KERN_INFO
@@ -135,6 +136,7 @@ static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size
current->comm, file->f_dentry->d_name.name);
warned = 1;
}
+#endif
if (! file->private_data) {
/* An attempt to read a transaction file without writing
* causes a 0-byte write so that the file can return
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 6b641cf2c19a..7ecfa2420307 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -158,6 +158,7 @@ void nfsd_lockd_shutdown(void);
#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP)
#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR)
#define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE)
+#define nfserr_badowner cpu_to_be32(NFSERR_BADOWNER)
#define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD)
#define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL)
#define nfserr_grace cpu_to_be32(NFSERR_GRACE)
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 08e17264784b..e15dc45fc5ec 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -735,9 +735,9 @@ nfserrno (int errno)
{ nfserr_stale, -ESTALE },
{ nfserr_jukebox, -ETIMEDOUT },
{ nfserr_jukebox, -ERESTARTSYS },
- { nfserr_dropit, -EAGAIN },
- { nfserr_dropit, -ENOMEM },
- { nfserr_badname, -ESRCH },
+ { nfserr_jukebox, -EAGAIN },
+ { nfserr_jukebox, -EWOULDBLOCK },
+ { nfserr_jukebox, -ENOMEM },
{ nfserr_io, -ETXTBSY },
{ nfserr_notsupp, -EOPNOTSUPP },
{ nfserr_toosmall, -ETOOSMALL },
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2bae1d86f5f2..18743c4d8bca 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -608,7 +608,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
/* Now call the procedure handler, and encode NFS status. */
nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
nfserr = map_new_errors(rqstp->rq_vers, nfserr);
- if (nfserr == nfserr_dropit) {
+ if (nfserr == nfserr_dropit || rqstp->rq_dropme) {
dprintk("nfsd: Dropping request; may be revisited later\n");
nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
return 0;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 39adc27b0685..3074656ba7bf 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -68,10 +68,12 @@ typedef struct {
struct nfsd4_callback {
void *cb_op;
struct nfs4_client *cb_clp;
+ struct list_head cb_per_client;
u32 cb_minorversion;
struct rpc_message cb_msg;
const struct rpc_call_ops *cb_ops;
struct work_struct cb_work;
+ bool cb_done;
};
struct nfs4_delegation {
@@ -81,6 +83,7 @@ struct nfs4_delegation {
atomic_t dl_count; /* ref count */
struct nfs4_client *dl_client;
struct nfs4_file *dl_file;
+ struct file *dl_vfs_file;
struct file_lock *dl_flock;
u32 dl_type;
time_t dl_time;
@@ -95,6 +98,7 @@ struct nfs4_delegation {
struct nfs4_cb_conn {
/* SETCLIENTID info */
struct sockaddr_storage cb_addr;
+ struct sockaddr_storage cb_saddr;
size_t cb_addrlen;
u32 cb_prog; /* used only in 4.0 case;
per-session otherwise */
@@ -146,6 +150,11 @@ struct nfsd4_create_session {
u32 gid;
};
+struct nfsd4_bind_conn_to_session {
+ struct nfs4_sessionid sessionid;
+ u32 dir;
+};
+
/* The single slot clientid cache structure */
struct nfsd4_clid_slot {
u32 sl_seqid;
@@ -235,9 +244,13 @@ struct nfs4_client {
unsigned long cl_cb_flags;
struct rpc_clnt *cl_cb_client;
u32 cl_cb_ident;
- atomic_t cl_cb_set;
+#define NFSD4_CB_UP 0
+#define NFSD4_CB_UNKNOWN 1
+#define NFSD4_CB_DOWN 2
+ int cl_cb_state;
struct nfsd4_callback cl_cb_null;
struct nfsd4_session *cl_cb_session;
+ struct list_head cl_callbacks; /* list of in-progress callbacks */
/* for all client information that callback code might need: */
spinlock_t cl_lock;
@@ -454,6 +467,7 @@ extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
extern void nfs4_free_stateowner(struct kref *kref);
extern int set_callback_cred(void);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
+extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
extern void nfsd4_do_callback_rpc(struct work_struct *);
extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 230b79fbf005..641117f2188d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1,4 +1,3 @@
-#define MSNFS /* HACK HACK */
/*
* File operations used by nfsd. Some of these have been ripped from
* other parts of the kernel because they weren't exported, others
@@ -35,8 +34,8 @@
#endif /* CONFIG_NFSD_V3 */
#ifdef CONFIG_NFSD_V4
-#include <linux/nfs4_acl.h>
-#include <linux/nfsd_idmap.h>
+#include "acl.h"
+#include "idmap.h"
#endif /* CONFIG_NFSD_V4 */
#include "nfsd.h"
@@ -88,8 +87,9 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
.dentry = dget(dentry)};
int err = 0;
- while (d_mountpoint(path.dentry) && follow_down(&path))
- ;
+ err = follow_down(&path, false);
+ if (err < 0)
+ goto out;
exp2 = rqst_exp_get_by_name(rqstp, &path);
if (IS_ERR(exp2)) {
@@ -273,6 +273,13 @@ out:
return err;
}
+static int nfsd_break_lease(struct inode *inode)
+{
+ if (!S_ISREG(inode->i_mode))
+ return 0;
+ return break_lease(inode, O_WRONLY | O_NONBLOCK);
+}
+
/*
* Commit metadata changes to stable storage.
*/
@@ -375,16 +382,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
goto out;
}
- /*
- * If we are changing the size of the file, then
- * we need to break all leases.
- */
- host_err = break_lease(inode, O_WRONLY | O_NONBLOCK);
- if (host_err == -EWOULDBLOCK)
- host_err = -ETIMEDOUT;
- if (host_err) /* ENOMEM or EWOULDBLOCK */
- goto out_nfserr;
-
host_err = get_write_access(inode);
if (host_err)
goto out_nfserr;
@@ -425,7 +422,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
err = nfserr_notsync;
if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
+ host_err = nfsd_break_lease(inode);
+ if (host_err)
+ goto out_nfserr;
fh_lock(fhp);
+
host_err = notify_change(dentry, iap);
err = nfserrno(host_err);
fh_unlock(fhp);
@@ -752,8 +753,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
*/
if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
- if (host_err == -EWOULDBLOCK)
- host_err = -ETIMEDOUT;
if (host_err) /* NOMEM or WOULDBLOCK */
goto out_nfserr;
@@ -874,15 +873,6 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
}
-static inline int svc_msnfs(struct svc_fh *ffhp)
-{
-#ifdef MSNFS
- return (ffhp->fh_export->ex_flags & NFSEXP_MSNFS);
-#else
- return 0;
-#endif
-}
-
static __be32
nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
@@ -895,9 +885,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
err = nfserr_perm;
inode = file->f_path.dentry->d_inode;
- if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count))
- goto out;
-
if (file->f_op->splice_read && rqstp->rq_splice_ok) {
struct splice_desc sd = {
.len = 0,
@@ -922,7 +909,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
fsnotify_access(file);
} else
err = nfserrno(host_err);
-out:
return err;
}
@@ -987,14 +973,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
int stable = *stablep;
int use_wgather;
-#ifdef MSNFS
- err = nfserr_perm;
-
- if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
- (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt)))
- goto out;
-#endif
-
dentry = file->f_path.dentry;
inode = dentry->d_inode;
exp = fhp->fh_export;
@@ -1045,7 +1023,6 @@ out_nfserr:
err = 0;
else
err = nfserrno(host_err);
-out:
return err;
}
@@ -1665,6 +1642,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
err = nfserrno(host_err);
goto out_dput;
}
+ err = nfserr_noent;
+ if (!dold->d_inode)
+ goto out_drop_write;
+ host_err = nfsd_break_lease(dold->d_inode);
+ if (host_err)
+ goto out_drop_write;
host_err = vfs_link(dold, dirp, dnew);
if (!host_err) {
err = nfserrno(commit_metadata(ffhp));
@@ -1676,6 +1659,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
else
err = nfserrno(host_err);
}
+out_drop_write:
mnt_drop_write(tfhp->fh_export->ex_path.mnt);
out_dput:
dput(dnew);
@@ -1750,12 +1734,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (ndentry == trap)
goto out_dput_new;
- if (svc_msnfs(ffhp) &&
- ((odentry->d_count > 1) || (ndentry->d_count > 1))) {
- host_err = -EPERM;
- goto out_dput_new;
- }
-
host_err = -EXDEV;
if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
goto out_dput_new;
@@ -1763,15 +1741,17 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (host_err)
goto out_dput_new;
+ host_err = nfsd_break_lease(odentry->d_inode);
+ if (host_err)
+ goto out_drop_write;
host_err = vfs_rename(fdir, odentry, tdir, ndentry);
if (!host_err) {
host_err = commit_metadata(tfhp);
if (!host_err)
host_err = commit_metadata(ffhp);
}
-
+out_drop_write:
mnt_drop_write(ffhp->fh_export->ex_path.mnt);
-
out_dput_new:
dput(ndentry);
out_dput_old:
@@ -1834,18 +1814,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (host_err)
goto out_nfserr;
- if (type != S_IFDIR) { /* It's UNLINK */
-#ifdef MSNFS
- if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
- (rdentry->d_count > 1)) {
- host_err = -EPERM;
- } else
-#endif
+ host_err = nfsd_break_lease(rdentry->d_inode);
+ if (host_err)
+ goto out_put;
+ if (type != S_IFDIR)
host_err = vfs_unlink(dirp, rdentry);
- } else { /* It's RMDIR */
+ else
host_err = vfs_rmdir(dirp, rdentry);
- }
-
+out_put:
dput(rdentry);
if (!host_err)
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 60fce3dc5cb5..366401e1a536 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -311,6 +311,11 @@ struct nfsd4_secinfo {
struct svc_export *si_exp; /* response */
};
+struct nfsd4_secinfo_no_name {
+ u32 sin_style; /* request */
+ struct svc_export *sin_exp; /* response */
+};
+
struct nfsd4_setattr {
stateid_t sa_stateid; /* request */
u32 sa_bmval[3]; /* request */
@@ -373,8 +378,8 @@ struct nfsd4_sequence {
u32 cachethis; /* request */
#if 0
u32 target_maxslots; /* response */
- u32 status_flags; /* response */
#endif /* not yet */
+ u32 status_flags; /* response */
};
struct nfsd4_destroy_session {
@@ -422,6 +427,7 @@ struct nfsd4_op {
/* NFSv4.1 */
struct nfsd4_exchange_id exchange_id;
+ struct nfsd4_bind_conn_to_session bind_conn_to_session;
struct nfsd4_create_session create_session;
struct nfsd4_destroy_session destroy_session;
struct nfsd4_sequence sequence;
@@ -518,6 +524,7 @@ extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
struct nfsd4_sequence *seq);
extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
+extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *);
extern __be32 nfsd4_create_session(struct svc_rqst *,
struct nfsd4_compound_state *,
struct nfsd4_create_session *);
diff --git a/fs/pipe.c b/fs/pipe.c
index e2e95fb46a1e..89e9e19b1b2e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void)
static void __exit exit_pipe_fs(void)
{
unregister_filesystem(&pipe_fs_type);
- mntput_long(pipe_mnt);
+ mntput(pipe_mnt);
}
fs_initcall(init_pipe_fs);
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index e5f63da64d04..aa68a8a31518 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -29,7 +29,6 @@ config SQUASHFS
config SQUASHFS_XATTR
bool "Squashfs XATTR support"
depends on SQUASHFS
- default n
help
Saying Y here includes support for extended attributes (xattrs).
Xattrs are name:value pairs associated with inodes by
@@ -40,7 +39,6 @@ config SQUASHFS_XATTR
config SQUASHFS_LZO
bool "Include support for LZO compressed file systems"
depends on SQUASHFS
- default n
select LZO_DECOMPRESS
help
Saying Y here includes support for reading Squashfs file systems
@@ -53,10 +51,24 @@ config SQUASHFS_LZO
If unsure, say N.
+config SQUASHFS_XZ
+ bool "Include support for XZ compressed file systems"
+ depends on SQUASHFS
+ select XZ_DEC
+ help
+ Saying Y here includes support for reading Squashfs file systems
+ compressed with XZ compresssion. XZ gives better compression than
+ the default zlib compression, at the expense of greater CPU and
+ memory overhead.
+
+ XZ is not the standard compression used in Squashfs and so most
+ file systems will be readable without selecting this option.
+
+ If unsure, say N.
+
config SQUASHFS_EMBEDDED
bool "Additional option for memory-constrained systems"
depends on SQUASHFS
- default n
help
Saying Y here allows you to specify cache size.
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 7672bac8d328..cecf2bea07af 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -7,3 +7,4 @@ squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
+squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 653c030eb840..2fb2882f0fa7 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -34,7 +34,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
#include "decompressor.h"
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 57314bee9059..26b15ae34d6f 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -55,7 +55,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
/*
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 24af9ce9722f..a5940e54c4dd 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -27,7 +27,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "decompressor.h"
#include "squashfs.h"
@@ -41,23 +40,26 @@ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
};
#ifndef CONFIG_SQUASHFS_LZO
-static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = {
+static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
};
#endif
+#ifndef CONFIG_SQUASHFS_XZ
+static const struct squashfs_decompressor squashfs_xz_comp_ops = {
+ NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
+};
+#endif
+
static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
NULL, NULL, NULL, 0, "unknown", 0
};
static const struct squashfs_decompressor *decompressor[] = {
&squashfs_zlib_comp_ops,
- &squashfs_lzma_unsupported_comp_ops,
-#ifdef CONFIG_SQUASHFS_LZO
&squashfs_lzo_comp_ops,
-#else
- &squashfs_lzo_unsupported_comp_ops,
-#endif
+ &squashfs_xz_comp_ops,
+ &squashfs_lzma_unsupported_comp_ops,
&squashfs_unknown_comp_ops
};
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 7425f80783f6..3b305a70f7aa 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -52,4 +52,13 @@ static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
length, srclength, pages);
}
+
+#ifdef CONFIG_SQUASHFS_XZ
+extern const struct squashfs_decompressor squashfs_xz_comp_ops;
+#endif
+
+#ifdef CONFIG_SQUASHFS_LZO
+extern const struct squashfs_decompressor squashfs_lzo_comp_ops;
+#endif
+
#endif
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 7c90bbd6879d..7eef571443c6 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -39,7 +39,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
/*
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index b7f64bcd2b70..d8f32452638e 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c
@@ -37,7 +37,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
/*
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 5d87789bf1c1..7da759e34c52 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -29,7 +29,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
#include "decompressor.h"
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 5d45569d5f72..ba729d808876 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -27,11 +27,6 @@
#define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args)
-static inline struct squashfs_inode_info *squashfs_i(struct inode *inode)
-{
- return list_entry(inode, struct squashfs_inode_info, vfs_inode);
-}
-
/* block.c */
extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
int, int);
@@ -104,6 +99,3 @@ extern const struct xattr_handler *squashfs_xattr_handlers[];
/* zlib_wrapper.c */
extern const struct squashfs_decompressor squashfs_zlib_comp_ops;
-
-/* lzo_wrapper.c */
-extern const struct squashfs_decompressor squashfs_lzo_comp_ops;
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index c5137fc9ab11..39533feffd6d 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -238,6 +238,7 @@ struct meta_index {
#define ZLIB_COMPRESSION 1
#define LZMA_COMPRESSION 2
#define LZO_COMPRESSION 3
+#define XZ_COMPRESSION 4
struct squashfs_super_block {
__le32 s_magic;
diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h
index d3e3a37f28a1..359baefc01fc 100644
--- a/fs/squashfs/squashfs_fs_i.h
+++ b/fs/squashfs/squashfs_fs_i.h
@@ -45,4 +45,10 @@ struct squashfs_inode_info {
};
struct inode vfs_inode;
};
+
+
+static inline struct squashfs_inode_info *squashfs_i(struct inode *inode)
+{
+ return list_entry(inode, struct squashfs_inode_info, vfs_inode);
+}
#endif
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index d33be5dd6c32..05385dbe1465 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -32,7 +32,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
#include "xattr.h"
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
new file mode 100644
index 000000000000..856756ca5ee4
--- /dev/null
+++ b/fs/squashfs/xz_wrapper.c
@@ -0,0 +1,153 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * xz_wrapper.c
+ */
+
+
+#include <linux/mutex.h>
+#include <linux/buffer_head.h>
+#include <linux/slab.h>
+#include <linux/xz.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+#include "decompressor.h"
+
+struct squashfs_xz {
+ struct xz_dec *state;
+ struct xz_buf buf;
+};
+
+static void *squashfs_xz_init(struct squashfs_sb_info *msblk)
+{
+ int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
+
+ struct squashfs_xz *stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+ if (stream == NULL)
+ goto failed;
+
+ stream->state = xz_dec_init(XZ_PREALLOC, block_size);
+ if (stream->state == NULL)
+ goto failed;
+
+ return stream;
+
+failed:
+ ERROR("Failed to allocate xz workspace\n");
+ kfree(stream);
+ return NULL;
+}
+
+
+static void squashfs_xz_free(void *strm)
+{
+ struct squashfs_xz *stream = strm;
+
+ if (stream) {
+ xz_dec_end(stream->state);
+ kfree(stream);
+ }
+}
+
+
+static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
+ struct buffer_head **bh, int b, int offset, int length, int srclength,
+ int pages)
+{
+ enum xz_ret xz_err;
+ int avail, total = 0, k = 0, page = 0;
+ struct squashfs_xz *stream = msblk->stream;
+
+ mutex_lock(&msblk->read_data_mutex);
+
+ xz_dec_reset(stream->state);
+ stream->buf.in_pos = 0;
+ stream->buf.in_size = 0;
+ stream->buf.out_pos = 0;
+ stream->buf.out_size = PAGE_CACHE_SIZE;
+ stream->buf.out = buffer[page++];
+
+ do {
+ if (stream->buf.in_pos == stream->buf.in_size && k < b) {
+ avail = min(length, msblk->devblksize - offset);
+ length -= avail;
+ wait_on_buffer(bh[k]);
+ if (!buffer_uptodate(bh[k]))
+ goto release_mutex;
+
+ if (avail == 0) {
+ offset = 0;
+ put_bh(bh[k++]);
+ continue;
+ }
+
+ stream->buf.in = bh[k]->b_data + offset;
+ stream->buf.in_size = avail;
+ stream->buf.in_pos = 0;
+ offset = 0;
+ }
+
+ if (stream->buf.out_pos == stream->buf.out_size
+ && page < pages) {
+ stream->buf.out = buffer[page++];
+ stream->buf.out_pos = 0;
+ total += PAGE_CACHE_SIZE;
+ }
+
+ xz_err = xz_dec_run(stream->state, &stream->buf);
+
+ if (stream->buf.in_pos == stream->buf.in_size && k < b)
+ put_bh(bh[k++]);
+ } while (xz_err == XZ_OK);
+
+ if (xz_err != XZ_STREAM_END) {
+ ERROR("xz_dec_run error, data probably corrupt\n");
+ goto release_mutex;
+ }
+
+ if (k < b) {
+ ERROR("xz_uncompress error, input remaining\n");
+ goto release_mutex;
+ }
+
+ total += stream->buf.out_pos;
+ mutex_unlock(&msblk->read_data_mutex);
+ return total;
+
+release_mutex:
+ mutex_unlock(&msblk->read_data_mutex);
+
+ for (; k < b; k++)
+ put_bh(bh[k]);
+
+ return -EIO;
+}
+
+const struct squashfs_decompressor squashfs_xz_comp_ops = {
+ .init = squashfs_xz_init,
+ .free = squashfs_xz_free,
+ .decompress = squashfs_xz_uncompress,
+ .id = XZ_COMPRESSION,
+ .name = "xz",
+ .supported = 1
+};
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 7a603874e483..818a5e063faf 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -29,7 +29,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
#include "decompressor.h"
@@ -66,8 +65,8 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
struct buffer_head **bh, int b, int offset, int length, int srclength,
int pages)
{
- int zlib_err = 0, zlib_init = 0;
- int avail, bytes, k = 0, page = 0;
+ int zlib_err, zlib_init = 0;
+ int k = 0, page = 0;
z_stream *stream = msblk->stream;
mutex_lock(&msblk->read_data_mutex);
@@ -75,11 +74,10 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
stream->avail_out = 0;
stream->avail_in = 0;
- bytes = length;
do {
if (stream->avail_in == 0 && k < b) {
- avail = min(bytes, msblk->devblksize - offset);
- bytes -= avail;
+ int avail = min(length, msblk->devblksize - offset);
+ length -= avail;
wait_on_buffer(bh[k]);
if (!buffer_uptodate(bh[k]))
goto release_mutex;
@@ -128,6 +126,11 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
goto release_mutex;
}
+ if (k < b) {
+ ERROR("zlib_uncompress error, data remaining\n");
+ goto release_mutex;
+ }
+
length = stream->total_out;
mutex_unlock(&msblk->read_data_mutex);
return length;
diff --git a/fs/stat.c b/fs/stat.c
index 12e90e213900..d5c61cf2b703 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,11 +75,13 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
int error = -EINVAL;
int lookup_flags = 0;
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+ if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0)
goto out;
if (!(flag & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
+ if (flag & AT_NO_AUTOMOUNT)
+ lookup_flags |= LOOKUP_NO_AUTOMOUNT;
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
diff --git a/fs/super.c b/fs/super.c
index 4f6a3571a634..74e149efed81 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1141,7 +1141,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
return mnt;
err:
- mntput_long(mnt);
+ mntput(mnt);
return ERR_PTR(err);
}
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 0dce969d6cad..faca44997099 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -98,6 +98,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
kmem.o \
xfs_aops.o \
xfs_buf.o \
+ xfs_discard.o \
xfs_export.o \
xfs_file.o \
xfs_fs_subr.o \
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 92f1f2acc6ab..ac1c7e8378dd 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -896,7 +896,6 @@ xfs_buf_rele(
trace_xfs_buf_rele(bp, _RET_IP_);
if (!pag) {
- ASSERT(!bp->b_relse);
ASSERT(list_empty(&bp->b_lru));
ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
if (atomic_dec_and_test(&bp->b_hold))
@@ -908,11 +907,7 @@ xfs_buf_rele(
ASSERT(atomic_read(&bp->b_hold) > 0);
if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
- if (bp->b_relse) {
- atomic_inc(&bp->b_hold);
- spin_unlock(&pag->pag_buf_lock);
- bp->b_relse(bp);
- } else if (!(bp->b_flags & XBF_STALE) &&
+ if (!(bp->b_flags & XBF_STALE) &&
atomic_read(&bp->b_lru_ref)) {
xfs_buf_lru_add(bp);
spin_unlock(&pag->pag_buf_lock);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a76c2428faff..cbe65950e524 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -152,8 +152,6 @@ typedef struct xfs_buftarg {
struct xfs_buf;
typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
-typedef void (*xfs_buf_relse_t)(struct xfs_buf *);
-typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
#define XB_PAGES 2
@@ -183,7 +181,6 @@ typedef struct xfs_buf {
void *b_addr; /* virtual address of buffer */
struct work_struct b_iodone_work;
xfs_buf_iodone_t b_iodone; /* I/O completion function */
- xfs_buf_relse_t b_relse; /* releasing function */
struct completion b_iowait; /* queue for I/O waiters */
void *b_fspriv;
void *b_fspriv2;
@@ -323,7 +320,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
#define XFS_BUF_SET_START(bp) do { } while (0)
-#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func))
#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)
#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt)
@@ -360,8 +356,7 @@ xfs_buf_set_ref(
static inline void xfs_buf_relse(xfs_buf_t *bp)
{
- if (!bp->b_relse)
- xfs_buf_unlock(bp);
+ xfs_buf_unlock(bp);
xfs_buf_rele(bp);
}
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
new file mode 100644
index 000000000000..05201ae719e5
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "xfs.h"
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_discard.h"
+#include "xfs_trace.h"
+
+STATIC int
+xfs_trim_extents(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_fsblock_t start,
+ xfs_fsblock_t len,
+ xfs_fsblock_t minlen,
+ __uint64_t *blocks_trimmed)
+{
+ struct block_device *bdev = mp->m_ddev_targp->bt_bdev;
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agbp;
+ struct xfs_perag *pag;
+ int error;
+ int i;
+
+ pag = xfs_perag_get(mp, agno);
+
+ error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ if (error || !agbp)
+ goto out_put_perag;
+
+ cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
+
+ /*
+ * Force out the log. This means any transactions that might have freed
+ * space before we took the AGF buffer lock are now on disk, and the
+ * volatile disk cache is flushed.
+ */
+ xfs_log_force(mp, XFS_LOG_SYNC);
+
+ /*
+ * Look up the longest btree in the AGF and start with it.
+ */
+ error = xfs_alloc_lookup_le(cur, 0,
+ XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
+ if (error)
+ goto out_del_cursor;
+
+ /*
+ * Loop until we are done with all extents that are large
+ * enough to be worth discarding.
+ */
+ while (i) {
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
+
+ error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
+ if (error)
+ goto out_del_cursor;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
+ ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
+
+ /*
+ * Too small? Give up.
+ */
+ if (flen < minlen) {
+ trace_xfs_discard_toosmall(mp, agno, fbno, flen);
+ goto out_del_cursor;
+ }
+
+ /*
+ * If the extent is entirely outside of the range we are
+ * supposed to discard skip it. Do not bother to trim
+ * down partially overlapping ranges for now.
+ */
+ if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
+ XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
+ trace_xfs_discard_exclude(mp, agno, fbno, flen);
+ goto next_extent;
+ }
+
+ /*
+ * If any blocks in the range are still busy, skip the
+ * discard and try again the next time.
+ */
+ if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
+ trace_xfs_discard_busy(mp, agno, fbno, flen);
+ goto next_extent;
+ }
+
+ trace_xfs_discard_extent(mp, agno, fbno, flen);
+ error = -blkdev_issue_discard(bdev,
+ XFS_AGB_TO_DADDR(mp, agno, fbno),
+ XFS_FSB_TO_BB(mp, flen),
+ GFP_NOFS, 0);
+ if (error)
+ goto out_del_cursor;
+ *blocks_trimmed += flen;
+
+next_extent:
+ error = xfs_btree_decrement(cur, 0, &i);
+ if (error)
+ goto out_del_cursor;
+ }
+
+out_del_cursor:
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+ xfs_buf_relse(agbp);
+out_put_perag:
+ xfs_perag_put(pag);
+ return error;
+}
+
+int
+xfs_ioc_trim(
+ struct xfs_mount *mp,
+ struct fstrim_range __user *urange)
+{
+ struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
+ unsigned int granularity = q->limits.discard_granularity;
+ struct fstrim_range range;
+ xfs_fsblock_t start, len, minlen;
+ xfs_agnumber_t start_agno, end_agno, agno;
+ __uint64_t blocks_trimmed = 0;
+ int error, last_error = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -XFS_ERROR(EPERM);
+ if (copy_from_user(&range, urange, sizeof(range)))
+ return -XFS_ERROR(EFAULT);
+
+ /*
+ * Truncating down the len isn't actually quite correct, but using
+ * XFS_B_TO_FSB would mean we trivially get overflows for values
+ * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default
+ * used by the fstrim application. In the end it really doesn't
+ * matter as trimming blocks is an advisory interface.
+ */
+ start = XFS_B_TO_FSBT(mp, range.start);
+ len = XFS_B_TO_FSBT(mp, range.len);
+ minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
+
+ start_agno = XFS_FSB_TO_AGNO(mp, start);
+ if (start_agno >= mp->m_sb.sb_agcount)
+ return -XFS_ERROR(EINVAL);
+
+ end_agno = XFS_FSB_TO_AGNO(mp, start + len);
+ if (end_agno >= mp->m_sb.sb_agcount)
+ end_agno = mp->m_sb.sb_agcount - 1;
+
+ for (agno = start_agno; agno <= end_agno; agno++) {
+ error = -xfs_trim_extents(mp, agno, start, len, minlen,
+ &blocks_trimmed);
+ if (error)
+ last_error = error;
+ }
+
+ if (last_error)
+ return last_error;
+
+ range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
+ if (copy_to_user(urange, &range, sizeof(range)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+}
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h
new file mode 100644
index 000000000000..e82b6dd3e127
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_discard.h
@@ -0,0 +1,8 @@
+#ifndef XFS_DISCARD_H
+#define XFS_DISCARD_H 1
+
+struct fstrim_range;
+
+extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
+
+#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ba8ad422a165..ef51eb43e137 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -41,6 +41,40 @@
static const struct vm_operations_struct xfs_file_vm_ops;
/*
+ * Locking primitives for read and write IO paths to ensure we consistently use
+ * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
+ */
+static inline void
+xfs_rw_ilock(
+ struct xfs_inode *ip,
+ int type)
+{
+ if (type & XFS_IOLOCK_EXCL)
+ mutex_lock(&VFS_I(ip)->i_mutex);
+ xfs_ilock(ip, type);
+}
+
+static inline void
+xfs_rw_iunlock(
+ struct xfs_inode *ip,
+ int type)
+{
+ xfs_iunlock(ip, type);
+ if (type & XFS_IOLOCK_EXCL)
+ mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+static inline void
+xfs_rw_ilock_demote(
+ struct xfs_inode *ip,
+ int type)
+{
+ xfs_ilock_demote(ip, type);
+ if (type & XFS_IOLOCK_EXCL)
+ mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+/*
* xfs_iozero
*
* xfs_iozero clears the specified range of buffer supplied,
@@ -262,22 +296,21 @@ xfs_file_aio_read(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- if (unlikely(ioflags & IO_ISDIRECT))
- mutex_lock(&inode->i_mutex);
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
if (unlikely(ioflags & IO_ISDIRECT)) {
+ xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+
if (inode->i_mapping->nrpages) {
ret = -xfs_flushinval_pages(ip,
(iocb->ki_pos & PAGE_CACHE_MASK),
-1, FI_REMAPF_LOCKED);
+ if (ret) {
+ xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
+ return ret;
+ }
}
- mutex_unlock(&inode->i_mutex);
- if (ret) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return ret;
- }
- }
+ xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+ } else
+ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
@@ -285,7 +318,7 @@ xfs_file_aio_read(
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
@@ -309,7 +342,7 @@ xfs_file_splice_read(
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
@@ -317,10 +350,61 @@ xfs_file_splice_read(
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
+STATIC void
+xfs_aio_write_isize_update(
+ struct inode *inode,
+ loff_t *ppos,
+ ssize_t bytes_written)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ xfs_fsize_t isize = i_size_read(inode);
+
+ if (bytes_written > 0)
+ XFS_STATS_ADD(xs_write_bytes, bytes_written);
+
+ if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
+ *ppos > isize))
+ *ppos = isize;
+
+ if (*ppos > ip->i_size) {
+ xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
+ if (*ppos > ip->i_size)
+ ip->i_size = *ppos;
+ xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+}
+
+/*
+ * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
+ * part of the I/O may have been written to disk before the error occured. In
+ * this case the on-disk file size may have been adjusted beyond the in-memory
+ * file size and now needs to be truncated back.
+ */
+STATIC void
+xfs_aio_write_newsize_update(
+ struct xfs_inode *ip)
+{
+ if (ip->i_new_size) {
+ xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
+ ip->i_new_size = 0;
+ if (ip->i_d.di_size > ip->i_size)
+ ip->i_d.di_size = ip->i_size;
+ xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+}
+
+/*
+ * xfs_file_splice_write() does not use xfs_rw_ilock() because
+ * generic_file_splice_write() takes the i_mutex itself. This, in theory,
+ * couuld cause lock inversions between the aio_write path and the splice path
+ * if someone is doing concurrent splice(2) based writes and write(2) based
+ * writes to the same inode. The only real way to fix this is to re-implement
+ * the generic code here with correct locking orders.
+ */
STATIC ssize_t
xfs_file_splice_write(
struct pipe_inode_info *pipe,
@@ -331,7 +415,7 @@ xfs_file_splice_write(
{
struct inode *inode = outfilp->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- xfs_fsize_t isize, new_size;
+ xfs_fsize_t new_size;
int ioflags = 0;
ssize_t ret;
@@ -355,27 +439,9 @@ xfs_file_splice_write(
trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
- if (ret > 0)
- XFS_STATS_ADD(xs_write_bytes, ret);
-
- isize = i_size_read(inode);
- if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
- *ppos = isize;
- if (*ppos > ip->i_size) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (*ppos > ip->i_size)
- ip->i_size = *ppos;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
-
- if (ip->i_new_size) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- ip->i_new_size = 0;
- if (ip->i_d.di_size > ip->i_size)
- ip->i_d.di_size = ip->i_size;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
+ xfs_aio_write_isize_update(inode, ppos, ret);
+ xfs_aio_write_newsize_update(ip);
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
@@ -562,245 +628,258 @@ out_lock:
return error;
}
+/*
+ * Common pre-write limit and setup checks.
+ *
+ * Returns with iolock held according to @iolock.
+ */
STATIC ssize_t
-xfs_file_aio_write(
- struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos)
+xfs_file_aio_write_checks(
+ struct file *file,
+ loff_t *pos,
+ size_t *count,
+ int *iolock)
{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- ssize_t ret = 0, error = 0;
- int ioflags = 0;
- xfs_fsize_t isize, new_size;
- int iolock;
- size_t ocount = 0, count;
- int need_i_mutex;
+ xfs_fsize_t new_size;
+ int error = 0;
- XFS_STATS_INC(xs_write_calls);
+ error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
+ if (error) {
+ xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
+ *iolock = 0;
+ return error;
+ }
- BUG_ON(iocb->ki_pos != pos);
+ new_size = *pos + *count;
+ if (new_size > ip->i_size)
+ ip->i_new_size = new_size;
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
- if (file->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ if (likely(!(file->f_mode & FMODE_NOCMTIME)))
+ file_update_time(file);
- error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
+ /*
+ * If the offset is beyond the size of the file, we need to zero any
+ * blocks that fall between the existing EOF and the start of this
+ * write.
+ */
+ if (*pos > ip->i_size)
+ error = -xfs_zero_eof(ip, *pos, ip->i_size);
+
+ xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
return error;
- count = ocount;
- if (count == 0)
- return 0;
-
- xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
-relock:
- if (ioflags & IO_ISDIRECT) {
- iolock = XFS_IOLOCK_SHARED;
- need_i_mutex = 0;
- } else {
- iolock = XFS_IOLOCK_EXCL;
- need_i_mutex = 1;
- mutex_lock(&inode->i_mutex);
- }
+ /*
+ * If we're writing the file then make sure to clear the setuid and
+ * setgid bits if the process is not being run by root. This keeps
+ * people from modifying setuid and setgid binaries.
+ */
+ return file_remove_suid(file);
- xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
+}
-start:
- error = -generic_write_checks(file, &pos, &count,
- S_ISBLK(inode->i_mode));
- if (error) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
- goto out_unlock_mutex;
+/*
+ * xfs_file_dio_aio_write - handle direct IO writes
+ *
+ * Lock the inode appropriately to prepare for and issue a direct IO write.
+ * By separating it from the buffered write path we remove all the tricky to
+ * follow locking changes and looping.
+ *
+ * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
+ * until we're sure the bytes at the new EOF have been zeroed and/or the cached
+ * pages are flushed out.
+ *
+ * In most cases the direct IO writes will be done holding IOLOCK_SHARED
+ * allowing them to be done in parallel with reads and other direct IO writes.
+ * However, if the IO is not aligned to filesystem blocks, the direct IO layer
+ * needs to do sub-block zeroing and that requires serialisation against other
+ * direct IOs to the same block. In this case we need to serialise the
+ * submission of the unaligned IOs so that we don't get racing block zeroing in
+ * the dio layer. To avoid the problem with aio, we also need to wait for
+ * outstanding IOs to complete so that unwritten extent conversion is completed
+ * before we try to map the overlapping block. This is currently implemented by
+ * hitting it with a big hammer (i.e. xfs_ioend_wait()).
+ *
+ * Returns with locks held indicated by @iolock and errors indicated by
+ * negative return values.
+ */
+STATIC ssize_t
+xfs_file_dio_aio_write(
+ struct kiocb *iocb,
+ const struct iovec *iovp,
+ unsigned long nr_segs,
+ loff_t pos,
+ size_t ocount,
+ int *iolock)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ ssize_t ret = 0;
+ size_t count = ocount;
+ int unaligned_io = 0;
+ struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
+ mp->m_rtdev_targp : mp->m_ddev_targp;
+
+ *iolock = 0;
+ if ((pos & target->bt_smask) || (count & target->bt_smask))
+ return -XFS_ERROR(EINVAL);
+
+ if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
+ unaligned_io = 1;
+
+ if (unaligned_io || mapping->nrpages || pos > ip->i_size)
+ *iolock = XFS_IOLOCK_EXCL;
+ else
+ *iolock = XFS_IOLOCK_SHARED;
+ xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+
+ ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
+ if (ret)
+ return ret;
+
+ if (mapping->nrpages) {
+ WARN_ON(*iolock != XFS_IOLOCK_EXCL);
+ ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
+ FI_REMAPF_LOCKED);
+ if (ret)
+ return ret;
}
- if (ioflags & IO_ISDIRECT) {
- xfs_buftarg_t *target =
- XFS_IS_REALTIME_INODE(ip) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
-
- if ((pos & target->bt_smask) || (count & target->bt_smask)) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
- return XFS_ERROR(-EINVAL);
- }
-
- if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
- iolock = XFS_IOLOCK_EXCL;
- need_i_mutex = 1;
- mutex_lock(&inode->i_mutex);
- xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
- goto start;
- }
+ /*
+ * If we are doing unaligned IO, wait for all other IO to drain,
+ * otherwise demote the lock if we had to flush cached pages
+ */
+ if (unaligned_io)
+ xfs_ioend_wait(ip);
+ else if (*iolock == XFS_IOLOCK_EXCL) {
+ xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+ *iolock = XFS_IOLOCK_SHARED;
}
- new_size = pos + count;
- if (new_size > ip->i_size)
- ip->i_new_size = new_size;
+ trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
+ ret = generic_file_direct_write(iocb, iovp,
+ &nr_segs, pos, &iocb->ki_pos, count, ocount);
- if (likely(!(ioflags & IO_INVIS)))
- file_update_time(file);
+ /* No fallback to buffered IO on errors for XFS. */
+ ASSERT(ret < 0 || ret == count);
+ return ret;
+}
- /*
- * If the offset is beyond the size of the file, we have a couple
- * of things to do. First, if there is already space allocated
- * we need to either create holes or zero the disk or ...
- *
- * If there is a page where the previous size lands, we need
- * to zero it out up to the new size.
- */
+STATIC ssize_t
+xfs_file_buffered_aio_write(
+ struct kiocb *iocb,
+ const struct iovec *iovp,
+ unsigned long nr_segs,
+ loff_t pos,
+ size_t ocount,
+ int *iolock)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ ssize_t ret;
+ int enospc = 0;
+ size_t count = ocount;
- if (pos > ip->i_size) {
- error = xfs_zero_eof(ip, pos, ip->i_size);
- if (error) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- goto out_unlock_internal;
- }
- }
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ *iolock = XFS_IOLOCK_EXCL;
+ xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
- /*
- * If we're writing the file then make sure to clear the
- * setuid and setgid bits if the process is not being run
- * by root. This keeps people from modifying setuid and
- * setgid binaries.
- */
- error = -file_remove_suid(file);
- if (unlikely(error))
- goto out_unlock_internal;
+ ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
+ if (ret)
+ return ret;
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
- if ((ioflags & IO_ISDIRECT)) {
- if (mapping->nrpages) {
- WARN_ON(need_i_mutex == 0);
- error = xfs_flushinval_pages(ip,
- (pos & PAGE_CACHE_MASK),
- -1, FI_REMAPF_LOCKED);
- if (error)
- goto out_unlock_internal;
- }
-
- if (need_i_mutex) {
- /* demote the lock now the cached pages are gone */
- xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
- mutex_unlock(&inode->i_mutex);
+write_retry:
+ trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
+ ret = generic_file_buffered_write(iocb, iovp, nr_segs,
+ pos, &iocb->ki_pos, count, ret);
+ /*
+ * if we just got an ENOSPC, flush the inode now we aren't holding any
+ * page locks and retry *once*
+ */
+ if (ret == -ENOSPC && !enospc) {
+ ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
+ if (ret)
+ return ret;
+ enospc = 1;
+ goto write_retry;
+ }
+ current->backing_dev_info = NULL;
+ return ret;
+}
- iolock = XFS_IOLOCK_SHARED;
- need_i_mutex = 0;
- }
+STATIC ssize_t
+xfs_file_aio_write(
+ struct kiocb *iocb,
+ const struct iovec *iovp,
+ unsigned long nr_segs,
+ loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ ssize_t ret;
+ int iolock;
+ size_t ocount = 0;
- trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
- ret = generic_file_direct_write(iocb, iovp,
- &nr_segs, pos, &iocb->ki_pos, count, ocount);
+ XFS_STATS_INC(xs_write_calls);
- /*
- * direct-io write to a hole: fall through to buffered I/O
- * for completing the rest of the request.
- */
- if (ret >= 0 && ret != count) {
- XFS_STATS_ADD(xs_write_bytes, ret);
+ BUG_ON(iocb->ki_pos != pos);
- pos += ret;
- count -= ret;
+ ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
+ if (ret)
+ return ret;
- ioflags &= ~IO_ISDIRECT;
- xfs_iunlock(ip, iolock);
- goto relock;
- }
- } else {
- int enospc = 0;
- ssize_t ret2 = 0;
+ if (ocount == 0)
+ return 0;
-write_retry:
- trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags);
- ret2 = generic_file_buffered_write(iocb, iovp, nr_segs,
- pos, &iocb->ki_pos, count, ret);
- /*
- * if we just got an ENOSPC, flush the inode now we
- * aren't holding any page locks and retry *once*
- */
- if (ret2 == -ENOSPC && !enospc) {
- error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
- if (error)
- goto out_unlock_internal;
- enospc = 1;
- goto write_retry;
- }
- ret = ret2;
- }
+ xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
- current->backing_dev_info = NULL;
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
- isize = i_size_read(inode);
- if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize))
- iocb->ki_pos = isize;
+ if (unlikely(file->f_flags & O_DIRECT))
+ ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
+ ocount, &iolock);
+ else
+ ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
+ ocount, &iolock);
- if (iocb->ki_pos > ip->i_size) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (iocb->ki_pos > ip->i_size)
- ip->i_size = iocb->ki_pos;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
+ xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
- error = -ret;
if (ret <= 0)
- goto out_unlock_internal;
-
- XFS_STATS_ADD(xs_write_bytes, ret);
+ goto out_unlock;
/* Handle various SYNC-type writes */
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
loff_t end = pos + ret - 1;
- int error2;
-
- xfs_iunlock(ip, iolock);
- if (need_i_mutex)
- mutex_unlock(&inode->i_mutex);
+ int error, error2;
- error2 = filemap_write_and_wait_range(mapping, pos, end);
- if (!error)
- error = error2;
- if (need_i_mutex)
- mutex_lock(&inode->i_mutex);
- xfs_ilock(ip, iolock);
+ xfs_rw_iunlock(ip, iolock);
+ error = filemap_write_and_wait_range(mapping, pos, end);
+ xfs_rw_ilock(ip, iolock);
error2 = -xfs_file_fsync(file,
(file->f_flags & __O_SYNC) ? 0 : 1);
- if (!error)
- error = error2;
+ if (error)
+ ret = error;
+ else if (error2)
+ ret = error2;
}
- out_unlock_internal:
- if (ip->i_new_size) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- ip->i_new_size = 0;
- /*
- * If this was a direct or synchronous I/O that failed (such
- * as ENOSPC) then part of the I/O may have been written to
- * disk before the error occured. In this case the on-disk
- * file size may have been adjusted beyond the in-memory file
- * size and now needs to be truncated back.
- */
- if (ip->i_d.di_size > ip->i_size)
- ip->i_d.di_size = ip->i_size;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
- xfs_iunlock(ip, iolock);
- out_unlock_mutex:
- if (need_i_mutex)
- mutex_unlock(&inode->i_mutex);
- return -error;
+out_unlock:
+ xfs_aio_write_newsize_update(ip);
+ xfs_rw_iunlock(ip, iolock);
+ return ret;
}
STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index ad442d9e392e..b06ede1d0bed 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -39,6 +39,7 @@
#include "xfs_dfrag.h"
#include "xfs_fsops.h"
#include "xfs_vnodeops.h"
+#include "xfs_discard.h"
#include "xfs_quota.h"
#include "xfs_inode_item.h"
#include "xfs_export.h"
@@ -1294,6 +1295,8 @@ xfs_file_ioctl(
trace_xfs_file_ioctl(ip);
switch (cmd) {
+ case FITRIM:
+ return xfs_ioc_trim(mp, arg);
case XFS_IOC_ALLOCSP:
case XFS_IOC_FREESP:
case XFS_IOC_RESVSP:
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bd07f7339366..9731898083ae 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1414,7 +1414,7 @@ xfs_fs_freeze(
xfs_save_resvblks(mp);
xfs_quiesce_attr(mp);
- return -xfs_fs_log_dummy(mp, SYNC_WAIT);
+ return -xfs_fs_log_dummy(mp);
}
STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a02480de9759..e22f0057d21f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -362,7 +362,7 @@ xfs_quiesce_data(
/* mark the log as covered if needed */
if (xfs_log_need_covered(mp))
- error2 = xfs_fs_log_dummy(mp, SYNC_WAIT);
+ error2 = xfs_fs_log_dummy(mp);
/* flush data-only devices */
if (mp->m_rtdev_targp)
@@ -503,13 +503,14 @@ xfs_sync_worker(
int error;
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
- xfs_log_force(mp, 0);
- xfs_reclaim_inodes(mp, 0);
/* dgc: errors ignored here */
- error = xfs_qm_sync(mp, SYNC_TRYLOCK);
if (mp->m_super->s_frozen == SB_UNFROZEN &&
xfs_log_need_covered(mp))
- error = xfs_fs_log_dummy(mp, 0);
+ error = xfs_fs_log_dummy(mp);
+ else
+ xfs_log_force(mp, 0);
+ xfs_reclaim_inodes(mp, 0);
+ error = xfs_qm_sync(mp, SYNC_TRYLOCK);
}
mp->m_sync_seq++;
wake_up(&mp->m_wait_single_sync_task);
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 7bb5092d6ae4..ee3cee097e7e 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -18,6 +18,7 @@
#include "xfs.h"
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
+#include "xfs_error.h"
static struct ctl_table_header *xfs_table_header;
@@ -51,6 +52,26 @@ xfs_stats_clear_proc_handler(
return ret;
}
+
+STATIC int
+xfs_panic_mask_proc_handler(
+ ctl_table *ctl,
+ int write,
+ void __user *buffer,
+ size_t *lenp,
+ loff_t *ppos)
+{
+ int ret, *valp = ctl->data;
+
+ ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+ if (!ret && write) {
+ xfs_panic_mask = *valp;
+#ifdef DEBUG
+ xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
+#endif
+ }
+ return ret;
+}
#endif /* CONFIG_PROC_FS */
static ctl_table xfs_table[] = {
@@ -77,7 +98,7 @@ static ctl_table xfs_table[] = {
.data = &xfs_params.panic_mask.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = xfs_panic_mask_proc_handler,
.extra1 = &xfs_params.panic_mask.min,
.extra2 = &xfs_params.panic_mask.max
},
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 647af2a2e7aa..2d0bcb479075 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -1759,6 +1759,39 @@ DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
+DECLARE_EVENT_CLASS(xfs_discard_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agblock_t agbno, xfs_extlen_t len),
+ TP_ARGS(mp, agno, agbno, len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, agbno)
+ __field(xfs_extlen_t, len)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->agbno = agbno;
+ __entry->len = len;
+ ),
+ TP_printk("dev %d:%d agno %u agbno %u len %u\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->agbno,
+ __entry->len)
+)
+
+#define DEFINE_DISCARD_EVENT(name) \
+DEFINE_EVENT(xfs_discard_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+ xfs_agblock_t agbno, xfs_extlen_t len), \
+ TP_ARGS(mp, agno, agbno, len))
+DEFINE_DISCARD_EVENT(xfs_discard_extent);
+DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
+DEFINE_DISCARD_EVENT(xfs_discard_exclude);
+DEFINE_DISCARD_EVENT(xfs_discard_busy);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 975aa10e1a47..e6cf955ec0fc 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -25,86 +25,78 @@
#include "xfs_mount.h"
#include "xfs_error.h"
-static char message[1024]; /* keep it off the stack */
-static DEFINE_SPINLOCK(xfs_err_lock);
-
-/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
-#define XFS_MAX_ERR_LEVEL 7
-#define XFS_ERR_MASK ((1 << 3) - 1)
-static const char * const err_level[XFS_MAX_ERR_LEVEL+1] =
- {KERN_EMERG, KERN_ALERT, KERN_CRIT,
- KERN_ERR, KERN_WARNING, KERN_NOTICE,
- KERN_INFO, KERN_DEBUG};
-
void
-cmn_err(register int level, char *fmt, ...)
+cmn_err(
+ const char *lvl,
+ const char *fmt,
+ ...)
{
- char *fp = fmt;
- int len;
- ulong flags;
- va_list ap;
-
- level &= XFS_ERR_MASK;
- if (level > XFS_MAX_ERR_LEVEL)
- level = XFS_MAX_ERR_LEVEL;
- spin_lock_irqsave(&xfs_err_lock,flags);
- va_start(ap, fmt);
- if (*fmt == '!') fp++;
- len = vsnprintf(message, sizeof(message), fp, ap);
- if (len >= sizeof(message))
- len = sizeof(message) - 1;
- if (message[len-1] == '\n')
- message[len-1] = 0;
- printk("%s%s\n", err_level[level], message);
- va_end(ap);
- spin_unlock_irqrestore(&xfs_err_lock,flags);
- BUG_ON(level == CE_PANIC);
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk("%s%pV", lvl, &vaf);
+ va_end(args);
+
+ BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0);
}
void
-xfs_fs_vcmn_err(
- int level,
+xfs_fs_cmn_err(
+ const char *lvl,
struct xfs_mount *mp,
- char *fmt,
- va_list ap)
+ const char *fmt,
+ ...)
{
- unsigned long flags;
- int len = 0;
+ struct va_format vaf;
+ va_list args;
- level &= XFS_ERR_MASK;
- if (level > XFS_MAX_ERR_LEVEL)
- level = XFS_MAX_ERR_LEVEL;
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
- spin_lock_irqsave(&xfs_err_lock,flags);
+ printk("%sFilesystem %s: %pV", lvl, mp->m_fsname, &vaf);
+ va_end(args);
- if (mp) {
- len = sprintf(message, "Filesystem \"%s\": ", mp->m_fsname);
+ BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0);
+}
+
+/* All callers to xfs_cmn_err use CE_ALERT, so don't bother testing lvl */
+void
+xfs_cmn_err(
+ int panic_tag,
+ const char *lvl,
+ struct xfs_mount *mp,
+ const char *fmt,
+ ...)
+{
+ struct va_format vaf;
+ va_list args;
+ int panic = 0;
- /*
- * Skip the printk if we can't print anything useful
- * due to an over-long device name.
- */
- if (len >= sizeof(message))
- goto out;
+ if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
+ printk(KERN_ALERT "XFS: Transforming an alert into a BUG.");
+ panic = 1;
}
- len = vsnprintf(message + len, sizeof(message) - len, fmt, ap);
- if (len >= sizeof(message))
- len = sizeof(message) - 1;
- if (message[len-1] == '\n')
- message[len-1] = 0;
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
- printk("%s%s\n", err_level[level], message);
- out:
- spin_unlock_irqrestore(&xfs_err_lock,flags);
+ printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf);
+ va_end(args);
- BUG_ON(level == CE_PANIC);
+ BUG_ON(panic);
}
void
assfail(char *expr, char *file, int line)
{
- printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line);
+ printk(KERN_CRIT "Assertion failed: %s, file: %s, line: %d\n", expr,
+ file, line);
BUG();
}
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index d2d20462fd4f..05699f67d475 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -20,15 +20,22 @@
#include <stdarg.h>
-#define CE_DEBUG 7 /* debug */
-#define CE_CONT 6 /* continuation */
-#define CE_NOTE 5 /* notice */
-#define CE_WARN 4 /* warning */
-#define CE_ALERT 1 /* alert */
-#define CE_PANIC 0 /* panic */
-
-extern void cmn_err(int, char *, ...)
- __attribute__ ((format (printf, 2, 3)));
+struct xfs_mount;
+
+#define CE_DEBUG KERN_DEBUG
+#define CE_CONT KERN_INFO
+#define CE_NOTE KERN_NOTICE
+#define CE_WARN KERN_WARNING
+#define CE_ALERT KERN_ALERT
+#define CE_PANIC KERN_EMERG
+
+void cmn_err(const char *lvl, const char *fmt, ...)
+ __attribute__ ((format (printf, 2, 3)));
+void xfs_fs_cmn_err( const char *lvl, struct xfs_mount *mp,
+ const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
+void xfs_cmn_err( int panic_tag, const char *lvl, struct xfs_mount *mp,
+ const char *fmt, ...) __attribute__ ((format (printf, 4, 5)));
+
extern void assfail(char *expr, char *f, int l);
#define ASSERT_ALWAYS(expr) \
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index fa8723f5870a..f3227984a9bf 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -41,10 +41,6 @@
#define XFSA_FIXUP_BNO_OK 1
#define XFSA_FIXUP_CNT_OK 2
-static int
-xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t bno, xfs_extlen_t len);
-
/*
* Prototypes for per-ag allocation routines
*/
@@ -94,7 +90,7 @@ xfs_alloc_lookup_ge(
* Lookup the first record less than or equal to [bno, len]
* in the btree given by cur.
*/
-STATIC int /* error */
+int /* error */
xfs_alloc_lookup_le(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_agblock_t bno, /* starting block of extent */
@@ -127,7 +123,7 @@ xfs_alloc_update(
/*
* Get the data from the pointed-to record.
*/
-STATIC int /* error */
+int /* error */
xfs_alloc_get_rec(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_agblock_t *bno, /* output: starting block of extent */
@@ -2615,7 +2611,7 @@ restart:
* will require a synchronous transaction, but it can still be
* used to distinguish between a partial or exact match.
*/
-static int
+int
xfs_alloc_busy_search(
struct xfs_mount *mp,
xfs_agnumber_t agno,
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 895009a97271..0ab56b32c7eb 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -19,6 +19,7 @@
#define __XFS_ALLOC_H__
struct xfs_buf;
+struct xfs_btree_cur;
struct xfs_mount;
struct xfs_perag;
struct xfs_trans;
@@ -118,16 +119,16 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp,
struct xfs_perag *pag);
#ifdef __KERNEL__
-
void
-xfs_alloc_busy_insert(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len);
+xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
+ xfs_agblock_t bno, xfs_extlen_t len);
void
xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp);
+int
+xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agblock_t bno, xfs_extlen_t len);
#endif /* __KERNEL__ */
/*
@@ -205,4 +206,18 @@ xfs_free_extent(
xfs_fsblock_t bno, /* starting block number of extent */
xfs_extlen_t len); /* length of extent */
+int /* error */
+xfs_alloc_lookup_le(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat); /* success/failure */
+
+int /* error */
+xfs_alloc_get_rec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t *bno, /* output: starting block of extent */
+ xfs_extlen_t *len, /* output: length of extent */
+ int *stat); /* output: success/failure */
+
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index ed2b65f3f8b9..98c6f73b6752 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -141,7 +141,6 @@ xfs_buf_item_log_check(
#define xfs_buf_item_log_check(x)
#endif
-STATIC void xfs_buf_error_relse(xfs_buf_t *bp);
STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
/*
@@ -959,128 +958,76 @@ xfs_buf_do_callbacks(
*/
void
xfs_buf_iodone_callbacks(
- xfs_buf_t *bp)
+ struct xfs_buf *bp)
{
- xfs_log_item_t *lip;
- static ulong lasttime;
- static xfs_buftarg_t *lasttarg;
- xfs_mount_t *mp;
+ struct xfs_log_item *lip = bp->b_fspriv;
+ struct xfs_mount *mp = lip->li_mountp;
+ static ulong lasttime;
+ static xfs_buftarg_t *lasttarg;
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ if (likely(!XFS_BUF_GETERROR(bp)))
+ goto do_callbacks;
- if (XFS_BUF_GETERROR(bp) != 0) {
- /*
- * If we've already decided to shutdown the filesystem
- * because of IO errors, there's no point in giving this
- * a retry.
- */
- mp = lip->li_mountp;
- if (XFS_FORCED_SHUTDOWN(mp)) {
- ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
- XFS_BUF_SUPER_STALE(bp);
- trace_xfs_buf_item_iodone(bp, _RET_IP_);
- xfs_buf_do_callbacks(bp);
- XFS_BUF_SET_FSPRIVATE(bp, NULL);
- XFS_BUF_CLR_IODONE_FUNC(bp);
- xfs_buf_ioend(bp, 0);
- return;
- }
+ /*
+ * If we've already decided to shutdown the filesystem because of
+ * I/O errors, there's no point in giving this a retry.
+ */
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ XFS_BUF_SUPER_STALE(bp);
+ trace_xfs_buf_item_iodone(bp, _RET_IP_);
+ goto do_callbacks;
+ }
- if ((XFS_BUF_TARGET(bp) != lasttarg) ||
- (time_after(jiffies, (lasttime + 5*HZ)))) {
- lasttime = jiffies;
- cmn_err(CE_ALERT, "Device %s, XFS metadata write error"
- " block 0x%llx in %s",
- XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
- (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname);
- }
- lasttarg = XFS_BUF_TARGET(bp);
+ if (XFS_BUF_TARGET(bp) != lasttarg ||
+ time_after(jiffies, (lasttime + 5*HZ))) {
+ lasttime = jiffies;
+ cmn_err(CE_ALERT, "Device %s, XFS metadata write error"
+ " block 0x%llx in %s",
+ XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
+ (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname);
+ }
+ lasttarg = XFS_BUF_TARGET(bp);
- if (XFS_BUF_ISASYNC(bp)) {
- /*
- * If the write was asynchronous then noone will be
- * looking for the error. Clear the error state
- * and write the buffer out again delayed write.
- *
- * XXXsup This is OK, so long as we catch these
- * before we start the umount; we don't want these
- * DELWRI metadata bufs to be hanging around.
- */
- XFS_BUF_ERROR(bp,0); /* errno of 0 unsets the flag */
-
- if (!(XFS_BUF_ISSTALE(bp))) {
- XFS_BUF_DELAYWRITE(bp);
- XFS_BUF_DONE(bp);
- XFS_BUF_SET_START(bp);
- }
- ASSERT(XFS_BUF_IODONE_FUNC(bp));
- trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
- xfs_buf_relse(bp);
- } else {
- /*
- * If the write of the buffer was not asynchronous,
- * then we want to make sure to return the error
- * to the caller of bwrite(). Because of this we
- * cannot clear the B_ERROR state at this point.
- * Instead we install a callback function that
- * will be called when the buffer is released, and
- * that routine will clear the error state and
- * set the buffer to be written out again after
- * some delay.
- */
- /* We actually overwrite the existing b-relse
- function at times, but we're gonna be shutting down
- anyway. */
- XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse);
+ /*
+ * If the write was asynchronous then noone will be looking for the
+ * error. Clear the error state and write the buffer out again.
+ *
+ * During sync or umount we'll write all pending buffers again
+ * synchronous, which will catch these errors if they keep hanging
+ * around.
+ */
+ if (XFS_BUF_ISASYNC(bp)) {
+ XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */
+
+ if (!XFS_BUF_ISSTALE(bp)) {
+ XFS_BUF_DELAYWRITE(bp);
XFS_BUF_DONE(bp);
- XFS_BUF_FINISH_IOWAIT(bp);
+ XFS_BUF_SET_START(bp);
}
+ ASSERT(XFS_BUF_IODONE_FUNC(bp));
+ trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
+ xfs_buf_relse(bp);
return;
}
- xfs_buf_do_callbacks(bp);
- XFS_BUF_SET_FSPRIVATE(bp, NULL);
- XFS_BUF_CLR_IODONE_FUNC(bp);
- xfs_buf_ioend(bp, 0);
-}
-
-/*
- * This is a callback routine attached to a buffer which gets an error
- * when being written out synchronously.
- */
-STATIC void
-xfs_buf_error_relse(
- xfs_buf_t *bp)
-{
- xfs_log_item_t *lip;
- xfs_mount_t *mp;
-
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- mp = (xfs_mount_t *)lip->li_mountp;
- ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
-
+ /*
+ * If the write of the buffer was synchronous, we want to make
+ * sure to return the error to the caller of xfs_bwrite().
+ */
XFS_BUF_STALE(bp);
XFS_BUF_DONE(bp);
XFS_BUF_UNDELAYWRITE(bp);
- XFS_BUF_ERROR(bp,0);
trace_xfs_buf_error_relse(bp, _RET_IP_);
+ xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
- if (! XFS_FORCED_SHUTDOWN(mp))
- xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
- /*
- * We have to unpin the pinned buffers so do the
- * callbacks.
- */
+do_callbacks:
xfs_buf_do_callbacks(bp);
XFS_BUF_SET_FSPRIVATE(bp, NULL);
XFS_BUF_CLR_IODONE_FUNC(bp);
- XFS_BUF_SET_BRELSE_FUNC(bp,NULL);
- xfs_buf_relse(bp);
+ xfs_buf_ioend(bp, 0);
}
-
/*
* This is the iodone() function for buffers which have been
* logged. It is called when they are eventually flushed out.
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index c78cc6a3d87c..4c7db74a05f7 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -152,37 +152,6 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
}
#endif /* DEBUG */
-
-void
-xfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- xfs_fs_vcmn_err(level, mp, fmt, ap);
- va_end(ap);
-}
-
-void
-xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...)
-{
- va_list ap;
-
-#ifdef DEBUG
- xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
-#endif
-
- if (xfs_panic_mask && (xfs_panic_mask & panic_tag)
- && (level & CE_ALERT)) {
- level &= ~CE_ALERT;
- level |= CE_PANIC;
- cmn_err(CE_ALERT, "XFS: Transforming an alert into a BUG.");
- }
- va_start(ap, fmt);
- xfs_fs_vcmn_err(level, mp, fmt, ap);
- va_end(ap);
-}
-
void
xfs_error_report(
const char *tag,
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index f338847f80b8..10dce5475f02 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -136,8 +136,8 @@ extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
(rf))))
-extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
-extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
+extern int xfs_errortag_add(int error_tag, struct xfs_mount *mp);
+extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
#else
#define XFS_TEST_ERROR(expr, mp, tag, rf) (expr)
#define xfs_errortag_add(tag, mp) (ENOSYS)
@@ -162,21 +162,15 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
struct xfs_mount;
-extern void xfs_fs_vcmn_err(int level, struct xfs_mount *mp,
- char *fmt, va_list ap)
- __attribute__ ((format (printf, 3, 0)));
-extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
- char *fmt, ...)
- __attribute__ ((format (printf, 4, 5)));
-extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...)
- __attribute__ ((format (printf, 3, 4)));
-
extern void xfs_hex_dump(void *p, int length);
#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args)
#define xfs_fs_mount_cmn_err(f, fmt, args...) \
- ((f & XFS_MFSI_QUIET)? (void)0 : cmn_err(CE_WARN, "XFS: " fmt, ## args))
+ do { \
+ if (!(f & XFS_MFSI_QUIET)) \
+ cmn_err(CE_WARN, "XFS: " fmt, ## args); \
+ } while (0)
#endif /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index f56d30e8040c..cec89dd5d7d2 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -612,12 +612,13 @@ out:
*
* We cannot use an inode here for this - that will push dirty state back up
* into the VFS and then periodic inode flushing will prevent log covering from
- * making progress. Hence we log a field in the superblock instead.
+ * making progress. Hence we log a field in the superblock instead and use a
+ * synchronous transaction to ensure the superblock is immediately unpinned
+ * and can be written back.
*/
int
xfs_fs_log_dummy(
- xfs_mount_t *mp,
- int flags)
+ xfs_mount_t *mp)
{
xfs_trans_t *tp;
int error;
@@ -632,8 +633,7 @@ xfs_fs_log_dummy(
/* log the UUID because it is an unchanging field */
xfs_mod_sb(tp, XFS_SB_UUID);
- if (flags & SYNC_WAIT)
- xfs_trans_set_sync(tp);
+ xfs_trans_set_sync(tp);
return xfs_trans_commit(tp, 0);
}
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index a786c5212c1e..1b6a98b66886 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
xfs_fsop_resblks_t *outval);
extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
-extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags);
+extern int xfs_fs_log_dummy(struct xfs_mount *mp);
#endif /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 0bf24b11d0c4..ae6fef1ff563 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -377,7 +377,7 @@ xfs_log_mount(
cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname);
else {
cmn_err(CE_NOTE,
- "!Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.",
+ "Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.",
mp->m_fsname);
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 204d8e5fa7fa..aa0ebb776903 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3800,7 +3800,7 @@ xlog_recover_finish(
log->l_flags &= ~XLOG_RECOVERY_NEEDED;
} else {
cmn_err(CE_DEBUG,
- "!Ending clean XFS mount for filesystem: %s\n",
+ "Ending clean XFS mount for filesystem: %s\n",
log->l_mp->m_fsname);
}
return 0;
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f80a067a4658..33dbc4e0ad62 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1137,7 +1137,7 @@ out_undo_fdblocks:
if (blkdelta)
xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
out:
- ASSERT(error = 0);
+ ASSERT(error == 0);
return;
}