From 0b9bf3812ad1f0d937584e300826285694f53e2b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:50 +0100
Subject: afs: Split wait from afs_make_call()

Split the call to afs_wait_for_call_to_complete() from afs_make_call() to
make it easier to handle asynchronous calls and to make it easier to
convert a synchronous call to an asynchronous one in future, for instance
when someone tries to interrupt an operation by pressing Ctrl-C.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fs_probe.c  | 13 ++++-----
 fs/afs/fsclient.c  | 78 ++++++++++++++++++++++++++++++++++--------------------
 fs/afs/internal.h  | 12 +++++----
 fs/afs/rxrpc.c     | 33 +++++++++++------------
 fs/afs/vl_probe.c  | 14 +++++-----
 fs/afs/vlclient.c  | 26 ++++++++++--------
 fs/afs/yfsclient.c | 54 ++++++++++++++++++++++++-------------
 7 files changed, 139 insertions(+), 91 deletions(-)

diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index 3a9eaec06756..5d3abde52a0f 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -141,8 +141,8 @@ static int afs_do_probe_fileserver(struct afs_net *net,
 	struct afs_addr_cursor ac = {
 		.index = 0,
 	};
+	struct afs_call *call;
 	bool in_progress = false;
-	int err;
 
 	_enter("%pU", &server->uuid);
 
@@ -156,12 +156,13 @@ static int afs_do_probe_fileserver(struct afs_net *net,
 	server->probe.rtt = UINT_MAX;
 
 	for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
-		err = afs_fs_get_capabilities(net, server, &ac, key, server_index,
-					      true);
-		if (err == -EINPROGRESS)
+		call = afs_fs_get_capabilities(net, server, &ac, key, server_index);
+		if (!IS_ERR(call)) {
+			afs_put_call(call);
 			in_progress = true;
-		else
-			afs_prioritise_error(_e, err, ac.abort_code);
+		} else {
+			afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code);
+		}
 	}
 
 	if (!in_progress)
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 0b37867b5c20..5eefcf35d73d 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -468,7 +468,9 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -660,7 +662,8 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -707,7 +710,8 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -827,7 +831,8 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -922,7 +927,8 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &dvnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1014,7 +1020,8 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1128,7 +1135,8 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1246,7 +1254,8 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &orig_dvnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1350,7 +1359,8 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
 	*bp++ = htonl((u32) i_size);
 
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1426,7 +1436,8 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1524,7 +1535,8 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1570,7 +1582,8 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1614,7 +1627,8 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1798,7 +1812,8 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1885,7 +1900,8 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1919,7 +1935,8 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc)
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1953,7 +1970,8 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc)
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1998,7 +2016,8 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net,
 	*bp++ = htonl(FSGIVEUPALLCALLBACKS);
 
 	/* Can't take a ref on server */
-	return afs_make_call(ac, call, GFP_NOFS, false);
+	afs_make_call(ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, ac);
 }
 
 /*
@@ -2068,12 +2087,11 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
  * Probe a fileserver for the capabilities that it supports.  This can
  * return up to 196 words.
  */
-int afs_fs_get_capabilities(struct afs_net *net,
-			    struct afs_server *server,
-			    struct afs_addr_cursor *ac,
-			    struct key *key,
-			    unsigned int server_index,
-			    bool async)
+struct afs_call *afs_fs_get_capabilities(struct afs_net *net,
+					 struct afs_server *server,
+					 struct afs_addr_cursor *ac,
+					 struct key *key,
+					 unsigned int server_index)
 {
 	struct afs_call *call;
 	__be32 *bp;
@@ -2082,13 +2100,14 @@ int afs_fs_get_capabilities(struct afs_net *net,
 
 	call = afs_alloc_flat_call(net, &afs_RXFSGetCapabilities, 1 * 4, 16 * 4);
 	if (!call)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	call->key = key;
 	call->reply[0] = afs_get_server(server);
 	call->reply[1] = (void *)(long)server_index;
 	call->upgrade = true;
 	call->want_reply_time = true;
+	call->async = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -2096,7 +2115,8 @@ int afs_fs_get_capabilities(struct afs_net *net,
 
 	/* Can't take a ref on server */
 	trace_afs_make_fs_call(call, NULL);
-	return afs_make_call(ac, call, GFP_NOFS, async);
+	afs_make_call(ac, call, GFP_NOFS);
+	return call;
 }
 
 /*
@@ -2183,7 +2203,8 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -2363,5 +2384,6 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &fids[0]);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 3904ab0b9563..61e24551574d 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -939,8 +939,9 @@ extern int afs_fs_extend_lock(struct afs_fs_cursor *);
 extern int afs_fs_release_lock(struct afs_fs_cursor *);
 extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
 					struct afs_addr_cursor *, struct key *);
-extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
-				   struct afs_addr_cursor *, struct key *, unsigned int, bool);
+extern struct afs_call *afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
+						struct afs_addr_cursor *, struct key *,
+						unsigned int);
 extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
 				     struct afs_fid *, struct afs_file_status *,
 				     struct afs_callback *, unsigned int,
@@ -1073,7 +1074,8 @@ extern int __net_init afs_open_socket(struct afs_net *);
 extern void __net_exit afs_close_socket(struct afs_net *);
 extern void afs_charge_preallocation(struct work_struct *);
 extern void afs_put_call(struct afs_call *);
-extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
+extern void afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t);
+extern long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
 extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
 					    const struct afs_call_type *,
 					    size_t, size_t);
@@ -1218,8 +1220,8 @@ extern void afs_fs_exit(void);
 extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
 							 const char *, int);
 extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
-extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *,
-				   struct afs_vlserver *, unsigned int, bool);
+extern struct afs_call *afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *,
+						struct key *, struct afs_vlserver *, unsigned int);
 extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
 
 /*
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 15c7e82d80cb..bc5aa590f348 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -21,7 +21,6 @@
 struct workqueue_struct *afs_async_calls;
 
 static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
-static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
 static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
 static void afs_delete_async_call(struct work_struct *);
 static void afs_process_async_call(struct work_struct *);
@@ -361,10 +360,10 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 }
 
 /*
- * initiate a call
+ * Initiate a call and synchronously queue up the parameters for dispatch.  Any
+ * error is stored into the call struct, which the caller must check for.
  */
-long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
-		   gfp_t gfp, bool async)
+void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
 {
 	struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index];
 	struct rxrpc_call *rxcall;
@@ -382,7 +381,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 	       call, call->type->name, key_serial(call->key),
 	       atomic_read(&call->net->nr_outstanding_calls));
 
-	call->async = async;
 	call->addr_ix = ac->index;
 	call->alist = afs_get_addrlist(ac->alist);
 
@@ -415,7 +413,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 	rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
 					 (unsigned long)call,
 					 tx_total_len, gfp,
-					 (async ?
+					 (call->async ?
 					  afs_wake_up_async_call :
 					  afs_wake_up_call_waiter),
 					 call->upgrade,
@@ -453,13 +451,11 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 
 	/* Note that at this point, we may have received the reply or an abort
 	 * - and an asynchronous call may already have completed.
+	 *
+	 * afs_wait_for_call_to_complete(call, ac)
+	 * must be called to synchronously clean up.
 	 */
-	if (call->async) {
-		afs_put_call(call);
-		return -EINPROGRESS;
-	}
-
-	return afs_wait_for_call_to_complete(call, ac);
+	return;
 
 error_do_abort:
 	if (ret != -ECONNABORTED) {
@@ -495,9 +491,7 @@ error_kill_call:
 
 	ac->error = ret;
 	call->state = AFS_CALL_COMPLETE;
-	afs_put_call(call);
 	_leave(" = %d", ret);
-	return ret;
 }
 
 /*
@@ -604,10 +598,10 @@ call_complete:
 }
 
 /*
- * wait synchronously for a call to complete
+ * Wait synchronously for a call to complete and clean up the call struct.
  */
-static long afs_wait_for_call_to_complete(struct afs_call *call,
-					  struct afs_addr_cursor *ac)
+long afs_wait_for_call_to_complete(struct afs_call *call,
+				   struct afs_addr_cursor *ac)
 {
 	signed long rtt2, timeout;
 	long ret;
@@ -620,6 +614,10 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 
 	_enter("");
 
+	ret = call->error;
+	if (ret < 0)
+		goto out;
+
 	rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
 	rtt2 = nsecs_to_jiffies64(rtt) * 2;
 	if (rtt2 < 2)
@@ -703,6 +701,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 		break;
 	}
 
+out:
 	_debug("call complete");
 	afs_put_call(call);
 	_leave(" = %p", (void *)ret);
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
index f402ee8171a1..b05e0de04f42 100644
--- a/fs/afs/vl_probe.c
+++ b/fs/afs/vl_probe.c
@@ -141,8 +141,8 @@ static bool afs_do_probe_vlserver(struct afs_net *net,
 	struct afs_addr_cursor ac = {
 		.index = 0,
 	};
+	struct afs_call *call;
 	bool in_progress = false;
-	int err;
 
 	_enter("%s", server->name);
 
@@ -156,12 +156,14 @@ static bool afs_do_probe_vlserver(struct afs_net *net,
 	server->probe.rtt = UINT_MAX;
 
 	for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
-		err = afs_vl_get_capabilities(net, &ac, key, server,
-					      server_index, true);
-		if (err == -EINPROGRESS)
+		call = afs_vl_get_capabilities(net, &ac, key, server,
+					       server_index);
+		if (!IS_ERR(call)) {
+			afs_put_call(call);
 			in_progress = true;
-		else
-			afs_prioritise_error(_e, err, ac.abort_code);
+		} else {
+			afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code);
+		}
 	}
 
 	if (!in_progress)
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index c3d9e5a5f67e..e98ffe6df1c1 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -167,7 +167,8 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
 		memset((void *)bp + volnamesz, 0, padsz);
 
 	trace_afs_make_vl_call(call);
-	return (struct afs_vldb_entry *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
+	afs_make_call(&vc->ac, call, GFP_KERNEL);
+	return (struct afs_vldb_entry *)afs_wait_for_call_to_complete(call, &vc->ac);
 }
 
 /*
@@ -304,7 +305,8 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
 		r->uuid.node[i] = htonl(u->node[i]);
 
 	trace_afs_make_vl_call(call);
-	return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
+	afs_make_call(&vc->ac, call, GFP_KERNEL);
+	return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac);
 }
 
 /*
@@ -378,12 +380,11 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
  * We use this to probe for service upgrade to determine what the server at the
  * other end supports.
  */
-int afs_vl_get_capabilities(struct afs_net *net,
-			    struct afs_addr_cursor *ac,
-			    struct key *key,
-			    struct afs_vlserver *server,
-			    unsigned int server_index,
-			    bool async)
+struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
+					 struct afs_addr_cursor *ac,
+					 struct key *key,
+					 struct afs_vlserver *server,
+					 unsigned int server_index)
 {
 	struct afs_call *call;
 	__be32 *bp;
@@ -392,13 +393,14 @@ int afs_vl_get_capabilities(struct afs_net *net,
 
 	call = afs_alloc_flat_call(net, &afs_RXVLGetCapabilities, 1 * 4, 16 * 4);
 	if (!call)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	call->key = key;
 	call->reply[0] = afs_get_vlserver(server);
 	call->reply[1] = (void *)(long)server_index;
 	call->upgrade = true;
 	call->want_reply_time = true;
+	call->async = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -406,7 +408,8 @@ int afs_vl_get_capabilities(struct afs_net *net,
 
 	/* Can't take a ref on server */
 	trace_afs_make_vl_call(call);
-	return afs_make_call(ac, call, GFP_KERNEL, async);
+	afs_make_call(ac, call, GFP_KERNEL);
+	return call;
 }
 
 /*
@@ -647,5 +650,6 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
 	memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
 
 	trace_afs_make_vl_call(call);
-	return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
+	afs_make_call(&vc->ac, call, GFP_KERNEL);
+	return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac);
 }
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 6e97a42d24d1..95b04cb76c8c 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -519,7 +519,8 @@ int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -708,7 +709,8 @@ int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -808,7 +810,8 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 static const struct afs_call_type yfs_RXFSMakeDir = {
@@ -871,7 +874,8 @@ int yfs_fs_make_dir(struct afs_fs_cursor *fc,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -961,7 +965,8 @@ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &dvnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1048,7 +1053,8 @@ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &dvnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1133,7 +1139,8 @@ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1229,7 +1236,8 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &dvnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1327,7 +1335,8 @@ int yfs_fs_rename(struct afs_fs_cursor *fc,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &orig_dvnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1433,7 +1442,8 @@ int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1521,7 +1531,8 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1564,7 +1575,8 @@ int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1751,7 +1763,8 @@ int yfs_fs_get_volume_status(struct afs_fs_cursor *fc,
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1845,7 +1858,8 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1880,7 +1894,8 @@ int yfs_fs_extend_lock(struct afs_fs_cursor *fc)
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -1915,7 +1930,8 @@ int yfs_fs_release_lock(struct afs_fs_cursor *fc)
 
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -2003,7 +2019,8 @@ int yfs_fs_fetch_status(struct afs_fs_cursor *fc,
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, fid);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
 
 /*
@@ -2180,5 +2197,6 @@ int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &fids[0]);
-	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+	afs_make_call(&fc->ac, call, GFP_NOFS);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
-- 
cgit v1.2.3


From a690f60a2ba3125a2f08cdde176376f5ec1d8a84 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:50 +0100
Subject: afs: Calculate lock extend timer from set/extend reply reception

Record the timestamp on the first reply DATA packet received in response to
a set- or extend-lock operation, then use this to calculate the time
remaining till the lock expires rather than using whatever time the
requesting process wakes up and finishes processing the operation as a
base.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/flock.c     | 30 ++++++++++++++++++++++++++++--
 fs/afs/fsclient.c  |  4 ++++
 fs/afs/internal.h  |  2 ++
 fs/afs/yfsclient.c |  4 ++++
 4 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index e432bd27a2e7..8b02f0056d54 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -40,8 +40,34 @@ void afs_lock_may_be_available(struct afs_vnode *vnode)
  */
 static void afs_schedule_lock_extension(struct afs_vnode *vnode)
 {
-	queue_delayed_work(afs_lock_manager, &vnode->lock_work,
-			   AFS_LOCKWAIT * HZ / 2);
+	ktime_t expires_at, now, duration;
+	u64 duration_j;
+
+	expires_at = ktime_add_ms(vnode->locked_at, AFS_LOCKWAIT * 1000 / 2);
+	now = ktime_get_real();
+	duration = ktime_sub(expires_at, now);
+	if (duration <= 0)
+		duration_j = 0;
+	else
+		duration_j = nsecs_to_jiffies(ktime_to_ns(duration));
+
+	queue_delayed_work(afs_lock_manager, &vnode->lock_work, duration_j);
+}
+
+/*
+ * In the case of successful completion of a lock operation, record the time
+ * the reply appeared and start the lock extension timer.
+ */
+void afs_lock_op_done(struct afs_call *call)
+{
+	struct afs_vnode *vnode = call->reply[0];
+
+	if (call->error == 0) {
+		spin_lock(&vnode->lock);
+		vnode->locked_at = call->reply_time;
+		afs_schedule_lock_extension(vnode);
+		spin_unlock(&vnode->lock);
+	}
 }
 
 /*
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 5eefcf35d73d..9d405f96cc91 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1845,6 +1845,7 @@ static const struct afs_call_type afs_RXFSSetLock = {
 	.name		= "FS.SetLock",
 	.op		= afs_FS_SetLock,
 	.deliver	= afs_deliver_fs_xxxx_lock,
+	.done		= afs_lock_op_done,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1855,6 +1856,7 @@ static const struct afs_call_type afs_RXFSExtendLock = {
 	.name		= "FS.ExtendLock",
 	.op		= afs_FS_ExtendLock,
 	.deliver	= afs_deliver_fs_xxxx_lock,
+	.done		= afs_lock_op_done,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1889,6 +1891,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 
 	call->key = fc->key;
 	call->reply[0] = vnode;
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1925,6 +1928,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc)
 
 	call->key = fc->key;
 	call->reply[0] = vnode;
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 61e24551574d..6e680783f59f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -638,6 +638,7 @@ struct afs_vnode {
 	struct list_head	granted_locks;	/* locks granted on this file */
 	struct delayed_work	lock_work;	/* work to be done in locking */
 	struct key		*lock_key;	/* Key to be used in lock ops */
+	ktime_t			locked_at;	/* Time at which lock obtained */
 	enum afs_lock_state	lock_state : 8;
 	afs_lock_type_t		lock_type : 8;
 
@@ -905,6 +906,7 @@ extern void afs_put_read(struct afs_read *);
  */
 extern struct workqueue_struct *afs_lock_manager;
 
+extern void afs_lock_op_done(struct afs_call *);
 extern void afs_lock_work(struct work_struct *);
 extern void afs_lock_may_be_available(struct afs_vnode *);
 extern int afs_lock(struct file *, int, struct file_lock *);
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 95b04cb76c8c..73546c2d89ee 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -1801,6 +1801,7 @@ static const struct afs_call_type yfs_RXYFSSetLock = {
 	.name		= "YFS.SetLock",
 	.op		= yfs_FS_SetLock,
 	.deliver	= yfs_deliver_fs_xxxx_lock,
+	.done		= afs_lock_op_done,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1811,6 +1812,7 @@ static const struct afs_call_type yfs_RXYFSExtendLock = {
 	.name		= "YFS.ExtendLock",
 	.op		= yfs_FS_ExtendLock,
 	.deliver	= yfs_deliver_fs_xxxx_lock,
+	.done		= afs_lock_op_done,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1847,6 +1849,7 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 
 	call->key = fc->key;
 	call->reply[0] = vnode;
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1884,6 +1887,7 @@ int yfs_fs_extend_lock(struct afs_fs_cursor *fc)
 
 	call->key = fc->key;
 	call->reply[0] = vnode;
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
-- 
cgit v1.2.3


From 68ce801ffd82e72d5005ab5458e8b9e59f24d9cc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:50 +0100
Subject: afs: Fix AFS file locking to allow fine grained locks

Fix AFS file locking to allow fine grained locks as some applications, such
as firefox, won't work if they can't take such locks on certain state files
- thereby preventing the use of kAFS to distribute a home directory.

Note that this cannot be made completely functional as the protocol only
has provision for whole-file locks, so there exists the possibility of a
process deadlocking itself by getting a partial read-lock on a file first
and then trying to get a non-overlapping write-lock - but we got the
server's read lock with the first lock, so we're now stuck.

OpenAFS solves this by just granting any partial-range lock directly
without consulting the server - and hoping there's no remote collision.  I
want to implement that in a separate patch and it requires a bit more
thought.

Fixes: 8d6c554126b8 ("AFS: implement file locking")
Reported-by: Jonathan Billings <jsbillings@jsbillings.org>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/flock.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 8b02f0056d54..6919f53ed4ad 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -458,10 +458,6 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 
 	_enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
 
-	/* only whole-file locks are supported */
-	if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
-		return -EINVAL;
-
 	fl->fl_ops = &afs_lock_ops;
 	INIT_LIST_HEAD(&fl->fl_u.afs.link);
 	fl->fl_u.afs.state = AFS_LOCK_PENDING;
@@ -613,10 +609,6 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
 	/* Flush all pending writes before doing anything with locks. */
 	vfs_fsync(file, 0);
 
-	/* only whole-file unlocks are supported */
-	if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
-		return -EINVAL;
-
 	ret = posix_lock_file(file, fl, NULL);
 	_leave(" = %d [%u]", ret, vnode->lock_state);
 	return ret;
@@ -644,12 +636,15 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
 			goto error;
 
 		lock_count = READ_ONCE(vnode->status.lock_count);
-		if (lock_count > 0)
-			fl->fl_type = F_RDLCK;
-		else
-			fl->fl_type = F_WRLCK;
-		fl->fl_start = 0;
-		fl->fl_end = OFFSET_MAX;
+		if (lock_count != 0) {
+			if (lock_count > 0)
+				fl->fl_type = F_RDLCK;
+			else
+				fl->fl_type = F_WRLCK;
+			fl->fl_start = 0;
+			fl->fl_end = OFFSET_MAX;
+			fl->fl_pid = 0;
+		}
 	}
 
 	ret = 0;
-- 
cgit v1.2.3


From 4be5975aea154e164696128d049dec9ed341585c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:50 +0100
Subject: afs: Further fix file locking

Further fix the file locking in the afs filesystem client in a number of
ways, including:

 (1) Don't submit the operation to obtain a lock from the server in a work
     queue context, but rather do it in the process context of whoever
     issued the requesting system call.

 (2) The owner of the file_lock struct at the front of the pending_locks
     queue now owns right to talk to the server.

 (3) Write locks can be instantly granted if they don't overlap with any
     other locks *and* we have a write lock on the server.

 (4) In the event of an authentication/permission error, all other matching
     pending locks requests are also immediately aborted.

 (5) Properly use VFS core locks_lock_file_wait() to distribute the server
     lock amongst local client locks, including waiting for the lock to
     become available.

Test with:

	sqlite3 /afs/.../scratch/billings.sqlite <<EOF
	CREATE TABLE hosts (
	    hostname varchar(80),
	    shorthost varchar(80),
	    room varchar(30),
	    building varchar(30),
	    PRIMARY KEY(shorthost)
	    );
	EOF

With the version of sqlite3 that I have, this should fail consistently with
EAGAIN, whether or not the program is straced (which introduces some delays
between lock syscalls).

Fixes: 0fafdc9f888b ("afs: Fix file locking")
Reported-by: Jonathan Billings <jsbillin@umich.edu>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/flock.c | 390 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 195 insertions(+), 195 deletions(-)

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 6919f53ed4ad..827d5a711088 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -13,9 +13,11 @@
 
 #define AFS_LOCK_GRANTED	0
 #define AFS_LOCK_PENDING	1
+#define AFS_LOCK_YOUR_TRY	2
 
 struct workqueue_struct *afs_lock_manager;
 
+static void afs_next_locker(struct afs_vnode *vnode, int error);
 static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl);
 static void afs_fl_release_private(struct file_lock *fl);
 
@@ -24,6 +26,12 @@ static const struct file_lock_operations afs_lock_ops = {
 	.fl_release_private	= afs_fl_release_private,
 };
 
+static inline void afs_set_lock_state(struct afs_vnode *vnode, enum afs_lock_state state)
+{
+	_debug("STATE %u -> %u", vnode->lock_state, state);
+	vnode->lock_state = state;
+}
+
 /*
  * if the callback is broken on this vnode, then the lock may now be available
  */
@@ -31,7 +39,13 @@ void afs_lock_may_be_available(struct afs_vnode *vnode)
 {
 	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
-	queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
+	if (vnode->lock_state != AFS_VNODE_LOCK_WAITING_FOR_CB)
+		return;
+
+	spin_lock(&vnode->lock);
+	if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
+		afs_next_locker(vnode, 0);
+	spin_unlock(&vnode->lock);
 }
 
 /*
@@ -75,22 +89,65 @@ void afs_lock_op_done(struct afs_call *call)
  * first lock in the queue is itself a readlock)
  * - the caller must hold the vnode lock
  */
-static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl)
+static void afs_grant_locks(struct afs_vnode *vnode)
 {
 	struct file_lock *p, *_p;
+	bool exclusive = (vnode->lock_type == AFS_LOCK_WRITE);
 
-	list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
-	if (fl->fl_type == F_RDLCK) {
-		list_for_each_entry_safe(p, _p, &vnode->pending_locks,
-					 fl_u.afs.link) {
-			if (p->fl_type == F_RDLCK) {
-				p->fl_u.afs.state = AFS_LOCK_GRANTED;
-				list_move_tail(&p->fl_u.afs.link,
-					       &vnode->granted_locks);
-				wake_up(&p->fl_wait);
-			}
+	list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
+		if (!exclusive && p->fl_type == F_WRLCK)
+			continue;
+
+		list_move_tail(&p->fl_u.afs.link, &vnode->granted_locks);
+		p->fl_u.afs.state = AFS_LOCK_GRANTED;
+		wake_up(&p->fl_wait);
+	}
+}
+
+/*
+ * If an error is specified, reject every pending lock that matches the
+ * authentication and type of the lock we failed to get.  If there are any
+ * remaining lockers, try to wake up one of them to have a go.
+ */
+static void afs_next_locker(struct afs_vnode *vnode, int error)
+{
+	struct file_lock *p, *_p, *next = NULL;
+	struct key *key = vnode->lock_key;
+	unsigned int fl_type = F_RDLCK;
+
+	_enter("");
+
+	if (vnode->lock_type == AFS_LOCK_WRITE)
+		fl_type = F_WRLCK;
+
+	list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
+		if (error &&
+		    p->fl_type == fl_type &&
+		    afs_file_key(p->fl_file) == key) {
+			list_del_init(&p->fl_u.afs.link);
+			p->fl_u.afs.state = error;
+			wake_up(&p->fl_wait);
 		}
+
+		/* Select the next locker to hand off to. */
+		if (next &&
+		    (next->fl_type == F_WRLCK || p->fl_type == F_RDLCK))
+			continue;
+		next = p;
 	}
+
+	vnode->lock_key = NULL;
+	key_put(key);
+
+	if (next) {
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
+		next->fl_u.afs.state = AFS_LOCK_YOUR_TRY;
+		wake_up(&next->fl_wait);
+	} else {
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_NONE);
+	}
+
+	_leave("");
 }
 
 /*
@@ -196,8 +253,6 @@ void afs_lock_work(struct work_struct *work)
 {
 	struct afs_vnode *vnode =
 		container_of(work, struct afs_vnode, lock_work.work);
-	struct file_lock *fl, *next;
-	afs_lock_type_t type;
 	struct key *key;
 	int ret;
 
@@ -210,7 +265,7 @@ again:
 	switch (vnode->lock_state) {
 	case AFS_VNODE_LOCK_NEED_UNLOCK:
 		_debug("unlock");
-		vnode->lock_state = AFS_VNODE_LOCK_UNLOCKING;
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_UNLOCKING);
 		spin_unlock(&vnode->lock);
 
 		/* attempt to release the server lock; if it fails, we just
@@ -222,22 +277,9 @@ again:
 			       vnode->fid.vid, vnode->fid.vnode, ret);
 
 		spin_lock(&vnode->lock);
-		key_put(vnode->lock_key);
-		vnode->lock_key = NULL;
-		vnode->lock_state = AFS_VNODE_LOCK_NONE;
-
-		if (list_empty(&vnode->pending_locks)) {
-			spin_unlock(&vnode->lock);
-			return;
-		}
-
-		/* The new front of the queue now owns the state variables. */
-		next = list_entry(vnode->pending_locks.next,
-				  struct file_lock, fl_u.afs.link);
-		vnode->lock_key = key_get(afs_file_key(next->fl_file));
-		vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
-		vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
-		goto again;
+		afs_next_locker(vnode, 0);
+		spin_unlock(&vnode->lock);
+		return;
 
 	/* If we've already got a lock, then it must be time to extend that
 	 * lock as AFS locks time out after 5 minutes.
@@ -248,7 +290,7 @@ again:
 		ASSERT(!list_empty(&vnode->granted_locks));
 
 		key = key_get(vnode->lock_key);
-		vnode->lock_state = AFS_VNODE_LOCK_EXTENDING;
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_EXTENDING);
 		spin_unlock(&vnode->lock);
 
 		ret = afs_extend_lock(vnode, key); /* RPC */
@@ -262,72 +304,26 @@ again:
 
 		if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
 			goto again;
-		vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED);
 
-		if (ret == 0)
-			afs_schedule_lock_extension(vnode);
-		else
+		if (ret != 0)
 			queue_delayed_work(afs_lock_manager, &vnode->lock_work,
 					   HZ * 10);
 		spin_unlock(&vnode->lock);
 		_leave(" [ext]");
 		return;
 
-		/* If we don't have a granted lock, then we must've been called
-		 * back by the server, and so if might be possible to get a
-		 * lock we're currently waiting for.
-		 */
+	/* If we're waiting for a callback to indicate lock release, we can't
+	 * actually rely on this, so need to recheck at regular intervals.  The
+	 * problem is that the server might not notify us if the lock just
+	 * expires (say because a client died) rather than being explicitly
+	 * released.
+	 */
 	case AFS_VNODE_LOCK_WAITING_FOR_CB:
-		_debug("get");
-
-		key = key_get(vnode->lock_key);
-		type = vnode->lock_type;
-		vnode->lock_state = AFS_VNODE_LOCK_SETTING;
+		_debug("retry");
+		afs_next_locker(vnode, 0);
 		spin_unlock(&vnode->lock);
-
-		ret = afs_set_lock(vnode, key, type); /* RPC */
-		key_put(key);
-
-		spin_lock(&vnode->lock);
-		switch (ret) {
-		case -EWOULDBLOCK:
-			_debug("blocked");
-			break;
-		case 0:
-			_debug("acquired");
-			vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
-			/* Fall through */
-		default:
-			/* Pass the lock or the error onto the first locker in
-			 * the list - if they're looking for this type of lock.
-			 * If they're not, we assume that whoever asked for it
-			 * took a signal.
-			 */
-			if (list_empty(&vnode->pending_locks)) {
-				_debug("withdrawn");
-				vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
-				goto again;
-			}
-
-			fl = list_entry(vnode->pending_locks.next,
-					struct file_lock, fl_u.afs.link);
-			type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
-			if (vnode->lock_type != type) {
-				_debug("changed");
-				vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
-				goto again;
-			}
-
-			fl->fl_u.afs.state = ret;
-			if (ret == 0)
-				afs_grant_locks(vnode, fl);
-			else
-				list_del_init(&fl->fl_u.afs.link);
-			wake_up(&fl->fl_wait);
-			spin_unlock(&vnode->lock);
-			_leave(" [granted]");
-			return;
-		}
+		return;
 
 	default:
 		/* Looks like a lock request was withdrawn. */
@@ -345,14 +341,15 @@ again:
  */
 static void afs_defer_unlock(struct afs_vnode *vnode)
 {
-	_enter("");
+	_enter("%u", vnode->lock_state);
 
-	if (vnode->lock_state == AFS_VNODE_LOCK_GRANTED ||
-	    vnode->lock_state == AFS_VNODE_LOCK_EXTENDING) {
+	if (list_empty(&vnode->granted_locks) &&
+	    (vnode->lock_state == AFS_VNODE_LOCK_GRANTED ||
+	     vnode->lock_state == AFS_VNODE_LOCK_EXTENDING)) {
 		cancel_delayed_work(&vnode->lock_work);
 
-		vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
-		afs_lock_may_be_available(vnode);
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_NEED_UNLOCK);
+		queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
 	}
 }
 
@@ -401,50 +398,6 @@ static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
 	return 0;
 }
 
-/*
- * Remove the front runner from the pending queue.
- * - The caller must hold vnode->lock.
- */
-static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl)
-{
-	struct file_lock *next;
-
-	_enter("");
-
-	/* ->lock_type, ->lock_key and ->lock_state only belong to this
-	 * file_lock if we're at the front of the pending queue or if we have
-	 * the lock granted or if the lock_state is NEED_UNLOCK or UNLOCKING.
-	 */
-	if (vnode->granted_locks.next == &fl->fl_u.afs.link &&
-	    vnode->granted_locks.prev == &fl->fl_u.afs.link) {
-		list_del_init(&fl->fl_u.afs.link);
-		afs_defer_unlock(vnode);
-		return;
-	}
-
-	if (!list_empty(&vnode->granted_locks) ||
-	    vnode->pending_locks.next != &fl->fl_u.afs.link) {
-		list_del_init(&fl->fl_u.afs.link);
-		return;
-	}
-
-	list_del_init(&fl->fl_u.afs.link);
-	key_put(vnode->lock_key);
-	vnode->lock_key = NULL;
-	vnode->lock_state = AFS_VNODE_LOCK_NONE;
-
-	if (list_empty(&vnode->pending_locks))
-		return;
-
-	/* The new front of the queue now owns the state variables. */
-	next = list_entry(vnode->pending_locks.next,
-			  struct file_lock, fl_u.afs.link);
-	vnode->lock_key = key_get(afs_file_key(next->fl_file));
-	vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
-	vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
-	afs_lock_may_be_available(vnode);
-}
-
 /*
  * request a lock on a file on the server
  */
@@ -469,44 +422,66 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 		return ret;
 
 	spin_lock(&vnode->lock);
+	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
 
-	/* If we've already got a readlock on the server then we instantly
-	 * grant another readlock, irrespective of whether there are any
-	 * pending writelocks.
+	/* If we've already got a lock on the server then try to move to having
+	 * the VFS grant the requested lock.  Note that this means that other
+	 * clients may get starved out.
 	 */
-	if (type == AFS_LOCK_READ &&
-	    vnode->lock_state == AFS_VNODE_LOCK_GRANTED &&
-	    vnode->lock_type == AFS_LOCK_READ) {
-		_debug("instant readlock");
-		ASSERT(!list_empty(&vnode->granted_locks));
-		goto share_existing_lock;
-	}
+	_debug("try %u", vnode->lock_state);
+	if (vnode->lock_state == AFS_VNODE_LOCK_GRANTED) {
+		if (type == AFS_LOCK_READ) {
+			_debug("instant readlock");
+			list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
+			fl->fl_u.afs.state = AFS_LOCK_GRANTED;
+			goto vnode_is_locked_u;
+		}
 
-	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
+		if (vnode->lock_type == AFS_LOCK_WRITE) {
+			_debug("instant writelock");
+			list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
+			fl->fl_u.afs.state = AFS_LOCK_GRANTED;
+			goto vnode_is_locked_u;
+		}
+	}
 
 	if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
 		goto need_to_wait;
 
+try_to_lock:
 	/* We don't have a lock on this vnode and we aren't currently waiting
 	 * for one either, so ask the server for a lock.
 	 *
 	 * Note that we need to be careful if we get interrupted by a signal
 	 * after dispatching the request as we may still get the lock, even
 	 * though we don't wait for the reply (it's not too bad a problem - the
-	 * lock will expire in 10 mins anyway).
+	 * lock will expire in 5 mins anyway).
 	 */
 	_debug("not locked");
 	vnode->lock_key = key_get(key);
 	vnode->lock_type = type;
-	vnode->lock_state = AFS_VNODE_LOCK_SETTING;
+	afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
 	spin_unlock(&vnode->lock);
 
 	ret = afs_set_lock(vnode, key, type); /* RPC */
 
 	spin_lock(&vnode->lock);
 	switch (ret) {
+	case -EKEYREJECTED:
+	case -EKEYEXPIRED:
+	case -EKEYREVOKED:
+	case -EPERM:
+	case -EACCES:
+		fl->fl_u.afs.state = ret;
+		list_del_init(&fl->fl_u.afs.link);
+		afs_next_locker(vnode, ret);
+		goto error_unlock;
+
 	default:
-		goto abort_attempt;
+		fl->fl_u.afs.state = ret;
+		list_del_init(&fl->fl_u.afs.link);
+		afs_next_locker(vnode, 0);
+		goto error_unlock;
 
 	case -EWOULDBLOCK:
 		/* The server doesn't have a lock-waiting queue, so the client
@@ -516,29 +491,23 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 		_debug("would block");
 		ASSERT(list_empty(&vnode->granted_locks));
 		ASSERTCMP(vnode->pending_locks.next, ==, &fl->fl_u.afs.link);
-		vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
-		goto need_to_wait;
+		goto lock_is_contended;
 
 	case 0:
 		_debug("acquired");
-		break;
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED);
+		afs_grant_locks(vnode);
+		goto vnode_is_locked_u;
 	}
 
-	/* we've acquired a server lock, but it needs to be renewed after 5
-	 * mins */
-	vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
-	afs_schedule_lock_extension(vnode);
-
-share_existing_lock:
-	/* the lock has been granted as far as we're concerned... */
-	fl->fl_u.afs.state = AFS_LOCK_GRANTED;
-	list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
-
-given_lock:
-	/* ... but we do still need to get the VFS's blessing */
+vnode_is_locked_u:
 	spin_unlock(&vnode->lock);
+vnode_is_locked:
+	/* the lock has been granted by the server... */
+	ASSERTCMP(fl->fl_u.afs.state, ==, AFS_LOCK_GRANTED);
 
-	ret = posix_lock_file(file, fl, NULL);
+	/* ... but the VFS still needs to distribute access on this client. */
+	ret = locks_lock_file_wait(file, fl);
 	if (ret < 0)
 		goto vfs_rejected_lock;
 
@@ -550,38 +519,61 @@ given_lock:
 	_leave(" = 0");
 	return 0;
 
+lock_is_contended:
+	if (!(fl->fl_flags & FL_SLEEP)) {
+		list_del_init(&fl->fl_u.afs.link);
+		afs_next_locker(vnode, 0);
+		ret = -EAGAIN;
+		goto error_unlock;
+	}
+
+	afs_set_lock_state(vnode, AFS_VNODE_LOCK_WAITING_FOR_CB);
+	queue_delayed_work(afs_lock_manager, &vnode->lock_work, HZ * 5);
+
 need_to_wait:
 	/* We're going to have to wait.  Either this client doesn't have a lock
 	 * on the server yet and we need to wait for a callback to occur, or
-	 * the client does have a lock on the server, but it belongs to some
-	 * other process(es) and is incompatible with the lock we want.
+	 * the client does have a lock on the server, but it's shared and we
+	 * need an exclusive lock.
 	 */
-	ret = -EAGAIN;
-	if (fl->fl_flags & FL_SLEEP) {
-		spin_unlock(&vnode->lock);
+	spin_unlock(&vnode->lock);
 
-		_debug("sleep");
-		ret = wait_event_interruptible(fl->fl_wait,
-					       fl->fl_u.afs.state != AFS_LOCK_PENDING);
+	_debug("sleep");
+	ret = wait_event_interruptible(fl->fl_wait,
+				       fl->fl_u.afs.state != AFS_LOCK_PENDING);
+	_debug("wait = %d", ret);
 
+	if (fl->fl_u.afs.state >= 0 && fl->fl_u.afs.state != AFS_LOCK_GRANTED) {
 		spin_lock(&vnode->lock);
-	}
 
-	if (fl->fl_u.afs.state == AFS_LOCK_GRANTED)
-		goto given_lock;
-	if (fl->fl_u.afs.state < 0)
-		ret = fl->fl_u.afs.state;
+		switch (fl->fl_u.afs.state) {
+		case AFS_LOCK_YOUR_TRY:
+			fl->fl_u.afs.state = AFS_LOCK_PENDING;
+			goto try_to_lock;
+		case AFS_LOCK_PENDING:
+			if (ret > 0) {
+				/* We need to retry the lock.  We may not be
+				 * notified by the server if it just expired
+				 * rather than being released.
+				 */
+				ASSERTCMP(vnode->lock_state, ==, AFS_VNODE_LOCK_WAITING_FOR_CB);
+				afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
+				fl->fl_u.afs.state = AFS_LOCK_PENDING;
+				goto try_to_lock;
+			}
+			goto error_unlock;
+		case AFS_LOCK_GRANTED:
+		default:
+			break;
+		}
 
-abort_attempt:
-	/* we aren't going to get the lock, either because we're unwilling to
-	 * wait, or because some signal happened */
-	_debug("abort");
-	afs_dequeue_lock(vnode, fl);
+		spin_unlock(&vnode->lock);
+	}
 
-error_unlock:
-	spin_unlock(&vnode->lock);
-	_leave(" = %d", ret);
-	return ret;
+	if (fl->fl_u.afs.state == AFS_LOCK_GRANTED)
+		goto vnode_is_locked;
+	ret = fl->fl_u.afs.state;
+	goto error;
 
 vfs_rejected_lock:
 	/* The VFS rejected the lock we just obtained, so we have to discard
@@ -591,9 +583,13 @@ vfs_rejected_lock:
 	_debug("vfs refused %d", ret);
 	spin_lock(&vnode->lock);
 	list_del_init(&fl->fl_u.afs.link);
-	if (list_empty(&vnode->granted_locks))
-		afs_defer_unlock(vnode);
-	goto error_unlock;
+	afs_defer_unlock(vnode);
+
+error_unlock:
+	spin_unlock(&vnode->lock);
+error:
+	_leave(" = %d", ret);
+	return ret;
 }
 
 /*
@@ -609,7 +605,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
 	/* Flush all pending writes before doing anything with locks. */
 	vfs_fsync(file, 0);
 
-	ret = posix_lock_file(file, fl, NULL);
+	ret = locks_lock_file_wait(file, fl);
 	_leave(" = %d [%u]", ret, vnode->lock_state);
 	return ret;
 }
@@ -731,7 +727,11 @@ static void afs_fl_release_private(struct file_lock *fl)
 	_enter("");
 
 	spin_lock(&vnode->lock);
-	afs_dequeue_lock(vnode, fl);
+
+	list_del_init(&fl->fl_u.afs.link);
+	if (list_empty(&vnode->granted_locks))
+		afs_defer_unlock(vnode);
+
 	_debug("state %u for %p", vnode->lock_state, vnode);
 	spin_unlock(&vnode->lock);
 }
-- 
cgit v1.2.3


From d46966013bb4f1fbed7cf995828523a3d1f265c6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:50 +0100
Subject: afs: Add file locking tracepoints

Add two tracepoints for monitoring AFS file locking.  Firstly, add one that
follows the operational part:

    echo 1 >/sys/kernel/debug/tracing/events/afs/afs_flock_op/enable

And add a second that more follows the event-driven part:

    echo 1 >/sys/kernel/debug/tracing/events/afs/afs_flock_ev/enable

Individual file_lock structs seen by afs are tagged with debugging IDs that
are displayed in the trace log to make it easier to see what's going on,
especially as setting the first lock always seems to involve copying the
file_lock twice.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/flock.c             |  72 ++++++++++++++++++----
 include/linux/fs.h         |   1 +
 include/trace/events/afs.h | 146 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 209 insertions(+), 10 deletions(-)

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 827d5a711088..3e06a560f66b 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -32,6 +32,8 @@ static inline void afs_set_lock_state(struct afs_vnode *vnode, enum afs_lock_sta
 	vnode->lock_state = state;
 }
 
+static atomic_t afs_file_lock_debug_id;
+
 /*
  * if the callback is broken on this vnode, then the lock may now be available
  */
@@ -45,6 +47,7 @@ void afs_lock_may_be_available(struct afs_vnode *vnode)
 	spin_lock(&vnode->lock);
 	if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
 		afs_next_locker(vnode, 0);
+	trace_afs_flock_ev(vnode, NULL, afs_flock_callback_break, 0);
 	spin_unlock(&vnode->lock);
 }
 
@@ -78,6 +81,7 @@ void afs_lock_op_done(struct afs_call *call)
 
 	if (call->error == 0) {
 		spin_lock(&vnode->lock);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0);
 		vnode->locked_at = call->reply_time;
 		afs_schedule_lock_extension(vnode);
 		spin_unlock(&vnode->lock);
@@ -100,6 +104,7 @@ static void afs_grant_locks(struct afs_vnode *vnode)
 
 		list_move_tail(&p->fl_u.afs.link, &vnode->granted_locks);
 		p->fl_u.afs.state = AFS_LOCK_GRANTED;
+		trace_afs_flock_op(vnode, p, afs_flock_op_grant);
 		wake_up(&p->fl_wait);
 	}
 }
@@ -142,9 +147,11 @@ static void afs_next_locker(struct afs_vnode *vnode, int error)
 	if (next) {
 		afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
 		next->fl_u.afs.state = AFS_LOCK_YOUR_TRY;
+		trace_afs_flock_op(vnode, next, afs_flock_op_wake);
 		wake_up(&next->fl_wait);
 	} else {
 		afs_set_lock_state(vnode, AFS_VNODE_LOCK_NONE);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_no_lockers, 0);
 	}
 
 	_leave("");
@@ -264,8 +271,8 @@ again:
 	_debug("wstate %u for %p", vnode->lock_state, vnode);
 	switch (vnode->lock_state) {
 	case AFS_VNODE_LOCK_NEED_UNLOCK:
-		_debug("unlock");
 		afs_set_lock_state(vnode, AFS_VNODE_LOCK_UNLOCKING);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_work_unlocking, 0);
 		spin_unlock(&vnode->lock);
 
 		/* attempt to release the server lock; if it fails, we just
@@ -291,6 +298,7 @@ again:
 
 		key = key_get(vnode->lock_key);
 		afs_set_lock_state(vnode, AFS_VNODE_LOCK_EXTENDING);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_work_extending, 0);
 		spin_unlock(&vnode->lock);
 
 		ret = afs_extend_lock(vnode, key); /* RPC */
@@ -349,6 +357,7 @@ static void afs_defer_unlock(struct afs_vnode *vnode)
 		cancel_delayed_work(&vnode->lock_work);
 
 		afs_set_lock_state(vnode, AFS_VNODE_LOCK_NEED_UNLOCK);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_defer_unlock, 0);
 		queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
 	}
 }
@@ -421,6 +430,8 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 	if (ret < 0)
 		return ret;
 
+	trace_afs_flock_op(vnode, fl, afs_flock_op_set_lock);
+
 	spin_lock(&vnode->lock);
 	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
 
@@ -457,7 +468,7 @@ try_to_lock:
 	 * though we don't wait for the reply (it's not too bad a problem - the
 	 * lock will expire in 5 mins anyway).
 	 */
-	_debug("not locked");
+	trace_afs_flock_ev(vnode, fl, afs_flock_try_to_lock, 0);
 	vnode->lock_key = key_get(key);
 	vnode->lock_type = type;
 	afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
@@ -473,12 +484,14 @@ try_to_lock:
 	case -EPERM:
 	case -EACCES:
 		fl->fl_u.afs.state = ret;
+		trace_afs_flock_ev(vnode, fl, afs_flock_fail_perm, ret);
 		list_del_init(&fl->fl_u.afs.link);
 		afs_next_locker(vnode, ret);
 		goto error_unlock;
 
 	default:
 		fl->fl_u.afs.state = ret;
+		trace_afs_flock_ev(vnode, fl, afs_flock_fail_other, ret);
 		list_del_init(&fl->fl_u.afs.link);
 		afs_next_locker(vnode, 0);
 		goto error_unlock;
@@ -488,14 +501,13 @@ try_to_lock:
 		 * will have to retry.  The server will break the outstanding
 		 * callbacks on a file when a lock is released.
 		 */
-		_debug("would block");
 		ASSERT(list_empty(&vnode->granted_locks));
 		ASSERTCMP(vnode->pending_locks.next, ==, &fl->fl_u.afs.link);
 		goto lock_is_contended;
 
 	case 0:
-		_debug("acquired");
 		afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED);
+		trace_afs_flock_ev(vnode, fl, afs_flock_acquired, type);
 		afs_grant_locks(vnode);
 		goto vnode_is_locked_u;
 	}
@@ -507,7 +519,9 @@ vnode_is_locked:
 	ASSERTCMP(fl->fl_u.afs.state, ==, AFS_LOCK_GRANTED);
 
 	/* ... but the VFS still needs to distribute access on this client. */
+	trace_afs_flock_ev(vnode, fl, afs_flock_vfs_locking, 0);
 	ret = locks_lock_file_wait(file, fl);
+	trace_afs_flock_ev(vnode, fl, afs_flock_vfs_lock, ret);
 	if (ret < 0)
 		goto vfs_rejected_lock;
 
@@ -528,6 +542,7 @@ lock_is_contended:
 	}
 
 	afs_set_lock_state(vnode, AFS_VNODE_LOCK_WAITING_FOR_CB);
+	trace_afs_flock_ev(vnode, fl, afs_flock_would_block, ret);
 	queue_delayed_work(afs_lock_manager, &vnode->lock_work, HZ * 5);
 
 need_to_wait:
@@ -538,10 +553,10 @@ need_to_wait:
 	 */
 	spin_unlock(&vnode->lock);
 
-	_debug("sleep");
+	trace_afs_flock_ev(vnode, fl, afs_flock_waiting, 0);
 	ret = wait_event_interruptible(fl->fl_wait,
 				       fl->fl_u.afs.state != AFS_LOCK_PENDING);
-	_debug("wait = %d", ret);
+	trace_afs_flock_ev(vnode, fl, afs_flock_waited, ret);
 
 	if (fl->fl_u.afs.state >= 0 && fl->fl_u.afs.state != AFS_LOCK_GRANTED) {
 		spin_lock(&vnode->lock);
@@ -602,6 +617,8 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
 
 	_enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
 
+	trace_afs_flock_op(vnode, fl, afs_flock_op_unlock);
+
 	/* Flush all pending writes before doing anything with locks. */
 	vfs_fsync(file, 0);
 
@@ -655,6 +672,8 @@ error:
 int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+	enum afs_flock_operation op;
+	int ret;
 
 	_enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
 	       vnode->fid.vid, vnode->fid.vnode, cmd,
@@ -667,9 +686,23 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 
 	if (IS_GETLK(cmd))
 		return afs_do_getlk(file, fl);
+
+	fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
+	trace_afs_flock_op(vnode, fl, afs_flock_op_lock);
+
 	if (fl->fl_type == F_UNLCK)
-		return afs_do_unlk(file, fl);
-	return afs_do_setlk(file, fl);
+		ret = afs_do_unlk(file, fl);
+	else
+		ret = afs_do_setlk(file, fl);
+
+	switch (ret) {
+	case 0:		op = afs_flock_op_return_ok; break;
+	case -EAGAIN:	op = afs_flock_op_return_eagain; break;
+	case -EDEADLK:	op = afs_flock_op_return_edeadlk; break;
+	default:	op = afs_flock_op_return_error; break;
+	}
+	trace_afs_flock_op(vnode, fl, op);
+	return ret;
 }
 
 /*
@@ -678,6 +711,8 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 int afs_flock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+	enum afs_flock_operation op;
+	int ret;
 
 	_enter("{%llx:%llu},%d,{t=%x,fl=%x}",
 	       vnode->fid.vid, vnode->fid.vnode, cmd,
@@ -693,10 +728,23 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
 
+	fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
+	trace_afs_flock_op(vnode, fl, afs_flock_op_flock);
+
 	/* we're simulating flock() locks using posix locks on the server */
 	if (fl->fl_type == F_UNLCK)
-		return afs_do_unlk(file, fl);
-	return afs_do_setlk(file, fl);
+		ret = afs_do_unlk(file, fl);
+	else
+		ret = afs_do_setlk(file, fl);
+
+	switch (ret) {
+	case 0:		op = afs_flock_op_return_ok; break;
+	case -EAGAIN:	op = afs_flock_op_return_eagain; break;
+	case -EDEADLK:	op = afs_flock_op_return_edeadlk; break;
+	default:	op = afs_flock_op_return_error; break;
+	}
+	trace_afs_flock_op(vnode, fl, op);
+	return ret;
 }
 
 /*
@@ -711,7 +759,10 @@ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
 
 	_enter("");
 
+	new->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
+
 	spin_lock(&vnode->lock);
+	trace_afs_flock_op(vnode, new, afs_flock_op_copy_lock);
 	list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link);
 	spin_unlock(&vnode->lock);
 }
@@ -728,6 +779,7 @@ static void afs_fl_release_private(struct file_lock *fl)
 
 	spin_lock(&vnode->lock);
 
+	trace_afs_flock_op(vnode, fl, afs_flock_op_release_lock);
 	list_del_init(&fl->fl_u.afs.link);
 	if (list_empty(&vnode->granted_locks))
 		afs_defer_unlock(vnode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28e7679089..744908e0bdec 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1093,6 +1093,7 @@ struct file_lock {
 		struct {
 			struct list_head link;	/* link in AFS vnode's pending_locks list */
 			int state;		/* state of grant or error if -ve */
+			unsigned int	debug_id;
 		} afs;
 	} fl_u;
 } __randomize_layout;
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index e3f005eae1f7..24c058a93e8f 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -152,6 +152,40 @@ enum afs_file_error {
 	afs_file_error_writeback_fail,
 };
 
+enum afs_flock_event {
+	afs_flock_acquired,
+	afs_flock_callback_break,
+	afs_flock_defer_unlock,
+	afs_flock_fail_other,
+	afs_flock_fail_perm,
+	afs_flock_no_lockers,
+	afs_flock_timestamp,
+	afs_flock_try_to_lock,
+	afs_flock_vfs_lock,
+	afs_flock_vfs_locking,
+	afs_flock_waited,
+	afs_flock_waiting,
+	afs_flock_work_extending,
+	afs_flock_work_retry,
+	afs_flock_work_unlocking,
+	afs_flock_would_block,
+};
+
+enum afs_flock_operation {
+	afs_flock_op_copy_lock,
+	afs_flock_op_flock,
+	afs_flock_op_grant,
+	afs_flock_op_lock,
+	afs_flock_op_release_lock,
+	afs_flock_op_return_ok,
+	afs_flock_op_return_eagain,
+	afs_flock_op_return_edeadlk,
+	afs_flock_op_return_error,
+	afs_flock_op_set_lock,
+	afs_flock_op_unlock,
+	afs_flock_op_wake,
+};
+
 #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
 
 /*
@@ -277,6 +311,52 @@ enum afs_file_error {
 	EM(afs_file_error_mntpt,		"MNTPT_READ_FAILED")	\
 	E_(afs_file_error_writeback_fail,	"WRITEBACK_FAILED")
 
+#define afs_flock_types							\
+	EM(F_RDLCK,				"RDLCK")		\
+	EM(F_WRLCK,				"WRLCK")		\
+	E_(F_UNLCK,				"UNLCK")
+
+#define afs_flock_states						\
+	EM(AFS_VNODE_LOCK_NONE,			"NONE")			\
+	EM(AFS_VNODE_LOCK_WAITING_FOR_CB,	"WAIT_FOR_CB")		\
+	EM(AFS_VNODE_LOCK_SETTING,		"SETTING")		\
+	EM(AFS_VNODE_LOCK_GRANTED,		"GRANTED")		\
+	EM(AFS_VNODE_LOCK_EXTENDING,		"EXTENDING")		\
+	EM(AFS_VNODE_LOCK_NEED_UNLOCK,		"NEED_UNLOCK")		\
+	E_(AFS_VNODE_LOCK_UNLOCKING,		"UNLOCKING")		\
+
+#define afs_flock_events						\
+	EM(afs_flock_acquired,			"Acquired")		\
+	EM(afs_flock_callback_break,		"Callback")		\
+	EM(afs_flock_defer_unlock,		"D-Unlock")		\
+	EM(afs_flock_fail_other,		"ErrOther")		\
+	EM(afs_flock_fail_perm,			"ErrPerm ")		\
+	EM(afs_flock_no_lockers,		"NoLocker")		\
+	EM(afs_flock_timestamp,			"Timestmp")		\
+	EM(afs_flock_try_to_lock,		"TryToLck")		\
+	EM(afs_flock_vfs_lock,			"VFSLock ")		\
+	EM(afs_flock_vfs_locking,		"VFSLking")		\
+	EM(afs_flock_waited,			"Waited  ")		\
+	EM(afs_flock_waiting,			"Waiting ")		\
+	EM(afs_flock_work_extending,		"Extendng")		\
+	EM(afs_flock_work_retry,		"Retry   ")		\
+	EM(afs_flock_work_unlocking,		"Unlcking")		\
+	E_(afs_flock_would_block,		"EWOULDBL")
+
+#define afs_flock_operations						\
+	EM(afs_flock_op_copy_lock,		"COPY    ")		\
+	EM(afs_flock_op_flock,			"->flock ")		\
+	EM(afs_flock_op_grant,			"GRANT   ")		\
+	EM(afs_flock_op_lock,			"->lock  ")		\
+	EM(afs_flock_op_release_lock,		"RELEASE ")		\
+	EM(afs_flock_op_return_ok,		"<-OK    ")		\
+	EM(afs_flock_op_return_edeadlk,		"<-EDEADL")		\
+	EM(afs_flock_op_return_eagain,		"<-EAGAIN")		\
+	EM(afs_flock_op_return_error,		"<-ERROR ")		\
+	EM(afs_flock_op_set_lock,		"SET     ")		\
+	EM(afs_flock_op_unlock,			"UNLOCK  ")		\
+	E_(afs_flock_op_wake,			"WAKE    ")
+
 /*
  * Export enum symbols via userspace.
  */
@@ -293,6 +373,8 @@ afs_edit_dir_reasons;
 afs_eproto_causes;
 afs_io_errors;
 afs_file_errors;
+afs_flock_types;
+afs_flock_operations;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -796,6 +878,70 @@ TRACE_EVENT(afs_cm_no_server_u,
 		      __entry->call, __entry->op_id, &__entry->uuid)
 	    );
 
+TRACE_EVENT(afs_flock_ev,
+	    TP_PROTO(struct afs_vnode *vnode, struct file_lock *fl,
+		     enum afs_flock_event event, int error),
+
+	    TP_ARGS(vnode, fl, event, error),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	fid		)
+		    __field(enum afs_flock_event,	event		)
+		    __field(enum afs_lock_state,	state		)
+		    __field(int,			error		)
+		    __field(unsigned int,		debug_id	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->fid = vnode->fid;
+		    __entry->event = event;
+		    __entry->state = vnode->lock_state;
+		    __entry->error = error;
+		    __entry->debug_id = fl ? fl->fl_u.afs.debug_id : 0;
+			   ),
+
+	    TP_printk("%llx:%llx:%x %04x %s s=%s e=%d",
+		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+		      __entry->debug_id,
+		      __print_symbolic(__entry->event, afs_flock_events),
+		      __print_symbolic(__entry->state, afs_flock_states),
+		      __entry->error)
+	    );
+
+TRACE_EVENT(afs_flock_op,
+	    TP_PROTO(struct afs_vnode *vnode, struct file_lock *fl,
+		     enum afs_flock_operation op),
+
+	    TP_ARGS(vnode, fl, op),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	fid		)
+		    __field(loff_t,			from		)
+		    __field(loff_t,			len		)
+		    __field(enum afs_flock_operation,	op		)
+		    __field(unsigned char,		type		)
+		    __field(unsigned int,		flags		)
+		    __field(unsigned int,		debug_id	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->fid = vnode->fid;
+		    __entry->from = fl->fl_start;
+		    __entry->len = fl->fl_end - fl->fl_start + 1;
+		    __entry->op = op;
+		    __entry->type = fl->fl_type;
+		    __entry->flags = fl->fl_flags;
+		    __entry->debug_id = fl->fl_u.afs.debug_id;
+			   ),
+
+	    TP_printk("%llx:%llx:%x %04x %s t=%s R=%llx/%llx f=%x",
+		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+		      __entry->debug_id,
+		      __print_symbolic(__entry->op, afs_flock_operations),
+		      __print_symbolic(__entry->type, afs_flock_types),
+		      __entry->from, __entry->len, __entry->flags)
+	    );
+
 #endif /* _TRACE_AFS_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 445b10289f766b73527ecb4fb4e388677ad93740 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:51 +0100
Subject: afs: Improve dir check failure reports

Improve the content of directory check failure reports from:

	kAFS: afs_dir_check_page(6d57): bad magic 1/2 is 0000

to dump more information about the individual blocks in a directory page.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 8a2562e3a316..378a96a1116e 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -159,6 +159,38 @@ error:
 	return false;
 }
 
+/*
+ * Check the contents of a directory that we've just read.
+ */
+static bool afs_dir_check_pages(struct afs_vnode *dvnode, struct afs_read *req)
+{
+	struct afs_xdr_dir_page *dbuf;
+	unsigned int i, j, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
+
+	for (i = 0; i < req->nr_pages; i++)
+		if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len))
+			goto bad;
+	return true;
+
+bad:
+	pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n",
+		dvnode->fid.vid, dvnode->fid.vnode,
+		req->file_size, req->len, req->actual_len, req->remain);
+	pr_warn("DIR %llx %x %x %x\n",
+		req->pos, req->index, req->nr_pages, req->offset);
+
+	for (i = 0; i < req->nr_pages; i++) {
+		dbuf = kmap(req->pages[i]);
+		for (j = 0; j < qty; j++) {
+			union afs_xdr_dir_block *block = &dbuf->blocks[j];
+
+			pr_warn("[%02x] %32phN\n", i * qty + j, block);
+		}
+		kunmap(req->pages[i]);
+	}
+	return false;
+}
+
 /*
  * open an AFS directory file
  */
@@ -288,10 +320,8 @@ retry:
 
 		/* Validate the data we just read. */
 		ret = -EIO;
-		for (i = 0; i < req->nr_pages; i++)
-			if (!afs_dir_check_page(dvnode, req->pages[i],
-						req->actual_len))
-				goto error_unlock;
+		if (!afs_dir_check_pages(dvnode, req))
+			goto error_unlock;
 
 		// TODO: Trim excess pages
 
-- 
cgit v1.2.3


From cdfb26b40dfa51127d22d171cef4fe8993cbfb55 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:51 +0100
Subject: afs: Handle lock rpc ops failing on a file that got deleted

Holding a file lock on an AFS file does not prevent it from being deleted
on the server, so we need to handle an error resulting from that when we
try setting, extending or releasing a lock.

Fix this by adding a "deleted" lock state and cancelling the lock extension
process for that file and aborting all waiters for the lock.

Fixes: 0fafdc9f888b ("afs: Fix file locking")
Reported-by: Jonathan Billings <jsbillin@umich.edu>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/flock.c             | 62 +++++++++++++++++++++++++++++++++++++++++++---
 fs/afs/internal.h          |  1 +
 include/trace/events/afs.h |  7 +++++-
 3 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 3e06a560f66b..742038a21ef7 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -157,6 +157,28 @@ static void afs_next_locker(struct afs_vnode *vnode, int error)
 	_leave("");
 }
 
+/*
+ * Kill off all waiters in the the pending lock queue due to the vnode being
+ * deleted.
+ */
+static void afs_kill_lockers_enoent(struct afs_vnode *vnode)
+{
+	struct file_lock *p;
+
+	afs_set_lock_state(vnode, AFS_VNODE_LOCK_DELETED);
+
+	while (!list_empty(&vnode->pending_locks)) {
+		p = list_entry(vnode->pending_locks.next,
+			       struct file_lock, fl_u.afs.link);
+		list_del_init(&p->fl_u.afs.link);
+		p->fl_u.afs.state = -ENOENT;
+		wake_up(&p->fl_wait);
+	}
+
+	key_put(vnode->lock_key);
+	vnode->lock_key = NULL;
+}
+
 /*
  * Get a lock on a file
  */
@@ -278,13 +300,19 @@ again:
 		/* attempt to release the server lock; if it fails, we just
 		 * wait 5 minutes and it'll expire anyway */
 		ret = afs_release_lock(vnode, vnode->lock_key);
-		if (ret < 0)
+		if (ret < 0) {
+			trace_afs_flock_ev(vnode, NULL, afs_flock_release_fail,
+					   ret);
 			printk(KERN_WARNING "AFS:"
 			       " Failed to release lock on {%llx:%llx} error %d\n",
 			       vnode->fid.vid, vnode->fid.vnode, ret);
+		}
 
 		spin_lock(&vnode->lock);
-		afs_next_locker(vnode, 0);
+		if (ret == -ENOENT)
+			afs_kill_lockers_enoent(vnode);
+		else
+			afs_next_locker(vnode, 0);
 		spin_unlock(&vnode->lock);
 		return;
 
@@ -304,12 +332,21 @@ again:
 		ret = afs_extend_lock(vnode, key); /* RPC */
 		key_put(key);
 
-		if (ret < 0)
+		if (ret < 0) {
+			trace_afs_flock_ev(vnode, NULL, afs_flock_extend_fail,
+					   ret);
 			pr_warning("AFS: Failed to extend lock on {%llx:%llx} error %d\n",
 				   vnode->fid.vid, vnode->fid.vnode, ret);
+		}
 
 		spin_lock(&vnode->lock);
 
+		if (ret == -ENOENT) {
+			afs_kill_lockers_enoent(vnode);
+			spin_unlock(&vnode->lock);
+			return;
+		}
+
 		if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
 			goto again;
 		afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED);
@@ -333,6 +370,11 @@ again:
 		spin_unlock(&vnode->lock);
 		return;
 
+	case AFS_VNODE_LOCK_DELETED:
+		afs_kill_lockers_enoent(vnode);
+		spin_unlock(&vnode->lock);
+		return;
+
 	default:
 		/* Looks like a lock request was withdrawn. */
 		spin_unlock(&vnode->lock);
@@ -435,6 +477,10 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 	spin_lock(&vnode->lock);
 	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
 
+	ret = -ENOENT;
+	if (vnode->lock_state == AFS_VNODE_LOCK_DELETED)
+		goto error_unlock;
+
 	/* If we've already got a lock on the server then try to move to having
 	 * the VFS grant the requested lock.  Note that this means that other
 	 * clients may get starved out.
@@ -489,6 +535,13 @@ try_to_lock:
 		afs_next_locker(vnode, ret);
 		goto error_unlock;
 
+	case -ENOENT:
+		fl->fl_u.afs.state = ret;
+		trace_afs_flock_ev(vnode, fl, afs_flock_fail_other, ret);
+		list_del_init(&fl->fl_u.afs.link);
+		afs_kill_lockers_enoent(vnode);
+		goto error_unlock;
+
 	default:
 		fl->fl_u.afs.state = ret;
 		trace_afs_flock_ev(vnode, fl, afs_flock_fail_other, ret);
@@ -638,6 +691,9 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
 
 	_enter("");
 
+	if (vnode->lock_state == AFS_VNODE_LOCK_DELETED)
+		return -ENOENT;
+
 	fl->fl_type = F_UNLCK;
 
 	/* check local lock records first */
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6e680783f59f..5eb6be3f73b2 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -599,6 +599,7 @@ enum afs_lock_state {
 	AFS_VNODE_LOCK_EXTENDING,	/* We're extending a lock on the server */
 	AFS_VNODE_LOCK_NEED_UNLOCK,	/* We need to unlock on the server */
 	AFS_VNODE_LOCK_UNLOCKING,	/* We're telling the server to unlock */
+	AFS_VNODE_LOCK_DELETED,		/* The vnode has been deleted whilst we have a lock */
 };
 
 /*
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 24c058a93e8f..21b896fabb2f 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -156,9 +156,11 @@ enum afs_flock_event {
 	afs_flock_acquired,
 	afs_flock_callback_break,
 	afs_flock_defer_unlock,
+	afs_flock_extend_fail,
 	afs_flock_fail_other,
 	afs_flock_fail_perm,
 	afs_flock_no_lockers,
+	afs_flock_release_fail,
 	afs_flock_timestamp,
 	afs_flock_try_to_lock,
 	afs_flock_vfs_lock,
@@ -323,15 +325,18 @@ enum afs_flock_operation {
 	EM(AFS_VNODE_LOCK_GRANTED,		"GRANTED")		\
 	EM(AFS_VNODE_LOCK_EXTENDING,		"EXTENDING")		\
 	EM(AFS_VNODE_LOCK_NEED_UNLOCK,		"NEED_UNLOCK")		\
-	E_(AFS_VNODE_LOCK_UNLOCKING,		"UNLOCKING")		\
+	EM(AFS_VNODE_LOCK_UNLOCKING,		"UNLOCKING")		\
+	E_(AFS_VNODE_LOCK_DELETED,		"DELETED")
 
 #define afs_flock_events						\
 	EM(afs_flock_acquired,			"Acquired")		\
 	EM(afs_flock_callback_break,		"Callback")		\
 	EM(afs_flock_defer_unlock,		"D-Unlock")		\
+	EM(afs_flock_extend_fail,		"Ext_Fail")		\
 	EM(afs_flock_fail_other,		"ErrOther")		\
 	EM(afs_flock_fail_perm,			"ErrPerm ")		\
 	EM(afs_flock_no_lockers,		"NoLocker")		\
+	EM(afs_flock_release_fail,		"Rel_Fail")		\
 	EM(afs_flock_timestamp,			"Timestmp")		\
 	EM(afs_flock_try_to_lock,		"TryToLck")		\
 	EM(afs_flock_vfs_lock,			"VFSLock ")		\
-- 
cgit v1.2.3


From 99987c560046ea178eb5aea793043deea255f185 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:51 +0100
Subject: afs: Add directory reload tracepoint

Add a tracepoint (afs_reload_dir) to indicate when a directory is being
reloaded.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c               |  1 +
 include/trace/events/afs.h | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 378a96a1116e..be5d2f932b77 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -309,6 +309,7 @@ retry:
 		goto error;
 
 	if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+		trace_afs_reload_dir(dvnode);
 		ret = afs_fetch_data(dvnode, key, req);
 		if (ret < 0)
 			goto error_unlock;
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 21b896fabb2f..8da9dd5bc2b6 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -947,6 +947,23 @@ TRACE_EVENT(afs_flock_op,
 		      __entry->from, __entry->len, __entry->flags)
 	    );
 
+TRACE_EVENT(afs_reload_dir,
+	    TP_PROTO(struct afs_vnode *vnode),
+
+	    TP_ARGS(vnode),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	fid		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->fid = vnode->fid;
+			   ),
+
+	    TP_printk("%llx:%llx:%x",
+		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique)
+	    );
+
 #endif /* _TRACE_AFS_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 79ddbfa500b37a94fa7501e65ebdd5c0e4c7592d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:51 +0100
Subject: afs: Implement sillyrename for unlink and rename

Implement sillyrename for AFS unlink and rename, using the NFS variant
implementation as a basis.

Note that the asynchronous file locking extender/releaser has to be
notified with a state change to stop it complaining if there's a race
between that and the actual file deletion.

A tracepoint, afs_silly_rename, is also added to note the silly rename and
the cleanup.  The afs_edit_dir tracepoint is given some extra reason
indicators and the afs_flock_ev tracepoint is given a silly-delete file
lock cancellation indicator.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/Makefile            |   1 +
 fs/afs/dir.c               | 116 ++++++++++++++++++++--
 fs/afs/dir_silly.c         | 239 +++++++++++++++++++++++++++++++++++++++++++++
 fs/afs/flock.c             |   2 +-
 fs/afs/inode.c             |   2 +
 fs/afs/internal.h          |  10 ++
 fs/afs/super.c             |   4 +-
 include/trace/events/afs.h |  34 ++++++-
 8 files changed, 395 insertions(+), 13 deletions(-)
 create mode 100644 fs/afs/dir_silly.c

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 0738e2bf5193..cbf31f6cd177 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -13,6 +13,7 @@ kafs-y := \
 	cmservice.o \
 	dir.o \
 	dir_edit.o \
+	dir_silly.o \
 	dynroot.o \
 	file.o \
 	flock.o \
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index be5d2f932b77..6c8523501639 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -26,6 +26,7 @@ static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, struct dir_context *ctx);
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
 static int afs_d_delete(const struct dentry *dentry);
+static void afs_d_iput(struct dentry *dentry, struct inode *inode);
 static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
 static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
@@ -85,6 +86,7 @@ const struct dentry_operations afs_fs_dentry_operations = {
 	.d_delete	= afs_d_delete,
 	.d_release	= afs_d_release,
 	.d_automount	= afs_d_automount,
+	.d_iput		= afs_d_iput,
 };
 
 struct afs_lookup_one_cookie {
@@ -1083,6 +1085,16 @@ zap:
 	return 1;
 }
 
+/*
+ * Clean up sillyrename files on dentry removal.
+ */
+static void afs_d_iput(struct dentry *dentry, struct inode *inode)
+{
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		afs_silly_iput(dentry, inode);
+	iput(inode);
+}
+
 /*
  * handle dentry release
  */
@@ -1225,6 +1237,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 			goto error_key;
 	}
 
+	if (vnode) {
+		ret = down_write_killable(&vnode->rmdir_lock);
+		if (ret < 0)
+			goto error_key;
+	}
+
 	ret = -ERESTARTSYS;
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
@@ -1243,6 +1261,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 		}
 	}
 
+	if (vnode)
+		up_write(&vnode->rmdir_lock);
 error_key:
 	key_put(key);
 error:
@@ -1259,9 +1279,9 @@ error:
  * However, if we didn't have a callback promise outstanding, or it was
  * outstanding on a different server, then it won't break it either...
  */
-static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
-			       unsigned long d_version_before,
-			       unsigned long d_version_after)
+int afs_dir_remove_link(struct dentry *dentry, struct key *key,
+			unsigned long d_version_before,
+			unsigned long d_version_after)
 {
 	bool dir_valid;
 	int ret = 0;
@@ -1308,6 +1328,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
 	struct key *key;
 	unsigned long d_version = (unsigned long)dentry->d_fsdata;
+	bool need_rehash = false;
 	u64 data_version = dvnode->status.data_version;
 	int ret;
 
@@ -1331,6 +1352,21 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 			goto error_key;
 	}
 
+	spin_lock(&dentry->d_lock);
+	if (vnode && d_count(dentry) > 1) {
+		spin_unlock(&dentry->d_lock);
+		/* Start asynchronous writeout of the inode */
+		write_inode_now(d_inode(dentry), 0);
+		ret = afs_sillyrename(dvnode, vnode, dentry, key);
+		goto error_key;
+	}
+	if (!d_unhashed(dentry)) {
+		/* Prevent a race with RCU lookup. */
+		__d_drop(dentry);
+		need_rehash = true;
+	}
+	spin_unlock(&dentry->d_lock);
+
 	ret = -ERESTARTSYS;
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
@@ -1362,6 +1398,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 					    afs_edit_dir_for_unlink);
 	}
 
+	if (need_rehash && ret < 0 && ret != -ENOENT)
+		d_rehash(dentry);
+
 error_key:
 	key_put(key);
 error:
@@ -1582,6 +1621,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	struct afs_fs_cursor fc;
 	struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
+	struct dentry *tmp = NULL, *rehash = NULL;
+	struct inode *new_inode;
 	struct key *key;
 	u64 orig_data_version, new_data_version;
 	bool new_negative = d_is_negative(new_dentry);
@@ -1590,6 +1631,10 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (flags)
 		return -EINVAL;
 
+	/* Don't allow silly-rename files be moved around. */
+	if (old_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		return -EINVAL;
+
 	vnode = AFS_FS_I(d_inode(old_dentry));
 	orig_dvnode = AFS_FS_I(old_dir);
 	new_dvnode = AFS_FS_I(new_dir);
@@ -1608,12 +1653,48 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto error;
 	}
 
+	/* For non-directories, check whether the target is busy and if so,
+	 * make a copy of the dentry and then do a silly-rename.  If the
+	 * silly-rename succeeds, the copied dentry is hashed and becomes the
+	 * new target.
+	 */
+	if (d_is_positive(new_dentry) && !d_is_dir(new_dentry)) {
+		/* To prevent any new references to the target during the
+		 * rename, we unhash the dentry in advance.
+		 */
+		if (!d_unhashed(new_dentry)) {
+			d_drop(new_dentry);
+			rehash = new_dentry;
+		}
+
+		if (d_count(new_dentry) > 2) {
+			/* copy the target dentry's name */
+			ret = -ENOMEM;
+			tmp = d_alloc(new_dentry->d_parent,
+				      &new_dentry->d_name);
+			if (!tmp)
+				goto error_rehash;
+
+			ret = afs_sillyrename(new_dvnode,
+					      AFS_FS_I(d_inode(new_dentry)),
+					      new_dentry, key);
+			if (ret)
+				goto error_rehash;
+
+			new_dentry = tmp;
+			rehash = NULL;
+			new_negative = true;
+			orig_data_version = orig_dvnode->status.data_version;
+			new_data_version = new_dvnode->status.data_version;
+		}
+	}
+
 	ret = -ERESTARTSYS;
 	if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) {
 		if (orig_dvnode != new_dvnode) {
 			if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
 				afs_end_vnode_operation(&fc);
-				goto error_key;
+				goto error_rehash;
 			}
 		}
 		while (afs_select_fileserver(&fc)) {
@@ -1630,25 +1711,42 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			mutex_unlock(&new_dvnode->io_lock);
 		ret = afs_end_vnode_operation(&fc);
 		if (ret < 0)
-			goto error_key;
+			goto error_rehash;
 	}
 
 	if (ret == 0) {
+		if (rehash)
+			d_rehash(rehash);
 		if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags))
 		    afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
-					afs_edit_dir_for_rename);
+					afs_edit_dir_for_rename_0);
 
 		if (!new_negative &&
 		    test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
 			afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
-					    afs_edit_dir_for_rename);
+					    afs_edit_dir_for_rename_1);
 
 		if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
 			afs_edit_dir_add(new_dvnode, &new_dentry->d_name,
-					 &vnode->fid,  afs_edit_dir_for_rename);
+					 &vnode->fid, afs_edit_dir_for_rename_2);
+
+		new_inode = d_inode(new_dentry);
+		if (new_inode) {
+			spin_lock(&new_inode->i_lock);
+			if (new_inode->i_nlink > 0)
+				drop_nlink(new_inode);
+			spin_unlock(&new_inode->i_lock);
+		}
+		d_move(old_dentry, new_dentry);
+		goto error_tmp;
 	}
 
-error_key:
+error_rehash:
+	if (rehash)
+		d_rehash(rehash);
+error_tmp:
+	if (tmp)
+		dput(tmp);
 	key_put(key);
 error:
 	_leave(" = %d", ret);
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
new file mode 100644
index 000000000000..f6f89fdab6b2
--- /dev/null
+++ b/fs/afs/dir_silly.c
@@ -0,0 +1,239 @@
+/* AFS silly rename handling
+ *
+ * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from NFS's sillyrename.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/fsnotify.h>
+#include "internal.h"
+
+/*
+ * Actually perform the silly rename step.
+ */
+static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+			       struct dentry *old, struct dentry *new,
+			       struct key *key)
+{
+	struct afs_fs_cursor fc;
+	u64 dir_data_version = dvnode->status.data_version;
+	int ret = -ERESTARTSYS;
+
+	_enter("%pd,%pd", old, new);
+
+	trace_afs_silly_rename(vnode, false);
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+			afs_fs_rename(&fc, old->d_name.name,
+				      dvnode, new->d_name.name,
+				      dir_data_version, dir_data_version);
+		}
+
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	if (ret == 0) {
+		spin_lock(&old->d_lock);
+		old->d_flags |= DCACHE_NFSFS_RENAMED;
+		spin_unlock(&old->d_lock);
+		if (dvnode->silly_key != key) {
+			key_put(dvnode->silly_key);
+			dvnode->silly_key = key_get(key);
+		}
+
+		if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+			afs_edit_dir_remove(dvnode, &old->d_name,
+					    afs_edit_dir_for_silly_0);
+		if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+			afs_edit_dir_add(dvnode, &new->d_name,
+					 &vnode->fid, afs_edit_dir_for_silly_1);
+
+		/* vfs_unlink and the like do not issue this when a file is
+		 * sillyrenamed, so do it here.
+		 */
+		fsnotify_nameremove(old, 0);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/**
+ * afs_sillyrename - Perform a silly-rename of a dentry
+ *
+ * AFS is stateless and the server doesn't know when the client is holding a
+ * file open.  To prevent application problems when a file is unlinked while
+ * it's still open, the client performs a "silly-rename".  That is, it renames
+ * the file to a hidden file in the same directory, and only performs the
+ * unlink once the last reference to it is put.
+ *
+ * The final cleanup is done during dentry_iput.
+ */
+int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+		    struct dentry *dentry, struct key *key)
+{
+	static unsigned int sillycounter;
+	struct dentry *sdentry = NULL;
+	unsigned char silly[16];
+	int ret = -EBUSY;
+
+	_enter("");
+
+	/* We don't allow a dentry to be silly-renamed twice. */
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		return -EBUSY;
+
+	sdentry = NULL;
+	do {
+		int slen;
+
+		dput(sdentry);
+		sillycounter++;
+
+		/* Create a silly name.  Note that the ".__afs" prefix is
+		 * understood by the salvager and must not be changed.
+		 */
+		slen = scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
+		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+
+		/* N.B. Better to return EBUSY here ... it could be dangerous
+		 * to delete the file while it's in use.
+		 */
+		if (IS_ERR(sdentry))
+			goto out;
+	} while (!d_is_negative(sdentry));
+
+	ihold(&vnode->vfs_inode);
+
+	ret = afs_do_silly_rename(dvnode, vnode, dentry, sdentry, key);
+	switch (ret) {
+	case 0:
+		/* The rename succeeded. */
+		d_move(dentry, sdentry);
+		break;
+	case -ERESTARTSYS:
+		/* The result of the rename is unknown. Play it safe by forcing
+		 * a new lookup.
+		 */
+		d_drop(dentry);
+		d_drop(sdentry);
+	}
+
+	iput(&vnode->vfs_inode);
+	dput(sdentry);
+out:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Tell the server to remove a sillyrename file.
+ */
+static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+			       struct dentry *dentry, struct key *key)
+{
+	struct afs_fs_cursor fc;
+	u64 dir_data_version = dvnode->status.data_version;
+	int ret = -ERESTARTSYS;
+
+	_enter("");
+
+	trace_afs_silly_rename(vnode, true);
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+
+			if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
+			    !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
+				yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
+						    dir_data_version);
+				if (fc.ac.error != -ECONNABORTED ||
+				    fc.ac.abort_code != RXGEN_OPCODE)
+					continue;
+				set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
+			}
+
+			afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
+				      dir_data_version);
+		}
+
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+		if (ret == 0) {
+			drop_nlink(&vnode->vfs_inode);
+			if (vnode->vfs_inode.i_nlink == 0) {
+				set_bit(AFS_VNODE_DELETED, &vnode->flags);
+				clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+			}
+		}
+		if (ret == 0 &&
+		    test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+			afs_edit_dir_remove(dvnode, &dentry->d_name,
+					    afs_edit_dir_for_unlink);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Remove sillyrename file on iput.
+ */
+int afs_silly_iput(struct dentry *dentry, struct inode *inode)
+{
+	struct afs_vnode *dvnode = AFS_FS_I(d_inode(dentry->d_parent));
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct dentry *alias;
+	int ret;
+
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+
+	_enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
+
+	down_read(&dvnode->rmdir_lock);
+
+	alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq);
+	if (IS_ERR(alias)) {
+		up_read(&dvnode->rmdir_lock);
+		return 0;
+	}
+
+	if (!d_in_lookup(alias)) {
+		/* We raced with lookup...  See if we need to transfer the
+		 * sillyrename information to the aliased dentry.
+		 */
+		ret = 0;
+		spin_lock(&alias->d_lock);
+		if (d_really_is_positive(alias) &&
+		    !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
+			alias->d_flags |= DCACHE_NFSFS_RENAMED;
+			ret = 1;
+		}
+		spin_unlock(&alias->d_lock);
+		up_read(&dvnode->rmdir_lock);
+		dput(alias);
+		return ret;
+	}
+
+	/* Stop lock-release from complaining. */
+	spin_lock(&vnode->lock);
+	vnode->lock_state = AFS_VNODE_LOCK_DELETED;
+	trace_afs_flock_ev(vnode, NULL, afs_flock_silly_delete, 0);
+	spin_unlock(&vnode->lock);
+
+	afs_do_silly_unlink(dvnode, vnode, dentry, dvnode->silly_key);
+	up_read(&dvnode->rmdir_lock);
+	d_lookup_done(alias);
+	dput(alias);
+	return 1;
+}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 742038a21ef7..325bf731d8dd 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -300,7 +300,7 @@ again:
 		/* attempt to release the server lock; if it fails, we just
 		 * wait 5 minutes and it'll expire anyway */
 		ret = afs_release_lock(vnode, vnode->lock_key);
-		if (ret < 0) {
+		if (ret < 0 && vnode->lock_state != AFS_VNODE_LOCK_DELETED) {
 			trace_afs_flock_ev(vnode, NULL, afs_flock_release_fail,
 					   ret);
 			printk(KERN_WARNING "AFS:"
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 9cedc3fc1b77..3eef20ff285b 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -543,6 +543,8 @@ void afs_evict_inode(struct inode *inode)
 #endif
 
 	afs_put_permits(rcu_access_pointer(vnode->permit_cache));
+	key_put(vnode->silly_key);
+	vnode->silly_key = NULL;
 	key_put(vnode->lock_key);
 	vnode->lock_key = NULL;
 	_leave("");
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5eb6be3f73b2..20fd44de26ac 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -621,6 +621,8 @@ struct afs_vnode {
 	struct afs_permits __rcu *permit_cache;	/* cache of permits so far obtained */
 	struct mutex		io_lock;	/* Lock for serialising I/O on this mutex */
 	struct rw_semaphore	validate_lock;	/* lock for validating this vnode */
+	struct rw_semaphore	rmdir_lock;	/* Lock for rmdir vs sillyrename */
+	struct key		*silly_key;	/* Silly rename key */
 	spinlock_t		wb_lock;	/* lock for wb_keys */
 	spinlock_t		lock;		/* waitqueue/flags lock */
 	unsigned long		flags;
@@ -866,6 +868,7 @@ extern const struct address_space_operations afs_dir_aops;
 extern const struct dentry_operations afs_fs_dentry_operations;
 
 extern void afs_d_release(struct dentry *);
+extern int afs_dir_remove_link(struct dentry *, struct key *, unsigned long, unsigned long);
 
 /*
  * dir_edit.c
@@ -874,6 +877,13 @@ extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *
 			     enum afs_edit_dir_reason);
 extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason);
 
+/*
+ * dir_silly.c
+ */
+extern int afs_sillyrename(struct afs_vnode *, struct afs_vnode *,
+			   struct dentry *, struct key *);
+extern int afs_silly_iput(struct dentry *, struct inode *);
+
 /*
  * dynroot.c
  */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 5adf012b8e27..6438849a75c4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -45,7 +45,7 @@ struct file_system_type afs_fs_type = {
 	.init_fs_context	= afs_init_fs_context,
 	.parameters		= &afs_fs_parameters,
 	.kill_sb		= afs_kill_super,
-	.fs_flags		= 0,
+	.fs_flags		= FS_RENAME_DOES_D_MOVE,
 };
 MODULE_ALIAS_FS("afs");
 
@@ -656,6 +656,8 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 	vnode->cb_type		= 0;
 	vnode->lock_state	= AFS_VNODE_LOCK_NONE;
 
+	init_rwsem(&vnode->rmdir_lock);
+
 	_leave(" = %p", &vnode->vfs_inode);
 	return &vnode->vfs_inode;
 }
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 8da9dd5bc2b6..f67815ebb1b9 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -108,8 +108,12 @@ enum afs_edit_dir_reason {
 	afs_edit_dir_for_create,
 	afs_edit_dir_for_link,
 	afs_edit_dir_for_mkdir,
-	afs_edit_dir_for_rename,
+	afs_edit_dir_for_rename_0,
+	afs_edit_dir_for_rename_1,
+	afs_edit_dir_for_rename_2,
 	afs_edit_dir_for_rmdir,
+	afs_edit_dir_for_silly_0,
+	afs_edit_dir_for_silly_1,
 	afs_edit_dir_for_symlink,
 	afs_edit_dir_for_unlink,
 };
@@ -161,6 +165,7 @@ enum afs_flock_event {
 	afs_flock_fail_perm,
 	afs_flock_no_lockers,
 	afs_flock_release_fail,
+	afs_flock_silly_delete,
 	afs_flock_timestamp,
 	afs_flock_try_to_lock,
 	afs_flock_vfs_lock,
@@ -273,8 +278,12 @@ enum afs_flock_operation {
 	EM(afs_edit_dir_for_create,		"Create") \
 	EM(afs_edit_dir_for_link,		"Link  ") \
 	EM(afs_edit_dir_for_mkdir,		"MkDir ") \
-	EM(afs_edit_dir_for_rename,		"Rename") \
+	EM(afs_edit_dir_for_rename_0,		"Renam0") \
+	EM(afs_edit_dir_for_rename_1,		"Renam1") \
+	EM(afs_edit_dir_for_rename_2,		"Renam2") \
 	EM(afs_edit_dir_for_rmdir,		"RmDir ") \
+	EM(afs_edit_dir_for_silly_0,		"S_Ren0") \
+	EM(afs_edit_dir_for_silly_1,		"S_Ren1") \
 	EM(afs_edit_dir_for_symlink,		"Symlnk") \
 	E_(afs_edit_dir_for_unlink,		"Unlink")
 
@@ -337,6 +346,7 @@ enum afs_flock_operation {
 	EM(afs_flock_fail_perm,			"ErrPerm ")		\
 	EM(afs_flock_no_lockers,		"NoLocker")		\
 	EM(afs_flock_release_fail,		"Rel_Fail")		\
+	EM(afs_flock_silly_delete,		"SillyDel")		\
 	EM(afs_flock_timestamp,			"Timestmp")		\
 	EM(afs_flock_try_to_lock,		"TryToLck")		\
 	EM(afs_flock_vfs_lock,			"VFSLock ")		\
@@ -964,6 +974,26 @@ TRACE_EVENT(afs_reload_dir,
 		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique)
 	    );
 
+TRACE_EVENT(afs_silly_rename,
+	    TP_PROTO(struct afs_vnode *vnode, bool done),
+
+	    TP_ARGS(vnode, done),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	fid		)
+		    __field(bool,			done		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->fid = vnode->fid;
+		    __entry->done = done;
+			   ),
+
+	    TP_printk("%llx:%llx:%x done=%u",
+		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+		      __entry->done)
+	    );
+
 #endif /* _TRACE_AFS_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 80548b03991f58758a336424a90bf9f988e3b077 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:51 +0100
Subject: afs: Add more tracepoints

Add four more tracepoints:

 (1) afs_make_fs_call1 - Split from afs_make_fs_call but takes a filename
     to log also.

 (2) afs_make_fs_call2 - Like the above but takes two filenames to log.

 (3) afs_lookup - Log the result of doing a successful lookup, including a
     negative result (fid 0:0).

 (4) afs_get_tree - Log the set up of a volume for mounting.

It also extends the name buffer on the afs_edit_dir tracepoint to 24 chars
and puts quotes around the filename in the text representation.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c               |   8 ++-
 fs/afs/fsclient.c          |  10 ++--
 fs/afs/super.c             |   1 +
 fs/afs/yfsclient.c         |  14 ++---
 include/trace/events/afs.h | 146 ++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 163 insertions(+), 16 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 6c8523501639..48706eefc63b 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -908,8 +908,14 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 			(void *)(unsigned long)dvnode->status.data_version;
 	}
 	d = d_splice_alias(inode, dentry);
-	if (!IS_ERR_OR_NULL(d))
+	if (!IS_ERR_OR_NULL(d)) {
 		d->d_fsdata = dentry->d_fsdata;
+		trace_afs_lookup(dvnode, &d->d_name,
+				 inode ? AFS_FS_I(inode) : NULL);
+	} else {
+		trace_afs_lookup(dvnode, &dentry->d_name,
+				 inode ? AFS_FS_I(inode) : NULL);
+	}
 	return d;
 }
 
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 9d405f96cc91..be4520eb4965 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -830,7 +830,7 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 	*bp++ = 0; /* segment size */
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -926,7 +926,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	}
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &dvnode->fid);
+	trace_afs_make_fs_call1(call, &dvnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1019,7 +1019,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	*bp++ = htonl(vnode->fid.unique);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1134,7 +1134,7 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
 	*bp++ = 0; /* segment size */
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1253,7 +1253,7 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
 	}
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &orig_dvnode->fid);
+	trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 6438849a75c4..ce85ae61f12d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -550,6 +550,7 @@ static int afs_get_tree(struct fs_context *fc)
 	}
 
 	fc->root = dget(sb->s_root);
+	trace_afs_get_tree(as->cell, as->volume);
 	_leave(" = 0 [%p]", sb);
 	return 0;
 
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 73546c2d89ee..5ea0350dc9dd 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -809,7 +809,7 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -873,7 +873,7 @@ int yfs_fs_make_dir(struct afs_fs_cursor *fc,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -964,7 +964,7 @@ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &dvnode->fid);
+	trace_afs_make_fs_call1(call, &dvnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1052,7 +1052,7 @@ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &dvnode->fid);
+	trace_afs_make_fs_call1(call, &dvnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1138,7 +1138,7 @@ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call1(call, &vnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1235,7 +1235,7 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &dvnode->fid);
+	trace_afs_make_fs_call1(call, &dvnode->fid, name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
@@ -1334,7 +1334,7 @@ int yfs_fs_rename(struct afs_fs_cursor *fc,
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &orig_dvnode->fid);
+	trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index f67815ebb1b9..e81d6a50781f 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -539,6 +539,88 @@ TRACE_EVENT(afs_make_fs_call,
 		      __print_symbolic(__entry->op, afs_fs_operations))
 	    );
 
+TRACE_EVENT(afs_make_fs_call1,
+	    TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
+		     const char *name),
+
+	    TP_ARGS(call, fid, name),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		call		)
+		    __field(enum afs_fs_operation,	op		)
+		    __field_struct(struct afs_fid,	fid		)
+		    __array(char,			name, 24	)
+			     ),
+
+	    TP_fast_assign(
+		    int __len = strlen(name);
+		    __len = min(__len, 23);
+		    __entry->call = call->debug_id;
+		    __entry->op = call->operation_ID;
+		    if (fid) {
+			    __entry->fid = *fid;
+		    } else {
+			    __entry->fid.vid = 0;
+			    __entry->fid.vnode = 0;
+			    __entry->fid.unique = 0;
+		    }
+		    memcpy(__entry->name, name, __len);
+		    __entry->name[__len] = 0;
+			   ),
+
+	    TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\"",
+		      __entry->call,
+		      __entry->fid.vid,
+		      __entry->fid.vnode,
+		      __entry->fid.unique,
+		      __print_symbolic(__entry->op, afs_fs_operations),
+		      __entry->name)
+	    );
+
+TRACE_EVENT(afs_make_fs_call2,
+	    TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
+		     const char *name, const char *name2),
+
+	    TP_ARGS(call, fid, name, name2),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		call		)
+		    __field(enum afs_fs_operation,	op		)
+		    __field_struct(struct afs_fid,	fid		)
+		    __array(char,			name, 24	)
+		    __array(char,			name2, 24	)
+			     ),
+
+	    TP_fast_assign(
+		    int __len = strlen(name);
+		    int __len2 = strlen(name2);
+		    __len = min(__len, 23);
+		    __len2 = min(__len2, 23);
+		    __entry->call = call->debug_id;
+		    __entry->op = call->operation_ID;
+		    if (fid) {
+			    __entry->fid = *fid;
+		    } else {
+			    __entry->fid.vid = 0;
+			    __entry->fid.vnode = 0;
+			    __entry->fid.unique = 0;
+		    }
+		    memcpy(__entry->name, name, __len);
+		    __entry->name[__len] = 0;
+		    memcpy(__entry->name2, name2, __len2);
+		    __entry->name2[__len2] = 0;
+			   ),
+
+	    TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\" \"%s\"",
+		      __entry->call,
+		      __entry->fid.vid,
+		      __entry->fid.vnode,
+		      __entry->fid.unique,
+		      __print_symbolic(__entry->op, afs_fs_operations),
+		      __entry->name,
+		      __entry->name2)
+	    );
+
 TRACE_EVENT(afs_make_vl_call,
 	    TP_PROTO(struct afs_call *call),
 
@@ -736,6 +818,38 @@ TRACE_EVENT(afs_call_state,
 		      __entry->ret, __entry->abort)
 	    );
 
+TRACE_EVENT(afs_lookup,
+	    TP_PROTO(struct afs_vnode *dvnode, const struct qstr *name,
+		     struct afs_vnode *vnode),
+
+	    TP_ARGS(dvnode, name, vnode),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	dfid		)
+		    __field_struct(struct afs_fid,	fid		)
+		    __array(char,			name, 24	)
+			     ),
+
+	    TP_fast_assign(
+		    int __len = min_t(int, name->len, 23);
+		    __entry->dfid = dvnode->fid;
+		    if (vnode) {
+			    __entry->fid = vnode->fid;
+		    } else {
+			    __entry->fid.vid = 0;
+			    __entry->fid.vnode = 0;
+			    __entry->fid.unique = 0;
+		    }
+		    memcpy(__entry->name, name->name, __len);
+		    __entry->name[__len] = 0;
+			   ),
+
+	    TP_printk("d=%llx:%llx:%x \"%s\" f=%llx:%x",
+		      __entry->dfid.vid, __entry->dfid.vnode, __entry->dfid.unique,
+		      __entry->name,
+		      __entry->fid.vnode, __entry->fid.unique)
+	    );
+
 TRACE_EVENT(afs_edit_dir,
 	    TP_PROTO(struct afs_vnode *dvnode,
 		     enum afs_edit_dir_reason why,
@@ -757,12 +871,12 @@ TRACE_EVENT(afs_edit_dir,
 		    __field(unsigned short,		slot		)
 		    __field(unsigned int,		f_vnode		)
 		    __field(unsigned int,		f_unique	)
-		    __array(char,			name, 18	)
+		    __array(char,			name, 24	)
 			     ),
 
 	    TP_fast_assign(
 		    int __len = strlen(name);
-		    __len = min(__len, 17);
+		    __len = min(__len, 23);
 		    __entry->vnode	= dvnode->fid.vnode;
 		    __entry->unique	= dvnode->fid.unique;
 		    __entry->why	= why;
@@ -775,7 +889,7 @@ TRACE_EVENT(afs_edit_dir,
 		    __entry->name[__len] = 0;
 			   ),
 
-	    TP_printk("d=%x:%x %s %s %u[%u] f=%x:%x %s",
+	    TP_printk("d=%x:%x %s %s %u[%u] f=%x:%x \"%s\"",
 		      __entry->vnode, __entry->unique,
 		      __print_symbolic(__entry->why, afs_edit_dir_reasons),
 		      __print_symbolic(__entry->op, afs_edit_dir_ops),
@@ -994,6 +1108,32 @@ TRACE_EVENT(afs_silly_rename,
 		      __entry->done)
 	    );
 
+TRACE_EVENT(afs_get_tree,
+	    TP_PROTO(struct afs_cell *cell, struct afs_volume *volume),
+
+	    TP_ARGS(cell, volume),
+
+	    TP_STRUCT__entry(
+		    __field(u64,			vid		)
+		    __array(char,			cell, 24	)
+		    __array(char,			volume, 24	)
+			     ),
+
+	    TP_fast_assign(
+		    int __len;
+		    __entry->vid = volume->vid;
+		    __len = min_t(int, cell->name_len, 23);
+		    memcpy(__entry->cell, cell->name, __len);
+		    __entry->cell[__len] = 0;
+		    __len = min_t(int, volume->name_len, 23);
+		    memcpy(__entry->volume, volume->name, __len);
+		    __entry->volume[__len] = 0;
+			   ),
+
+	    TP_printk("--- MOUNT %s:%s %llx",
+		      __entry->cell, __entry->volume, __entry->vid)
+	    );
+
 #endif /* _TRACE_AFS_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 6c6c1d63c243025956f061e67fff3a615aa0f6be Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:52 +0100
Subject: afs: Provide mount-time configurable byte-range file locking
 emulation

Provide byte-range file locking emulation that can be configured at mount
time to one of four modes:

 (1) flock=local.  Locking is done locally only and no reference is made to
     the server.

 (2) flock=openafs.  Byte-range locking is done locally only; whole-file
     locking is done with reference to the server.  Whole-file locks cannot
     be upgraded unless the client holds an exclusive lock.

 (3) flock=strict.  Byte-range and whole-file locking both require a
     sufficient whole-file lock on the server.

 (4) flock=write.  As strict, but the client always gets an exclusive
     whole-file lock on the server.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/flock.c             | 49 +++++++++++++++++++++++++++++++++++++++-------
 fs/afs/fsclient.c          |  2 +-
 fs/afs/internal.h          | 14 +++++++++++++
 fs/afs/super.c             | 27 +++++++++++++++++++++++++
 fs/afs/yfsclient.c         |  2 +-
 include/trace/events/afs.h | 35 +++++++++++++++++++++++++++++++++
 6 files changed, 120 insertions(+), 9 deletions(-)

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 325bf731d8dd..ef313f4c1d11 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -409,7 +409,7 @@ static void afs_defer_unlock(struct afs_vnode *vnode)
  * whether we think that we have a locking permit.
  */
 static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
-			      afs_lock_type_t type, bool can_sleep)
+			      enum afs_flock_mode mode, afs_lock_type_t type)
 {
 	afs_access_t access;
 	int ret;
@@ -437,13 +437,9 @@ static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
 	if (type == AFS_LOCK_READ) {
 		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK)))
 			return -EACCES;
-		if (vnode->status.lock_count == -1 && !can_sleep)
-			return -EAGAIN; /* Write locked */
 	} else {
 		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE)))
 			return -EACCES;
-		if (vnode->status.lock_count != 0 && !can_sleep)
-			return -EAGAIN; /* Locked */
 	}
 
 	return 0;
@@ -456,24 +452,48 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 {
 	struct inode *inode = locks_inode(file);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
+	enum afs_flock_mode mode = AFS_FS_S(inode->i_sb)->flock_mode;
 	afs_lock_type_t type;
 	struct key *key = afs_file_key(file);
+	bool partial, no_server_lock = false;
 	int ret;
 
-	_enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+	if (mode == afs_flock_mode_unset)
+		mode = afs_flock_mode_openafs;
+
+	_enter("{%llx:%llu},%llu-%llu,%u,%u",
+	       vnode->fid.vid, vnode->fid.vnode,
+	       fl->fl_start, fl->fl_end, fl->fl_type, mode);
 
 	fl->fl_ops = &afs_lock_ops;
 	INIT_LIST_HEAD(&fl->fl_u.afs.link);
 	fl->fl_u.afs.state = AFS_LOCK_PENDING;
 
+	partial = (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX);
 	type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+	if (mode == afs_flock_mode_write && partial)
+		type = AFS_LOCK_WRITE;
 
-	ret = afs_do_setlk_check(vnode, key, type, fl->fl_flags & FL_SLEEP);
+	ret = afs_do_setlk_check(vnode, key, mode, type);
 	if (ret < 0)
 		return ret;
 
 	trace_afs_flock_op(vnode, fl, afs_flock_op_set_lock);
 
+	/* AFS3 protocol only supports full-file locks and doesn't provide any
+	 * method of upgrade/downgrade, so we need to emulate for partial-file
+	 * locks.
+	 *
+	 * The OpenAFS client only gets a server lock for a full-file lock and
+	 * keeps partial-file locks local.  Allow this behaviour to be emulated
+	 * (as the default).
+	 */
+	if (mode == afs_flock_mode_local ||
+	    (partial && mode == afs_flock_mode_openafs)) {
+		no_server_lock = true;
+		goto skip_server_lock;
+	}
+
 	spin_lock(&vnode->lock);
 	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
 
@@ -502,6 +522,18 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 		}
 	}
 
+	if (vnode->lock_state == AFS_VNODE_LOCK_NONE &&
+	    !(fl->fl_flags & FL_SLEEP)) {
+		ret = -EAGAIN;
+		if (type == AFS_LOCK_READ) {
+			if (vnode->status.lock_count == -1)
+				goto lock_is_contended; /* Write locked */
+		} else {
+			if (vnode->status.lock_count != 0)
+				goto lock_is_contended; /* Locked */
+		}
+	}
+
 	if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
 		goto need_to_wait;
 
@@ -571,6 +603,7 @@ vnode_is_locked:
 	/* the lock has been granted by the server... */
 	ASSERTCMP(fl->fl_u.afs.state, ==, AFS_LOCK_GRANTED);
 
+skip_server_lock:
 	/* ... but the VFS still needs to distribute access on this client. */
 	trace_afs_flock_ev(vnode, fl, afs_flock_vfs_locking, 0);
 	ret = locks_lock_file_wait(file, fl);
@@ -649,6 +682,8 @@ vfs_rejected_lock:
 	 * deal with.
 	 */
 	_debug("vfs refused %d", ret);
+	if (no_server_lock)
+		goto error;
 	spin_lock(&vnode->lock);
 	list_del_init(&fl->fl_u.afs.link);
 	afs_defer_unlock(vnode);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index be4520eb4965..9b73a57aa5cb 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1902,7 +1902,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 	*bp++ = htonl(type);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_calli(call, &vnode->fid, type);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 20fd44de26ac..91204e1428f2 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -36,11 +36,24 @@
 struct pagevec;
 struct afs_call;
 
+/*
+ * Partial file-locking emulation mode.  (The problem being that AFS3 only
+ * allows whole-file locks and no upgrading/downgrading).
+ */
+enum afs_flock_mode {
+	afs_flock_mode_unset,
+	afs_flock_mode_local,	/* Local locking only */
+	afs_flock_mode_openafs,	/* Don't get server lock for a partial lock */
+	afs_flock_mode_strict,	/* Always get a server lock for a partial lock */
+	afs_flock_mode_write,	/* Get an exclusive server lock for a partial lock */
+};
+
 struct afs_fs_context {
 	bool			force;		/* T to force cell type */
 	bool			autocell;	/* T if set auto mount operation */
 	bool			dyn_root;	/* T if dynamic root */
 	bool			no_cell;	/* T if the source is "none" (for dynroot) */
+	enum afs_flock_mode	flock_mode;	/* Partial file-locking emulation mode */
 	afs_voltype_t		type;		/* type of volume requested */
 	unsigned int		volnamesz;	/* size of volume name */
 	const char		*volname;	/* name of volume to mount */
@@ -221,6 +234,7 @@ struct afs_super_info {
 	struct net		*net_ns;	/* Network namespace */
 	struct afs_cell		*cell;		/* The cell in which the volume resides */
 	struct afs_volume	*volume;	/* volume record */
+	enum afs_flock_mode	flock_mode:8;	/* File locking emulation mode */
 	bool			dyn_root;	/* True if dynamic root */
 };
 
diff --git a/fs/afs/super.c b/fs/afs/super.c
index ce85ae61f12d..18334fa1a0d2 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -67,19 +67,30 @@ static atomic_t afs_count_active_inodes;
 enum afs_param {
 	Opt_autocell,
 	Opt_dyn,
+	Opt_flock,
 	Opt_source,
 };
 
 static const struct fs_parameter_spec afs_param_specs[] = {
 	fsparam_flag  ("autocell",	Opt_autocell),
 	fsparam_flag  ("dyn",		Opt_dyn),
+	fsparam_enum  ("flock",		Opt_flock),
 	fsparam_string("source",	Opt_source),
 	{}
 };
 
+static const struct fs_parameter_enum afs_param_enums[] = {
+	{ Opt_flock,	"local",	afs_flock_mode_local },
+	{ Opt_flock,	"openafs",	afs_flock_mode_openafs },
+	{ Opt_flock,	"strict",	afs_flock_mode_strict },
+	{ Opt_flock,	"write",	afs_flock_mode_write },
+	{}
+};
+
 static const struct fs_parameter_description afs_fs_parameters = {
 	.name		= "kAFS",
 	.specs		= afs_param_specs,
+	.enums		= afs_param_enums,
 };
 
 /*
@@ -182,11 +193,22 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
 static int afs_show_options(struct seq_file *m, struct dentry *root)
 {
 	struct afs_super_info *as = AFS_FS_S(root->d_sb);
+	const char *p = NULL;
 
 	if (as->dyn_root)
 		seq_puts(m, ",dyn");
 	if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags))
 		seq_puts(m, ",autocell");
+	switch (as->flock_mode) {
+	case afs_flock_mode_unset:	break;
+	case afs_flock_mode_local:	p = "local";	break;
+	case afs_flock_mode_openafs:	p = "openafs";	break;
+	case afs_flock_mode_strict:	p = "strict";	break;
+	case afs_flock_mode_write:	p = "write";	break;
+	}
+	if (p)
+		seq_printf(m, ",flock=%s", p);
+
 	return 0;
 }
 
@@ -315,6 +337,10 @@ static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 		ctx->dyn_root = true;
 		break;
 
+	case Opt_flock:
+		ctx->flock_mode = result.uint_32;
+		break;
+
 	default:
 		return -EINVAL;
 	}
@@ -466,6 +492,7 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
 	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
 	if (as) {
 		as->net_ns = get_net(fc->net_ns);
+		as->flock_mode = ctx->flock_mode;
 		if (ctx->dyn_root) {
 			as->dyn_root = true;
 		} else {
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 5ea0350dc9dd..055840aa07f6 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -1860,7 +1860,7 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 	yfs_check_req(call, bp);
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_calli(call, &vnode->fid, type);
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index e81d6a50781f..f1373c29bf7d 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -539,6 +539,41 @@ TRACE_EVENT(afs_make_fs_call,
 		      __print_symbolic(__entry->op, afs_fs_operations))
 	    );
 
+TRACE_EVENT(afs_make_fs_calli,
+	    TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
+		     unsigned int i),
+
+	    TP_ARGS(call, fid, i),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		call		)
+		    __field(unsigned int,		i		)
+		    __field(enum afs_fs_operation,	op		)
+		    __field_struct(struct afs_fid,	fid		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call->debug_id;
+		    __entry->i = i;
+		    __entry->op = call->operation_ID;
+		    if (fid) {
+			    __entry->fid = *fid;
+		    } else {
+			    __entry->fid.vid = 0;
+			    __entry->fid.vnode = 0;
+			    __entry->fid.unique = 0;
+		    }
+			   ),
+
+	    TP_printk("c=%08x %06llx:%06llx:%06x %s i=%u",
+		      __entry->call,
+		      __entry->fid.vid,
+		      __entry->fid.vnode,
+		      __entry->fid.unique,
+		      __print_symbolic(__entry->op, afs_fs_operations),
+		      __entry->i)
+	    );
+
 TRACE_EVENT(afs_make_fs_call1,
 	    TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
 		     const char *name),
-- 
cgit v1.2.3


From b134d687dd9369bb2407a23c6ecc9e0a15d8bb20 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:52 +0100
Subject: afs: Log more information for "kAFS: AFS vnode with undefined type\n"

Log more information when "kAFS: AFS vnode with undefined type\n" is
displayed due to a vnode record being retrieved from the server that
appears to have a duff file type (usually 0).  This prints more information
to try and help pin down the problem.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c      |  4 ++--
 fs/afs/inode.c    | 35 +++++++++++++++++++++++++++++++----
 fs/afs/internal.h |  3 ++-
 fs/afs/super.c    |  2 +-
 4 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 48706eefc63b..9a466be583d2 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -776,7 +776,7 @@ success:
 		ti = afs_iget(dir->i_sb, key, &cookie->fids[i],
 			      &cookie->statuses[i],
 			      &cookie->callbacks[i],
-			      cbi);
+			      cbi, dvnode);
 		if (i == 0) {
 			inode = ti;
 		} else {
@@ -1125,7 +1125,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
 		return;
 
 	inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
-			 newfid, newstatus, newcb, fc->cbi);
+			 newfid, newstatus, newcb, fc->cbi, fc->vnode);
 	if (IS_ERR(inode)) {
 		/* ENOMEM or EINTR at a really inconvenient time - just abandon
 		 * the new directory on the server.
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 3eef20ff285b..0362cc7e1c7c 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -29,10 +29,36 @@ static const struct inode_operations afs_symlink_inode_operations = {
 	.listxattr	= afs_listxattr,
 };
 
+static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
+{
+	static unsigned long once_only;
+
+	pr_warn("kAFS: AFS vnode with undefined type %u\n",
+		vnode->status.type);
+	pr_warn("kAFS: A=%d m=%o s=%llx v=%llx\n",
+		vnode->status.abort_code,
+		vnode->status.mode,
+		vnode->status.size,
+		vnode->status.data_version);
+	pr_warn("kAFS: vnode %llx:%llx:%x\n",
+		vnode->fid.vid,
+		vnode->fid.vnode,
+		vnode->fid.unique);
+	if (parent_vnode)
+		pr_warn("kAFS: dir %llx:%llx:%x\n",
+			parent_vnode->fid.vid,
+			parent_vnode->fid.vnode,
+			parent_vnode->fid.unique);
+
+	if (!test_and_set_bit(0, &once_only))
+		dump_stack();
+}
+
 /*
  * Initialise an inode from the vnode status.
  */
-static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key)
+static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
+				      struct afs_vnode *parent_vnode)
 {
 	struct inode *inode = AFS_VNODE_TO_I(vnode);
 
@@ -80,7 +106,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key)
 		inode_nohighmem(inode);
 		break;
 	default:
-		printk("kAFS: AFS vnode with undefined type\n");
+		dump_vnode(vnode, parent_vnode);
 		read_sequnlock_excl(&vnode->cb_lock);
 		return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
 	}
@@ -270,7 +296,8 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
  */
 struct inode *afs_iget(struct super_block *sb, struct key *key,
 		       struct afs_fid *fid, struct afs_file_status *status,
-		       struct afs_callback *cb, struct afs_cb_interest *cbi)
+		       struct afs_callback *cb, struct afs_cb_interest *cbi,
+		       struct afs_vnode *parent_vnode)
 {
 	struct afs_iget_data data = { .fid = *fid };
 	struct afs_super_info *as;
@@ -327,7 +354,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 		vnode->cb_expires_at += ktime_get_real_seconds();
 	}
 
-	ret = afs_inode_init_from_status(vnode, key);
+	ret = afs_inode_init_from_status(vnode, key, parent_vnode);
 	if (ret < 0)
 		goto bad_inode;
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 91204e1428f2..585a5952f608 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -993,7 +993,8 @@ extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool);
 extern struct inode *afs_iget(struct super_block *, struct key *,
 			      struct afs_fid *, struct afs_file_status *,
 			      struct afs_callback *,
-			      struct afs_cb_interest *);
+			      struct afs_cb_interest *,
+			      struct afs_vnode *);
 extern void afs_zap_data(struct afs_vnode *);
 extern int afs_validate(struct afs_vnode *, struct key *);
 extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 18334fa1a0d2..df5b0d8ef049 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -453,7 +453,7 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
 		fid.vnode	= 1;
 		fid.vnode_hi	= 0;
 		fid.unique	= 1;
-		inode = afs_iget(sb, ctx->key, &fid, NULL, NULL, NULL);
+		inode = afs_iget(sb, ctx->key, &fid, NULL, NULL, NULL, NULL);
 	}
 
 	if (IS_ERR(inode))
-- 
cgit v1.2.3


From c0abbb5791f1984350f1dac72c31c67010010a30 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Thu, 25 Apr 2019 12:04:37 -0300
Subject: afs: Calculate i_blocks based on file size

While it's not possible to give an accurate number for the blocks
used on the server, populate i_blocks based on the file size so
that 'du' can give a reasonable estimate.

The value is rounded up to 1K granularity, for consistency with
what other AFS clients report, and the servers' 1K usage quota
unit.  Note that the value calculated by 'du' at the root of a
volume can still be slightly lower than the quota usage on the
server, as 0-length files are charged 1 quota block, but are
reported as occupying 0 blocks.  Again, this is consistent with
other AFS clients.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 0362cc7e1c7c..c4652b42d545 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -111,7 +111,11 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
 		return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
 	}
 
-	inode->i_blocks		= 0;
+	/*
+	 * Estimate 512 bytes  blocks used, rounded up to nearest 1K
+	 * for consistency with other AFS clients.
+	 */
+	inode->i_blocks		= ((i_size_read(inode) + 1023) >> 10) << 1;
 	vnode->invalid_before	= vnode->status.data_version;
 
 	read_sequnlock_excl(&vnode->cb_lock);
-- 
cgit v1.2.3


From c73aa4102f5b9f261a907c3b3df94cd2c478504d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 1 May 2019 13:27:09 +0100
Subject: afs: Fix the afs.cell and afs.volume xattr handlers

Fix the ->get handlers for the afs.cell and afs.volume xattrs to pass the
source data size to memcpy() rather than target buffer size.

Overcopying the source data occasionally causes the kernel to oops.

Fixes: d3e3b7eac886 ("afs: Add metadata xattrs")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/xattr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index a2cdf25573e2..4ea7e53752dc 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -50,7 +50,7 @@ static int afs_xattr_get_cell(const struct xattr_handler *handler,
 		return namelen;
 	if (namelen > size)
 		return -ERANGE;
-	memcpy(buffer, cell->name, size);
+	memcpy(buffer, cell->name, namelen);
 	return namelen;
 }
 
@@ -104,7 +104,7 @@ static int afs_xattr_get_volume(const struct xattr_handler *handler,
 		return namelen;
 	if (namelen > size)
 		return -ERANGE;
-	memcpy(buffer, volname, size);
+	memcpy(buffer, volname, namelen);
 	return namelen;
 }
 
-- 
cgit v1.2.3


From a2f611a3dc317d8ea1c98ad6c54b911cf7f93193 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 1 May 2019 15:58:24 +0100
Subject: afs: Fix getting the afs.fid xattr

The AFS3 FID is three 32-bit unsigned numbers and is represented as three
up-to-8-hex-digit numbers separated by colons to the afs.fid xattr.
However, with the advent of support for YFS, the FID is now a 64-bit volume
number, a 96-bit vnode/inode number and a 32-bit uniquifier (as before).
Whilst the sprintf in afs_xattr_get_fid() has been partially updated (it
currently ignores the upper 32 bits of the 96-bit vnode number), the size
of the stack-based buffer has not been increased to match, thereby allowing
stack corruption to occur.

Fix this by increasing the buffer size appropriately and conditionally
including the upper part of the vnode number if it is non-zero.  The latter
requires the lower part to be zero-padded if the upper part is non-zero.

Fixes: 3b6492df4153 ("afs: Increase to 64-bit volume ID and 96-bit vnode ID for YFS")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/xattr.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index 4ea7e53752dc..e729ee3d4b02 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -69,11 +69,20 @@ static int afs_xattr_get_fid(const struct xattr_handler *handler,
 			     void *buffer, size_t size)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
-	char text[8 + 1 + 8 + 1 + 8 + 1];
+	char text[16 + 1 + 24 + 1 + 8 + 1];
 	size_t len;
 
-	len = sprintf(text, "%llx:%llx:%x",
-		      vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
+	/* The volume ID is 64-bit, the vnode ID is 96-bit and the
+	 * uniquifier is 32-bit.
+	 */
+	len = sprintf(text, "%llx:", vnode->fid.vid);
+	if (vnode->fid.vnode_hi)
+		len += sprintf(text + len, "%x%016llx",
+			       vnode->fid.vnode_hi, vnode->fid.vnode);
+	else
+		len += sprintf(text + len, "%llx", vnode->fid.vnode);
+	len += sprintf(text + len, ":%x", vnode->fid.unique);
+
 	if (size == 0)
 		return len;
 	if (len > size)
-- 
cgit v1.2.3


From 260f082bae6dcf70aeae2cc3e24aecb55bdb1c99 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 25 Apr 2019 14:26:52 +0100
Subject: afs: Get an AFS3 ACL as an xattr

Implement an xattr on AFS files called "afs.acl" that retrieves a file's
ACL.  It returns the raw AFS3 ACL from the result of calling FS.FetchACL,
leaving any interpretation to userspace.

Note that whilst YFS servers will respond to FS.FetchACL, this will render
a more-advanced YFS ACL down.  Use "afs.yfs.acl" instead for that.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs_fs.h            |   1 +
 fs/afs/fsclient.c          | 122 +++++++++++++++++++++++++++++++++++++++++++++
 fs/afs/internal.h          |   7 +++
 fs/afs/xattr.c             |  53 ++++++++++++++++++++
 include/trace/events/afs.h |   1 +
 5 files changed, 184 insertions(+)

diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index ddfa88a7a9c0..4df1f1eec0ab 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -17,6 +17,7 @@
 
 enum AFS_FS_Operations {
 	FSFETCHDATA		= 130,	/* AFS Fetch file data */
+	FSFETCHACL		= 131,	/* AFS Fetch file ACL */
 	FSFETCHSTATUS		= 132,	/* AFS Fetch file status */
 	FSSTOREDATA		= 133,	/* AFS Store file data */
 	FSSTORESTATUS		= 135,	/* AFS Store file status */
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 9b73a57aa5cb..283f486c59f4 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -2391,3 +2391,125 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
+
+/*
+ * deliver reply data to an FS.FetchACL
+ */
+static int afs_deliver_fs_fetch_acl(struct afs_call *call)
+{
+	struct afs_vnode *vnode = call->reply[1];
+	struct afs_acl *acl;
+	const __be32 *bp;
+	unsigned int size;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+
+		/* extract the returned data length */
+	case 1:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		size = call->count2 = ntohl(call->tmp);
+		size = round_up(size, 4);
+
+		acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
+		if (!acl)
+			return -ENOMEM;
+		call->reply[0] = acl;
+		acl->size = call->count2;
+		afs_extract_begin(call, acl->data, size);
+		call->unmarshall++;
+
+		/* extract the returned data */
+	case 2:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		afs_extract_to_buf(call, (21 + 6) * 4);
+		call->unmarshall++;
+
+		/* extract the metadata */
+	case 3:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+					&vnode->status.data_version, NULL);
+		if (ret < 0)
+			return ret;
+		xdr_decode_AFSVolSync(&bp, call->reply[2]);
+
+		call->unmarshall++;
+
+	case 4:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+static void afs_destroy_fs_fetch_acl(struct afs_call *call)
+{
+	kfree(call->reply[0]);
+	afs_flat_call_destructor(call);
+}
+
+/*
+ * FS.FetchACL operation type
+ */
+static const struct afs_call_type afs_RXFSFetchACL = {
+	.name		= "FS.FetchACL",
+	.op		= afs_FS_FetchACL,
+	.deliver	= afs_deliver_fs_fetch_acl,
+	.destructor	= afs_destroy_fs_fetch_acl,
+};
+
+/*
+ * Fetch the ACL for a file.
+ */
+struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+	call = afs_alloc_flat_call(net, &afs_RXFSFetchACL, 16, (21 + 6) * 4);
+	if (!call) {
+		fc->ac.error = -ENOMEM;
+		return ERR_PTR(-ENOMEM);
+	}
+
+	call->key = fc->key;
+	call->reply[0] = NULL;
+	call->reply[1] = vnode;
+	call->reply[2] = NULL; /* volsync */
+	call->ret_reply0 = true;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp[0] = htonl(FSFETCHACL);
+	bp[1] = htonl(vnode->fid.vid);
+	bp[2] = htonl(vnode->fid.vnode);
+	bp[3] = htonl(vnode->fid.unique);
+
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	afs_make_call(&fc->ac, call, GFP_KERNEL);
+	return (struct afs_acl *)afs_wait_for_call_to_complete(call, &fc->ac);
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 585a5952f608..683b802c20ea 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -977,6 +977,13 @@ extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
 			       struct afs_fid *, struct afs_file_status *,
 			       struct afs_callback *, struct afs_volsync *);
 
+struct afs_acl {
+	u32	size;
+	u8	data[];
+};
+
+extern struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *);
+
 /*
  * fs_probe.c
  */
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index e729ee3d4b02..b7d3d714d8ff 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -16,6 +16,7 @@
 #include "internal.h"
 
 static const char afs_xattr_list[] =
+	"afs.acl\0"
 	"afs.cell\0"
 	"afs.fid\0"
 	"afs.volume";
@@ -33,6 +34,57 @@ ssize_t afs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 	return sizeof(afs_xattr_list);
 }
 
+/*
+ * Get a file's ACL.
+ */
+static int afs_xattr_get_acl(const struct xattr_handler *handler,
+			     struct dentry *dentry,
+			     struct inode *inode, const char *name,
+			     void *buffer, size_t size)
+{
+	struct afs_fs_cursor fc;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_acl *acl = NULL;
+	struct key *key;
+	int ret;
+
+	key = afs_request_key(vnode->volume->cell);
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(vnode);
+			acl = afs_fs_fetch_acl(&fc);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	if (ret == 0) {
+		ret = acl->size;
+		if (size > 0) {
+			ret = -ERANGE;
+			if (acl->size > size)
+				return -ERANGE;
+			memcpy(buffer, acl->data, acl->size);
+			ret = acl->size;
+		}
+		kfree(acl);
+	}
+
+	key_put(key);
+	return ret;
+}
+
+static const struct xattr_handler afs_xattr_afs_acl_handler = {
+	.name	= "afs.acl",
+	.get	= afs_xattr_get_acl,
+};
+
 /*
  * Get the name of the cell on which a file resides.
  */
@@ -123,6 +175,7 @@ static const struct xattr_handler afs_xattr_afs_volume_handler = {
 };
 
 const struct xattr_handler *afs_xattr_handlers[] = {
+	&afs_xattr_afs_acl_handler,
 	&afs_xattr_afs_cell_handler,
 	&afs_xattr_afs_fid_handler,
 	&afs_xattr_afs_volume_handler,
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index f1373c29bf7d..25c2e089c6ea 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -33,6 +33,7 @@ enum afs_call_trace {
 
 enum afs_fs_operation {
 	afs_FS_FetchData		= 130,	/* AFS Fetch file data */
+	afs_FS_FetchACL			= 131,	/* AFS Fetch file ACL */
 	afs_FS_FetchStatus		= 132,	/* AFS Fetch file status */
 	afs_FS_StoreData		= 133,	/* AFS Store file data */
 	afs_FS_StoreStatus		= 135,	/* AFS Store file status */
-- 
cgit v1.2.3


From b10494af4989d2d20679d0e3b7d1a45c2f8f8f1a Mon Sep 17 00:00:00 2001
From: Joe Gorse <jhgorse@gmail.com>
Date: Thu, 25 Apr 2019 14:26:52 +0100
Subject: afs: implement acl setting

Implements the setting of ACLs in AFS by means of setting the
afs.acl extended attribute on the file.

Signed-off-by: Joe Gorse <jhgorse@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs_fs.h            |  1 +
 fs/afs/fsclient.c          | 61 +++++++++++++++++++++++++++++++++++++++++++---
 fs/afs/internal.h          |  1 +
 fs/afs/xattr.c             | 52 +++++++++++++++++++++++++++++++++++++--
 include/trace/events/afs.h |  1 +
 5 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index 4df1f1eec0ab..18a54ca422f8 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -20,6 +20,7 @@ enum AFS_FS_Operations {
 	FSFETCHACL		= 131,	/* AFS Fetch file ACL */
 	FSFETCHSTATUS		= 132,	/* AFS Fetch file status */
 	FSSTOREDATA		= 133,	/* AFS Store file data */
+	FSSTOREACL		= 134,	/* AFS Store file ACL */
 	FSSTORESTATUS		= 135,	/* AFS Store file status */
 	FSREMOVEFILE		= 136,	/* AFS Remove a file */
 	FSCREATEFILE		= 137,	/* AFS Create a file */
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 283f486c59f4..7f1722b9e432 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -836,9 +836,10 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 }
 
 /*
- * deliver reply data to an FS.RemoveFile or FS.RemoveDir
+ * Deliver reply data to any operation that returns file status and volume
+ * sync.
  */
-static int afs_deliver_fs_remove(struct afs_call *call)
+static int afs_deliver_fs_status_and_vol(struct afs_call *call)
 {
 	struct afs_vnode *vnode = call->reply[0];
 	const __be32 *bp;
@@ -868,14 +869,14 @@ static int afs_deliver_fs_remove(struct afs_call *call)
 static const struct afs_call_type afs_RXFSRemoveFile = {
 	.name		= "FS.RemoveFile",
 	.op		= afs_FS_RemoveFile,
-	.deliver	= afs_deliver_fs_remove,
+	.deliver	= afs_deliver_fs_status_and_vol,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSRemoveDir = {
 	.name		= "FS.RemoveDir",
 	.op		= afs_FS_RemoveDir,
-	.deliver	= afs_deliver_fs_remove,
+	.deliver	= afs_deliver_fs_status_and_vol,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -2513,3 +2514,55 @@ struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc)
 	afs_make_call(&fc->ac, call, GFP_KERNEL);
 	return (struct afs_acl *)afs_wait_for_call_to_complete(call, &fc->ac);
 }
+
+/*
+ * FS.StoreACL operation type
+ */
+static const struct afs_call_type afs_RXFSStoreACL = {
+	.name		= "FS.StoreACL",
+	.op		= afs_FS_StoreACL,
+	.deliver	= afs_deliver_fs_status_and_vol,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the ACL for a file.
+ */
+int afs_fs_store_acl(struct afs_fs_cursor *fc, const struct afs_acl *acl)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	size_t size;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+	size = round_up(acl->size, 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSStoreACL,
+				   5 * 4 + size, (21 + 6) * 4);
+	if (!call) {
+		fc->ac.error = -ENOMEM;
+		return -ENOMEM;
+	}
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+	call->reply[2] = NULL; /* volsync */
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp[0] = htonl(FSSTOREACL);
+	bp[1] = htonl(vnode->fid.vid);
+	bp[2] = htonl(vnode->fid.vnode);
+	bp[3] = htonl(vnode->fid.unique);
+	bp[4] = htonl(acl->size);
+	memcpy(&bp[5], acl->data, acl->size);
+	if (acl->size != size)
+		memset((void *)&bp[5] + acl->size, 0, size - acl->size);
+
+	trace_afs_make_fs_call(call, &vnode->fid);
+	afs_make_call(&fc->ac, call, GFP_KERNEL);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 683b802c20ea..5269824244c6 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -983,6 +983,7 @@ struct afs_acl {
 };
 
 extern struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *);
+extern int afs_fs_store_acl(struct afs_fs_cursor *, const struct afs_acl *);
 
 /*
  * fs_probe.c
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index b7d3d714d8ff..31db360947a6 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -80,9 +80,57 @@ static int afs_xattr_get_acl(const struct xattr_handler *handler,
 	return ret;
 }
 
+/*
+ * Set a file's AFS3 ACL.
+ */
+static int afs_xattr_set_acl(const struct xattr_handler *handler,
+                             struct dentry *dentry,
+                             struct inode *inode, const char *name,
+                             const void *buffer, size_t size, int flags)
+{
+	struct afs_fs_cursor fc;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_acl *acl = NULL;
+	struct key *key;
+	int ret;
+
+	if (flags == XATTR_CREATE)
+		return -EINVAL;
+
+	key = afs_request_key(vnode->volume->cell);
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+
+	acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL);
+	if (!acl) {
+		key_put(key);
+		return -ENOMEM;
+	}
+
+	acl->size = size;
+	memcpy(acl->data, buffer, size);
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(vnode);
+			afs_fs_store_acl(&fc, acl);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	kfree(acl);
+	key_put(key);
+	return ret;
+}
+
 static const struct xattr_handler afs_xattr_afs_acl_handler = {
-	.name	= "afs.acl",
-	.get	= afs_xattr_get_acl,
+	.name   = "afs.acl",
+	.get    = afs_xattr_get_acl,
+	.set    = afs_xattr_set_acl,
 };
 
 /*
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 25c2e089c6ea..562f854ac4bf 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -36,6 +36,7 @@ enum afs_fs_operation {
 	afs_FS_FetchACL			= 131,	/* AFS Fetch file ACL */
 	afs_FS_FetchStatus		= 132,	/* AFS Fetch file status */
 	afs_FS_StoreData		= 133,	/* AFS Store file data */
+	afs_FS_StoreACL			= 134,	/* AFS Store file ACL */
 	afs_FS_StoreStatus		= 135,	/* AFS Store file status */
 	afs_FS_RemoveFile		= 136,	/* AFS Remove a file */
 	afs_FS_CreateFile		= 137,	/* AFS Create a file */
-- 
cgit v1.2.3


From ae46578b963f6daa9853791ab4c6ac1d6375937c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 30 Apr 2019 18:30:21 +0100
Subject: afs: Get YFS ACLs and information through xattrs

The YFS/AuriStor variant of AFS provides more capable ACLs and provides
per-volume ACLs and per-file ACLs as well as per-directory ACLs.  It also
provides some extra information that can be retrieved through four ACLs:

 (1) afs.yfs.acl

     The YFS file ACL (not the same format as afs.acl).

 (2) afs.yfs.vol_acl

     The YFS volume ACL.

 (3) afs.yfs.acl_inherited

     "1" if a file's ACL is inherited from its parent directory, "0"
     otherwise.

 (4) afs.yfs.acl_num_cleaned

     The number of of ACEs removed from the ACL by the server because the
     PT entries were removed from the PTS database (ie. the subject is no
     longer known).

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/internal.h     |  13 ++++
 fs/afs/protocol_yfs.h |   6 +-
 fs/afs/xattr.c        | 101 ++++++++++++++++++++++++++-
 fs/afs/yfsclient.c    | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 304 insertions(+), 4 deletions(-)

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5269824244c6..b800b4e286d3 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1371,6 +1371,19 @@ extern int yfs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
 				     struct afs_callback *, unsigned int,
 				     struct afs_volsync *);
 
+struct yfs_acl {
+	struct afs_acl	*acl;		/* Dir/file/symlink ACL */
+	struct afs_acl	*vol_acl;	/* Whole volume ACL */
+	u32		inherit_flag;	/* True if ACL is inherited from parent dir */
+	u32		num_cleaned;	/* Number of ACEs removed due to subject removal */
+	unsigned int	flags;
+#define YFS_ACL_WANT_ACL	0x01	/* Set if caller wants ->acl */
+#define YFS_ACL_WANT_VOL_ACL	0x02	/* Set if caller wants ->vol_acl */
+};
+
+extern void yfs_free_opaque_acl(struct yfs_acl *);
+extern struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *, unsigned int);
+
 /*
  * Miscellaneous inline functions.
  */
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
index d443e2bfa094..915b9d10cdf3 100644
--- a/fs/afs/protocol_yfs.h
+++ b/fs/afs/protocol_yfs.h
@@ -31,9 +31,9 @@ enum YFS_CM_Operations {
 };
 
 enum YFS_FS_Operations {
-	YFSFETCHACL		= 64131, /* YFS Fetch file ACL */
+	YFSFETCHACL		= 64131, /* YFS Fetch file AFS3 ACL */
 	YFSFETCHSTATUS		= 64132, /* YFS Fetch file status */
-	YFSSTOREACL		= 64134, /* YFS Store file ACL */
+	YFSSTOREACL		= 64134, /* YFS Store file AFS3 ACL */
 	YFSSTORESTATUS		= 64135, /* YFS Store file status */
 	YFSREMOVEFILE		= 64136, /* YFS Remove a file */
 	YFSCREATEFILE		= 64137, /* YFS Create a file */
@@ -49,7 +49,7 @@ enum YFS_FS_Operations {
 	YFSRELEASELOCK		= 64158, /* YFS Release a file lock */
 	YFSLOOKUP		= 64161, /* YFS lookup file in directory */
 	YFSFLUSHCPS		= 64165,
-	YFSFETCHOPAQUEACL	= 64168,
+	YFSFETCHOPAQUEACL	= 64168, /* YFS Fetch file YFS ACL */
 	YFSWHOAMI		= 64170,
 	YFSREMOVEACL		= 64171,
 	YFSREMOVEFILE2		= 64173,
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index 31db360947a6..a5c82b0ad539 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -19,7 +19,11 @@ static const char afs_xattr_list[] =
 	"afs.acl\0"
 	"afs.cell\0"
 	"afs.fid\0"
-	"afs.volume";
+	"afs.volume\0"
+	"afs.yfs.acl\0"
+	"afs.yfs.acl_inherited\0"
+	"afs.yfs.acl_num_cleaned\0"
+	"afs.yfs.vol_acl";
 
 /*
  * Retrieve a list of the supported xattrs.
@@ -133,6 +137,100 @@ static const struct xattr_handler afs_xattr_afs_acl_handler = {
 	.set    = afs_xattr_set_acl,
 };
 
+/*
+ * Get a file's YFS ACL.
+ */
+static int afs_xattr_get_yfs(const struct xattr_handler *handler,
+			     struct dentry *dentry,
+			     struct inode *inode, const char *name,
+			     void *buffer, size_t size)
+{
+	struct afs_fs_cursor fc;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct yfs_acl *yacl = NULL;
+	struct key *key;
+	unsigned int flags = 0;
+	char buf[16], *data;
+	int which = 0, dsize, ret;
+
+	if (strcmp(name, "acl") == 0)
+		which = 0;
+	else if (strcmp(name, "acl_inherited") == 0)
+		which = 1;
+	else if (strcmp(name, "acl_num_cleaned") == 0)
+		which = 2;
+	else if (strcmp(name, "vol_acl") == 0)
+		which = 3;
+	else
+		return -EOPNOTSUPP;
+
+	if (which == 0)
+		flags |= YFS_ACL_WANT_ACL;
+	else if (which == 3)
+		flags |= YFS_ACL_WANT_VOL_ACL;
+
+	key = afs_request_key(vnode->volume->cell);
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(vnode);
+			yacl = yfs_fs_fetch_opaque_acl(&fc, flags);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	if (ret == 0) {
+		switch (which) {
+		case 0:
+			data = yacl->acl->data;
+			dsize = yacl->acl->size;
+			break;
+		case 1:
+			data = buf;
+			dsize = snprintf(buf, sizeof(buf), "%u",
+					 yacl->inherit_flag);
+			break;
+		case 2:
+			data = buf;
+			dsize = snprintf(buf, sizeof(buf), "%u",
+					 yacl->num_cleaned);
+			break;
+		case 3:
+			data = yacl->vol_acl->data;
+			dsize = yacl->vol_acl->size;
+			break;
+		default:
+			ret = -EOPNOTSUPP;
+			goto out;
+		}
+
+		ret = dsize;
+		if (size > 0) {
+			if (dsize > size) {
+				ret = -ERANGE;
+				goto out;
+			}
+			memcpy(buffer, data, dsize);
+		}
+	}
+
+out:
+	yfs_free_opaque_acl(yacl);
+	key_put(key);
+	return ret;
+}
+
+static const struct xattr_handler afs_xattr_yfs_handler = {
+	.prefix	= "afs.yfs.",
+	.get	= afs_xattr_get_yfs,
+};
+
 /*
  * Get the name of the cell on which a file resides.
  */
@@ -227,5 +325,6 @@ const struct xattr_handler *afs_xattr_handlers[] = {
 	&afs_xattr_afs_cell_handler,
 	&afs_xattr_afs_fid_handler,
 	&afs_xattr_afs_volume_handler,
+	&afs_xattr_yfs_handler,		/* afs.yfs. prefix */
 	NULL
 };
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 055840aa07f6..13eafa764d71 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -2204,3 +2204,191 @@ int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
 	afs_make_call(&fc->ac, call, GFP_NOFS);
 	return afs_wait_for_call_to_complete(call, &fc->ac);
 }
+
+/*
+ * Deliver reply data to an YFS.FetchOpaqueACL.
+ */
+static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
+{
+	struct afs_volsync *volsync = call->reply[2];
+	struct afs_vnode *vnode = call->reply[1];
+	struct yfs_acl *yacl =  call->reply[0];
+	struct afs_acl *acl;
+	const __be32 *bp;
+	unsigned int size;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+
+		/* Extract the file ACL length */
+	case 1:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		size = call->count2 = ntohl(call->tmp);
+		size = round_up(size, 4);
+
+		if (yacl->flags & YFS_ACL_WANT_ACL) {
+			acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
+			if (!acl)
+				return -ENOMEM;
+			yacl->acl = acl;
+			acl->size = call->count2;
+			afs_extract_begin(call, acl->data, size);
+		} else {
+			iov_iter_discard(&call->iter, READ, size);
+		}
+		call->unmarshall++;
+
+		/* Extract the file ACL */
+	case 2:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+
+		/* Extract the volume ACL length */
+	case 3:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		size = call->count2 = ntohl(call->tmp);
+		size = round_up(size, 4);
+
+		if (yacl->flags & YFS_ACL_WANT_VOL_ACL) {
+			acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
+			if (!acl)
+				return -ENOMEM;
+			yacl->vol_acl = acl;
+			acl->size = call->count2;
+			afs_extract_begin(call, acl->data, size);
+		} else {
+			iov_iter_discard(&call->iter, READ, size);
+		}
+		call->unmarshall++;
+
+		/* Extract the volume ACL */
+	case 4:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		afs_extract_to_buf(call,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+		call->unmarshall++;
+
+		/* extract the metadata */
+	case 5:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		yacl->inherit_flag = ntohl(*bp++);
+		yacl->num_cleaned = ntohl(*bp++);
+		ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+					&call->expected_version, NULL);
+		if (ret < 0)
+			return ret;
+		xdr_decode_YFSVolSync(&bp, volsync);
+
+		call->unmarshall++;
+
+	case 6:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+void yfs_free_opaque_acl(struct yfs_acl *yacl)
+{
+	if (yacl) {
+		kfree(yacl->acl);
+		kfree(yacl->vol_acl);
+		kfree(yacl);
+	}
+}
+
+static void yfs_destroy_fs_fetch_opaque_acl(struct afs_call *call)
+{
+	yfs_free_opaque_acl(call->reply[0]);
+	afs_flat_call_destructor(call);
+}
+
+/*
+ * YFS.FetchOpaqueACL operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchOpaqueACL = {
+	.name		= "YFS.FetchOpaqueACL",
+	.op		= yfs_FS_FetchOpaqueACL,
+	.deliver	= yfs_deliver_fs_fetch_opaque_acl,
+	.destructor	= yfs_destroy_fs_fetch_opaque_acl,
+};
+
+/*
+ * Fetch the YFS advanced ACLs for a file.
+ */
+struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc,
+					unsigned int flags)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct yfs_acl *yacl;
+	struct afs_net *net = afs_v2net(vnode);
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSFetchOpaqueACL,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid),
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		goto nomem;
+
+	yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL);
+	if (!yacl)
+		goto nomem_call;
+
+	yacl->flags = flags;
+	call->key = fc->key;
+	call->reply[0] = yacl;
+	call->reply[1] = vnode;
+	call->reply[2] = NULL; /* volsync */
+	call->ret_reply0 = true;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSFETCHOPAQUEACL);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	yfs_check_req(call, bp);
+
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	afs_make_call(&fc->ac, call, GFP_KERNEL);
+	return (struct yfs_acl *)afs_wait_for_call_to_complete(call, &fc->ac);
+
+nomem_call:
+	afs_put_call(call);
+nomem:
+	fc->ac.error = -ENOMEM;
+	return ERR_PTR(-ENOMEM);
+}
-- 
cgit v1.2.3


From f5e4546347bc847be30b3cf904db5fc874b3c5dc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 1 May 2019 14:05:27 +0100
Subject: afs: Implement YFS ACL setting

Implement the setting of YFS ACLs in AFS through the interface of setting
the afs.yfs.acl extended attribute on the file.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/internal.h  |  1 +
 fs/afs/xattr.c     | 49 +++++++++++++++++++++++++++++++++++++++
 fs/afs/yfsclient.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 112 insertions(+), 5 deletions(-)

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index b800b4e286d3..b3cd6e8ad59d 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1383,6 +1383,7 @@ struct yfs_acl {
 
 extern void yfs_free_opaque_acl(struct yfs_acl *);
 extern struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *, unsigned int);
+extern int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *, const struct afs_acl *);
 
 /*
  * Miscellaneous inline functions.
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index a5c82b0ad539..c81f85003fc7 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -226,9 +226,58 @@ out:
 	return ret;
 }
 
+/*
+ * Set a file's YFS ACL.
+ */
+static int afs_xattr_set_yfs(const struct xattr_handler *handler,
+                             struct dentry *dentry,
+                             struct inode *inode, const char *name,
+                             const void *buffer, size_t size, int flags)
+{
+	struct afs_fs_cursor fc;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_acl *acl = NULL;
+	struct key *key;
+	int ret;
+
+	if (flags == XATTR_CREATE ||
+	    strcmp(name, "acl") != 0)
+		return -EINVAL;
+
+	key = afs_request_key(vnode->volume->cell);
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+
+	acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL);
+	if (!acl) {
+		key_put(key);
+		return -ENOMEM;
+	}
+
+	acl->size = size;
+	memcpy(acl->data, buffer, size);
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vnode_operation(&fc, vnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(vnode);
+			yfs_fs_store_opaque_acl2(&fc, acl);
+		}
+
+		afs_check_for_remote_deletion(&fc, fc.vnode);
+		afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	kfree(acl);
+	key_put(key);
+	return ret;
+}
+
 static const struct xattr_handler afs_xattr_yfs_handler = {
 	.prefix	= "afs.yfs.",
 	.get	= afs_xattr_get_yfs,
+	.set	= afs_xattr_set_yfs,
 };
 
 /*
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 13eafa764d71..6d5af09e3f19 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -1768,9 +1768,10 @@ int yfs_fs_get_volume_status(struct afs_fs_cursor *fc,
 }
 
 /*
- * Deliver reply data to an YFS.SetLock, YFS.ExtendLock or YFS.ReleaseLock
+ * Deliver reply data to operations that just return a file status and a volume
+ * sync record.
  */
-static int yfs_deliver_fs_xxxx_lock(struct afs_call *call)
+static int yfs_deliver_status_and_volsync(struct afs_call *call)
 {
 	struct afs_vnode *vnode = call->reply[0];
 	const __be32 *bp;
@@ -1800,7 +1801,7 @@ static int yfs_deliver_fs_xxxx_lock(struct afs_call *call)
 static const struct afs_call_type yfs_RXYFSSetLock = {
 	.name		= "YFS.SetLock",
 	.op		= yfs_FS_SetLock,
-	.deliver	= yfs_deliver_fs_xxxx_lock,
+	.deliver	= yfs_deliver_status_and_volsync,
 	.done		= afs_lock_op_done,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -1811,7 +1812,7 @@ static const struct afs_call_type yfs_RXYFSSetLock = {
 static const struct afs_call_type yfs_RXYFSExtendLock = {
 	.name		= "YFS.ExtendLock",
 	.op		= yfs_FS_ExtendLock,
-	.deliver	= yfs_deliver_fs_xxxx_lock,
+	.deliver	= yfs_deliver_status_and_volsync,
 	.done		= afs_lock_op_done,
 	.destructor	= afs_flat_call_destructor,
 };
@@ -1822,7 +1823,7 @@ static const struct afs_call_type yfs_RXYFSExtendLock = {
 static const struct afs_call_type yfs_RXYFSReleaseLock = {
 	.name		= "YFS.ReleaseLock",
 	.op		= yfs_FS_ReleaseLock,
-	.deliver	= yfs_deliver_fs_xxxx_lock,
+	.deliver	= yfs_deliver_status_and_volsync,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -2392,3 +2393,59 @@ nomem:
 	fc->ac.error = -ENOMEM;
 	return ERR_PTR(-ENOMEM);
 }
+
+/*
+ * YFS.StoreOpaqueACL2 operation type
+ */
+static const struct afs_call_type yfs_RXYFSStoreOpaqueACL2 = {
+	.name		= "YFS.StoreOpaqueACL2",
+	.op		= yfs_FS_StoreOpaqueACL2,
+	.deliver	= yfs_deliver_status_and_volsync,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the YFS ACL for a file.
+ */
+int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	size_t size;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+	size = round_up(acl->size, 4);
+	call = afs_alloc_flat_call(net, &yfs_RXYFSStoreStatus,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(__be32) + size,
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call) {
+		fc->ac.error = -ENOMEM;
+		return -ENOMEM;
+	}
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+	call->reply[2] = NULL; /* volsync */
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSTOREOPAQUEACL2);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	bp = xdr_encode_u32(bp, acl->size);
+	memcpy(bp, acl->data, acl->size);
+	if (acl->size != size)
+		memset((void *)bp + acl->size, 0, size - acl->size);
+	yfs_check_req(call, bp);
+
+	trace_afs_make_fs_call(call, &vnode->fid);
+	afs_make_call(&fc->ac, call, GFP_KERNEL);
+	return afs_wait_for_call_to_complete(call, &fc->ac);
+}
-- 
cgit v1.2.3