summaryrefslogtreecommitdiff
path: root/net/rds/rdma_transport.c
diff options
context:
space:
mode:
authorGerd Rausch <gerd.rausch@oracle.com>2019-06-27 19:21:44 +0300
committerSantosh Shilimkar <santosh.shilimkar@oracle.com>2019-07-10 07:45:42 +0300
commit8c6166cfc9cd48e93d9176561e50b63cef4330d5 (patch)
treed3289c279dc2e5dfdb6b3ebcefe345919b5a8e30 /net/rds/rdma_transport.c
parenta55207884708bf1d5b8c87a3c504502de77a5416 (diff)
downloadlinux-8c6166cfc9cd48e93d9176561e50b63cef4330d5.tar.xz
rds: Accept peer connection reject messages due to incompatible version
Prior to commit d021fabf525ff ("rds: rdma: add consumer reject") function "rds_rdma_cm_event_handler_cmn" would always honor a rejected connection attempt by issuing a "rds_conn_drop". The commit mentioned above added a "break", eliminating the "fallthrough" case and made the "rds_conn_drop" rather conditional: Now it only happens if a "consumer defined" reject (i.e. "rdma_reject") carries an integer-value of "1" inside "private_data": if (!conn) break; err = (int *)rdma_consumer_reject_data(cm_id, event, &len); if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) { pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n", &conn->c_laddr, &conn->c_faddr); conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; rds_conn_drop(conn); } rdsdebug("Connection rejected: %s\n", rdma_reject_msg(cm_id, event->status)); break; /* FALLTHROUGH */ A number of issues are worth mentioning here: #1) Previous versions of the RDS code simply rejected a connection by calling "rdma_reject(cm_id, NULL, 0);" So the value of the payload in "private_data" will not be "1", but "0". #2) Now the code has become dependent on host byte order and sizing. If one peer is big-endian, the other is little-endian, or there's a difference in sizeof(int) (e.g. ILP64 vs LP64), the *err check does not work as intended. #3) There is no check for "len" to see if the data behind *err is even valid. Luckily, it appears that the "rdma_reject(cm_id, NULL, 0)" will always carry 148 bytes of zeroized payload. But that should probably not be relied upon here. #4) With the added "break;", we might as well drop the misleading "/* FALLTHROUGH */" comment. This commit does _not_ address issue #2, as the sender would have to agree on a byte order as well. Here is the sequence of messages in this observed error-scenario: Host-A is pre-QoS changes (excluding the commit mentioned above) Host-B is post-QoS changes (including the commit mentioned above) #1 Host-B issues a connection request via function "rds_conn_path_transition" connection state transitions to "RDS_CONN_CONNECTING" #2 Host-A rejects the incompatible connection request (from #1) It does so by calling "rdma_reject(cm_id, NULL, 0);" #3 Host-B receives an "RDMA_CM_EVENT_REJECTED" event (from #2) But since the code is changed in the way described above, it won't drop the connection here, simply because "*err == 0". #4 Host-A issues a connection request #5 Host-B receives an "RDMA_CM_EVENT_CONNECT_REQUEST" event and ends up calling "rds_ib_cm_handle_connect". But since the state is already in "RDS_CONN_CONNECTING" (as of #1) it will end up issuing a "rdma_reject" without dropping the connection: if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { /* Wait and see - our connect may still be succeeding */ rds_ib_stats_inc(s_ib_connect_raced); } goto out; #6 Host-A receives an "RDMA_CM_EVENT_REJECTED" event (from #5), drops the connection and tries again (goto #4) until it gives up. Tested-by: Zhu Yanjun <yanjun.zhu@oracle.com> Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Diffstat (limited to 'net/rds/rdma_transport.c')
-rw-r--r--net/rds/rdma_transport.c5
1 files changed, 3 insertions, 2 deletions
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 46bce8389066..9db455d02255 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -112,7 +112,9 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
if (!conn)
break;
err = (int *)rdma_consumer_reject_data(cm_id, event, &len);
- if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) {
+ if (!err ||
+ (err && len >= sizeof(*err) &&
+ ((*err) <= RDS_RDMA_REJ_INCOMPAT))) {
pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
&conn->c_laddr, &conn->c_faddr);
conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
@@ -122,7 +124,6 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
rdsdebug("Connection rejected: %s\n",
rdma_reject_msg(cm_id, event->status));
break;
- /* FALLTHROUGH */
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR: