summaryrefslogtreecommitdiff
path: root/net/rds
diff options
context:
space:
mode:
Diffstat (limited to 'net/rds')
-rw-r--r--net/rds/connection.c1
-rw-r--r--net/rds/ib.c2
-rw-r--r--net/rds/ib.h5
-rw-r--r--net/rds/ib_cm.c26
-rw-r--r--net/rds/ib_frmr.c95
-rw-r--r--net/rds/ib_mr.h4
-rw-r--r--net/rds/ib_rdma.c60
-rw-r--r--net/rds/ib_send.c29
-rw-r--r--net/rds/rdma.c10
-rw-r--r--net/rds/rdma_transport.c11
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/send.c4
12 files changed, 145 insertions, 103 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7ea134f9a825..ed7f2133acc2 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -736,6 +736,7 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
cinfo->next_rx_seq = cp->cp_next_rx_seq;
cinfo->laddr = conn->c_laddr.s6_addr32[3];
cinfo->faddr = conn->c_faddr.s6_addr32[3];
+ cinfo->tos = conn->c_tos;
strncpy(cinfo->transport, conn->c_trans->t_name,
sizeof(cinfo->transport));
cinfo->flags = 0;
diff --git a/net/rds/ib.c b/net/rds/ib.c
index b8d581b779b2..ec05d91aa9a2 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -318,6 +318,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
iinfo->max_send_sge = rds_ibdev->max_sge;
rds_ib_get_mr_info(rds_ibdev, iinfo);
+ iinfo->cache_allocs = atomic_read(&ic->i_cache_allocs);
}
return 1;
}
@@ -351,6 +352,7 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
iinfo6->max_recv_wr = ic->i_recv_ring.w_nr;
iinfo6->max_send_sge = rds_ibdev->max_sge;
rds6_ib_get_mr_info(rds_ibdev, iinfo6);
+ iinfo6->cache_allocs = atomic_read(&ic->i_cache_allocs);
}
return 1;
}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 67a715b076ca..303c6ee8bdb7 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -15,8 +15,7 @@
#define RDS_IB_DEFAULT_RECV_WR 1024
#define RDS_IB_DEFAULT_SEND_WR 256
-#define RDS_IB_DEFAULT_FR_WR 256
-#define RDS_IB_DEFAULT_FR_INV_WR 256
+#define RDS_IB_DEFAULT_FR_WR 512
#define RDS_IB_DEFAULT_RETRY_COUNT 1
@@ -157,7 +156,7 @@ struct rds_ib_connection {
/* To control the number of wrs from fastreg */
atomic_t i_fastreg_wrs;
- atomic_t i_fastunreg_wrs;
+ atomic_t i_fastreg_inuse_count;
/* interrupt handling */
struct tasklet_struct i_send_tasklet;
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 66c6eb56072b..fddaa09f7b0d 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -40,6 +40,7 @@
#include "rds_single_path.h"
#include "rds.h"
#include "ib.h"
+#include "ib_mr.h"
/*
* Set the selected protocol version
@@ -460,10 +461,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
* completion queue and send queue. This extra space is used for FRMR
* registration and invalidation work requests
*/
- fr_queue_space = rds_ibdev->use_fastreg ?
- (RDS_IB_DEFAULT_FR_WR + 1) +
- (RDS_IB_DEFAULT_FR_INV_WR + 1)
- : 0;
+ fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0);
/* add the conn now so that connection establishment has the dev */
rds_ib_add_conn(rds_ibdev, conn);
@@ -529,8 +527,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
attr.qp_type = IB_QPT_RC;
attr.send_cq = ic->i_send_cq;
attr.recv_cq = ic->i_recv_cq;
- atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
- atomic_set(&ic->i_fastunreg_wrs, RDS_IB_DEFAULT_FR_INV_WR);
/*
* XXX this can fail if max_*_wr is too large? Are we supposed
@@ -611,11 +607,11 @@ send_hdrs_dma_out:
qp_out:
rdma_destroy_qp(ic->i_cm_id);
recv_cq_out:
- if (!ib_destroy_cq(ic->i_recv_cq))
- ic->i_recv_cq = NULL;
+ ib_destroy_cq(ic->i_recv_cq);
+ ic->i_recv_cq = NULL;
send_cq_out:
- if (!ib_destroy_cq(ic->i_send_cq))
- ic->i_send_cq = NULL;
+ ib_destroy_cq(ic->i_send_cq);
+ ic->i_send_cq = NULL;
rds_ibdev_out:
rds_ib_remove_conn(rds_ibdev, conn);
out:
@@ -997,6 +993,11 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_cm_id, err);
}
+ /* kick off "flush_worker" for all pools in order to reap
+ * all FRMR registrations that are still marked "FRMR_IS_INUSE"
+ */
+ rds_ib_flush_mrs();
+
/*
* We want to wait for tx and rx completion to finish
* before we tear down the connection, but we have to be
@@ -1009,8 +1010,8 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
wait_event(rds_ib_ring_empty_wait,
rds_ib_ring_empty(&ic->i_recv_ring) &&
(atomic_read(&ic->i_signaled_sends) == 0) &&
- (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR) &&
- (atomic_read(&ic->i_fastunreg_wrs) == RDS_IB_DEFAULT_FR_INV_WR));
+ (atomic_read(&ic->i_fastreg_inuse_count) == 0) &&
+ (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR));
tasklet_kill(&ic->i_send_tasklet);
tasklet_kill(&ic->i_recv_tasklet);
@@ -1137,6 +1138,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
spin_lock_init(&ic->i_ack_lock);
#endif
atomic_set(&ic->i_signaled_sends, 0);
+ atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
/*
* rds_ib_conn_shutdown() waits for these to be emptied so they
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 688dcd68d4ea..06ecf9d2d4bf 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -32,6 +32,24 @@
#include "ib_mr.h"
+static inline void
+rds_transition_frwr_state(struct rds_ib_mr *ibmr,
+ enum rds_ib_fr_state old_state,
+ enum rds_ib_fr_state new_state)
+{
+ if (cmpxchg(&ibmr->u.frmr.fr_state,
+ old_state, new_state) == old_state &&
+ old_state == FRMR_IS_INUSE) {
+ /* enforce order of ibmr->u.frmr.fr_state update
+ * before decrementing i_fastreg_inuse_count
+ */
+ smp_mb__before_atomic();
+ atomic_dec(&ibmr->ic->i_fastreg_inuse_count);
+ if (waitqueue_active(&rds_ib_ring_empty_wait))
+ wake_up(&rds_ib_ring_empty_wait);
+ }
+}
+
static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
int npages)
{
@@ -75,6 +93,8 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
pool->max_items_soft = pool->max_items;
frmr->fr_state = FRMR_IS_FREE;
+ init_waitqueue_head(&frmr->fr_inv_done);
+ init_waitqueue_head(&frmr->fr_reg_done);
return ibmr;
out_no_cigar:
@@ -116,13 +136,19 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
if (unlikely(ret != ibmr->sg_len))
return ret < 0 ? ret : -EINVAL;
+ if (cmpxchg(&frmr->fr_state,
+ FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE)
+ return -EBUSY;
+
+ atomic_inc(&ibmr->ic->i_fastreg_inuse_count);
+
/* Perform a WR for the fast_reg_mr. Each individual page
* in the sg list is added to the fast reg page list and placed
* inside the fast_reg_mr WR. The key used is a rolling 8bit
* counter, which should guarantee uniqueness.
*/
ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
- frmr->fr_state = FRMR_IS_INUSE;
+ frmr->fr_reg = true;
memset(&reg_wr, 0, sizeof(reg_wr));
reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
@@ -138,12 +164,23 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
if (unlikely(ret)) {
/* Failure here can be because of -ENOMEM as well */
- frmr->fr_state = FRMR_IS_STALE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
+
atomic_inc(&ibmr->ic->i_fastreg_wrs);
if (printk_ratelimit())
pr_warn("RDS/IB: %s returned error(%d)\n",
__func__, ret);
+ goto out;
}
+
+ /* Wait for the registration to complete in order to prevent an invalid
+ * access error resulting from a race between the memory region already
+ * being accessed while registration is still pending.
+ */
+ wait_event(frmr->fr_reg_done, !frmr->fr_reg);
+
+out:
+
return ret;
}
@@ -239,8 +276,8 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
if (frmr->fr_state != FRMR_IS_INUSE)
goto out;
- while (atomic_dec_return(&ibmr->ic->i_fastunreg_wrs) <= 0) {
- atomic_inc(&ibmr->ic->i_fastunreg_wrs);
+ while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
+ atomic_inc(&ibmr->ic->i_fastreg_wrs);
cpu_relax();
}
@@ -255,12 +292,29 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
if (unlikely(ret)) {
- frmr->fr_state = FRMR_IS_STALE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
frmr->fr_inv = false;
- atomic_inc(&ibmr->ic->i_fastunreg_wrs);
+ /* enforce order of frmr->fr_inv update
+ * before incrementing i_fastreg_wrs
+ */
+ smp_mb__before_atomic();
+ atomic_inc(&ibmr->ic->i_fastreg_wrs);
pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
goto out;
}
+
+ /* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to
+ * 1) avoid a silly bouncing between "clean_list" and "drop_list"
+ * triggered by function "rds_ib_reg_frmr" as it is releases frmr
+ * regions whose state is not "FRMR_IS_FREE" right away.
+ * 2) prevents an invalid access error in a race
+ * from a pending "IB_WR_LOCAL_INV" operation
+ * with a teardown ("dma_unmap_sg", "put_page")
+ * and de-registration ("ib_dereg_mr") of the corresponding
+ * memory region.
+ */
+ wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
+
out:
return ret;
}
@@ -271,7 +325,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
struct rds_ib_frmr *frmr = &ibmr->u.frmr;
if (wc->status != IB_WC_SUCCESS) {
- frmr->fr_state = FRMR_IS_STALE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
if (rds_conn_up(ic->conn))
rds_ib_conn_error(ic->conn,
"frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
@@ -283,12 +337,21 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
}
if (frmr->fr_inv) {
- frmr->fr_state = FRMR_IS_FREE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE);
frmr->fr_inv = false;
- atomic_inc(&ic->i_fastreg_wrs);
- } else {
- atomic_inc(&ic->i_fastunreg_wrs);
+ wake_up(&frmr->fr_inv_done);
}
+
+ if (frmr->fr_reg) {
+ frmr->fr_reg = false;
+ wake_up(&frmr->fr_reg_done);
+ }
+
+ /* enforce order of frmr->{fr_reg,fr_inv} update
+ * before incrementing i_fastreg_wrs
+ */
+ smp_mb__before_atomic();
+ atomic_inc(&ic->i_fastreg_wrs);
}
void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
@@ -296,14 +359,18 @@ void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
{
struct rds_ib_mr *ibmr, *next;
struct rds_ib_frmr *frmr;
- int ret = 0;
+ int ret = 0, ret2;
unsigned int freed = *nfreed;
/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
list_for_each_entry(ibmr, list, unmap_list) {
- if (ibmr->sg_dma_len)
- ret |= rds_ib_post_inv(ibmr);
+ if (ibmr->sg_dma_len) {
+ ret2 = rds_ib_post_inv(ibmr);
+ if (ret2 && !ret)
+ ret = ret2;
+ }
}
+
if (ret)
pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 5da12c248431..9045a8c0edff 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -57,6 +57,9 @@ struct rds_ib_frmr {
struct ib_mr *mr;
enum rds_ib_fr_state fr_state;
bool fr_inv;
+ wait_queue_head_t fr_inv_done;
+ bool fr_reg;
+ wait_queue_head_t fr_reg_done;
struct ib_send_wr fr_wr;
unsigned int dma_npages;
unsigned int sg_byte_len;
@@ -97,6 +100,7 @@ struct rds_ib_mr_pool {
struct llist_head free_list; /* unused MRs */
struct llist_head clean_list; /* unused & unmapped MRs */
wait_queue_head_t flush_wait;
+ spinlock_t clean_lock; /* "clean_list" concurrency */
atomic_t free_pinned; /* memory pinned by free MRs */
unsigned long max_items;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 0b347f46b2f4..c8c1e3ae8d84 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -40,9 +40,6 @@
struct workqueue_struct *rds_ib_mr_wq;
-static DEFINE_PER_CPU(unsigned long, clean_list_grace);
-#define CLEAN_LIST_BUSY_BIT 0
-
static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
{
struct rds_ib_device *rds_ibdev;
@@ -195,12 +192,11 @@ struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
{
struct rds_ib_mr *ibmr = NULL;
struct llist_node *ret;
- unsigned long *flag;
+ unsigned long flags;
- preempt_disable();
- flag = this_cpu_ptr(&clean_list_grace);
- set_bit(CLEAN_LIST_BUSY_BIT, flag);
+ spin_lock_irqsave(&pool->clean_lock, flags);
ret = llist_del_first(&pool->clean_list);
+ spin_unlock_irqrestore(&pool->clean_lock, flags);
if (ret) {
ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
if (pool->pool_type == RDS_IB_MR_8K_POOL)
@@ -209,23 +205,9 @@ struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
rds_ib_stats_inc(s_ib_rdma_mr_1m_reused);
}
- clear_bit(CLEAN_LIST_BUSY_BIT, flag);
- preempt_enable();
return ibmr;
}
-static inline void wait_clean_list_grace(void)
-{
- int cpu;
- unsigned long *flag;
-
- for_each_online_cpu(cpu) {
- flag = &per_cpu(clean_list_grace, cpu);
- while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
- cpu_relax();
- }
-}
-
void rds_ib_sync_mr(void *trans_private, int direction)
{
struct rds_ib_mr *ibmr = trans_private;
@@ -324,8 +306,7 @@ static unsigned int llist_append_to_list(struct llist_head *llist,
* of clusters. Each cluster has linked llist nodes of
* MR_CLUSTER_SIZE mrs that are ready for reuse.
*/
-static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
- struct list_head *list,
+static void list_to_llist_nodes(struct list_head *list,
struct llist_node **nodes_head,
struct llist_node **nodes_tail)
{
@@ -402,8 +383,13 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
*/
dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list);
dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list);
- if (free_all)
+ if (free_all) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->clean_lock, flags);
llist_append_to_list(&pool->clean_list, &unmap_list);
+ spin_unlock_irqrestore(&pool->clean_lock, flags);
+ }
free_goal = rds_ib_flush_goal(pool, free_all);
@@ -416,27 +402,20 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal);
if (!list_empty(&unmap_list)) {
- /* we have to make sure that none of the things we're about
- * to put on the clean list would race with other cpus trying
- * to pull items off. The llist would explode if we managed to
- * remove something from the clean list and then add it back again
- * while another CPU was spinning on that same item in llist_del_first.
- *
- * This is pretty unlikely, but just in case wait for an llist grace period
- * here before adding anything back into the clean list.
- */
- wait_clean_list_grace();
-
- list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail);
+ unsigned long flags;
+
+ list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail);
if (ibmr_ret) {
*ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
clean_nodes = clean_nodes->next;
}
/* more than one entry in llist nodes */
- if (clean_nodes)
+ if (clean_nodes) {
+ spin_lock_irqsave(&pool->clean_lock, flags);
llist_add_batch(clean_nodes, clean_tail,
&pool->clean_list);
-
+ spin_unlock_irqrestore(&pool->clean_lock, flags);
+ }
}
atomic_sub(unpinned, &pool->free_pinned);
@@ -471,7 +450,7 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
- return ERR_PTR(-EAGAIN);
+ break;
}
/* We do have some empty MRs. Flush them out. */
@@ -485,7 +464,7 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
return ibmr;
}
- return ibmr;
+ return NULL;
}
static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
@@ -610,6 +589,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
init_llist_head(&pool->free_list);
init_llist_head(&pool->drop_list);
init_llist_head(&pool->clean_list);
+ spin_lock_init(&pool->clean_lock);
mutex_init(&pool->flush_lock);
init_waitqueue_head(&pool->flush_wait);
INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 18f2341202f8..dfe6237dafe2 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -69,6 +69,16 @@ static void rds_ib_send_complete(struct rds_message *rm,
complete(rm, notify_status);
}
+static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
+ struct rm_data_op *op,
+ int wc_status)
+{
+ if (op->op_nents)
+ ib_dma_unmap_sg(ic->i_cm_id->device,
+ op->op_sg, op->op_nents,
+ DMA_TO_DEVICE);
+}
+
static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
struct rm_rdma_op *op,
int wc_status)
@@ -129,21 +139,6 @@ static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
rds_ib_stats_inc(s_ib_atomic_fadd);
}
-static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
- struct rm_data_op *op,
- int wc_status)
-{
- struct rds_message *rm = container_of(op, struct rds_message, data);
-
- if (op->op_nents)
- ib_dma_unmap_sg(ic->i_cm_id->device,
- op->op_sg, op->op_nents,
- DMA_TO_DEVICE);
-
- if (rm->rdma.op_active && rm->data.op_notify)
- rds_ib_send_unmap_rdma(ic, &rm->rdma, wc_status);
-}
-
/*
* Unmap the resources associated with a struct send_work.
*
@@ -902,7 +897,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
send->s_queued = jiffies;
send->s_op = NULL;
- nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
+ if (!op->op_notify)
+ nr_sig += rds_ib_set_wr_signal_state(ic, send,
+ op->op_notify);
send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
send->s_rdma_wr.remote_addr = remote_addr;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index b340ed4fc43a..916f5ec373d8 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -641,16 +641,6 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
}
op->op_notifier->n_user_token = args->user_token;
op->op_notifier->n_status = RDS_RDMA_SUCCESS;
-
- /* Enable rmda notification on data operation for composite
- * rds messages and make sure notification is enabled only
- * for the data operation which follows it so that application
- * gets notified only after full message gets delivered.
- */
- if (rm->data.op_sg) {
- rm->rdma.op_notify = 0;
- rm->data.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
- }
}
/* The cookie contains the R_Key of the remote memory region, and
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 46bce8389066..ff74c4bbb9fc 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -112,17 +112,20 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
if (!conn)
break;
err = (int *)rdma_consumer_reject_data(cm_id, event, &len);
- if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) {
+ if (!err ||
+ (err && len >= sizeof(*err) &&
+ ((*err) <= RDS_RDMA_REJ_INCOMPAT))) {
pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
&conn->c_laddr, &conn->c_faddr);
- conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
- conn->c_tos = 0;
+
+ if (!conn->c_tos)
+ conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
+
rds_conn_drop(conn);
}
rdsdebug("Connection rejected: %s\n",
rdma_reject_msg(cm_id, event->status));
break;
- /* FALLTHROUGH */
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 0d8f67cadd74..f0066d168499 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -476,7 +476,6 @@ struct rds_message {
} rdma;
struct rm_data_op {
unsigned int op_active:1;
- unsigned int op_notify:1;
unsigned int op_nents;
unsigned int op_count;
unsigned int op_dmasg;
diff --git a/net/rds/send.c b/net/rds/send.c
index 166dd578c1cc..031b1e97a466 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -491,14 +491,12 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
struct rm_rdma_op *ro;
struct rds_notifier *notifier;
unsigned long flags;
- unsigned int notify = 0;
spin_lock_irqsave(&rm->m_rs_lock, flags);
- notify = rm->rdma.op_notify | rm->data.op_notify;
ro = &rm->rdma;
if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
- ro->op_active && notify && ro->op_notifier) {
+ ro->op_active && ro->op_notify && ro->op_notifier) {
notifier = ro->op_notifier;
rs = rm->m_rs;
sock_hold(rds_rs_to_sk(rs));