From 5cd2fe6d54c91aa76893b3034f5f3473063c0202 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 9 Apr 2009 14:09:36 +0000 Subject: RDS/IW+IB: Set recv ring low water mark to 1/2 full. Currently the recv ring low water mark is 1/4 the depth. Performance measurements show that this limits iWARP throughput by flow controlling the rds-stress senders. Setting it to 1/2 seems to max the T3 performance. I tried even higher levels but that didn't help and it started to increase the rds thread cpu utilization. Signed-off-by: Steve Wise Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib_ring.c | 2 +- net/rds/iw_ring.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/rds') diff --git a/net/rds/ib_ring.c b/net/rds/ib_ring.c index 99a6ccae964c..ff97e8eda858 100644 --- a/net/rds/ib_ring.c +++ b/net/rds/ib_ring.c @@ -137,7 +137,7 @@ int rds_ib_ring_empty(struct rds_ib_work_ring *ring) int rds_ib_ring_low(struct rds_ib_work_ring *ring) { - return __rds_ib_ring_used(ring) <= (ring->w_nr >> 2); + return __rds_ib_ring_used(ring) <= (ring->w_nr >> 1); } /* diff --git a/net/rds/iw_ring.c b/net/rds/iw_ring.c index d422d4b5deef..da8e3b63f663 100644 --- a/net/rds/iw_ring.c +++ b/net/rds/iw_ring.c @@ -137,7 +137,7 @@ int rds_iw_ring_empty(struct rds_iw_work_ring *ring) int rds_iw_ring_low(struct rds_iw_work_ring *ring) { - return __rds_iw_ring_used(ring) <= (ring->w_nr >> 2); + return __rds_iw_ring_used(ring) <= (ring->w_nr >> 1); } -- cgit v1.2.3 From 11bc9421da3040c71fc96da1a31e95217e8cf2af Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 9 Apr 2009 14:09:37 +0000 Subject: RDS: Correct some iw references in rdma_transport.c Had some lingering instances of _iw_ variable names from when the listen code was centralized into rdma_transport.c Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/rdma_transport.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/rds') diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 7b19024f9706..7d0f901c93d5 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -34,7 +34,7 @@ #include "rdma_transport.h" -static struct rdma_cm_id *rds_iw_listen_id; +static struct rdma_cm_id *rds_rdma_listen_id; int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) @@ -161,7 +161,7 @@ static int __init rds_rdma_listen_init(void) rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT); - rds_iw_listen_id = cm_id; + rds_rdma_listen_id = cm_id; cm_id = NULL; out: if (cm_id) @@ -171,10 +171,10 @@ out: static void rds_rdma_listen_stop(void) { - if (rds_iw_listen_id) { - rdsdebug("cm %p\n", rds_iw_listen_id); - rdma_destroy_id(rds_iw_listen_id); - rds_iw_listen_id = NULL; + if (rds_rdma_listen_id) { + rdsdebug("cm %p\n", rds_rdma_listen_id); + rdma_destroy_id(rds_rdma_listen_id); + rds_rdma_listen_id = NULL; } } -- cgit v1.2.3 From d39e0602bb987133321d358d9b837d67c27b223d Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 9 Apr 2009 14:09:38 +0000 Subject: RDS/IW+IB: Set the RDS_LL_SEND_FULL bit when we're throttled. The RDS_LL_SEND_FULL bit should be set when we stop transmitted due to flow control. Otherwise the send worker will keep trying as opposed to sleeping until we unthrottle. Saves CPU. Signed-off-by: Steve Wise Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib_send.c | 2 +- net/rds/iw_send.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/rds') diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index cb6c52cb1c4c..fa684b7fc748 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -506,7 +506,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, flow_controlled++; } if (work_alloc == 0) { - rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + set_bit(RDS_LL_SEND_FULL, &conn->c_flags); rds_ib_stats_inc(s_ib_tx_throttle); ret = -ENOMEM; goto out; diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 22dd38ffd608..626290bc7e97 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -549,7 +549,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, flow_controlled++; } if (work_alloc == 0) { - rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); + set_bit(RDS_LL_SEND_FULL, &conn->c_flags); rds_iw_stats_inc(s_iw_tx_throttle); ret = -ENOMEM; goto out; -- cgit v1.2.3 From 7b70d0336da777c00395cc7a503497c2cdabd1a8 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 9 Apr 2009 14:09:39 +0000 Subject: RDS/IW+IB: Allow max credit advertise window. Fix hack that restricts the credit advertisement to 127. Signed-off-by: Steve Wise Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib.h | 2 +- net/rds/ib_recv.c | 2 +- net/rds/ib_send.c | 8 ++++---- net/rds/iw.h | 2 +- net/rds/iw_recv.c | 2 +- net/rds/iw_send.c | 8 ++++---- net/rds/rds.h | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net/rds') diff --git a/net/rds/ib.h b/net/rds/ib.h index 069206cae733..455ae73047fe 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -333,7 +333,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, - u32 *adv_credits, int need_posted); + u32 *adv_credits, int need_posted, int max_posted); /* ib_stats.c */ DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 36d931573ff4..5709bad28329 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -524,7 +524,7 @@ void rds_ib_attempt_ack(struct rds_ib_connection *ic) } /* Can we get a send credit? */ - if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0)) { + if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) { rds_ib_stats_inc(s_ib_tx_throttle); clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); return; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index fa684b7fc748..23bf830db2d5 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -311,7 +311,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) * and using atomic_cmpxchg when updating the two counters. */ int rds_ib_send_grab_credits(struct rds_ib_connection *ic, - u32 wanted, u32 *adv_credits, int need_posted) + u32 wanted, u32 *adv_credits, int need_posted, int max_posted) { unsigned int avail, posted, got = 0, advertise; long oldval, newval; @@ -351,7 +351,7 @@ try_again: * available. */ if (posted && (got || need_posted)) { - advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT); + advertise = min_t(unsigned int, posted, max_posted); newval -= IB_SET_POST_CREDITS(advertise); } @@ -498,7 +498,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, credit_alloc = work_alloc; if (ic->i_flowctl) { - credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0); + credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT); adv_credits += posted; if (credit_alloc < work_alloc) { rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); @@ -571,7 +571,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, /* * Update adv_credits since we reset the ACK_REQUIRED bit. */ - rds_ib_send_grab_credits(ic, 0, &posted, 1); + rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); adv_credits += posted; BUG_ON(adv_credits > 255); } else if (ic->i_rm != rm) diff --git a/net/rds/iw.h b/net/rds/iw.h index b4fb27252895..0715dde323e7 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -361,7 +361,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits); void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted); int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted, - u32 *adv_credits, int need_posted); + u32 *adv_credits, int need_posted, int max_posted); /* ib_stats.c */ DECLARE_PER_CPU(struct rds_iw_statistics, rds_iw_stats); diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index fde470fa50d5..8683f5f66c4b 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -524,7 +524,7 @@ void rds_iw_attempt_ack(struct rds_iw_connection *ic) } /* Can we get a send credit? */ - if (!rds_iw_send_grab_credits(ic, 1, &adv_credits, 0)) { + if (!rds_iw_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) { rds_iw_stats_inc(s_iw_tx_throttle); clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); return; diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 626290bc7e97..44a6a0551f28 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -347,7 +347,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) * and using atomic_cmpxchg when updating the two counters. */ int rds_iw_send_grab_credits(struct rds_iw_connection *ic, - u32 wanted, u32 *adv_credits, int need_posted) + u32 wanted, u32 *adv_credits, int need_posted, int max_posted) { unsigned int avail, posted, got = 0, advertise; long oldval, newval; @@ -387,7 +387,7 @@ try_again: * available. */ if (posted && (got || need_posted)) { - advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT); + advertise = min_t(unsigned int, posted, max_posted); newval -= IB_SET_POST_CREDITS(advertise); } @@ -541,7 +541,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, credit_alloc = work_alloc; if (ic->i_flowctl) { - credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0); + credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT); adv_credits += posted; if (credit_alloc < work_alloc) { rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); @@ -614,7 +614,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, /* * Update adv_credits since we reset the ACK_REQUIRED bit. */ - rds_iw_send_grab_credits(ic, 0, &posted, 1); + rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); adv_credits += posted; BUG_ON(adv_credits > 255); } else if (ic->i_rm != rm) diff --git a/net/rds/rds.h b/net/rds/rds.h index 619f0a30a4e5..1f82ec0d2066 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -132,7 +132,7 @@ struct rds_connection { #define RDS_FLAG_CONG_BITMAP 0x01 #define RDS_FLAG_ACK_REQUIRED 0x02 #define RDS_FLAG_RETRANSMITTED 0x04 -#define RDS_MAX_ADV_CREDIT 127 +#define RDS_MAX_ADV_CREDIT 255 /* * Maximum space available for extension headers. -- cgit v1.2.3 From 7acd4a794c1530af063e51f3f7171e75556458f3 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 9 Apr 2009 14:09:40 +0000 Subject: RDS: Fix ordering in a conditional Putting the constant first is a supposed "best practice" that actually makes the code harder to read. Thanks to Roland Dreier for finding a bug in this "simple, obviously correct" patch. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/rds') diff --git a/net/rds/rdma.c b/net/rds/rdma.c index eaeeb91e1119..e390dc70b9d9 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -155,7 +155,7 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, nr_pages, write, 0, pages, NULL); up_read(¤t->mm->mmap_sem); - if (0 <= ret && (unsigned) ret < nr_pages) { + if (ret >= 0 && ret < nr_pages) { while (ret--) put_page(pages[ret]); ret = -EFAULT; -- cgit v1.2.3 From 49f696914100780f6bf8e652d3468de0015d6172 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 9 Apr 2009 14:09:41 +0000 Subject: RDS: Establish connection before parsing CMSGs The first message to a remote node should prompt a new connection. Even an RDMA op via CMSG. Therefore move CMSG parsing to after connection establishment. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/send.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/rds') diff --git a/net/rds/send.c b/net/rds/send.c index 104fe033203d..a4a7f428cd76 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -854,11 +854,6 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rm->m_daddr = daddr; - /* Parse any control messages the user may have included. */ - ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); - if (ret) - goto out; - /* rds_conn_create has a spinlock that runs with IRQ off. * Caching the conn in the socket helps a lot. */ if (rs->rs_conn && rs->rs_conn->c_faddr == daddr) @@ -874,6 +869,11 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rs->rs_conn = conn; } + /* Parse any control messages the user may have included. */ + ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); + if (ret) + goto out; + if ((rm->m_rdma_cookie || rm->m_rdma_op) && conn->c_trans->xmit_rdma == NULL) { if (printk_ratelimit()) -- cgit v1.2.3 From 830eb7d56c18ff4c29acd8b0bb48db404660321f Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 9 Apr 2009 14:09:42 +0000 Subject: RDS: use get_user_pages_fast() Use the new function that is simpler and faster. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/info.c | 5 +---- net/rds/rdma.c | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'net/rds') diff --git a/net/rds/info.c b/net/rds/info.c index 1d885535214d..62aeef37aefe 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -188,10 +188,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, ret = -ENOMEM; goto out; } - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, start, nr_pages, 1, 0, - pages, NULL); - up_read(¤t->mm->mmap_sem); + ret = get_user_pages_fast(start, nr_pages, 1, pages); if (ret != nr_pages) { if (ret > 0) nr_pages = ret; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index e390dc70b9d9..8dc83d2caa58 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -150,10 +150,7 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, { int ret; - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, user_addr, - nr_pages, write, 0, pages, NULL); - up_read(¤t->mm->mmap_sem); + ret = get_user_pages_fast(user_addr, nr_pages, write, pages); if (ret >= 0 && ret < nr_pages) { while (ret--) -- cgit v1.2.3 From 9c56a84478b708e5d8d34d28cc3a8e71842d5b05 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Thu, 9 Apr 2009 14:09:43 +0000 Subject: RDS: remove unused #include Remove unused #include in net/rds/af_rds.c. Signed-off-by: Huang Weiyi Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/af_rds.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/rds') diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 20cf16fc572f..b11e7e527864 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include "rds.h" -- cgit v1.2.3 From 05a178ecdc7396b78dfbb5d8bda65108b37b8672 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 9 Apr 2009 14:09:44 +0000 Subject: rds: use kmem_cache_zalloc instead of kmem_cache_alloc/memset Use kmem_cache_zalloc instead of kmem_cache_alloc/memset. Signed-off-by: Wei Yongjun Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/connection.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/rds') diff --git a/net/rds/connection.c b/net/rds/connection.c index 273f064930a8..d14445c48304 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -148,14 +148,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, if (conn) goto out; - conn = kmem_cache_alloc(rds_conn_slab, gfp); + conn = kmem_cache_zalloc(rds_conn_slab, gfp); if (conn == NULL) { conn = ERR_PTR(-ENOMEM); goto out; } - memset(conn, 0, sizeof(*conn)); - INIT_HLIST_NODE(&conn->c_hash_node); conn->c_version = RDS_PROTOCOL_3_0; conn->c_laddr = laddr; -- cgit v1.2.3 From 5d57eeb52ae71a03c8e083a9b0a818a9b63ca440 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 9 Apr 2009 14:09:45 +0000 Subject: ERR_PTR() dereference in net/rds/iw.c rdma_create_id() returns ERR_PTR() not null. Found by smatch (http://repo.or.cz/w/smatch.git). Compile tested. regards, dan carpenter Signed-off-by: Dan Carpenter Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/iw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/rds') diff --git a/net/rds/iw.c b/net/rds/iw.c index b732efb5b634..d16e1cbc8e83 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -233,8 +233,8 @@ static int rds_iw_laddr_check(__be32 addr) * IB and iWARP capable NICs. */ cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); - if (!cm_id) - return -EADDRNOTAVAIL; + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; -- cgit v1.2.3 From 94713bab649736177a1c33a39b7bb33cbd5af3a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 9 Apr 2009 14:09:46 +0000 Subject: ERR_PTR() dereference in net/rds/ib.c rdma_create_id() doesn't return NULL, only ERR_PTR(). Found by smatch (http://repo.or.cz/w/smatch.git). Compile tested. regards, dan carpenter Signed-off-by: Dan Carpenter Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/rds') diff --git a/net/rds/ib.c b/net/rds/ib.c index 4933b380985e..b9bcd32431e1 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -224,8 +224,8 @@ static int rds_ib_laddr_check(__be32 addr) * IB and iWARP capable NICs. */ cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); - if (!cm_id) - return -EADDRNOTAVAIL; + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; -- cgit v1.2.3