From fcb12fd2236f49aa8fdc1568ed4ebdfe4fddc6b5 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 16:41:59 +0000 Subject: net: rds: remove duplication type definitions __be* are defined in linux/types.h now, and in fact, rds.h isn't exported to user space even. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/rds.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux/rds.h') diff --git a/include/linux/rds.h b/include/linux/rds.h index 24bce3ded9ea..7f3971d9fc5c 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -36,15 +36,6 @@ #include -/* These sparse annotated types shouldn't be in any user - * visible header file. We should clean this up rather - * than kludging around them. */ -#ifndef __KERNEL__ -#define __be16 u_int16_t -#define __be32 u_int32_t -#define __be64 u_int64_t -#endif - #define RDS_IB_ABI_VERSION 0x301 /* -- cgit v1.2.3 From 15133f6e67d8d646d0744336b4daa3135452cb0d Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 12 Jan 2010 14:33:38 -0800 Subject: RDS: Implement atomic operations Implement a CMSG-based interface to do FADD and CSWP ops. Alter send routines to handle atomic ops. Add atomic counters to stats. Add xmit_atomic() to struct rds_transport Inline rds_ib_send_unmap_rdma into unmap_rm Signed-off-by: Andy Grover --- include/linux/rds.h | 19 +++++++ net/rds/ib.c | 1 + net/rds/ib.h | 1 + net/rds/ib_rdma.c | 4 +- net/rds/ib_send.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++------ net/rds/rdma.c | 73 +++++++++++++++++++++++++++ net/rds/rds.h | 33 +++++++++++-- net/rds/send.c | 71 ++++++++++++++++++++++++-- net/rds/stats.c | 2 + 9 files changed, 321 insertions(+), 23 deletions(-) (limited to 'include/linux/rds.h') diff --git a/include/linux/rds.h b/include/linux/rds.h index 7f3971d9fc5c..9239152abf7a 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -73,6 +73,8 @@ #define RDS_CMSG_RDMA_MAP 3 #define RDS_CMSG_RDMA_STATUS 4 #define RDS_CMSG_CONG_UPDATE 5 +#define RDS_CMSG_ATOMIC_FADD 6 +#define RDS_CMSG_ATOMIC_CSWP 7 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 @@ -237,6 +239,23 @@ struct rds_rdma_args { u_int64_t user_token; }; +struct rds_atomic_args { + rds_rdma_cookie_t cookie; + uint64_t local_addr; + uint64_t remote_addr; + union { + struct { + uint64_t compare; + uint64_t swap; + } cswp; + struct { + uint64_t add; + } fadd; + }; + uint64_t flags; + uint64_t user_token; +}; + struct rds_rdma_notify { u_int64_t user_token; int32_t status; diff --git a/net/rds/ib.c b/net/rds/ib.c index 8f2d6dd7700a..f0d29656baff 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -264,6 +264,7 @@ struct rds_transport rds_ib_transport = { .xmit = rds_ib_xmit, .xmit_cong_map = NULL, .xmit_rdma = rds_ib_xmit_rdma, + .xmit_atomic = rds_ib_xmit_atomic, .recv = rds_ib_recv, .conn_alloc = rds_ib_conn_alloc, .conn_free = rds_ib_conn_free, diff --git a/net/rds/ib.h b/net/rds/ib.h index 64df4e79b29f..d2fd0aa4fde7 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -336,6 +336,7 @@ void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, u32 *adv_credits, int need_posted, int max_posted); +int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op); /* ib_stats.c */ DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 0f3b5a2f3fe0..242231f09464 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -298,7 +298,9 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE), + IB_ACCESS_REMOTE_WRITE| + IB_ACCESS_REMOTE_ATOMIC), + &pool->fmr_attr); if (IS_ERR(ibmr->fmr)) { err = PTR_ERR(ibmr->fmr); diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index f0edfdb2866c..b2bd164434ad 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -62,15 +62,17 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm, rds_rdma_send_complete(rm, notify_status); } -static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, - struct rds_rdma_op *op) +static void rds_ib_send_atomic_complete(struct rds_message *rm, + int wc_status) { - if (op->r_mapped) { - ib_dma_unmap_sg(ic->i_cm_id->device, - op->r_sg, op->r_nents, - op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - op->r_mapped = 0; - } + int notify_status; + + if (wc_status != IB_WC_SUCCESS) + notify_status = RDS_RDMA_OTHER_ERROR; + else + notify_status = RDS_RDMA_SUCCESS; + + rds_atomic_send_complete(rm, notify_status); } static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, @@ -86,7 +88,14 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, DMA_TO_DEVICE); if (rm->rdma.m_rdma_op.r_active) { - rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); + struct rds_rdma_op *op = &rm->rdma.m_rdma_op; + + if (op->r_mapped) { + ib_dma_unmap_sg(ic->i_cm_id->device, + op->r_sg, op->r_nents, + op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + op->r_mapped = 0; + } /* If the user asked for a completion notification on this * message, we can implement three different semantics: @@ -116,6 +125,24 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes); } + if (rm->atomic.op_active) { + struct rm_atomic_op *op = &rm->atomic; + + /* unmap atomic recvbuf */ + if (op->op_mapped) { + ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1, + DMA_FROM_DEVICE); + op->op_mapped = 0; + } + + rds_ib_send_atomic_complete(rm, wc_status); + + if (rm->atomic.op_type == RDS_ATOMIC_TYPE_CSWP) + rds_stats_inc(s_atomic_cswp); + else + rds_stats_inc(s_atomic_fadd); + } + /* If anyone waited for this message to get flushed out, wake * them up now */ rds_message_unmapped(rm); @@ -158,12 +185,9 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic) u32 i; for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { - if (send->s_wr.opcode == 0xdead) + if (!send->s_rm || send->s_wr.opcode == 0xdead) continue; - if (send->s_rm) - rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); - if (send->s_op) - rds_ib_send_unmap_rdma(ic, send->s_op); + rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); } } @@ -218,6 +242,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_READ: + case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_ATOMIC_CMP_AND_SWP: /* Nothing to be done - the SG list will be unmapped * when the SEND completes. */ break; @@ -243,8 +269,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rm = rds_send_get_message(conn, send->s_op); if (rm) { - if (rm->rdma.m_rdma_op.r_active) - rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); + rds_ib_send_unmap_rm(ic, send, wc.status); rds_ib_send_rdma_complete(rm, wc.status); rds_message_put(rm); } @@ -736,6 +761,89 @@ out: return ret; } +/* + * Issue atomic operation. + * A simplified version of the rdma case, we always map 1 SG, and + * only 8 bytes, for the return value from the atomic operation. + */ +int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + struct rds_ib_send_work *send = NULL; + struct ib_send_wr *failed_wr; + struct rds_ib_device *rds_ibdev; + u32 pos; + u32 work_alloc; + int ret; + + rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); + + work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos); + if (work_alloc != 1) { + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + rds_ib_stats_inc(s_ib_tx_ring_full); + ret = -ENOMEM; + goto out; + } + + /* address of send request in ring */ + send = &ic->i_sends[pos]; + send->s_queued = jiffies; + + if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { + send->s_wr.opcode = IB_WR_ATOMIC_CMP_AND_SWP; + send->s_wr.wr.atomic.compare_add = op->op_compare; + send->s_wr.wr.atomic.swap = op->op_swap_add; + } else { /* FADD */ + send->s_wr.opcode = IB_WR_ATOMIC_FETCH_AND_ADD; + send->s_wr.wr.atomic.compare_add = op->op_swap_add; + send->s_wr.wr.atomic.swap = 0; + } + send->s_wr.send_flags = IB_SEND_SIGNALED; + send->s_wr.num_sge = 1; + send->s_wr.next = NULL; + send->s_wr.wr.atomic.remote_addr = op->op_remote_addr; + send->s_wr.wr.atomic.rkey = op->op_rkey; + + /* map 8 byte retval buffer to the device */ + ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE); + rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret); + if (ret != 1) { + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); + ret = -ENOMEM; /* XXX ? */ + goto out; + } + + /* Convert our struct scatterlist to struct ib_sge */ + send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg); + send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg); + send->s_sge[0].lkey = ic->i_mr->lkey; + + rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr, + send->s_sge[0].addr, send->s_sge[0].length); + + failed_wr = &send->s_wr; + ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr); + rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic, + send, &send->s_wr, ret, failed_wr); + BUG_ON(failed_wr != &send->s_wr); + if (ret) { + printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 " + "returned %d\n", &conn->c_faddr, ret); + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + goto out; + } + + if (unlikely(failed_wr != &send->s_wr)) { + printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret); + BUG_ON(failed_wr != &send->s_wr); + } + +out: + return ret; +} + int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) { struct rds_ib_connection *ic = conn->c_transport_data; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4fda33045598..a7019df38c70 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -719,3 +719,76 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.m_rdma_mr); } + +/* + * Fill in rds_message for an atomic request. + */ +int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg) +{ + struct page *page = NULL; + struct rds_atomic_args *args; + int ret = 0; + + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args)) + || rm->atomic.op_active) + return -EINVAL; + + args = CMSG_DATA(cmsg); + + if (cmsg->cmsg_type == RDS_CMSG_ATOMIC_CSWP) { + rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; + rm->atomic.op_swap_add = args->cswp.swap; + rm->atomic.op_compare = args->cswp.compare; + } else { + rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; + rm->atomic.op_swap_add = args->fadd.add; + } + + rm->m_rdma_cookie = args->cookie; + rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); + rm->atomic.op_recverr = rs->rs_recverr; + rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); + + /* verify 8 byte-aligned */ + if (args->local_addr & 0x7) { + ret = -EFAULT; + goto err; + } + + ret = rds_pin_pages(args->local_addr, 1, &page, 1); + if (ret != 1) + goto err; + ret = 0; + + sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr)); + + if (rm->atomic.op_notify || rm->atomic.op_recverr) { + /* We allocate an uninitialized notifier here, because + * we don't want to do that in the completion handler. We + * would have to use GFP_ATOMIC there, and don't want to deal + * with failed allocations. + */ + rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL); + if (!rm->atomic.op_notifier) { + ret = -ENOMEM; + goto err; + } + + rm->atomic.op_notifier->n_user_token = args->user_token; + rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS; + } + + rm->atomic.op_rkey = rds_rdma_cookie_key(rm->m_rdma_cookie); + rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie); + + rm->atomic.op_active = 1; + + return ret; +err: + if (page) + put_page(page); + kfree(rm->atomic.op_notifier); + + return ret; +} diff --git a/net/rds/rds.h b/net/rds/rds.h index 0bb4957e0cfc..830e2bbb3332 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -97,6 +97,7 @@ struct rds_connection { unsigned int c_xmit_hdr_off; unsigned int c_xmit_data_off; unsigned int c_xmit_rdma_sent; + unsigned int c_xmit_atomic_sent; spinlock_t c_lock; /* protect msg queues */ u64 c_next_tx_seq; @@ -260,6 +261,10 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) return cookie >> 32; } +/* atomic operation types */ +#define RDS_ATOMIC_TYPE_CSWP 0 +#define RDS_ATOMIC_TYPE_FADD 1 + /* * m_sock_item and m_conn_item are on lists that are serialized under * conn->c_lock. m_sock_item has additional meaning in that once it is empty @@ -315,11 +320,27 @@ struct rds_message { struct rds_sock *m_rs; rds_rdma_cookie_t m_rdma_cookie; struct { - struct { + struct rm_atomic_op { + int op_type; + uint64_t op_swap_add; + uint64_t op_compare; + + u32 op_rkey; + u64 op_remote_addr; + unsigned int op_notify:1; + unsigned int op_recverr:1; + unsigned int op_mapped:1; + unsigned int op_active:1; + struct rds_notifier *op_notifier; + struct scatterlist *op_sg; + + struct rds_mr *op_rdma_mr; + } atomic; + struct rm_rdma_op { struct rds_rdma_op m_rdma_op; struct rds_mr *m_rdma_mr; } rdma; - struct { + struct rm_data_op { unsigned int m_nents; unsigned int m_count; struct scatterlist *m_sg; @@ -397,6 +418,7 @@ struct rds_transport { int (*xmit_cong_map)(struct rds_connection *conn, struct rds_cong_map *map, unsigned long offset); int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op); + int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op); int (*recv)(struct rds_connection *conn); int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, size_t size); @@ -546,6 +568,8 @@ struct rds_statistics { uint64_t s_cong_update_received; uint64_t s_cong_send_error; uint64_t s_cong_send_blocked; + uint64_t s_atomic_cswp; + uint64_t s_atomic_fadd; }; /* af_rds.c */ @@ -722,7 +746,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); void rds_rdma_free_op(struct rds_rdma_op *ro); -void rds_rdma_send_complete(struct rds_message *rm, int); +void rds_rdma_send_complete(struct rds_message *rm, int wc_status); +void rds_atomic_send_complete(struct rds_message *rm, int wc_status); +int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg); extern void __rds_put_mr_final(struct rds_mr *mr); static inline void rds_mr_put(struct rds_mr *mr) diff --git a/net/rds/send.c b/net/rds/send.c index b751a8e77c41..f3f4e79274bf 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -73,6 +73,7 @@ void rds_send_reset(struct rds_connection *conn) conn->c_xmit_hdr_off = 0; conn->c_xmit_data_off = 0; conn->c_xmit_rdma_sent = 0; + conn->c_xmit_atomic_sent = 0; conn->c_map_queued = 0; @@ -171,6 +172,7 @@ int rds_send_xmit(struct rds_connection *conn) conn->c_xmit_hdr_off = 0; conn->c_xmit_data_off = 0; conn->c_xmit_rdma_sent = 0; + conn->c_xmit_atomic_sent = 0; /* Release the reference to the previous message. */ rds_message_put(rm); @@ -262,6 +264,17 @@ int rds_send_xmit(struct rds_connection *conn) conn->c_xmit_rm = rm; } + + if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) { + ret = conn->c_trans->xmit_atomic(conn, &rm->atomic); + if (ret) + break; + conn->c_xmit_atomic_sent = 1; + /* The transport owns the mapped memory for now. + * You can't unmap it while it's on the send queue */ + set_bit(RDS_MSG_MAPPED, &rm->m_flags); + } + /* * Try and send an rdma message. Let's see if we can * keep this simple and require that the transport either @@ -442,6 +455,41 @@ void rds_rdma_send_complete(struct rds_message *rm, int status) } EXPORT_SYMBOL_GPL(rds_rdma_send_complete); +/* + * Just like above, except looks at atomic op + */ +void rds_atomic_send_complete(struct rds_message *rm, int status) +{ + struct rds_sock *rs = NULL; + struct rm_atomic_op *ao; + struct rds_notifier *notifier; + + spin_lock(&rm->m_rs_lock); + + ao = &rm->atomic; + if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) + && ao->op_active && ao->op_notify && ao->op_notifier) { + notifier = ao->op_notifier; + rs = rm->m_rs; + sock_hold(rds_rs_to_sk(rs)); + + notifier->n_status = status; + spin_lock(&rs->rs_lock); + list_add_tail(¬ifier->n_list, &rs->rs_notify_queue); + spin_unlock(&rs->rs_lock); + + ao->op_notifier = NULL; + } + + spin_unlock(&rm->m_rs_lock); + + if (rs) { + rds_wake_sk_sleep(rs); + sock_put(rds_rs_to_sk(rs)); + } +} +EXPORT_SYMBOL_GPL(rds_atomic_send_complete); + /* * This is the same as rds_rdma_send_complete except we * don't do any locking - we have all the ingredients (message, @@ -788,6 +836,11 @@ static int rds_rm_size(struct msghdr *msg, int data_len) /* these are valid but do no add any size */ break; + case RDS_CMSG_ATOMIC_CSWP: + case RDS_CMSG_ATOMIC_FADD: + size += sizeof(struct scatterlist); + break; + default: return -EINVAL; } @@ -813,7 +866,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, continue; /* As a side effect, RDMA_DEST and RDMA_MAP will set - * rm->m_rdma_cookie and rm->m_rdma_mr. + * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr. */ switch (cmsg->cmsg_type) { case RDS_CMSG_RDMA_ARGS: @@ -829,6 +882,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, if (!ret) *allocated_mr = 1; break; + case RDS_CMSG_ATOMIC_CSWP: + case RDS_CMSG_ATOMIC_FADD: + ret = rds_cmsg_atomic(rs, rm, cmsg); + break; default: return -EINVAL; @@ -926,10 +983,18 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, goto out; if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op.r_active) && - !conn->c_trans->xmit_rdma) { + !conn->c_trans->xmit_rdma) { if (printk_ratelimit()) printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", - &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); + &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); + ret = -EOPNOTSUPP; + goto out; + } + + if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) { + if (printk_ratelimit()) + printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n", + &rm->atomic, conn->c_trans->xmit_atomic); ret = -EOPNOTSUPP; goto out; } diff --git a/net/rds/stats.c b/net/rds/stats.c index 7598eb07cfb1..c66d95d9c262 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -75,6 +75,8 @@ static const char *const rds_stat_names[] = { "cong_update_received", "cong_send_error", "cong_send_blocked", + "s_atomic_cswp", + "s_atomic_fadd", }; void rds_stats_info_copy(struct rds_info_iterator *iter, -- cgit v1.2.3 From 2c3a5f9abb1dc5efdab8ba9a568b1661c65fd1e3 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 1 Mar 2010 16:10:40 -0800 Subject: RDS: Add flag for silent ops. Do atomic op before RDMA Add a flag to the API so users can indicate they want silent operations. This is needed because silent ops cannot be used with USE_ONCE MRs, so we can't just assume silent. Also, change send_xmit to do atomic op before rdma op if both are present, and centralize the hairy logic to determine if we want to attempt silent, or not. Signed-off-by: Andy Grover --- include/linux/rds.h | 1 + net/rds/rdma.c | 2 ++ net/rds/rds.h | 2 ++ net/rds/send.c | 55 ++++++++++++++++++++++++++++++----------------------- 4 files changed, 36 insertions(+), 24 deletions(-) (limited to 'include/linux/rds.h') diff --git a/include/linux/rds.h b/include/linux/rds.h index 9239152abf7a..109f1d343318 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -276,5 +276,6 @@ struct rds_rdma_notify { #define RDS_RDMA_USE_ONCE 0x0008 /* free MR after use */ #define RDS_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ #define RDS_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ +#define RDS_RDMA_SILENT 0x0040 /* Do not interrupt remote */ #endif /* IB_RDS_H */ diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 5ba514684431..48781fe4431c 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -559,6 +559,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, op->op_write = !!(args->flags & RDS_RDMA_READWRITE); op->op_fence = !!(args->flags & RDS_RDMA_FENCE); op->op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); + op->op_silent = !!(args->flags & RDS_RDMA_SILENT); op->op_active = 1; op->op_recverr = rs->rs_recverr; WARN_ON(!nr_pages); @@ -747,6 +748,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, } rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); + rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT); rm->atomic.op_active = 1; rm->atomic.op_recverr = rs->rs_recverr; rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); diff --git a/net/rds/rds.h b/net/rds/rds.h index 46d190d08549..23b921000e74 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -319,6 +319,7 @@ struct rds_message { unsigned int op_notify:1; unsigned int op_recverr:1; unsigned int op_mapped:1; + unsigned int op_silent:1; unsigned int op_active:1; struct scatterlist *op_sg; struct rds_notifier *op_notifier; @@ -333,6 +334,7 @@ struct rds_message { unsigned int op_notify:1; unsigned int op_recverr:1; unsigned int op_mapped:1; + unsigned int op_silent:1; unsigned int op_active:1; unsigned int op_bytes; unsigned int op_nents; diff --git a/net/rds/send.c b/net/rds/send.c index cdca9747fcbc..38567f3ee7e8 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -250,42 +250,50 @@ int rds_send_xmit(struct rds_connection *conn) conn->c_xmit_rm = rm; } - if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) { - ret = conn->c_trans->xmit_atomic(conn, rm); + /* The transport either sends the whole rdma or none of it */ + if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) { + ret = conn->c_trans->xmit_rdma(conn, &rm->rdma); if (ret) break; - conn->c_xmit_atomic_sent = 1; + conn->c_xmit_rdma_sent = 1; + /* The transport owns the mapped memory for now. * You can't unmap it while it's on the send queue */ set_bit(RDS_MSG_MAPPED, &rm->m_flags); - - /* - * This is evil, muahaha. - * We permit 0-byte sends. (rds-ping depends on this.) - * BUT if there is an atomic op and no sent data, - * we turn off sending the header, to achieve - * "silent" atomics. - * But see below; RDMA op might toggle this back on! - */ - if (rm->data.op_nents == 0) - rm->data.op_active = 0; } - /* The transport either sends the whole rdma or none of it */ - if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) { - ret = conn->c_trans->xmit_rdma(conn, &rm->rdma); + if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) { + ret = conn->c_trans->xmit_atomic(conn, rm); if (ret) break; - conn->c_xmit_rdma_sent = 1; - - /* rdmas need data sent, even if just the header */ - rm->data.op_active = 1; - + conn->c_xmit_atomic_sent = 1; /* The transport owns the mapped memory for now. * You can't unmap it while it's on the send queue */ set_bit(RDS_MSG_MAPPED, &rm->m_flags); } + /* + * A number of cases require an RDS header to be sent + * even if there is no data. + * We permit 0-byte sends; rds-ping depends on this. + * However, if there are exclusively attached silent ops, + * we skip the hdr/data send, to enable silent operation. + */ + if (rm->data.op_nents == 0) { + int ops_present; + int all_ops_are_silent = 1; + + ops_present = (rm->atomic.op_active || rm->rdma.op_active); + if (rm->atomic.op_active && !rm->atomic.op_silent) + all_ops_are_silent = 0; + if (rm->rdma.op_active && !rm->rdma.op_silent) + all_ops_are_silent = 0; + + if (ops_present && all_ops_are_silent + && !rm->m_rdma_cookie) + rm->data.op_active = 0; + } + if (rm->data.op_active && !conn->c_xmit_data_sent) { ret = conn->c_trans->xmit(conn, rm, conn->c_xmit_hdr_off, @@ -1009,8 +1017,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (ret) goto out; - if ((rm->m_rdma_cookie || rm->rdma.op_active) && - !conn->c_trans->xmit_rdma) { + if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { if (printk_ratelimit()) printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", &rm->rdma, conn->c_trans->xmit_rdma); -- cgit v1.2.3 From 20c72bd5f5f902e5a8745d51573699605bf8d21c Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 05:51:28 -0700 Subject: RDS: Implement masked atomic operations Add two CMSGs for masked versions of cswp and fadd. args struct modified to use a union for different atomic op type's arguments. Change IB to do masked atomic ops. Atomic op type in rds_message similarly unionized. Signed-off-by: Andy Grover --- include/linux/rds.h | 12 ++++++++++++ net/rds/ib_send.c | 14 +++++++++----- net/rds/rdma.c | 33 +++++++++++++++++++++++++++------ net/rds/rds.h | 14 ++++++++++++-- net/rds/send.c | 4 ++++ 5 files changed, 64 insertions(+), 13 deletions(-) (limited to 'include/linux/rds.h') diff --git a/include/linux/rds.h b/include/linux/rds.h index 109f1d343318..a2a5edb4a276 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -75,6 +75,8 @@ #define RDS_CMSG_CONG_UPDATE 5 #define RDS_CMSG_ATOMIC_FADD 6 #define RDS_CMSG_ATOMIC_CSWP 7 +#define RDS_CMSG_MASKED_ATOMIC_FADD 8 +#define RDS_CMSG_MASKED_ATOMIC_CSWP 9 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 @@ -251,6 +253,16 @@ struct rds_atomic_args { struct { uint64_t add; } fadd; + struct { + uint64_t compare; + uint64_t swap; + uint64_t compare_mask; + uint64_t swap_mask; + } m_cswp; + struct { + uint64_t add; + uint64_t nocarry_mask; + } m_fadd; }; uint64_t flags; uint64_t user_token; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 808544aebb70..71f373c421bc 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -807,13 +807,17 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) send->s_queued = jiffies; if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { - send->s_wr.opcode = IB_WR_ATOMIC_CMP_AND_SWP; - send->s_wr.wr.atomic.compare_add = op->op_compare; - send->s_wr.wr.atomic.swap = op->op_swap_add; + send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP; + send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare; + send->s_wr.wr.atomic.swap = op->op_m_cswp.swap; + send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask; + send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask; } else { /* FADD */ - send->s_wr.opcode = IB_WR_ATOMIC_FETCH_AND_ADD; - send->s_wr.wr.atomic.compare_add = op->op_swap_add; + send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD; + send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add; send->s_wr.wr.atomic.swap = 0; + send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask; + send->s_wr.wr.atomic.swap_mask = 0; } nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify); send->s_wr.num_sge = 1; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 48781fe4431c..48064673fc76 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -738,13 +738,34 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, args = CMSG_DATA(cmsg); - if (cmsg->cmsg_type == RDS_CMSG_ATOMIC_CSWP) { - rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; - rm->atomic.op_swap_add = args->cswp.swap; - rm->atomic.op_compare = args->cswp.compare; - } else { + /* Nonmasked & masked cmsg ops converted to masked hw ops */ + switch (cmsg->cmsg_type) { + case RDS_CMSG_ATOMIC_FADD: + rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; + rm->atomic.op_m_fadd.add = args->fadd.add; + rm->atomic.op_m_fadd.nocarry_mask = 0; + break; + case RDS_CMSG_MASKED_ATOMIC_FADD: rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; - rm->atomic.op_swap_add = args->fadd.add; + rm->atomic.op_m_fadd.add = args->m_fadd.add; + rm->atomic.op_m_fadd.nocarry_mask = args->m_fadd.nocarry_mask; + break; + case RDS_CMSG_ATOMIC_CSWP: + rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; + rm->atomic.op_m_cswp.compare = args->cswp.compare; + rm->atomic.op_m_cswp.swap = args->cswp.swap; + rm->atomic.op_m_cswp.compare_mask = ~0; + rm->atomic.op_m_cswp.swap_mask = ~0; + break; + case RDS_CMSG_MASKED_ATOMIC_CSWP: + rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; + rm->atomic.op_m_cswp.compare = args->m_cswp.compare; + rm->atomic.op_m_cswp.swap = args->m_cswp.swap; + rm->atomic.op_m_cswp.compare_mask = args->m_cswp.compare_mask; + rm->atomic.op_m_cswp.swap_mask = args->m_cswp.swap_mask; + break; + default: + BUG(); /* should never happen */ } rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); diff --git a/net/rds/rds.h b/net/rds/rds.h index aadaddba88a7..8103dcf8b976 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -316,8 +316,18 @@ struct rds_message { struct { struct rm_atomic_op { int op_type; - uint64_t op_swap_add; - uint64_t op_compare; + union { + struct { + uint64_t compare; + uint64_t swap; + uint64_t compare_mask; + uint64_t swap_mask; + } op_m_cswp; + struct { + uint64_t add; + uint64_t nocarry_mask; + } op_m_fadd; + }; u32 op_rkey; u64 op_remote_addr; diff --git a/net/rds/send.c b/net/rds/send.c index 81471b25373b..9b951a0ab6b7 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -843,6 +843,8 @@ static int rds_rm_size(struct msghdr *msg, int data_len) case RDS_CMSG_ATOMIC_CSWP: case RDS_CMSG_ATOMIC_FADD: + case RDS_CMSG_MASKED_ATOMIC_CSWP: + case RDS_CMSG_MASKED_ATOMIC_FADD: cmsg_groups |= 1; size += sizeof(struct scatterlist); break; @@ -894,6 +896,8 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, break; case RDS_CMSG_ATOMIC_CSWP: case RDS_CMSG_ATOMIC_FADD: + case RDS_CMSG_MASKED_ATOMIC_CSWP: + case RDS_CMSG_MASKED_ATOMIC_FADD: ret = rds_cmsg_atomic(rs, rm, cmsg); break; -- cgit v1.2.3 From fd128dfa50cfc4f2959dc4aa5d7468d33b988332 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 09:32:17 -0700 Subject: RDS: Add rds.h to exported headers list Also, a number of changes were made based on the assumption that rds.h wasn't exported, so roll these back. Signed-off-by: Andy Grover --- include/linux/Kbuild | 1 + include/linux/rds.h | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux/rds.h') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 626b629429ff..c7fbf298ad68 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -302,6 +302,7 @@ header-y += quota.h header-y += radeonfb.h header-y += random.h header-y += raw.h +header-y += rds.h header-y += reboot.h header-y += reiserfs_fs.h header-y += reiserfs_xattr.h diff --git a/include/linux/rds.h b/include/linux/rds.h index a2a5edb4a276..f371f885a352 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -95,7 +95,7 @@ struct rds_info_counter { u_int8_t name[32]; u_int64_t value; -} __packed; +} __attribute__((packed)); #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 #define RDS_INFO_CONNECTION_FLAG_CONNECTING 0x02 @@ -110,7 +110,7 @@ struct rds_info_connection { __be32 faddr; u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */ u_int8_t flags; -} __packed; +} __attribute__((packed)); struct rds_info_flow { __be32 laddr; @@ -118,7 +118,7 @@ struct rds_info_flow { u_int32_t bytes; __be16 lport; __be16 fport; -} __packed; +} __attribute__((packed)); #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 @@ -131,7 +131,7 @@ struct rds_info_message { __be16 lport; __be16 fport; u_int8_t flags; -} __packed; +} __attribute__((packed)); struct rds_info_socket { u_int32_t sndbuf; @@ -141,7 +141,7 @@ struct rds_info_socket { __be16 connected_port; u_int32_t rcvbuf; u_int64_t inum; -} __packed; +} __attribute__((packed)); struct rds_info_tcp_socket { __be32 local_addr; @@ -153,7 +153,7 @@ struct rds_info_tcp_socket { u_int32_t last_sent_nxt; u_int32_t last_expected_una; u_int32_t last_seen_una; -} __packed; +} __attribute__((packed)); #define RDS_IB_GID_LEN 16 struct rds_info_rdma_connection { -- cgit v1.2.3 From a46f561b774d90d8616473d56696e7d44fa1c9f1 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 09:34:10 -0700 Subject: RDS: rds.h: Replace u_int[size]_t with uint[size]_t Replace e.g. u_int32_t types with the more common uint32_t. Reported-by: Matthew Wilcox Signed-off-by: Andy Grover --- include/linux/rds.h | 58 ++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'include/linux/rds.h') diff --git a/include/linux/rds.h b/include/linux/rds.h index f371f885a352..3576b31b6b7b 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -93,8 +93,8 @@ #define RDS_INFO_LAST 10010 struct rds_info_counter { - u_int8_t name[32]; - u_int64_t value; + uint8_t name[32]; + uint64_t value; } __attribute__((packed)); #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 @@ -104,18 +104,18 @@ struct rds_info_counter { #define TRANSNAMSIZ 16 struct rds_info_connection { - u_int64_t next_tx_seq; - u_int64_t next_rx_seq; + uint64_t next_tx_seq; + uint64_t next_rx_seq; __be32 laddr; __be32 faddr; - u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */ - u_int8_t flags; + uint8_t transport[TRANSNAMSIZ]; /* null term ascii */ + uint8_t flags; } __attribute__((packed)); struct rds_info_flow { __be32 laddr; __be32 faddr; - u_int32_t bytes; + uint32_t bytes; __be16 lport; __be16 fport; } __attribute__((packed)); @@ -124,23 +124,23 @@ struct rds_info_flow { #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 struct rds_info_message { - u_int64_t seq; - u_int32_t len; + uint64_t seq; + uint32_t len; __be32 laddr; __be32 faddr; __be16 lport; __be16 fport; - u_int8_t flags; + uint8_t flags; } __attribute__((packed)); struct rds_info_socket { - u_int32_t sndbuf; + uint32_t sndbuf; __be32 bound_addr; __be32 connected_addr; __be16 bound_port; __be16 connected_port; - u_int32_t rcvbuf; - u_int64_t inum; + uint32_t rcvbuf; + uint64_t inum; } __attribute__((packed)); struct rds_info_tcp_socket { @@ -148,11 +148,11 @@ struct rds_info_tcp_socket { __be16 local_port; __be32 peer_addr; __be16 peer_port; - u_int64_t hdr_rem; - u_int64_t data_rem; - u_int32_t last_sent_nxt; - u_int32_t last_expected_una; - u_int32_t last_seen_una; + uint64_t hdr_rem; + uint64_t data_rem; + uint32_t last_sent_nxt; + uint32_t last_expected_una; + uint32_t last_seen_una; } __attribute__((packed)); #define RDS_IB_GID_LEN 16 @@ -207,38 +207,38 @@ struct rds_info_rdma_connection { * (so that the application does not have to worry about * alignment). */ -typedef u_int64_t rds_rdma_cookie_t; +typedef uint64_t rds_rdma_cookie_t; struct rds_iovec { - u_int64_t addr; - u_int64_t bytes; + uint64_t addr; + uint64_t bytes; }; struct rds_get_mr_args { struct rds_iovec vec; - u_int64_t cookie_addr; + uint64_t cookie_addr; uint64_t flags; }; struct rds_get_mr_for_dest_args { struct sockaddr_storage dest_addr; struct rds_iovec vec; - u_int64_t cookie_addr; + uint64_t cookie_addr; uint64_t flags; }; struct rds_free_mr_args { rds_rdma_cookie_t cookie; - u_int64_t flags; + uint64_t flags; }; struct rds_rdma_args { rds_rdma_cookie_t cookie; struct rds_iovec remote_vec; - u_int64_t local_vec_addr; - u_int64_t nr_local; - u_int64_t flags; - u_int64_t user_token; + uint64_t local_vec_addr; + uint64_t nr_local; + uint64_t flags; + uint64_t user_token; }; struct rds_atomic_args { @@ -269,7 +269,7 @@ struct rds_atomic_args { }; struct rds_rdma_notify { - u_int64_t user_token; + uint64_t user_token; int32_t status; }; -- cgit v1.2.3 From 905d64c89e2a9d71d0606904b7c3908633db6072 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 8 Sep 2010 18:03:54 -0700 Subject: RDS: Remove dead struct from rds.h flows are an obsolete date type. Signed-off-by: Andy Grover --- include/linux/rds.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux/rds.h') diff --git a/include/linux/rds.h b/include/linux/rds.h index 3576b31b6b7b..91950950aa59 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -112,14 +112,6 @@ struct rds_info_connection { uint8_t flags; } __attribute__((packed)); -struct rds_info_flow { - __be32 laddr; - __be32 faddr; - uint32_t bytes; - __be16 lport; - __be16 fport; -} __attribute__((packed)); - #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 -- cgit v1.2.3