summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/qib/qib_verbs.h
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@intel.com>2012-05-07 22:02:31 +0400
committerRoland Dreier <roland@purestorage.com>2012-05-14 23:43:34 +0400
commit1c94283ddbe8a9945c4aaac8b0be90d47f97f2df (patch)
tree37d46ea239d1e872007bd0dc93b2a3d40311afa8 /drivers/infiniband/hw/qib/qib_verbs.h
parent3236b2d469dba42fde837b8cb06308f7f360dfed (diff)
downloadlinux-1c94283ddbe8a9945c4aaac8b0be90d47f97f2df.tar.xz
IB/qib: Add cache line awareness to qib_qp and qib_devdata structures
This patch reorganizes the QP and devdata files to be more cache line aware. qib_qp fields in particular are split into read-mostly, send, and receive fields. qib_devdata fields are split into read-mostly and read/write fields Testing has show that bidirectional tests improve by as much as 100% with this patch. Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/hw/qib/qib_verbs.h')
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h145
1 files changed, 82 insertions, 63 deletions
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 0c19ef0c4123..487606024659 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -367,9 +367,10 @@ struct qib_rwq {
struct qib_rq {
struct qib_rwq *wq;
- spinlock_t lock; /* protect changes in this struct */
u32 size; /* size of RWQE array */
u8 max_sge;
+ spinlock_t lock /* protect changes in this struct */
+ ____cacheline_aligned_in_smp;
};
struct qib_srq {
@@ -412,31 +413,75 @@ struct qib_ack_entry {
*/
struct qib_qp {
struct ib_qp ibqp;
- struct qib_qp *next; /* link list for QPN hash table */
- struct qib_qp *timer_next; /* link list for qib_ib_timer() */
- struct list_head iowait; /* link for wait PIO buf */
- struct list_head rspwait; /* link for waititing to respond */
+ /* read mostly fields above and below */
struct ib_ah_attr remote_ah_attr;
struct ib_ah_attr alt_ah_attr;
- struct qib_ib_header s_hdr; /* next packet header to send */
- atomic_t refcount;
- wait_queue_head_t wait;
- wait_queue_head_t wait_dma;
- struct timer_list s_timer;
- struct work_struct s_work;
+ struct qib_qp *next; /* link list for QPN hash table */
+ struct qib_swqe *s_wq; /* send work queue */
struct qib_mmap_info *ip;
+ struct qib_ib_header *s_hdr; /* next packet header to send */
+ unsigned long timeout_jiffies; /* computed from timeout */
+
+ enum ib_mtu path_mtu;
+ u32 remote_qpn;
+ u32 pmtu; /* decoded from path_mtu */
+ u32 qkey; /* QKEY for this QP (for UD or RD) */
+ u32 s_size; /* send work queue size */
+ u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
+
+ u8 state; /* QP state */
+ u8 qp_access_flags;
+ u8 alt_timeout; /* Alternate path timeout for this QP */
+ u8 timeout; /* Timeout for this QP */
+ u8 s_srate;
+ u8 s_mig_state;
+ u8 port_num;
+ u8 s_pkey_index; /* PKEY index to use */
+ u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
+ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
+ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
+ u8 s_retry_cnt; /* number of times to retry */
+ u8 s_rnr_retry_cnt;
+ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
+ u8 s_max_sge; /* size of s_wq->sg_list */
+ u8 s_draining;
+
+ /* start of read/write fields */
+
+ atomic_t refcount ____cacheline_aligned_in_smp;
+ wait_queue_head_t wait;
+
+
+ struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]
+ ____cacheline_aligned_in_smp;
+ struct qib_sge_state s_rdma_read_sge;
+
+ spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */
+ unsigned long r_aflags;
+ u64 r_wr_id; /* ID for current receive WQE */
+ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
+ u32 r_len; /* total length of r_sge */
+ u32 r_rcv_len; /* receive data len processed */
+ u32 r_psn; /* expected rcv packet sequence number */
+ u32 r_msn; /* message sequence number */
+
+ u8 r_state; /* opcode of last packet received */
+ u8 r_flags;
+ u8 r_head_ack_queue; /* index into s_ack_queue[] */
+
+ struct list_head rspwait; /* link for waititing to respond */
+
+ struct qib_sge_state r_sge; /* current receive data */
+ struct qib_rq r_rq; /* receive work queue */
+
+ spinlock_t s_lock ____cacheline_aligned_in_smp;
struct qib_sge_state *s_cur_sge;
+ u32 s_flags;
struct qib_verbs_txreq *s_tx;
- struct qib_mregion *s_rdma_mr;
+ struct qib_swqe *s_wqe;
struct qib_sge_state s_sge; /* current send request data */
- struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1];
- struct qib_sge_state s_ack_rdma_sge;
- struct qib_sge_state s_rdma_read_sge;
- struct qib_sge_state r_sge; /* current receive data */
- spinlock_t r_lock; /* used for APM */
- spinlock_t s_lock;
+ struct qib_mregion *s_rdma_mr;
atomic_t s_dma_busy;
- u32 s_flags;
u32 s_cur_size; /* size of send packet in bytes */
u32 s_len; /* total length of s_sge */
u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
@@ -447,60 +492,34 @@ struct qib_qp {
u32 s_psn; /* current packet sequence number */
u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
- u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
- u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
- u64 r_wr_id; /* ID for current receive WQE */
- unsigned long r_aflags;
- u32 r_len; /* total length of r_sge */
- u32 r_rcv_len; /* receive data len processed */
- u32 r_psn; /* expected rcv packet sequence number */
- u32 r_msn; /* message sequence number */
+ u32 s_head; /* new entries added here */
+ u32 s_tail; /* next entry to process */
+ u32 s_cur; /* current work queue entry */
+ u32 s_acked; /* last un-ACK'ed entry */
+ u32 s_last; /* last completed entry */
+ u32 s_ssn; /* SSN of tail entry */
+ u32 s_lsn; /* limit sequence number (credit) */
u16 s_hdrwords; /* size of s_hdr in 32 bit words */
u16 s_rdma_ack_cnt;
- u8 state; /* QP state */
u8 s_state; /* opcode of last packet sent */
u8 s_ack_state; /* opcode of packet to ACK */
u8 s_nak_state; /* non-zero if NAK is pending */
- u8 r_state; /* opcode of last packet received */
u8 r_nak_state; /* non-zero if NAK is pending */
- u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
- u8 r_flags;
- u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
- u8 r_head_ack_queue; /* index into s_ack_queue[] */
- u8 qp_access_flags;
- u8 s_max_sge; /* size of s_wq->sg_list */
- u8 s_retry_cnt; /* number of times to retry */
- u8 s_rnr_retry_cnt;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
- u8 s_pkey_index; /* PKEY index to use */
- u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
- u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
u8 s_tail_ack_queue; /* index into s_ack_queue[] */
- u8 s_srate;
- u8 s_draining;
- u8 s_mig_state;
- u8 timeout; /* Timeout for this QP */
- u8 alt_timeout; /* Alternate path timeout for this QP */
- u8 port_num;
- enum ib_mtu path_mtu;
- u32 pmtu; /* decoded from path_mtu */
- u32 remote_qpn;
- u32 qkey; /* QKEY for this QP (for UD or RD) */
- u32 s_size; /* send work queue size */
- u32 s_head; /* new entries added here */
- u32 s_tail; /* next entry to process */
- u32 s_cur; /* current work queue entry */
- u32 s_acked; /* last un-ACK'ed entry */
- u32 s_last; /* last completed entry */
- u32 s_ssn; /* SSN of tail entry */
- u32 s_lsn; /* limit sequence number (credit) */
- unsigned long timeout_jiffies; /* computed from timeout */
- struct qib_swqe *s_wq; /* send work queue */
- struct qib_swqe *s_wqe;
- struct qib_rq r_rq; /* receive work queue */
- struct qib_sge r_sg_list[0]; /* verified SGEs */
+
+ struct qib_sge_state s_ack_rdma_sge;
+ struct timer_list s_timer;
+ struct list_head iowait; /* link for wait PIO buf */
+
+ struct work_struct s_work;
+
+ wait_queue_head_t wait_dma;
+
+ struct qib_sge r_sg_list[0] /* verified SGEs */
+ ____cacheline_aligned_in_smp;
};
/*