summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hfi1/pio.h
diff options
context:
space:
mode:
authorSebastian Sanchez <sebastian.sanchez@intel.com>2016-10-25 23:12:34 +0300
committerDoug Ledford <dledford@redhat.com>2016-11-16 00:37:27 +0300
commit8af8d2970ed98493a2db88dfcad88b0065e55e79 (patch)
tree57841ee0bdf580dc5c23fe6de9a52dfe20cf005d /drivers/infiniband/hw/hfi1/pio.h
parent2474d775d9e2f935ff6840c8b21b4262afacc821 (diff)
downloadlinux-8af8d2970ed98493a2db88dfcad88b0065e55e79.tar.xz
IB/hfi1: Optimize pio_buf and send_context structs
Both pio_buf and send_context structs have oversized fields and have cachelines that can be optimized. Reduce oversized fields for both structs. Make sure pio_buf struct fits within a cacheline. Move read-only fields to their own cacheline in send_context struct. All of this will avoid cacheline trading as the ring progresses and pio buffers/send contexts are used. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/hfi1/pio.h')
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h29
1 files changed, 15 insertions, 14 deletions
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 498b548055e0..867e5ffc3595 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -83,43 +83,43 @@ struct pio_buf {
void *arg; /* argument for cb */
void __iomem *start; /* buffer start address */
void __iomem *end; /* context end address */
- unsigned long size; /* context size, in bytes */
unsigned long sent_at; /* buffer is sent when <= free */
- u32 block_count; /* size of buffer, in blocks */
- u32 qw_written; /* QW written so far */
- u32 carry_bytes; /* number of valid bytes in carry */
union mix carry; /* pending unwritten bytes */
+ u16 qw_written; /* QW written so far */
+ u8 carry_bytes; /* number of valid bytes in carry */
};
/* cache line aligned pio buffer array */
union pio_shadow_ring {
struct pio_buf pbuf;
- u64 unused[16]; /* cache line spacer */
} ____cacheline_aligned;
/* per-NUMA send context */
struct send_context {
/* read-only after init */
struct hfi1_devdata *dd; /* device */
- void __iomem *base_addr; /* start of PIO memory */
union pio_shadow_ring *sr; /* shadow ring */
+ void __iomem *base_addr; /* start of PIO memory */
+ u32 __percpu *buffers_allocated;/* count of buffers allocated */
+ u32 size; /* context size, in bytes */
- struct work_struct halt_work; /* halted context work queue entry */
- unsigned long flags; /* flags */
int node; /* context home node */
- int type; /* context type */
- u32 sw_index; /* software index number */
- u32 hw_context; /* hardware context number */
- u32 credits; /* number of blocks in context */
u32 sr_size; /* size of the shadow ring */
- u32 group; /* credit return group */
+ u16 flags; /* flags */
+ u8 type; /* context type */
+ u8 sw_index; /* software index number */
+ u8 hw_context; /* hardware context number */
+ u8 group; /* credit return group */
+
/* allocator fields */
spinlock_t alloc_lock ____cacheline_aligned_in_smp;
u32 sr_head; /* shadow ring head */
unsigned long fill; /* official alloc count */
unsigned long alloc_free; /* copy of free (less cache thrash) */
- u32 __percpu *buffers_allocated;/* count of buffers allocated */
u32 fill_wrap; /* tracks fill within ring */
+ u32 credits; /* number of blocks in context */
+ /* adding a new field here would make it part of this cacheline */
+
/* releaser fields */
spinlock_t release_lock ____cacheline_aligned_in_smp;
u32 sr_tail; /* shadow ring tail */
@@ -131,6 +131,7 @@ struct send_context {
u32 credit_intr_count; /* count of credit intr users */
u64 credit_ctrl; /* cache for credit control */
wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */
+ struct work_struct halt_work; /* halted context work queue entry */
};
/* send context flags */