summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h1
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c1
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c46
-rw-r--r--drivers/misc/habanalabs/goya/goya.c4
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h4
5 files changed, 40 insertions, 16 deletions
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 931fa7092646..44752e5954ca 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1380,6 +1380,7 @@ struct hl_asic_funcs {
enum dma_data_direction dir);
void (*add_end_of_cb_packets)(struct hl_device *hdev,
void *kernel_address, u32 len,
+ u32 original_len,
u64 cq_addr, u32 cq_val, u32 msix_num,
bool eb);
void (*update_eq_ci)(struct hl_device *hdev, u32 val);
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 6103e479e855..32408887dd7c 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -308,6 +308,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
+ job->user_cb_size,
cq_addr,
le32_to_cpu(cq_pkt.data),
q->msi_vec,
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 172c21f262e7..453de3d27d0c 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1423,6 +1423,19 @@ static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
return 0;
}
+static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
+{
+ u32 cacheline_end, additional_commands;
+
+ cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
+ additional_commands = sizeof(struct packet_msg_prot) * 2;
+
+ if (user_cb_size + additional_commands > cacheline_end)
+ return cacheline_end - user_cb_size + additional_commands;
+ else
+ return additional_commands;
+}
+
static int gaudi_collective_wait_create_job(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs,
enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
@@ -1443,7 +1456,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
* 1 fence packet
* 4 msg short packets for monitor 2 configuration
* 1 fence packet
- * 2 msg prot packets for completion and MSI-X
+ * 2 msg prot packets for completion and MSI
*/
cb_size = sizeof(struct packet_msg_short) * 8 +
sizeof(struct packet_fence) * 2 +
@@ -5337,11 +5350,13 @@ static int gaudi_validate_cb(struct hl_device *hdev,
/*
* The new CB should have space at the end for two MSG_PROT packets:
- * 1. A packet that will act as a completion packet
- * 2. A packet that will generate MSI-X interrupt
+ * 1. Optional NOP padding for cacheline alignment
+ * 2. A packet that will act as a completion packet
+ * 3. A packet that will generate MSI interrupt
*/
if (parser->completion)
- parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
+ parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
+ parser->patched_cb_size);
return rc;
}
@@ -5564,13 +5579,14 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
int rc;
/*
- * The new CB should have space at the end for two MSG_PROT pkt:
- * 1. A packet that will act as a completion packet
- * 2. A packet that will generate MSI interrupt
+ * The new CB should have space at the end for two MSG_PROT packets:
+ * 1. Optional NOP padding for cacheline alignment
+ * 2. A packet that will act as a completion packet
+ * 3. A packet that will generate MSI interrupt
*/
if (parser->completion)
parser->patched_cb_size = parser->user_cb_size +
- sizeof(struct packet_msg_prot) * 2;
+ gaudi_get_patched_cb_extra_size(parser->user_cb_size);
else
parser->patched_cb_size = parser->user_cb_size;
@@ -5745,18 +5761,24 @@ static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
return gaudi_parse_cb_no_mmu(hdev, parser);
}
-static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
- void *kernel_address, u32 len,
- u64 cq_addr, u32 cq_val, u32 msi_vec,
- bool eb)
+static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
+ u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
+ u32 msi_vec, bool eb)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct packet_msg_prot *cq_pkt;
+ struct packet_nop *cq_padding;
u64 msi_addr;
u32 tmp;
+ cq_padding = kernel_address + original_len;
cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
+ while ((void *)cq_padding < (void *)cq_pkt) {
+ cq_padding->ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP);
+ cq_padding++;
+ }
+
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 64590fc55dc9..40c082cafbd7 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4166,8 +4166,8 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
}
void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
- u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
- bool eb)
+ u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
+ u32 msix_vec, bool eb)
{
struct packet_msg_prot *cq_pkt;
u32 tmp;
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index 647f57402616..54b5b6125df5 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -230,8 +230,8 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size);
void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
- u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
- bool eb);
+ u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
+ u32 msix_vec, bool eb);
int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser);
int goya_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size);
void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,