diff options
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 1 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/hw_queue.c | 1 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 46 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 4 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goyaP.h | 4 |
5 files changed, 40 insertions, 16 deletions
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 931fa7092646..44752e5954ca 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1380,6 +1380,7 @@ struct hl_asic_funcs { enum dma_data_direction dir); void (*add_end_of_cb_packets)(struct hl_device *hdev, void *kernel_address, u32 len, + u32 original_len, u64 cq_addr, u32 cq_val, u32 msix_num, bool eb); void (*update_eq_ci)(struct hl_device *hdev, u32 val); diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 6103e479e855..32408887dd7c 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -308,6 +308,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job) cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len, + job->user_cb_size, cq_addr, le32_to_cpu(cq_pkt.data), q->msi_vec, diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 172c21f262e7..453de3d27d0c 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -1423,6 +1423,19 @@ static int gaudi_collective_wait_init_cs(struct hl_cs *cs) return 0; } +static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size) +{ + u32 cacheline_end, additional_commands; + + cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE); + additional_commands = sizeof(struct packet_msg_prot) * 2; + + if (user_cb_size + additional_commands > cacheline_end) + return cacheline_end - user_cb_size + additional_commands; + else + return additional_commands; +} + static int gaudi_collective_wait_create_job(struct hl_device *hdev, struct hl_ctx *ctx, struct hl_cs *cs, enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, @@ -1443,7 +1456,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev, * 1 fence packet * 4 msg short packets for monitor 2 configuration * 1 fence packet - * 2 msg prot packets for completion and MSI-X + * 2 msg prot packets for completion and MSI */ cb_size = sizeof(struct packet_msg_short) * 8 + sizeof(struct packet_fence) * 2 + @@ -5337,11 +5350,13 @@ static int gaudi_validate_cb(struct hl_device *hdev, /* * The new CB should have space at the end for two MSG_PROT packets: - * 1. A packet that will act as a completion packet - * 2. A packet that will generate MSI-X interrupt + * 1. Optional NOP padding for cacheline alignment + * 2. A packet that will act as a completion packet + * 3. A packet that will generate MSI interrupt */ if (parser->completion) - parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2; + parser->patched_cb_size += gaudi_get_patched_cb_extra_size( + parser->patched_cb_size); return rc; } @@ -5564,13 +5579,14 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev, int rc; /* - * The new CB should have space at the end for two MSG_PROT pkt: - * 1. A packet that will act as a completion packet - * 2. A packet that will generate MSI interrupt + * The new CB should have space at the end for two MSG_PROT packets: + * 1. Optional NOP padding for cacheline alignment + * 2. A packet that will act as a completion packet + * 3. A packet that will generate MSI interrupt */ if (parser->completion) parser->patched_cb_size = parser->user_cb_size + - sizeof(struct packet_msg_prot) * 2; + gaudi_get_patched_cb_extra_size(parser->user_cb_size); else parser->patched_cb_size = parser->user_cb_size; @@ -5745,18 +5761,24 @@ static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) return gaudi_parse_cb_no_mmu(hdev, parser); } -static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, - void *kernel_address, u32 len, - u64 cq_addr, u32 cq_val, u32 msi_vec, - bool eb) +static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, + u32 len, u32 original_len, u64 cq_addr, u32 cq_val, + u32 msi_vec, bool eb) { struct gaudi_device *gaudi = hdev->asic_specific; struct packet_msg_prot *cq_pkt; + struct packet_nop *cq_padding; u64 msi_addr; u32 tmp; + cq_padding = kernel_address + original_len; cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); + while ((void *)cq_padding < (void *)cq_pkt) { + cq_padding->ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP); + cq_padding++; + } + tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 64590fc55dc9..40c082cafbd7 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4166,8 +4166,8 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) } void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, - u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec, - bool eb) + u32 len, u32 original_len, u64 cq_addr, u32 cq_val, + u32 msix_vec, bool eb) { struct packet_msg_prot *cq_pkt; u32 tmp; diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 647f57402616..54b5b6125df5 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -230,8 +230,8 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry); void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size); void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, - u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec, - bool eb); + u32 len, u32 original_len, u64 cq_addr, u32 cq_val, + u32 msix_vec, bool eb); int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser); int goya_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size); void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id, |