summaryrefslogtreecommitdiff
path: root/drivers/misc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/altera-stapl/altera.c56
-rw-r--r--drivers/misc/bcm-vk/bcm_vk_msg.c29
-rw-r--r--drivers/misc/cardreader/alcor_pci.c6
-rw-r--r--drivers/misc/cardreader/rts5261.c115
-rw-r--r--drivers/misc/cardreader/rtsx_usb.c1
-rw-r--r--drivers/misc/fastrpc.c18
-rw-r--r--drivers/misc/habanalabs/common/Makefile2
-rw-r--r--drivers/misc/habanalabs/common/command_buffer.c413
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c89
-rw-r--r--drivers/misc/habanalabs/common/context.c4
-rw-r--r--drivers/misc/habanalabs/common/debugfs.c304
-rw-r--r--drivers/misc/habanalabs/common/device.c280
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c86
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h415
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c44
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_ioctl.c108
-rw-r--r--drivers/misc/habanalabs/common/irq.c14
-rw-r--r--drivers/misc/habanalabs/common/memory.c289
-rw-r--r--drivers/misc/habanalabs/common/memory_mgr.c349
-rw-r--r--drivers/misc/habanalabs/common/mmu/mmu.c296
-rw-r--r--drivers/misc/habanalabs/common/mmu/mmu_v1.c297
-rw-r--r--drivers/misc/habanalabs/common/pci/pci.c10
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c412
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudiP.h4
-rw-r--r--drivers/misc/habanalabs/goya/goya.c363
-rw-r--r--drivers/misc/habanalabs/include/common/cpucp_if.h70
-rw-r--r--drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h10
-rw-r--r--drivers/misc/lkdtm/bugs.c96
-rw-r--r--drivers/misc/lkdtm/cfi.c145
-rw-r--r--drivers/misc/lkdtm/core.c138
-rw-r--r--drivers/misc/lkdtm/fortify.c17
-rw-r--r--drivers/misc/lkdtm/heap.c48
-rw-r--r--drivers/misc/lkdtm/lkdtm.h142
-rw-r--r--drivers/misc/lkdtm/perms.c47
-rw-r--r--drivers/misc/lkdtm/powerpc.c11
-rw-r--r--drivers/misc/lkdtm/refcount.c65
-rw-r--r--drivers/misc/lkdtm/stackleak.c13
-rw-r--r--drivers/misc/lkdtm/usercopy.c146
-rw-r--r--drivers/misc/pvpanic/pvpanic.c10
-rw-r--r--drivers/misc/vmw_balloon.c4
-rw-r--r--drivers/misc/vmw_vmci/Kconfig2
-rw-r--r--drivers/misc/vmw_vmci/vmci_context.c15
-rw-r--r--drivers/misc/vmw_vmci/vmci_guest.c4
-rw-r--r--drivers/misc/vmw_vmci/vmci_queue_pair.c12
44 files changed, 2760 insertions, 2239 deletions
diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c
index 92c0611034b0..075f3a36d512 100644
--- a/drivers/misc/altera-stapl/altera.c
+++ b/drivers/misc/altera-stapl/altera.c
@@ -530,11 +530,8 @@ exit_done:
}
break;
case OP_SWP:
- if (altera_check_stack(stack_ptr, 2, &status)) {
- long_tmp = stack[stack_ptr - 2];
- stack[stack_ptr - 2] = stack[stack_ptr - 1];
- stack[stack_ptr - 1] = long_tmp;
- }
+ if (altera_check_stack(stack_ptr, 2, &status))
+ swap(stack[stack_ptr - 2], stack[stack_ptr - 1]);
break;
case OP_ADD:
if (altera_check_stack(stack_ptr, 2, &status)) {
@@ -912,34 +909,22 @@ exit_done:
*/
/* SWP */
- if (altera_check_stack(stack_ptr, 2, &status)) {
- long_tmp = stack[stack_ptr - 2];
- stack[stack_ptr - 2] = stack[stack_ptr - 1];
- stack[stack_ptr - 1] = long_tmp;
- }
+ if (altera_check_stack(stack_ptr, 2, &status))
+ swap(stack[stack_ptr - 2], stack[stack_ptr - 1]);
/* SWPN 7 */
index = 7 + 1;
- if (altera_check_stack(stack_ptr, index, &status)) {
- long_tmp = stack[stack_ptr - index];
- stack[stack_ptr - index] = stack[stack_ptr - 1];
- stack[stack_ptr - 1] = long_tmp;
- }
+ if (altera_check_stack(stack_ptr, index, &status))
+ swap(stack[stack_ptr - index], stack[stack_ptr - 1]);
/* SWP */
- if (altera_check_stack(stack_ptr, 2, &status)) {
- long_tmp = stack[stack_ptr - 2];
- stack[stack_ptr - 2] = stack[stack_ptr - 1];
- stack[stack_ptr - 1] = long_tmp;
- }
+ if (altera_check_stack(stack_ptr, 2, &status))
+ swap(stack[stack_ptr - 2], stack[stack_ptr - 1]);
/* SWPN 6 */
index = 6 + 1;
- if (altera_check_stack(stack_ptr, index, &status)) {
- long_tmp = stack[stack_ptr - index];
- stack[stack_ptr - index] = stack[stack_ptr - 1];
- stack[stack_ptr - 1] = long_tmp;
- }
+ if (altera_check_stack(stack_ptr, index, &status))
+ swap(stack[stack_ptr - index], stack[stack_ptr - 1]);
/* DUPN 8 */
index = 8 + 1;
@@ -950,18 +935,12 @@ exit_done:
/* SWPN 2 */
index = 2 + 1;
- if (altera_check_stack(stack_ptr, index, &status)) {
- long_tmp = stack[stack_ptr - index];
- stack[stack_ptr - index] = stack[stack_ptr - 1];
- stack[stack_ptr - 1] = long_tmp;
- }
+ if (altera_check_stack(stack_ptr, index, &status))
+ swap(stack[stack_ptr - index], stack[stack_ptr - 1]);
/* SWP */
- if (altera_check_stack(stack_ptr, 2, &status)) {
- long_tmp = stack[stack_ptr - 2];
- stack[stack_ptr - 2] = stack[stack_ptr - 1];
- stack[stack_ptr - 1] = long_tmp;
- }
+ if (altera_check_stack(stack_ptr, 2, &status))
+ swap(stack[stack_ptr - 2], stack[stack_ptr - 1]);
/* DUPN 6 */
index = 6 + 1;
@@ -1075,11 +1054,8 @@ exit_done:
* to swap with top element
*/
index = (args[0]) + 1;
- if (altera_check_stack(stack_ptr, index, &status)) {
- long_tmp = stack[stack_ptr - index];
- stack[stack_ptr - index] = stack[stack_ptr - 1];
- stack[stack_ptr - 1] = long_tmp;
- }
+ if (altera_check_stack(stack_ptr, index, &status))
+ swap(stack[stack_ptr - index], stack[stack_ptr - 1]);
break;
case OP_DUPN:
/*
diff --git a/drivers/misc/bcm-vk/bcm_vk_msg.c b/drivers/misc/bcm-vk/bcm_vk_msg.c
index 066b9ef7fcd7..3c081504f38c 100644
--- a/drivers/misc/bcm-vk/bcm_vk_msg.c
+++ b/drivers/misc/bcm-vk/bcm_vk_msg.c
@@ -757,20 +757,19 @@ static struct bcm_vk_wkent *bcm_vk_dequeue_pending(struct bcm_vk *vk,
u16 q_num,
u16 msg_id)
{
- bool found = false;
- struct bcm_vk_wkent *entry;
+ struct bcm_vk_wkent *entry = NULL, *iter;
spin_lock(&chan->pendq_lock);
- list_for_each_entry(entry, &chan->pendq[q_num], node) {
- if (get_msg_id(&entry->to_v_msg[0]) == msg_id) {
- list_del(&entry->node);
- found = true;
+ list_for_each_entry(iter, &chan->pendq[q_num], node) {
+ if (get_msg_id(&iter->to_v_msg[0]) == msg_id) {
+ list_del(&iter->node);
+ entry = iter;
bcm_vk_msgid_bitmap_clear(vk, msg_id, 1);
break;
}
}
spin_unlock(&chan->pendq_lock);
- return ((found) ? entry : NULL);
+ return entry;
}
s32 bcm_to_h_msg_dequeue(struct bcm_vk *vk)
@@ -1010,16 +1009,14 @@ ssize_t bcm_vk_read(struct file *p_file,
miscdev);
struct device *dev = &vk->pdev->dev;
struct bcm_vk_msg_chan *chan = &vk->to_h_msg_chan;
- struct bcm_vk_wkent *entry = NULL;
+ struct bcm_vk_wkent *entry = NULL, *iter;
u32 q_num;
u32 rsp_length;
- bool found = false;
if (!bcm_vk_drv_access_ok(vk))
return -EPERM;
dev_dbg(dev, "Buf count %zu\n", count);
- found = false;
/*
* search through the pendq on the to_h chan, and return only those
@@ -1028,13 +1025,13 @@ ssize_t bcm_vk_read(struct file *p_file,
*/
spin_lock(&chan->pendq_lock);
for (q_num = 0; q_num < chan->q_nr; q_num++) {
- list_for_each_entry(entry, &chan->pendq[q_num], node) {
- if (entry->ctx->idx == ctx->idx) {
+ list_for_each_entry(iter, &chan->pendq[q_num], node) {
+ if (iter->ctx->idx == ctx->idx) {
if (count >=
- (entry->to_h_blks * VK_MSGQ_BLK_SIZE)) {
- list_del(&entry->node);
+ (iter->to_h_blks * VK_MSGQ_BLK_SIZE)) {
+ list_del(&iter->node);
atomic_dec(&ctx->pend_cnt);
- found = true;
+ entry = iter;
} else {
/* buffer not big enough */
rc = -EMSGSIZE;
@@ -1046,7 +1043,7 @@ ssize_t bcm_vk_read(struct file *p_file,
read_loop_exit:
spin_unlock(&chan->pendq_lock);
- if (found) {
+ if (entry) {
/* retrieve the passed down msg_id */
set_msg_id(&entry->to_h_msg[0], entry->usr_msg_id);
rsp_length = entry->to_h_blks * VK_MSGQ_BLK_SIZE;
diff --git a/drivers/misc/cardreader/alcor_pci.c b/drivers/misc/cardreader/alcor_pci.c
index 3f514d77a843..9080f9f150a2 100644
--- a/drivers/misc/cardreader/alcor_pci.c
+++ b/drivers/misc/cardreader/alcor_pci.c
@@ -317,12 +317,15 @@ static int alcor_pci_probe(struct pci_dev *pdev,
ret = mfd_add_devices(&pdev->dev, priv->id, alcor_pci_cells,
ARRAY_SIZE(alcor_pci_cells), NULL, 0, NULL);
if (ret < 0)
- goto error_release_regions;
+ goto error_clear_drvdata;
alcor_pci_aspm_ctrl(priv, 0);
return 0;
+error_clear_drvdata:
+ pci_clear_master(pdev);
+ pci_set_drvdata(pdev, NULL);
error_release_regions:
pci_release_regions(pdev);
error_free_ida:
@@ -343,6 +346,7 @@ static void alcor_pci_remove(struct pci_dev *pdev)
ida_free(&alcor_pci_idr, priv->id);
pci_release_regions(pdev);
+ pci_clear_master(pdev);
pci_set_drvdata(pdev, NULL);
}
diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c
index a77585ab0f30..749cc5a46d13 100644
--- a/drivers/misc/cardreader/rts5261.c
+++ b/drivers/misc/cardreader/rts5261.c
@@ -57,40 +57,6 @@ static void rts5261_fill_driving(struct rtsx_pcr *pcr, u8 voltage)
0xFF, driving[drive_sel][2]);
}
-static void rtsx5261_fetch_vendor_settings(struct rtsx_pcr *pcr)
-{
- struct pci_dev *pdev = pcr->pci;
- u32 reg;
-
- /* 0x814~0x817 */
- pci_read_config_dword(pdev, PCR_SETTING_REG2, &reg);
- pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
-
- if (!rts5261_vendor_setting_valid(reg)) {
- /* Not support MMC default */
- pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
- pcr_dbg(pcr, "skip fetch vendor setting\n");
- return;
- }
-
- if (!rts5261_reg_check_mmc_support(reg))
- pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
-
- /* TO do: need to add rtd3 function */
- pcr->rtd3_en = rts5261_reg_to_rtd3(reg);
-
- if (rts5261_reg_check_reverse_socket(reg))
- pcr->flags |= PCR_REVERSE_SOCKET;
-
- /* 0x724~0x727 */
- pci_read_config_dword(pdev, PCR_SETTING_REG1, &reg);
- pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
-
- pcr->aspm_en = rts5261_reg_to_aspm(reg);
- pcr->sd30_drive_sel_1v8 = rts5261_reg_to_sd30_drive_sel_1v8(reg);
- pcr->sd30_drive_sel_3v3 = rts5261_reg_to_sd30_drive_sel_3v3(reg);
-}
-
static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
{
/* Set relink_time to 0 */
@@ -391,11 +357,11 @@ static void rts5261_process_ocp(struct rtsx_pcr *pcr)
}
-static int rts5261_init_from_hw(struct rtsx_pcr *pcr)
+static void rts5261_init_from_hw(struct rtsx_pcr *pcr)
{
struct pci_dev *pdev = pcr->pci;
- int retval;
- u32 lval, i;
+ u32 lval1, lval2, i;
+ u16 setting_reg1, setting_reg2;
u8 valid, efuse_valid, tmp;
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
@@ -418,26 +384,70 @@ static int rts5261_init_from_hw(struct rtsx_pcr *pcr)
efuse_valid = ((tmp & 0x0C) >> 2);
pcr_dbg(pcr, "Load efuse valid: 0x%x\n", efuse_valid);
- if (efuse_valid == 0) {
- retval = pci_read_config_dword(pdev, PCR_SETTING_REG2, &lval);
- if (retval != 0)
- pcr_dbg(pcr, "read 0x814 DW fail\n");
- pcr_dbg(pcr, "DW from 0x814: 0x%x\n", lval);
- /* 0x816 */
- valid = (u8)((lval >> 16) & 0x03);
- pcr_dbg(pcr, "0x816: %d\n", valid);
- }
+ pci_read_config_dword(pdev, PCR_SETTING_REG2, &lval2);
+ pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, lval2);
+ /* 0x816 */
+ valid = (u8)((lval2 >> 16) & 0x03);
+
rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
REG_EFUSE_POR, 0);
pcr_dbg(pcr, "Disable efuse por!\n");
- pci_read_config_dword(pdev, PCR_SETTING_REG2, &lval);
- lval = lval & 0x00FFFFFF;
- retval = pci_write_config_dword(pdev, PCR_SETTING_REG2, lval);
- if (retval != 0)
- pcr_dbg(pcr, "write config fail\n");
+ if (efuse_valid == 2 || efuse_valid == 3) {
+ if (valid == 3) {
+ /* Bypass efuse */
+ setting_reg1 = PCR_SETTING_REG1;
+ setting_reg2 = PCR_SETTING_REG2;
+ } else {
+ /* Use efuse data */
+ setting_reg1 = PCR_SETTING_REG4;
+ setting_reg2 = PCR_SETTING_REG5;
+ }
+ } else if (efuse_valid == 0) {
+ // default
+ setting_reg1 = PCR_SETTING_REG1;
+ setting_reg2 = PCR_SETTING_REG2;
+ }
+
+ pci_read_config_dword(pdev, setting_reg2, &lval2);
+ pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", setting_reg2, lval2);
+
+ if (!rts5261_vendor_setting_valid(lval2)) {
+ /* Not support MMC default */
+ pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
+ pcr_dbg(pcr, "skip fetch vendor setting\n");
+ return;
+ }
+
+ if (!rts5261_reg_check_mmc_support(lval2))
+ pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
- return retval;
+ pcr->rtd3_en = rts5261_reg_to_rtd3(lval2);
+
+ if (rts5261_reg_check_reverse_socket(lval2))
+ pcr->flags |= PCR_REVERSE_SOCKET;
+
+ pci_read_config_dword(pdev, setting_reg1, &lval1);
+ pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", setting_reg1, lval1);
+
+ pcr->aspm_en = rts5261_reg_to_aspm(lval1);
+ pcr->sd30_drive_sel_1v8 = rts5261_reg_to_sd30_drive_sel_1v8(lval1);
+ pcr->sd30_drive_sel_3v3 = rts5261_reg_to_sd30_drive_sel_3v3(lval1);
+
+ if (setting_reg1 == PCR_SETTING_REG1) {
+ /* store setting */
+ rtsx_pci_write_register(pcr, 0xFF0C, 0xFF, (u8)(lval1 & 0xFF));
+ rtsx_pci_write_register(pcr, 0xFF0D, 0xFF, (u8)((lval1 >> 8) & 0xFF));
+ rtsx_pci_write_register(pcr, 0xFF0E, 0xFF, (u8)((lval1 >> 16) & 0xFF));
+ rtsx_pci_write_register(pcr, 0xFF0F, 0xFF, (u8)((lval1 >> 24) & 0xFF));
+ rtsx_pci_write_register(pcr, 0xFF10, 0xFF, (u8)(lval2 & 0xFF));
+ rtsx_pci_write_register(pcr, 0xFF11, 0xFF, (u8)((lval2 >> 8) & 0xFF));
+ rtsx_pci_write_register(pcr, 0xFF12, 0xFF, (u8)((lval2 >> 16) & 0xFF));
+
+ pci_write_config_dword(pdev, PCR_SETTING_REG4, lval1);
+ lval2 = lval2 & 0x00FFFFFF;
+ pci_write_config_dword(pdev, PCR_SETTING_REG5, lval2);
+ }
}
static void rts5261_init_from_cfg(struct rtsx_pcr *pcr)
@@ -636,7 +646,6 @@ static void rts5261_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active)
}
static const struct pcr_ops rts5261_pcr_ops = {
- .fetch_vendor_settings = rtsx5261_fetch_vendor_settings,
.turn_on_led = rts5261_turn_on_led,
.turn_off_led = rts5261_turn_off_led,
.extra_init_hw = rts5261_extra_init_hw,
diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c
index 59eda55d92a3..1ef9b61077c4 100644
--- a/drivers/misc/cardreader/rtsx_usb.c
+++ b/drivers/misc/cardreader/rtsx_usb.c
@@ -667,6 +667,7 @@ static int rtsx_usb_probe(struct usb_interface *intf,
return 0;
out_init_fail:
+ usb_set_intfdata(ucr->pusb_intf, NULL);
usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf,
ucr->iobuf_dma);
return ret;
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index d80ada8cac09..93ebd174d848 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -1606,17 +1606,18 @@ static int fastrpc_req_munmap_impl(struct fastrpc_user *fl,
struct fastrpc_req_munmap *req)
{
struct fastrpc_invoke_args args[1] = { [0] = { 0 } };
- struct fastrpc_buf *buf, *b;
+ struct fastrpc_buf *buf = NULL, *iter, *b;
struct fastrpc_munmap_req_msg req_msg;
struct device *dev = fl->sctx->dev;
int err;
u32 sc;
spin_lock(&fl->lock);
- list_for_each_entry_safe(buf, b, &fl->mmaps, node) {
- if ((buf->raddr == req->vaddrout) && (buf->size == req->size))
+ list_for_each_entry_safe(iter, b, &fl->mmaps, node) {
+ if ((iter->raddr == req->vaddrout) && (iter->size == req->size)) {
+ buf = iter;
break;
- buf = NULL;
+ }
}
spin_unlock(&fl->lock);
@@ -1747,17 +1748,18 @@ err_invoke:
static int fastrpc_req_mem_unmap_impl(struct fastrpc_user *fl, struct fastrpc_mem_unmap *req)
{
struct fastrpc_invoke_args args[1] = { [0] = { 0 } };
- struct fastrpc_map *map = NULL, *m;
+ struct fastrpc_map *map = NULL, *iter, *m;
struct fastrpc_mem_unmap_req_msg req_msg = { 0 };
int err = 0;
u32 sc;
struct device *dev = fl->sctx->dev;
spin_lock(&fl->lock);
- list_for_each_entry_safe(map, m, &fl->maps, node) {
- if ((req->fd < 0 || map->fd == req->fd) && (map->raddr == req->vaddr))
+ list_for_each_entry_safe(iter, m, &fl->maps, node) {
+ if ((req->fd < 0 || iter->fd == req->fd) && (iter->raddr == req->vaddr)) {
+ map = iter;
break;
- map = NULL;
+ }
}
spin_unlock(&fl->lock);
diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile
index 6ebe3c7001ff..934a3a4aedc9 100644
--- a/drivers/misc/habanalabs/common/Makefile
+++ b/drivers/misc/habanalabs/common/Makefile
@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \
common/command_submission.o common/firmware_if.o \
- common/state_dump.o
+ common/state_dump.o common/memory_mgr.o
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index a507110f6443..e13b2b39c058 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -160,24 +160,6 @@ static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb)
}
}
-static void cb_release(struct kref *ref)
-{
- struct hl_device *hdev;
- struct hl_cb *cb;
-
- cb = container_of(ref, struct hl_cb, refcount);
- hdev = cb->hdev;
-
- hl_debugfs_remove_cb(cb);
-
- if (cb->is_mmu_mapped)
- cb_unmap_mem(cb->ctx, cb);
-
- hl_ctx_put(cb->ctx);
-
- cb_do_release(hdev, cb);
-}
-
static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
int ctx_id, bool internal_cb)
{
@@ -238,168 +220,175 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
return cb;
}
-int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
- struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
- bool map_cb, u64 *handle)
+struct hl_cb_mmap_mem_alloc_args {
+ struct hl_device *hdev;
+ struct hl_ctx *ctx;
+ u32 cb_size;
+ bool internal_cb;
+ bool map_cb;
+};
+
+static void hl_cb_mmap_mem_release(struct hl_mmap_mem_buf *buf)
{
- struct hl_cb *cb;
- bool alloc_new_cb = true;
- int rc, ctx_id = ctx->asid;
+ struct hl_cb *cb = buf->private;
- /*
- * Can't use generic function to check this because of special case
- * where we create a CB as part of the reset process
- */
- if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) {
- dev_warn_ratelimited(hdev->dev,
- "Device is disabled or in reset. Can't create new CBs\n");
- rc = -EBUSY;
- goto out_err;
- }
+ hl_debugfs_remove_cb(cb);
- if (cb_size > SZ_2M) {
- dev_err(hdev->dev, "CB size %d must be less than %d\n",
- cb_size, SZ_2M);
- rc = -EINVAL;
- goto out_err;
- }
+ if (cb->is_mmu_mapped)
+ cb_unmap_mem(cb->ctx, cb);
+
+ hl_ctx_put(cb->ctx);
- if (!internal_cb) {
+ cb_do_release(cb->hdev, cb);
+}
+
+static int hl_cb_mmap_mem_alloc(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)
+{
+ struct hl_cb_mmap_mem_alloc_args *cb_args = args;
+ struct hl_cb *cb;
+ int rc, ctx_id = cb_args->ctx->asid;
+ bool alloc_new_cb = true;
+
+ if (!cb_args->internal_cb) {
/* Minimum allocation must be PAGE SIZE */
- if (cb_size < PAGE_SIZE)
- cb_size = PAGE_SIZE;
+ if (cb_args->cb_size < PAGE_SIZE)
+ cb_args->cb_size = PAGE_SIZE;
if (ctx_id == HL_KERNEL_ASID_ID &&
- cb_size <= hdev->asic_prop.cb_pool_cb_size) {
+ cb_args->cb_size <= cb_args->hdev->asic_prop.cb_pool_cb_size) {
- spin_lock(&hdev->cb_pool_lock);
- if (!list_empty(&hdev->cb_pool)) {
- cb = list_first_entry(&hdev->cb_pool,
+ spin_lock(&cb_args->hdev->cb_pool_lock);
+ if (!list_empty(&cb_args->hdev->cb_pool)) {
+ cb = list_first_entry(&cb_args->hdev->cb_pool,
typeof(*cb), pool_list);
list_del(&cb->pool_list);
- spin_unlock(&hdev->cb_pool_lock);
+ spin_unlock(&cb_args->hdev->cb_pool_lock);
alloc_new_cb = false;
} else {
- spin_unlock(&hdev->cb_pool_lock);
- dev_dbg(hdev->dev, "CB pool is empty\n");
+ spin_unlock(&cb_args->hdev->cb_pool_lock);
+ dev_dbg(cb_args->hdev->dev, "CB pool is empty\n");
}
}
}
if (alloc_new_cb) {
- cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb);
- if (!cb) {
- rc = -ENOMEM;
- goto out_err;
- }
+ cb = hl_cb_alloc(cb_args->hdev, cb_args->cb_size, ctx_id, cb_args->internal_cb);
+ if (!cb)
+ return -ENOMEM;
}
- cb->hdev = hdev;
- cb->ctx = ctx;
- hl_ctx_get(hdev, cb->ctx);
+ cb->hdev = cb_args->hdev;
+ cb->ctx = cb_args->ctx;
+ cb->buf = buf;
+ cb->buf->mappable_size = cb->size;
+ cb->buf->private = cb;
+
+ hl_ctx_get(cb->ctx);
- if (map_cb) {
+ if (cb_args->map_cb) {
if (ctx_id == HL_KERNEL_ASID_ID) {
- dev_err(hdev->dev,
+ dev_err(cb_args->hdev->dev,
"CB mapping is not supported for kernel context\n");
rc = -EINVAL;
goto release_cb;
}
- rc = cb_map_mem(ctx, cb);
+ rc = cb_map_mem(cb_args->ctx, cb);
if (rc)
goto release_cb;
}
- spin_lock(&mgr->cb_lock);
- rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
- spin_unlock(&mgr->cb_lock);
-
- if (rc < 0) {
- dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n");
- goto unmap_mem;
- }
-
- cb->id = (u64) rc;
-
- kref_init(&cb->refcount);
- spin_lock_init(&cb->lock);
-
- /*
- * idr is 32-bit so we can safely OR it with a mask that is above
- * 32 bit
- */
- *handle = cb->id | HL_MMAP_TYPE_CB;
- *handle <<= PAGE_SHIFT;
-
hl_debugfs_add_cb(cb);
return 0;
-unmap_mem:
- if (cb->is_mmu_mapped)
- cb_unmap_mem(cb->ctx, cb);
release_cb:
hl_ctx_put(cb->ctx);
- cb_do_release(hdev, cb);
-out_err:
- *handle = 0;
+ cb_do_release(cb_args->hdev, cb);
return rc;
}
-int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle)
+static int hl_cb_mmap(struct hl_mmap_mem_buf *buf,
+ struct vm_area_struct *vma, void *args)
{
- struct hl_cb *cb;
- u32 handle;
- int rc = 0;
+ struct hl_cb *cb = buf->private;
- /*
- * handle was given to user to do mmap, I need to shift it back to
- * how the idr module gave it to me
- */
- cb_handle >>= PAGE_SHIFT;
- handle = (u32) cb_handle;
+ return cb->hdev->asic_funcs->mmap(cb->hdev, vma, cb->kernel_address,
+ cb->bus_address, cb->size);
+}
- spin_lock(&mgr->cb_lock);
+static struct hl_mmap_mem_buf_behavior cb_behavior = {
+ .topic = "CB",
+ .mem_id = HL_MMAP_TYPE_CB,
+ .alloc = hl_cb_mmap_mem_alloc,
+ .release = hl_cb_mmap_mem_release,
+ .mmap = hl_cb_mmap,
+};
- cb = idr_find(&mgr->cb_handles, handle);
- if (cb) {
- idr_remove(&mgr->cb_handles, handle);
- spin_unlock(&mgr->cb_lock);
- kref_put(&cb->refcount, cb_release);
- } else {
- spin_unlock(&mgr->cb_lock);
- dev_err(hdev->dev,
- "CB destroy failed, no match to handle 0x%x\n", handle);
- rc = -EINVAL;
+int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg,
+ struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
+ bool map_cb, u64 *handle)
+{
+ struct hl_cb_mmap_mem_alloc_args args = {
+ .hdev = hdev,
+ .ctx = ctx,
+ .cb_size = cb_size,
+ .internal_cb = internal_cb,
+ .map_cb = map_cb,
+ };
+ struct hl_mmap_mem_buf *buf;
+ int ctx_id = ctx->asid;
+
+ if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) {
+ dev_warn_ratelimited(hdev->dev,
+ "Device is disabled or in reset. Can't create new CBs\n");
+ return -EBUSY;
}
- return rc;
+ if (cb_size > SZ_2M) {
+ dev_err(hdev->dev, "CB size %d must be less than %d\n",
+ cb_size, SZ_2M);
+ return -EINVAL;
+ }
+
+ buf = hl_mmap_mem_buf_alloc(
+ mmg, &cb_behavior,
+ ctx_id == HL_KERNEL_ASID_ID ? GFP_ATOMIC : GFP_KERNEL, &args);
+ if (!buf)
+ return -ENOMEM;
+
+ *handle = buf->handle;
+
+ return 0;
+}
+
+int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle)
+{
+ int rc;
+
+ rc = hl_mmap_mem_buf_put_handle(mmg, cb_handle);
+ if (rc < 0)
+ return rc; /* Invalid handle */
+
+ if (rc == 0)
+ dev_dbg(mmg->dev, "CB 0x%llx is destroyed while still in use\n", cb_handle);
+
+ return 0;
}
-static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr,
- u64 cb_handle, u32 flags, u32 *usage_cnt, u64 *device_va)
+static int hl_cb_info(struct hl_mem_mgr *mmg,
+ u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va)
{
struct hl_vm_va_block *va_block;
struct hl_cb *cb;
- u32 handle;
int rc = 0;
- /* The CB handle was given to user to do mmap, so need to shift it back
- * to the value which was allocated by the IDR module.
- */
- cb_handle >>= PAGE_SHIFT;
- handle = (u32) cb_handle;
-
- spin_lock(&mgr->cb_lock);
-
- cb = idr_find(&mgr->cb_handles, handle);
+ cb = hl_cb_get(mmg, handle);
if (!cb) {
- dev_err(hdev->dev,
- "CB info failed, no match to handle 0x%x\n", handle);
- rc = -EINVAL;
- goto out;
+ dev_err(mmg->dev,
+ "CB info failed, no match to handle 0x%llx\n", handle);
+ return -EINVAL;
}
if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
@@ -407,7 +396,7 @@ static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr,
if (va_block) {
*device_va = va_block->start;
} else {
- dev_err(hdev->dev, "CB is not mapped to the device's MMU\n");
+ dev_err(mmg->dev, "CB is not mapped to the device's MMU\n");
rc = -EINVAL;
goto out;
}
@@ -416,7 +405,7 @@ static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr,
}
out:
- spin_unlock(&mgr->cb_lock);
+ hl_cb_put(cb);
return rc;
}
@@ -444,7 +433,7 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
args->in.cb_size, HL_MAX_CB_SIZE);
rc = -EINVAL;
} else {
- rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx,
+ rc = hl_cb_create(hdev, &hpriv->mem_mgr, hpriv->ctx,
args->in.cb_size, false,
!!(args->in.flags & HL_CB_FLAGS_MAP),
&handle);
@@ -455,12 +444,12 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
break;
case HL_CB_OP_DESTROY:
- rc = hl_cb_destroy(hdev, &hpriv->cb_mgr,
+ rc = hl_cb_destroy(&hpriv->mem_mgr,
args->in.cb_handle);
break;
case HL_CB_OP_INFO:
- rc = hl_cb_info(hdev, &hpriv->cb_mgr, args->in.cb_handle,
+ rc = hl_cb_info(&hpriv->mem_mgr, args->in.cb_handle,
args->in.flags,
&usage_cnt,
&device_va);
@@ -483,163 +472,20 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
return rc;
}
-static void cb_vm_close(struct vm_area_struct *vma)
+struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle)
{
- struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data;
- long new_mmap_size;
-
- new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start);
-
- if (new_mmap_size > 0) {
- cb->mmap_size = new_mmap_size;
- return;
- }
-
- spin_lock(&cb->lock);
- cb->mmap = false;
- spin_unlock(&cb->lock);
-
- hl_cb_put(cb);
- vma->vm_private_data = NULL;
-}
-
-static const struct vm_operations_struct cb_vm_ops = {
- .close = cb_vm_close
-};
-
-int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
-{
- struct hl_device *hdev = hpriv->hdev;
- struct hl_cb *cb;
- u32 handle, user_cb_size;
- int rc;
-
- /* We use the page offset to hold the idr and thus we need to clear
- * it before doing the mmap itself
- */
- handle = vma->vm_pgoff;
- vma->vm_pgoff = 0;
-
- /* reference was taken here */
- cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
- if (!cb) {
- dev_err(hdev->dev,
- "CB mmap failed, no match to handle 0x%x\n", handle);
- return -EINVAL;
- }
-
- /* Validation check */
- user_cb_size = vma->vm_end - vma->vm_start;
- if (user_cb_size != ALIGN(cb->size, PAGE_SIZE)) {
- dev_err(hdev->dev,
- "CB mmap failed, mmap size 0x%lx != 0x%x cb size\n",
- vma->vm_end - vma->vm_start, cb->size);
- rc = -EINVAL;
- goto put_cb;
- }
-
- if (!access_ok((void __user *) (uintptr_t) vma->vm_start,
- user_cb_size)) {
- dev_err(hdev->dev,
- "user pointer is invalid - 0x%lx\n",
- vma->vm_start);
-
- rc = -EINVAL;
- goto put_cb;
- }
-
- spin_lock(&cb->lock);
+ struct hl_mmap_mem_buf *buf;
- if (cb->mmap) {
- dev_err(hdev->dev,
- "CB mmap failed, CB already mmaped to user\n");
- rc = -EINVAL;
- goto release_lock;
- }
-
- cb->mmap = true;
-
- spin_unlock(&cb->lock);
-
- vma->vm_ops = &cb_vm_ops;
-
- /*
- * Note: We're transferring the cb reference to
- * vma->vm_private_data here.
- */
-
- vma->vm_private_data = cb;
-
- rc = hdev->asic_funcs->mmap(hdev, vma, cb->kernel_address,
- cb->bus_address, cb->size);
- if (rc) {
- spin_lock(&cb->lock);
- cb->mmap = false;
- goto release_lock;
- }
-
- cb->mmap_size = cb->size;
- vma->vm_pgoff = handle;
-
- return 0;
-
-release_lock:
- spin_unlock(&cb->lock);
-put_cb:
- hl_cb_put(cb);
- return rc;
-}
-
-struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
- u32 handle)
-{
- struct hl_cb *cb;
-
- spin_lock(&mgr->cb_lock);
- cb = idr_find(&mgr->cb_handles, handle);
-
- if (!cb) {
- spin_unlock(&mgr->cb_lock);
- dev_warn(hdev->dev,
- "CB get failed, no match to handle 0x%x\n", handle);
+ buf = hl_mmap_mem_buf_get(mmg, handle);
+ if (!buf)
return NULL;
- }
-
- kref_get(&cb->refcount);
-
- spin_unlock(&mgr->cb_lock);
-
- return cb;
+ return buf->private;
}
void hl_cb_put(struct hl_cb *cb)
{
- kref_put(&cb->refcount, cb_release);
-}
-
-void hl_cb_mgr_init(struct hl_cb_mgr *mgr)
-{
- spin_lock_init(&mgr->cb_lock);
- idr_init(&mgr->cb_handles);
-}
-
-void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
-{
- struct hl_cb *cb;
- struct idr *idp;
- u32 id;
-
- idp = &mgr->cb_handles;
-
- idr_for_each_entry(idp, cb, id) {
- if (kref_put(&cb->refcount, cb_release) != 1)
- dev_err(hdev->dev,
- "CB %d for CTX ID %d is still alive\n",
- id, cb->ctx->asid);
- }
-
- idr_destroy(&mgr->cb_handles);
+ hl_mmap_mem_buf_put(cb->buf);
}
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
@@ -649,7 +495,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
struct hl_cb *cb;
int rc;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size,
+ rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, cb_size,
internal_cb, false, &cb_handle);
if (rc) {
dev_err(hdev->dev,
@@ -657,8 +503,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
return NULL;
}
- cb_handle >>= PAGE_SHIFT;
- cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
+ cb = hl_cb_get(&hdev->kernel_mem_mgr, cb_handle);
/* hl_cb_get should never fail here */
if (!cb) {
dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n",
@@ -669,7 +514,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
return cb;
destroy_cb:
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, cb_handle);
return NULL;
}
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index d93ef9f1c45c..fb30b7de4aab 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -407,8 +407,7 @@ static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
{
- bool next_entry_found = false;
- struct hl_cs *next, *first_cs;
+ struct hl_cs *next = NULL, *iter, *first_cs;
if (!cs_needs_timeout(cs))
return;
@@ -443,13 +442,13 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
spin_lock(&hdev->cs_mirror_lock);
/* queue TDR for next CS */
- list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
- if (cs_needs_timeout(next)) {
- next_entry_found = true;
+ list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node)
+ if (cs_needs_timeout(iter)) {
+ next = iter;
break;
}
- if (next_entry_found && !next->tdr_active) {
+ if (next && !next->tdr_active) {
next->tdr_active = true;
schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
}
@@ -736,11 +735,10 @@ static void cs_timedout(struct work_struct *work)
hdev = cs->ctx->hdev;
/* Save only the first CS timeout parameters */
- rc = atomic_cmpxchg(&hdev->last_error.cs_write_disable, 0, 1);
+ rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_disable, 0, 1);
if (!rc) {
- hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
- hdev->last_error.cs_timeout_timestamp = ktime_get();
- hdev->last_error.cs_timeout_seq = cs->sequence;
+ hdev->last_error.cs_timeout.timestamp = ktime_get();
+ hdev->last_error.cs_timeout.seq = cs->sequence;
}
switch (cs->type) {
@@ -806,7 +804,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
}
/* increment refcnt for context */
- hl_ctx_get(hdev, ctx);
+ hl_ctx_get(ctx);
cs->ctx = ctx;
cs->submitted = false;
@@ -958,9 +956,9 @@ wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
- if (pend->ts_reg_info.ts_buff) {
+ if (pend->ts_reg_info.buf) {
list_del(&pend->wait_list_node);
- hl_ts_put(pend->ts_reg_info.ts_buff);
+ hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
hl_cb_put(pend->ts_reg_info.cq_cb);
} else {
pend->fence.error = -EIO;
@@ -1072,17 +1070,14 @@ static int validate_queue_index(struct hl_device *hdev,
}
static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
- struct hl_cb_mgr *cb_mgr,
+ struct hl_mem_mgr *mmg,
struct hl_cs_chunk *chunk)
{
struct hl_cb *cb;
- u32 cb_handle;
- cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
-
- cb = hl_cb_get(hdev, cb_mgr, cb_handle);
+ cb = hl_cb_get(mmg, chunk->cb_handle);
if (!cb) {
- dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
+ dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle);
return NULL;
}
@@ -1344,7 +1339,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
}
if (is_kernel_allocated_cb) {
- cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
+ cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk);
if (!cb) {
atomic64_inc(
&ctx->cs_counters.validation_drop_cnt);
@@ -1772,7 +1767,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
*/
job->patched_cb = job->user_cb;
job->job_cb_size = job->user_cb_size;
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
/* increment refcount as for external queues we get completion */
cs_get(cs);
@@ -1834,7 +1829,7 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
handle->count = count;
- hl_ctx_get(hdev, hpriv->ctx);
+ hl_ctx_get(hpriv->ctx);
handle->ctx = hpriv->ctx;
mgr = &hpriv->ctx->sig_mgr;
@@ -2528,7 +2523,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
if (timestamp)
*timestamp = 0;
- hl_ctx_get(hdev, ctx);
+ hl_ctx_get(ctx);
fence = hl_ctx_get_fence(ctx, seq);
@@ -2668,7 +2663,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
struct multi_cs_completion *mcs_compl;
struct hl_device *hdev = hpriv->hdev;
- struct multi_cs_data mcs_data = {0};
+ struct multi_cs_data mcs_data = {};
union hl_wait_cs_args *args = data;
struct hl_ctx *ctx = hpriv->ctx;
struct hl_fence **fence_arr;
@@ -2719,7 +2714,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
mcs_data.fence_arr = fence_arr;
mcs_data.arr_len = seq_arr_len;
- hl_ctx_get(hdev, ctx);
+ hl_ctx_get(ctx);
/* wait (with timeout) for the first CS to be completed */
mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
@@ -2868,12 +2863,13 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return 0;
}
-static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
+static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
struct hl_cb *cq_cb,
u64 ts_offset, u64 cq_offset, u64 target_value,
spinlock_t *wait_list_lock,
struct hl_user_pending_interrupt **pend)
{
+ struct hl_ts_buff *ts_buff = buf->private;
struct hl_user_pending_interrupt *requested_offset_record =
(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
ts_offset;
@@ -2885,7 +2881,7 @@ static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
/* Validate ts_offset not exceeding last max */
if (requested_offset_record > cb_last) {
- dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
+ dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
(u64)(uintptr_t)cb_last);
return -EINVAL;
}
@@ -2904,18 +2900,21 @@ start_over:
list_del(&requested_offset_record->wait_list_node);
spin_unlock_irqrestore(wait_list_lock, flags);
- hl_ts_put(requested_offset_record->ts_reg_info.ts_buff);
+ hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
- dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n");
+ dev_dbg(buf->mmg->dev,
+ "ts node removed from interrupt list now can re-use\n");
} else {
- dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n");
+ dev_dbg(buf->mmg->dev,
+ "ts node in middle of irq handling\n");
/* irq handling in the middle give it time to finish */
spin_unlock_irqrestore(wait_list_lock, flags);
usleep_range(1, 10);
if (++iter_counter == MAX_TS_ITER_NUM) {
- dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n");
+ dev_err(buf->mmg->dev,
+ "handling registration interrupt took too long!!\n");
return -EINVAL;
}
@@ -2927,7 +2926,7 @@ start_over:
/* Fill up the new registration node info */
requested_offset_record->ts_reg_info.in_use = 1;
- requested_offset_record->ts_reg_info.ts_buff = ts_buff;
+ requested_offset_record->ts_reg_info.buf = buf;
requested_offset_record->ts_reg_info.cq_cb = cq_cb;
requested_offset_record->ts_reg_info.timestamp_kernel_addr =
(u64 *) ts_buff->user_buff_address + ts_offset;
@@ -2937,21 +2936,20 @@ start_over:
*pend = requested_offset_record;
- dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n",
+ dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB(0x%llx)\n",
(u64)(uintptr_t)requested_offset_record);
return 0;
}
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
- struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr,
+ struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg,
u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset,
u64 target_value, struct hl_user_interrupt *interrupt,
bool register_ts_record, u64 ts_handle, u64 ts_offset,
u32 *status, u64 *timestamp)
{
- u32 cq_patched_handle, ts_patched_handle;
struct hl_user_pending_interrupt *pend;
- struct hl_ts_buff *ts_buff;
+ struct hl_mmap_mem_buf *buf;
struct hl_cb *cq_cb;
unsigned long timeout, flags;
long completion_rc;
@@ -2959,10 +2957,9 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
timeout = hl_usecs64_to_jiffies(timeout_us);
- hl_ctx_get(hdev, ctx);
+ hl_ctx_get(ctx);
- cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT);
- cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle);
+ cq_cb = hl_cb_get(cb_mmg, cq_counters_handle);
if (!cq_cb) {
rc = -EINVAL;
goto put_ctx;
@@ -2971,16 +2968,14 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
if (register_ts_record) {
dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
interrupt->interrupt_id, ts_offset, cq_counters_offset);
-
- ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT);
- ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle);
- if (!ts_buff) {
+ buf = hl_mmap_mem_buf_get(mmg, ts_handle);
+ if (!buf) {
rc = -EINVAL;
goto put_cq_cb;
}
/* Find first available record */
- rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset,
+ rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset,
cq_counters_offset, target_value,
&interrupt->wait_list_lock, &pend);
if (rc)
@@ -3087,7 +3082,7 @@ ts_registration_exit:
return rc;
put_ts_buff:
- hl_ts_put(ts_buff);
+ hl_mmap_mem_buf_put(buf);
put_cq_cb:
hl_cb_put(cq_cb);
put_ctx:
@@ -3111,7 +3106,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
timeout = hl_usecs64_to_jiffies(timeout_us);
- hl_ctx_get(hdev, ctx);
+ hl_ctx_get(ctx);
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
if (!pend) {
@@ -3249,7 +3244,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
- rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr,
+ rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr,
args->in.interrupt_timeout_us, args->in.cq_counters_handle,
args->in.cq_counters_offset,
args->in.target, interrupt,
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index c6360e33bce8..ed2cfd0c6e99 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -262,7 +262,7 @@ err_hw_block_mem_fini:
return rc;
}
-void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx)
+void hl_ctx_get(struct hl_ctx *ctx)
{
kref_get(&ctx->refcount);
}
@@ -284,7 +284,7 @@ struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev)
* immediately once we find him
*/
ctx = hpriv->ctx;
- hl_ctx_get(hdev, ctx);
+ hl_ctx_get(ctx);
break;
}
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index f18495545854..c6744bfc6da4 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -11,6 +11,7 @@
#include <linux/pci.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/iommu.h>
#define MMU_ADDR_BUF_SIZE 40
#define MMU_ASID_BUF_SIZE 10
@@ -125,9 +126,9 @@ static int command_buffers_show(struct seq_file *s, void *data)
}
seq_printf(s,
" %03llu %d 0x%08x %d %d %d\n",
- cb->id, cb->ctx->asid, cb->size,
- kref_read(&cb->refcount),
- cb->mmap, atomic_read(&cb->cs_cnt));
+ cb->buf->handle, cb->ctx->asid, cb->size,
+ kref_read(&cb->buf->refcount),
+ atomic_read(&cb->buf->mmap), atomic_read(&cb->cs_cnt));
}
spin_unlock(&dev_entry->cb_spinlock);
@@ -369,8 +370,7 @@ static int userptr_lookup_show(struct seq_file *s, void *data)
if (dev_entry->userptr_lookup >= userptr->addr &&
dev_entry->userptr_lookup < userptr->addr + userptr->size) {
total_npages = 0;
- for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents,
- i) {
+ for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
npages = hl_get_sg_info(sg, &dma_addr);
sg_start = userptr->addr +
total_npages * PAGE_SIZE;
@@ -538,6 +538,39 @@ static int engines_show(struct seq_file *s, void *data)
return 0;
}
+static ssize_t hl_memory_scrub(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u64 val = entry->memory_scrub_val;
+ int rc;
+
+ if (!hl_device_operational(hdev, NULL)) {
+ dev_warn_ratelimited(hdev->dev, "Can't scrub memory, device is not operational\n");
+ return -EIO;
+ }
+
+ mutex_lock(&hdev->fpriv_list_lock);
+ if (hdev->is_compute_ctx_active) {
+ mutex_unlock(&hdev->fpriv_list_lock);
+ dev_err(hdev->dev, "can't scrub dram, context exist\n");
+ return -EBUSY;
+ }
+ hdev->is_in_dram_scrub = true;
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ rc = hdev->asic_funcs->scrub_device_dram(hdev, val);
+
+ mutex_lock(&hdev->fpriv_list_lock);
+ hdev->is_in_dram_scrub = false;
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ if (rc)
+ return rc;
+ return count;
+}
+
static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -647,13 +680,105 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
return rc;
}
+static int hl_access_dev_mem_by_region(struct hl_device *hdev, u64 addr,
+ u64 *val, enum debugfs_access_type acc_type, bool *found)
+{
+ size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ?
+ sizeof(u64) : sizeof(u32);
+ struct pci_mem_region *mem_reg;
+ int i;
+
+ for (i = 0; i < PCI_REGION_NUMBER; i++) {
+ mem_reg = &hdev->pci_mem_region[i];
+ if (!mem_reg->used)
+ continue;
+ if (addr >= mem_reg->region_base &&
+ addr <= mem_reg->region_base + mem_reg->region_size - acc_size) {
+ *found = true;
+ return hdev->asic_funcs->access_dev_mem(hdev, mem_reg, i,
+ addr, val, acc_type);
+ }
+ }
+ return 0;
+}
+
+static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val,
+ enum debugfs_access_type acc_type)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 offset = prop->device_dma_offset_for_host_access;
+
+ switch (acc_type) {
+ case DEBUGFS_READ32:
+ *val = *(u32 *) phys_to_virt(addr - offset);
+ break;
+ case DEBUGFS_WRITE32:
+ *(u32 *) phys_to_virt(addr - offset) = *val;
+ break;
+ case DEBUGFS_READ64:
+ *val = *(u64 *) phys_to_virt(addr - offset);
+ break;
+ case DEBUGFS_WRITE64:
+ *(u64 *) phys_to_virt(addr - offset) = *val;
+ break;
+ default:
+ dev_err(hdev->dev, "hostmem access-type %d id not supported\n", acc_type);
+ break;
+ }
+}
+
+static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,
+ enum debugfs_access_type acc_type)
+{
+ size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ?
+ sizeof(u64) : sizeof(u32);
+ u64 host_start = hdev->asic_prop.host_base_address;
+ u64 host_end = hdev->asic_prop.host_end_address;
+ bool user_address, found = false;
+ int rc;
+
+ user_address = hl_is_device_va(hdev, addr);
+ if (user_address) {
+ rc = device_va_to_pa(hdev, addr, acc_size, &addr);
+ if (rc)
+ return rc;
+ }
+
+ rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed reading addr %#llx from dev mem (%d)\n",
+ addr, rc);
+ return rc;
+ }
+
+ if (found)
+ return 0;
+
+ if (!user_address || device_iommu_mapped(&hdev->pdev->dev)) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ if (addr >= host_start && addr <= host_end - acc_size) {
+ hl_access_host_mem(hdev, addr, val, acc_type);
+ } else {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ return 0;
+err:
+ dev_err(hdev->dev, "invalid addr %#llx\n", addr);
+ return rc;
+}
+
static ssize_t hl_data_read32(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
- u64 addr = entry->addr;
- bool user_address;
+ u64 value64, addr = entry->addr;
char tmp_buf[32];
ssize_t rc;
u32 val;
@@ -666,18 +791,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
if (*ppos)
return 0;
- user_address = hl_is_device_va(hdev, addr);
- if (user_address) {
- rc = device_va_to_pa(hdev, addr, sizeof(val), &addr);
- if (rc)
- return rc;
- }
-
- rc = hdev->asic_funcs->debugfs_read32(hdev, addr, user_address, &val);
- if (rc) {
- dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
+ rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_READ32);
+ if (rc)
return rc;
- }
+
+ val = value64; /* downcast back to 32 */
sprintf(tmp_buf, "0x%08x\n", val);
return simple_read_from_buffer(buf, count, ppos, tmp_buf,
@@ -689,8 +807,7 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
- u64 addr = entry->addr;
- bool user_address;
+ u64 value64, addr = entry->addr;
u32 value;
ssize_t rc;
@@ -703,19 +820,10 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
if (rc)
return rc;
- user_address = hl_is_device_va(hdev, addr);
- if (user_address) {
- rc = device_va_to_pa(hdev, addr, sizeof(value), &addr);
- if (rc)
- return rc;
- }
-
- rc = hdev->asic_funcs->debugfs_write32(hdev, addr, user_address, value);
- if (rc) {
- dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n",
- value, addr);
+ value64 = value;
+ rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_WRITE32);
+ if (rc)
return rc;
- }
return count;
}
@@ -726,7 +834,6 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf,
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u64 addr = entry->addr;
- bool user_address;
char tmp_buf[32];
ssize_t rc;
u64 val;
@@ -739,18 +846,9 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf,
if (*ppos)
return 0;
- user_address = hl_is_device_va(hdev, addr);
- if (user_address) {
- rc = device_va_to_pa(hdev, addr, sizeof(val), &addr);
- if (rc)
- return rc;
- }
-
- rc = hdev->asic_funcs->debugfs_read64(hdev, addr, user_address, &val);
- if (rc) {
- dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
+ rc = hl_access_mem(hdev, addr, &val, DEBUGFS_READ64);
+ if (rc)
return rc;
- }
sprintf(tmp_buf, "0x%016llx\n", val);
return simple_read_from_buffer(buf, count, ppos, tmp_buf,
@@ -763,7 +861,6 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf,
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u64 addr = entry->addr;
- bool user_address;
u64 value;
ssize_t rc;
@@ -776,19 +873,9 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf,
if (rc)
return rc;
- user_address = hl_is_device_va(hdev, addr);
- if (user_address) {
- rc = device_va_to_pa(hdev, addr, sizeof(value), &addr);
- if (rc)
- return rc;
- }
-
- rc = hdev->asic_funcs->debugfs_write64(hdev, addr, user_address, value);
- if (rc) {
- dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
- value, addr);
+ rc = hl_access_mem(hdev, addr, &value, DEBUGFS_WRITE64);
+ if (rc)
return rc;
- }
return count;
}
@@ -829,23 +916,67 @@ static ssize_t hl_dma_size_write(struct file *f, const char __user *buf,
}
/* Free the previous allocation, if there was any */
- entry->blob_desc.size = 0;
- vfree(entry->blob_desc.data);
+ entry->data_dma_blob_desc.size = 0;
+ vfree(entry->data_dma_blob_desc.data);
- entry->blob_desc.data = vmalloc(size);
- if (!entry->blob_desc.data)
+ entry->data_dma_blob_desc.data = vmalloc(size);
+ if (!entry->data_dma_blob_desc.data)
return -ENOMEM;
rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size,
- entry->blob_desc.data);
+ entry->data_dma_blob_desc.data);
if (rc) {
dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr);
- vfree(entry->blob_desc.data);
- entry->blob_desc.data = NULL;
+ vfree(entry->data_dma_blob_desc.data);
+ entry->data_dma_blob_desc.data = NULL;
+ return -EIO;
+ }
+
+ entry->data_dma_blob_desc.size = size;
+
+ return count;
+}
+
+static ssize_t hl_monitor_dump_trigger(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u32 size, trig;
+ ssize_t rc;
+
+ if (hdev->reset_info.in_reset) {
+ dev_warn_ratelimited(hdev->dev, "Can't dump monitors during reset\n");
+ return 0;
+ }
+ rc = kstrtouint_from_user(buf, count, 10, &trig);
+ if (rc)
+ return rc;
+
+ if (trig != 1) {
+ dev_err(hdev->dev, "Must write 1 to trigger monitor dump\n");
+ return -EINVAL;
+ }
+
+ size = sizeof(struct cpucp_monitor_dump);
+
+ /* Free the previous allocation, if there was any */
+ entry->mon_dump_blob_desc.size = 0;
+ vfree(entry->mon_dump_blob_desc.data);
+
+ entry->mon_dump_blob_desc.data = vmalloc(size);
+ if (!entry->mon_dump_blob_desc.data)
+ return -ENOMEM;
+
+ rc = hdev->asic_funcs->get_monitor_dump(hdev, entry->mon_dump_blob_desc.data);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to dump monitors\n");
+ vfree(entry->mon_dump_blob_desc.data);
+ entry->mon_dump_blob_desc.data = NULL;
return -EIO;
}
- entry->blob_desc.size = size;
+ entry->mon_dump_blob_desc.size = size;
return count;
}
@@ -1218,6 +1349,11 @@ static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
return count;
}
+static const struct file_operations hl_mem_scrub_fops = {
+ .owner = THIS_MODULE,
+ .write = hl_memory_scrub,
+};
+
static const struct file_operations hl_data32b_fops = {
.owner = THIS_MODULE,
.read = hl_data_read32,
@@ -1235,6 +1371,11 @@ static const struct file_operations hl_dma_size_fops = {
.write = hl_dma_size_write
};
+static const struct file_operations hl_monitor_dump_fops = {
+ .owner = THIS_MODULE,
+ .write = hl_monitor_dump_trigger
+};
+
static const struct file_operations hl_i2c_data_fops = {
.owner = THIS_MODULE,
.read = hl_i2c_data_read,
@@ -1350,8 +1491,10 @@ void hl_debugfs_add_device(struct hl_device *hdev)
if (!dev_entry->entry_arr)
return;
- dev_entry->blob_desc.size = 0;
- dev_entry->blob_desc.data = NULL;
+ dev_entry->data_dma_blob_desc.size = 0;
+ dev_entry->data_dma_blob_desc.data = NULL;
+ dev_entry->mon_dump_blob_desc.size = 0;
+ dev_entry->mon_dump_blob_desc.data = NULL;
INIT_LIST_HEAD(&dev_entry->file_list);
INIT_LIST_HEAD(&dev_entry->cb_list);
@@ -1370,6 +1513,17 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry->root = debugfs_create_dir(dev_name(hdev->dev),
hl_debug_root);
+ debugfs_create_x64("memory_scrub_val",
+ 0644,
+ dev_entry->root,
+ &dev_entry->memory_scrub_val);
+
+ debugfs_create_file("memory_scrub",
+ 0200,
+ dev_entry->root,
+ dev_entry,
+ &hl_mem_scrub_fops);
+
debugfs_create_x64("addr",
0644,
dev_entry->root,
@@ -1470,7 +1624,18 @@ void hl_debugfs_add_device(struct hl_device *hdev)
debugfs_create_blob("data_dma",
0400,
dev_entry->root,
- &dev_entry->blob_desc);
+ &dev_entry->data_dma_blob_desc);
+
+ debugfs_create_file("monitor_dump_trig",
+ 0200,
+ dev_entry->root,
+ dev_entry,
+ &hl_monitor_dump_fops);
+
+ debugfs_create_blob("monitor_dump",
+ 0400,
+ dev_entry->root,
+ &dev_entry->mon_dump_blob_desc);
debugfs_create_x8("skip_reset_on_timeout",
0644,
@@ -1509,7 +1674,8 @@ void hl_debugfs_remove_device(struct hl_device *hdev)
mutex_destroy(&entry->file_mutex);
- vfree(entry->blob_desc.data);
+ vfree(entry->data_dma_blob_desc.data);
+ vfree(entry->mon_dump_blob_desc.data);
for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i)
vfree(entry->state_dump[i]);
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index dc9341a64541..b4f14c6d3970 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -15,6 +15,182 @@
#define HL_RESET_DELAY_USEC 10000 /* 10ms */
+/*
+ * hl_set_dram_bar- sets the bar to allow later access to address
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @addr: the address the caller wants to access.
+ *
+ * @return: the old BAR base address on success, U64_MAX for failure.
+ * The caller should set it back to the old address after use.
+ *
+ * In case the bar space does not cover the whole address space,
+ * the bar base address should be set to allow access to a given address.
+ * This function can be called also if the bar doesn't need to be set,
+ * in that case it just won't change the base.
+ */
+static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 bar_base_addr;
+
+ bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
+
+ return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
+}
+
+
+static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
+ enum debugfs_access_type acc_type, enum pci_region region_type)
+{
+ struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
+ u64 old_base, rc;
+
+ if (region_type == PCI_REGION_DRAM) {
+ old_base = hl_set_dram_bar(hdev, addr);
+ if (old_base == U64_MAX)
+ return -EIO;
+ }
+
+ switch (acc_type) {
+ case DEBUGFS_READ8:
+ *val = readb(hdev->pcie_bar[region->bar_id] +
+ addr - region->region_base + region->offset_in_bar);
+ break;
+ case DEBUGFS_WRITE8:
+ writeb(*val, hdev->pcie_bar[region->bar_id] +
+ addr - region->region_base + region->offset_in_bar);
+ break;
+ case DEBUGFS_READ32:
+ *val = readl(hdev->pcie_bar[region->bar_id] +
+ addr - region->region_base + region->offset_in_bar);
+ break;
+ case DEBUGFS_WRITE32:
+ writel(*val, hdev->pcie_bar[region->bar_id] +
+ addr - region->region_base + region->offset_in_bar);
+ break;
+ case DEBUGFS_READ64:
+ *val = readq(hdev->pcie_bar[region->bar_id] +
+ addr - region->region_base + region->offset_in_bar);
+ break;
+ case DEBUGFS_WRITE64:
+ writeq(*val, hdev->pcie_bar[region->bar_id] +
+ addr - region->region_base + region->offset_in_bar);
+ break;
+ }
+
+ if (region_type == PCI_REGION_DRAM) {
+ rc = hl_set_dram_bar(hdev, old_base);
+ if (rc == U64_MAX)
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct scatterlist *sg;
+ int rc, i;
+
+ rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0);
+ if (rc)
+ return rc;
+
+ /* Shift to the device's base physical address of host memory if necessary */
+ if (prop->device_dma_offset_for_host_access)
+ for_each_sgtable_dma_sg(sgt, sg, i)
+ sg->dma_address += prop->device_dma_offset_for_host_access;
+
+ return 0;
+}
+
+void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct scatterlist *sg;
+ int i;
+
+ /* Cancel the device's base physical address of host memory if necessary */
+ if (prop->device_dma_offset_for_host_access)
+ for_each_sgtable_dma_sg(sgt, sg, i)
+ sg->dma_address -= prop->device_dma_offset_for_host_access;
+
+ dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0);
+}
+
+/*
+ * hl_access_cfg_region - access the config region
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @addr: the address to access
+ * @val: the value to write from or read to
+ * @acc_type: the type of access (read/write 64/32)
+ */
+int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
+ enum debugfs_access_type acc_type)
+{
+ struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG];
+ u32 val_h, val_l;
+
+ if (!IS_ALIGNED(addr, sizeof(u32))) {
+ dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32));
+ return -EINVAL;
+ }
+
+ switch (acc_type) {
+ case DEBUGFS_READ32:
+ *val = RREG32(addr - cfg_region->region_base);
+ break;
+ case DEBUGFS_WRITE32:
+ WREG32(addr - cfg_region->region_base, *val);
+ break;
+ case DEBUGFS_READ64:
+ val_l = RREG32(addr - cfg_region->region_base);
+ val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base);
+
+ *val = (((u64) val_h) << 32) | val_l;
+ break;
+ case DEBUGFS_WRITE64:
+ WREG32(addr - cfg_region->region_base, lower_32_bits(*val));
+ WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val));
+ break;
+ default:
+ dev_err(hdev->dev, "access type %d is not supported\n", acc_type);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/*
+ * hl_access_dev_mem - access device memory
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @region: the memory region the address belongs to
+ * @region_type: the type of the region the address belongs to
+ * @addr: the address to access
+ * @val: the value to write from or read to
+ * @acc_type: the type of access (r/w, 32/64)
+ */
+int hl_access_dev_mem(struct hl_device *hdev, struct pci_mem_region *region,
+ enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type)
+{
+ switch (region_type) {
+ case PCI_REGION_CFG:
+ return hl_access_cfg_region(hdev, addr, val, acc_type);
+ case PCI_REGION_SRAM:
+ case PCI_REGION_DRAM:
+ return hl_access_sram_dram_region(hdev, addr, val, acc_type,
+ region_type);
+ default:
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
enum hl_device_status hl_device_status(struct hl_device *hdev)
{
enum hl_device_status status;
@@ -107,6 +283,14 @@ static void hpriv_release(struct kref *ref)
hdev->is_compute_ctx_active = false;
mutex_unlock(&hdev->fpriv_list_lock);
+ hdev->compute_ctx_in_release = 0;
+
+ /* release the eventfd */
+ if (hpriv->notifier_event.eventfd)
+ eventfd_ctx_put(hpriv->notifier_event.eventfd);
+
+ mutex_destroy(&hpriv->notifier_event.lock);
+
kfree(hpriv);
}
@@ -146,10 +330,11 @@ static int hl_device_release(struct inode *inode, struct file *filp)
*/
hl_release_pending_user_interrupts(hpriv->hdev);
- hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
- hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
+ hl_mem_mgr_fini(&hpriv->mem_mgr);
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
+ hdev->compute_ctx_in_release = 1;
+
if (!hl_hpriv_put(hpriv))
dev_notice(hdev->dev,
"User process closed FD but device still in use\n");
@@ -176,6 +361,11 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
list_del(&hpriv->dev_node);
mutex_unlock(&hdev->fpriv_ctrl_list_lock);
out:
+ /* release the eventfd */
+ if (hpriv->notifier_event.eventfd)
+ eventfd_ctx_put(hpriv->notifier_event.eventfd);
+
+ mutex_destroy(&hpriv->notifier_event.lock);
put_pid(hpriv->taskpid);
kfree(hpriv);
@@ -204,17 +394,15 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
}
vm_pgoff = vma->vm_pgoff;
- vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
- case HL_MMAP_TYPE_CB:
- return hl_cb_mmap(hpriv, vma);
-
case HL_MMAP_TYPE_BLOCK:
+ vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
return hl_hw_block_mmap(hpriv, vma);
+ case HL_MMAP_TYPE_CB:
case HL_MMAP_TYPE_TS_BUFF:
- return hl_ts_mmap(hpriv, vma);
+ return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL);
}
return -EINVAL;
@@ -424,18 +612,25 @@ static int device_early_init(struct hl_device *hdev)
goto free_eq_wq;
}
+ hdev->pf_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0);
+ if (!hdev->pf_wq) {
+ dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n");
+ rc = -ENOMEM;
+ goto free_ts_free_wq;
+ }
+
hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
GFP_KERNEL);
if (!hdev->hl_chip_info) {
rc = -ENOMEM;
- goto free_ts_free_wq;
+ goto free_pf_wq;
}
rc = hl_mmu_if_set_funcs(hdev);
if (rc)
goto free_chip_info;
- hl_cb_mgr_init(&hdev->kernel_cb_mgr);
+ hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr);
hdev->device_reset_work.wq =
create_singlethread_workqueue("hl_device_reset");
@@ -464,9 +659,11 @@ static int device_early_init(struct hl_device *hdev)
return 0;
free_cb_mgr:
- hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
+ hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
free_chip_info:
kfree(hdev->hl_chip_info);
+free_pf_wq:
+ destroy_workqueue(hdev->pf_wq);
free_ts_free_wq:
destroy_workqueue(hdev->ts_free_obj_wq);
free_eq_wq:
@@ -503,10 +700,11 @@ static void device_early_fini(struct hl_device *hdev)
mutex_destroy(&hdev->clk_throttling.lock);
- hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
+ hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
kfree(hdev->hl_chip_info);
+ destroy_workqueue(hdev->pf_wq);
destroy_workqueue(hdev->ts_free_obj_wq);
destroy_workqueue(hdev->eq_wq);
destroy_workqueue(hdev->device_reset_work.wq);
@@ -703,6 +901,9 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r
/* Go over all the queues, release all CS and their jobs */
hl_cs_rollback_all(hdev, skip_wq_flush);
+ /* flush the MMU prefetch workqueue */
+ flush_workqueue(hdev->pf_wq);
+
/* Release all pending user interrupts, each pending user interrupt
* holds a reference to user context
*/
@@ -847,10 +1048,13 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
put_task_struct(task);
} else {
- dev_warn(hdev->dev,
- "Can't get task struct for PID so giving up on killing process\n");
- mutex_unlock(fd_lock);
- return -ETIME;
+ /*
+ * If we got here, it means that process was killed from outside the driver
+ * right after it started looping on fd_list and before get_pid_task, thus
+ * we don't need to kill it.
+ */
+ dev_dbg(hdev->dev,
+ "Can't get task struct for user process, assuming process was killed from outside the driver\n");
}
}
@@ -1062,9 +1266,9 @@ do_reset:
if (hard_reset)
dev_info(hdev->dev, "Going to reset device\n");
else if (reset_upon_device_release)
- dev_info(hdev->dev, "Going to reset device after release by user\n");
+ dev_dbg(hdev->dev, "Going to reset device after release by user\n");
else
- dev_info(hdev->dev, "Going to reset engines of inference device\n");
+ dev_dbg(hdev->dev, "Going to reset engines of inference device\n");
}
again:
@@ -1270,7 +1474,10 @@ kill_processes:
hdev->reset_info.needs_reset = false;
- dev_notice(hdev->dev, "Successfully finished resetting the device\n");
+ if (hard_reset)
+ dev_info(hdev->dev, "Successfully finished resetting the device\n");
+ else
+ dev_dbg(hdev->dev, "Successfully finished resetting the device\n");
if (hard_reset) {
hdev->reset_info.hard_reset_cnt++;
@@ -1323,6 +1530,43 @@ out_err:
return rc;
}
+static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event)
+{
+ mutex_lock(&notifier_event->lock);
+ notifier_event->events_mask |= event;
+ if (notifier_event->eventfd)
+ eventfd_signal(notifier_event->eventfd, 1);
+
+ mutex_unlock(&notifier_event->lock);
+}
+
+/*
+ * hl_notifier_event_send_all - notify all user processes via eventfd
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @event: the occurred event
+ * Returns 0 for success or an error on failure.
+ */
+void hl_notifier_event_send_all(struct hl_device *hdev, u64 event)
+{
+ struct hl_fpriv *hpriv;
+
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
+ hl_notifier_event_send(&hpriv->notifier_event, event);
+
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ /* control device */
+ mutex_lock(&hdev->fpriv_ctrl_list_lock);
+
+ list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node)
+ hl_notifier_event_send(&hpriv->notifier_event, event);
+
+ mutex_unlock(&hdev->fpriv_ctrl_list_lock);
+}
+
/*
* hl_device_init - main initialization function for habanalabs device
*
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 3262126cc7ca..828a36af5b14 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -18,8 +18,9 @@
static char *extract_fw_ver_from_str(const char *fw_str)
{
char *str, *fw_ver, *whitespace;
+ u32 ver_offset;
- fw_ver = kmalloc(16, GFP_KERNEL);
+ fw_ver = kmalloc(VERSION_MAX_LEN, GFP_KERNEL);
if (!fw_ver)
return NULL;
@@ -29,9 +30,10 @@ static char *extract_fw_ver_from_str(const char *fw_str)
/* Skip the fw- part */
str += 3;
+ ver_offset = str - fw_str;
/* Copy until the next whitespace */
- whitespace = strnstr(str, " ", 15);
+ whitespace = strnstr(str, " ", VERSION_MAX_LEN - ver_offset);
if (!whitespace)
goto free_fw_ver;
@@ -819,6 +821,54 @@ out:
return rc;
}
+int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data)
+{
+ struct cpucp_monitor_dump *mon_dump_cpu_addr;
+ dma_addr_t mon_dump_dma_addr;
+ struct cpucp_packet pkt = {};
+ size_t data_size;
+ __le32 *src_ptr;
+ u32 *dst_ptr;
+ u64 result;
+ int i, rc;
+
+ data_size = sizeof(struct cpucp_monitor_dump);
+ mon_dump_cpu_addr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, data_size,
+ &mon_dump_dma_addr);
+ if (!mon_dump_cpu_addr) {
+ dev_err(hdev->dev,
+ "Failed to allocate DMA memory for CPU-CP monitor-dump packet\n");
+ return -ENOMEM;
+ }
+
+ memset(mon_dump_cpu_addr, 0, data_size);
+
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_MONITOR_DUMP_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.addr = cpu_to_le64(mon_dump_dma_addr);
+ pkt.data_max_size = cpu_to_le32(data_size);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_CPUCP_MON_DUMP_TIMEOUT_USEC, &result);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc);
+ goto out;
+ }
+
+ /* result contains the actual size */
+ src_ptr = (__le32 *) mon_dump_cpu_addr;
+ dst_ptr = data;
+ for (i = 0; i < (data_size / sizeof(u32)); i++) {
+ *dst_ptr = le32_to_cpu(*src_ptr);
+ src_ptr++;
+ dst_ptr++;
+ }
+
+out:
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, data_size, mon_dump_cpu_addr);
+
+ return rc;
+}
+
int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters)
{
@@ -1539,7 +1589,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev,
le32_to_cpu(dyn_regs->cpu_cmd_status_to_host),
status,
FIELD_GET(COMMS_STATUS_STATUS_MASK, status) == expected_status,
- hdev->fw_poll_interval_usec,
+ hdev->fw_comms_poll_interval_usec,
timeout);
if (rc) {
@@ -1909,7 +1959,7 @@ static int hl_fw_dynamic_request_descriptor(struct hl_device *hdev,
* @fwc: the firmware component
* @fw_version: fw component's version string
*/
-static void hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev,
+static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev,
enum hl_fw_component fwc,
const char *fw_version)
{
@@ -1933,23 +1983,33 @@ static void hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev,
VERSION_MAX_LEN);
if (preboot_ver && preboot_ver != prop->preboot_ver) {
strscpy(btl_ver, prop->preboot_ver,
- min((int) (preboot_ver - prop->preboot_ver),
- 31));
+ min((int) (preboot_ver - prop->preboot_ver), 31));
dev_info(hdev->dev, "%s\n", btl_ver);
}
preboot_ver = extract_fw_ver_from_str(prop->preboot_ver);
if (preboot_ver) {
- dev_info(hdev->dev, "preboot version %s\n",
- preboot_ver);
+ char major[8];
+ int rc;
+
+ dev_info(hdev->dev, "preboot version %s\n", preboot_ver);
+ sprintf(major, "%.2s", preboot_ver);
kfree(preboot_ver);
+
+ rc = kstrtou32(major, 10, &hdev->fw_major_version);
+ if (rc) {
+ dev_err(hdev->dev, "Error %d parsing preboot major version\n", rc);
+ return rc;
+ }
}
break;
default:
dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
- return;
+ return -EINVAL;
}
+
+ return 0;
}
/**
@@ -2121,9 +2181,10 @@ static int hl_fw_dynamic_load_image(struct hl_device *hdev,
goto release_fw;
/* read preboot version */
- hl_fw_dynamic_read_device_fw_version(hdev, cur_fwc,
+ rc = hl_fw_dynamic_read_device_fw_version(hdev, cur_fwc,
fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
-
+ if (rc)
+ goto release_fw;
/* update state according to boot stage */
if (cur_fwc == FW_COMP_BOOT_FIT) {
@@ -2390,9 +2451,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
goto protocol_err;
/* read preboot version */
- hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT,
+ return hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT,
fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
- return 0;
}
/* load boot fit to FW */
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 1edaf6ab67bd..b0b0f3f89865 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -21,6 +21,7 @@
#include <linux/hashtable.h>
#include <linux/debugfs.h>
#include <linux/rwsem.h>
+#include <linux/eventfd.h>
#include <linux/bitfield.h>
#include <linux/genalloc.h>
#include <linux/sched/signal.h>
@@ -61,8 +62,10 @@
#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
+#define HL_CPUCP_MON_DUMP_TIMEOUT_USEC 10000000 /* 10s */
#define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */
+#define HL_FW_COMMS_STATUS_PLDM_POLL_INTERVAL_USEC 1000000 /* 1s */
#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
@@ -394,18 +397,8 @@ enum hl_device_hw_state {
* struct hl_mmu_properties - ASIC specific MMU address translation properties.
* @start_addr: virtual start address of the memory region.
* @end_addr: virtual end address of the memory region.
- * @hop0_shift: shift of hop 0 mask.
- * @hop1_shift: shift of hop 1 mask.
- * @hop2_shift: shift of hop 2 mask.
- * @hop3_shift: shift of hop 3 mask.
- * @hop4_shift: shift of hop 4 mask.
- * @hop5_shift: shift of hop 5 mask.
- * @hop0_mask: mask to get the PTE address in hop 0.
- * @hop1_mask: mask to get the PTE address in hop 1.
- * @hop2_mask: mask to get the PTE address in hop 2.
- * @hop3_mask: mask to get the PTE address in hop 3.
- * @hop4_mask: mask to get the PTE address in hop 4.
- * @hop5_mask: mask to get the PTE address in hop 5.
+ * @hop_shifts: array holds HOPs shifts.
+ * @hop_masks: array holds HOPs masks.
* @last_mask: mask to get the bit indicating this is the last hop.
* @pgt_size: size for page tables.
* @page_size: default page size used to allocate memory.
@@ -418,18 +411,8 @@ enum hl_device_hw_state {
struct hl_mmu_properties {
u64 start_addr;
u64 end_addr;
- u64 hop0_shift;
- u64 hop1_shift;
- u64 hop2_shift;
- u64 hop3_shift;
- u64 hop4_shift;
- u64 hop5_shift;
- u64 hop0_mask;
- u64 hop1_mask;
- u64 hop2_mask;
- u64 hop3_mask;
- u64 hop4_mask;
- u64 hop5_mask;
+ u64 hop_shifts[MMU_HOP_MAX];
+ u64 hop_masks[MMU_HOP_MAX];
u64 last_mask;
u64 pgt_size;
u32 page_size;
@@ -486,8 +469,10 @@ struct hl_hints_range {
* the device's MMU.
* @dram_hints_align_mask: dram va hint addresses alignment mask which is used
* for hints validity check.
- * device_dma_offset_for_host_access: the offset to add to host DMA addresses
- * to enable the device to access them.
+ * @device_dma_offset_for_host_access: the offset to add to host DMA addresses
+ * to enable the device to access them.
+ * @host_base_address: host physical start address for host DMA from device
+ * @host_end_address: host physical end address for host DMA from device
* @max_freq_value: current max clk frequency.
* @clk_pll_index: clock PLL index that specify which PLL determines the clock
* we display to the user
@@ -528,6 +513,10 @@ struct hl_hints_range {
* @fw_app_cpu_boot_dev_sts1: bitmap representation of application security
* status reported by FW, bit description can be
* found in CPU_BOOT_DEV_STS1
+ * @device_mem_alloc_default_page_size: may be different than dram_page_size only for ASICs for
+ * which the property supports_user_set_page_size is true
+ * (i.e. the DRAM supports multiple page sizes), otherwise
+ * it will shall be equal to dram_page_size.
* @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
@@ -568,6 +557,7 @@ struct hl_hints_range {
* @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
* @set_max_power_on_device_init: true if need to set max power in F/W on device init.
* @supports_user_set_page_size: true if user can set the allocation page size.
+ * @dma_mask: the dma mask to be set for this device
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -599,6 +589,8 @@ struct asic_fixed_properties {
u64 cb_va_end_addr;
u64 dram_hints_align_mask;
u64 device_dma_offset_for_host_access;
+ u64 host_base_address;
+ u64 host_end_address;
u64 max_freq_value;
u32 clk_pll_index;
u32 mmu_pgt_size;
@@ -626,6 +618,7 @@ struct asic_fixed_properties {
u32 fw_bootfit_cpu_boot_dev_sts1;
u32 fw_app_cpu_boot_dev_sts0;
u32 fw_app_cpu_boot_dev_sts1;
+ u32 device_mem_alloc_default_page_size;
u16 collective_first_sob;
u16 collective_first_mon;
u16 sync_stream_first_sob;
@@ -654,6 +647,7 @@ struct asic_fixed_properties {
u8 configurable_stop_on_err;
u8 set_max_power_on_device_init;
u8 supports_user_set_page_size;
+ u8 dma_mask;
};
/**
@@ -711,85 +705,102 @@ struct hl_cs_compl {
*/
/**
- * struct hl_cb_mgr - describes a Command Buffer Manager.
- * @cb_lock: protects cb_handles.
- * @cb_handles: an idr to hold all command buffer handles.
- */
-struct hl_cb_mgr {
- spinlock_t cb_lock;
- struct idr cb_handles; /* protected by cb_lock */
-};
-
-/**
- * struct hl_ts_mgr - describes the timestamp registration memory manager.
- * @ts_lock: protects ts_handles.
- * @ts_handles: an idr to hold all ts bufferes handles.
- */
-struct hl_ts_mgr {
- spinlock_t ts_lock;
- struct idr ts_handles;
-};
-
-/**
* struct hl_ts_buff - describes a timestamp buffer.
- * @refcount: reference counter for usage of the buffer.
- * @hdev: pointer to device this buffer belongs to.
- * @mmap: true if the buff is currently mapped to user.
* @kernel_buff_address: Holds the internal buffer's kernel virtual address.
* @user_buff_address: Holds the user buffer's kernel virtual address.
- * @id: the buffer ID.
- * @mmap_size: Holds the buffer size that was mmaped.
* @kernel_buff_size: Holds the internal kernel buffer size.
- * @user_buff_size: Holds the user buffer size.
*/
struct hl_ts_buff {
- struct kref refcount;
- struct hl_device *hdev;
- atomic_t mmap;
void *kernel_buff_address;
void *user_buff_address;
- u32 id;
- u32 mmap_size;
u32 kernel_buff_size;
- u32 user_buff_size;
+};
+
+struct hl_mmap_mem_buf;
+
+/**
+ * struct hl_mem_mgr - describes unified memory manager for mappable memory chunks.
+ * @dev: back pointer to the owning device
+ * @lock: protects handles
+ * @handles: an idr holding all active handles to the memory buffers in the system.
+ */
+struct hl_mem_mgr {
+ struct device *dev;
+ spinlock_t lock;
+ struct idr handles;
+};
+
+/**
+ * struct hl_mmap_mem_buf_behavior - describes unified memory manager buffer behavior
+ * @topic: string identifier used for logging
+ * @mem_id: memory type identifier, embedded in the handle and used to identify
+ * the memory type by handle.
+ * @alloc: callback executed on buffer allocation, shall allocate the memory,
+ * set it under buffer private, and set mappable size.
+ * @mmap: callback executed on mmap, must map the buffer to vma
+ * @release: callback executed on release, must free the resources used by the buffer
+ */
+struct hl_mmap_mem_buf_behavior {
+ const char *topic;
+ u64 mem_id;
+
+ int (*alloc)(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args);
+ int (*mmap)(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args);
+ void (*release)(struct hl_mmap_mem_buf *buf);
+};
+
+/**
+ * struct hl_mmap_mem_buf - describes a single unified memory buffer
+ * @behavior: buffer behavior
+ * @mmg: back pointer to the unified memory manager
+ * @refcount: reference counter for buffer users
+ * @private: pointer to buffer behavior private data
+ * @mmap: atomic boolean indicating whether or not the buffer is mapped right now
+ * @real_mapped_size: the actual size of buffer mapped, after part of it may be released,
+ * may change at runtime.
+ * @mappable_size: the original mappable size of the buffer, does not change after
+ * the allocation.
+ * @handle: the buffer id in mmg handles store
+ */
+struct hl_mmap_mem_buf {
+ struct hl_mmap_mem_buf_behavior *behavior;
+ struct hl_mem_mgr *mmg;
+ struct kref refcount;
+ void *private;
+ atomic_t mmap;
+ u64 real_mapped_size;
+ u64 mappable_size;
+ u64 handle;
};
/**
* struct hl_cb - describes a Command Buffer.
- * @refcount: reference counter for usage of the CB.
* @hdev: pointer to device this CB belongs to.
* @ctx: pointer to the CB owner's context.
- * @lock: spinlock to protect mmap flows.
+ * @buf: back pointer to the parent mappable memory buffer
* @debugfs_list: node in debugfs list of command buffers.
* @pool_list: node in pool list of command buffers.
* @va_block_list: list of virtual addresses blocks of the CB if it is mapped to
* the device's MMU.
- * @id: the CB's ID.
* @kernel_address: Holds the CB's kernel virtual address.
* @bus_address: Holds the CB's DMA address.
- * @mmap_size: Holds the CB's size that was mmaped.
* @size: holds the CB's size.
* @cs_cnt: holds number of CS that this CB participates in.
- * @mmap: true if the CB is currently mmaped to user.
* @is_pool: true if CB was acquired from the pool, false otherwise.
* @is_internal: internaly allocated
* @is_mmu_mapped: true if the CB is mapped to the device's MMU.
*/
struct hl_cb {
- struct kref refcount;
struct hl_device *hdev;
struct hl_ctx *ctx;
- spinlock_t lock;
+ struct hl_mmap_mem_buf *buf;
struct list_head debugfs_list;
struct list_head pool_list;
struct list_head va_block_list;
- u64 id;
void *kernel_address;
dma_addr_t bus_address;
- u32 mmap_size;
u32 size;
atomic_t cs_cnt;
- u8 mmap;
u8 is_pool;
u8 is_internal;
u8 is_mmu_mapped;
@@ -935,12 +946,12 @@ struct hl_user_interrupt {
* struct timestamp_reg_free_node - holds the timestamp registration free objects node
* @free_objects_node: node in the list free_obj_jobs
* @cq_cb: pointer to cq command buffer to be freed
- * @ts_buff: pointer to timestamp buffer to be freed
+ * @buf: pointer to timestamp buffer to be freed
*/
struct timestamp_reg_free_node {
struct list_head free_objects_node;
struct hl_cb *cq_cb;
- struct hl_ts_buff *ts_buff;
+ struct hl_mmap_mem_buf *buf;
};
/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
@@ -957,8 +968,8 @@ struct timestamp_reg_work_obj {
};
/* struct timestamp_reg_info - holds the timestamp registration related data.
- * @ts_buff: pointer to the timestamp buffer which include both user/kernel buffers.
- * relevant only when doing timestamps records registration.
+ * @buf: pointer to the timestamp buffer which include both user/kernel buffers.
+ * relevant only when doing timestamps records registration.
* @cq_cb: pointer to CQ counter CB.
* @timestamp_kernel_addr: timestamp handle address, where to set timestamp
* relevant only when doing timestamps records
@@ -969,7 +980,7 @@ struct timestamp_reg_work_obj {
* allocating records dynamically.
*/
struct timestamp_reg_info {
- struct hl_ts_buff *ts_buff;
+ struct hl_mmap_mem_buf *buf;
struct hl_cb *cq_cb;
u64 *timestamp_kernel_addr;
u8 in_use;
@@ -1068,6 +1079,15 @@ enum div_select_defs {
DIV_SEL_DIVIDED_PLL = 3,
};
+enum debugfs_access_type {
+ DEBUGFS_READ8,
+ DEBUGFS_WRITE8,
+ DEBUGFS_READ32,
+ DEBUGFS_WRITE32,
+ DEBUGFS_READ64,
+ DEBUGFS_WRITE64,
+};
+
enum pci_region {
PCI_REGION_CFG,
PCI_REGION_SRAM,
@@ -1229,6 +1249,7 @@ struct fw_load_mgr {
* its implementation is not trivial when the driver
* is loaded in simulation mode (not upstreamed).
* @scrub_device_mem: Scrub device memory given an address and size
+ * @scrub_device_dram: Scrub the dram memory of the device.
* @get_int_queue_base: get the internal queue base address.
* @test_queues: run simple test on all queues for sanity check.
* @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
@@ -1236,18 +1257,14 @@ struct fw_load_mgr {
* @asic_dma_pool_free: free small DMA allocation from pool.
* @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
* @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
- * @hl_dma_unmap_sg: DMA unmap scatter-gather list.
+ * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table.
* @cs_parser: parse Command Submission.
- * @asic_dma_map_sg: DMA map scatter-gather list.
+ * @asic_dma_map_sgtable: DMA map scatter-gather table.
* @get_dma_desc_list_size: get number of LIN_DMA packets required for CB.
* @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
* @update_eq_ci: update event queue CI.
* @context_switch: called upon ASID context switch.
* @restore_phase_topology: clear all SOBs amd MONs.
- * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM/Host memory.
- * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM/Host memory.
- * @debugfs_read64: debug interface for reading u64 from DRAM/SRAM/Host memory.
- * @debugfs_write64: debug interface for writing u64 to DRAM/SRAM/Host memory.
* @debugfs_read_dma: debug interface for reading up to 2MB from the device's
* internal memory via DMA engine.
* @add_device_attr: add ASIC specific device attributes.
@@ -1257,8 +1274,8 @@ struct fw_load_mgr {
* @write_pte: write MMU page table entry to DRAM.
* @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft
* (L1 only) or hard (L0 & L1) flush.
- * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
- * ASID-VA-size mask.
+ * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with ASID-VA-size mask.
+ * @mmu_prefetch_cache_range: pre-fetch specific MMU STLB cache lines with ASID-VA-size mask.
* @send_heartbeat: send is-alive packet to CPU-CP and verify response.
* @debug_coresight: perform certain actions on Coresight for debugging.
* @is_device_idle: return true if device is idle, false otherwise.
@@ -1267,6 +1284,7 @@ struct fw_load_mgr {
* @hw_queues_unlock: release H/W queues lock.
* @get_pci_id: retrieve PCI ID.
* @get_eeprom_data: retrieve EEPROM data from F/W.
+ * @get_monitor_dump: retrieve monitor registers dump from F/W.
* @send_cpu_message: send message to F/W. If the message is timedout, the
* driver will eventually reset the device. The timeout can
* be determined by the calling function or it can be 0 and
@@ -1289,8 +1307,6 @@ struct fw_load_mgr {
* @gen_wait_cb: Generate a wait CB.
* @reset_sob: Reset a SOB.
* @reset_sob_group: Reset SOB group
- * @set_dma_mask_from_fw: set the DMA mask in the driver according to the
- * firmware configuration
* @get_device_time: Get the device time.
* @collective_wait_init_cs: Generate collective master/slave packets
* and place them in the relevant cs jobs
@@ -1319,6 +1335,9 @@ struct fw_load_mgr {
* @get_stream_master_qid_arr: get pointer to stream masters QID array
* @is_valid_dram_page_size: return true if page size is supported in device
* memory allocation, otherwise false.
+ * @get_valid_dram_page_orders: get valid device memory allocation page orders
+ * @access_dev_mem: access device memory
+ * @set_dram_bar_base: set the base of the DRAM BAR
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -1342,6 +1361,7 @@ struct hl_asic_funcs {
void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle);
int (*scrub_device_mem)(struct hl_device *hdev, u64 addr, u64 size);
+ int (*scrub_device_dram)(struct hl_device *hdev, u64 val);
void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
dma_addr_t *dma_handle, u16 *queue_len);
int (*test_queues)(struct hl_device *hdev);
@@ -1353,12 +1373,11 @@ struct hl_asic_funcs {
size_t size, dma_addr_t *dma_handle);
void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
size_t size, void *vaddr);
- void (*hl_dma_unmap_sg)(struct hl_device *hdev,
- struct scatterlist *sgl, int nents,
+ void (*hl_dma_unmap_sgtable)(struct hl_device *hdev,
+ struct sg_table *sgt,
enum dma_data_direction dir);
int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
- int (*asic_dma_map_sg)(struct hl_device *hdev,
- struct scatterlist *sgl, int nents,
+ int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir);
u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
struct sg_table *sgt);
@@ -1369,14 +1388,6 @@ struct hl_asic_funcs {
void (*update_eq_ci)(struct hl_device *hdev, u32 val);
int (*context_switch)(struct hl_device *hdev, u32 asid);
void (*restore_phase_topology)(struct hl_device *hdev);
- int (*debugfs_read32)(struct hl_device *hdev, u64 addr,
- bool user_address, u32 *val);
- int (*debugfs_write32)(struct hl_device *hdev, u64 addr,
- bool user_address, u32 val);
- int (*debugfs_read64)(struct hl_device *hdev, u64 addr,
- bool user_address, u64 *val);
- int (*debugfs_write64)(struct hl_device *hdev, u64 addr,
- bool user_address, u64 val);
int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size,
void *blob_addr);
void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
@@ -1391,6 +1402,7 @@ struct hl_asic_funcs {
u32 flags);
int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
u32 flags, u32 asid, u64 va, u64 size);
+ int (*mmu_prefetch_cache_range)(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
@@ -1399,8 +1411,8 @@ struct hl_asic_funcs {
void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev);
u32 (*get_pci_id)(struct hl_device *hdev);
- int (*get_eeprom_data)(struct hl_device *hdev, void *data,
- size_t max_size);
+ int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size);
+ int (*get_monitor_dump)(struct hl_device *hdev, void *data);
int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
u16 len, u32 timeout, u64 *result);
int (*pci_bars_map)(struct hl_device *hdev);
@@ -1421,7 +1433,6 @@ struct hl_asic_funcs {
struct hl_gen_wait_properties *prop);
void (*reset_sob)(struct hl_device *hdev, void *data);
void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
- void (*set_dma_mask_from_fw)(struct hl_device *hdev);
u64 (*get_device_time)(struct hl_device *hdev);
int (*collective_wait_init_cs)(struct hl_cs *cs);
int (*collective_wait_create_jobs)(struct hl_device *hdev,
@@ -1445,6 +1456,12 @@ struct hl_asic_funcs {
void (*set_pci_memory_regions)(struct hl_device *hdev);
u32* (*get_stream_master_qid_arr)(void);
bool (*is_valid_dram_page_size)(u32 page_size);
+ int (*mmu_get_real_page_size)(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
+ u32 page_size, u32 *real_page_size, bool is_dram_addr);
+ void (*get_valid_dram_page_orders)(struct hl_info_dev_memalloc_page_sizes *info);
+ int (*access_dev_mem)(struct hl_device *hdev, struct pci_mem_region *region,
+ enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type);
+ u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
};
@@ -1915,6 +1932,18 @@ struct hl_debug_params {
bool enable;
};
+/**
+ * struct hl_notifier_event - holds the notifier data structure
+ * @eventfd: the event file descriptor to raise the notifications
+ * @lock: mutex lock to protect the notifier data flows
+ * @events_mask: indicates the bitmap events
+ */
+struct hl_notifier_event {
+ struct eventfd_ctx *eventfd;
+ struct mutex lock;
+ u64 events_mask;
+};
+
/*
* FILE PRIVATE STRUCTURE
*/
@@ -1926,25 +1955,25 @@ struct hl_debug_params {
* @taskpid: current process ID.
* @ctx: current executing context. TODO: remove for multiple ctx per process
* @ctx_mgr: context manager to handle multiple context for this FD.
- * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
- * @ts_mem_mgr: timestamp registration manager for alloc/free/map timestamp buffers.
+ * @mem_mgr: manager descriptor for memory exportable via mmap
+ * @notifier_event: notifier eventfd towards user process
* @debugfs_list: list of relevant ASIC debugfs.
* @dev_node: node in the device list of file private data
* @refcount: number of related contexts.
* @restore_phase_mutex: lock for context switch and restore phase.
*/
struct hl_fpriv {
- struct hl_device *hdev;
- struct file *filp;
- struct pid *taskpid;
- struct hl_ctx *ctx;
- struct hl_ctx_mgr ctx_mgr;
- struct hl_cb_mgr cb_mgr;
- struct hl_ts_mgr ts_mem_mgr;
- struct list_head debugfs_list;
- struct list_head dev_node;
- struct kref refcount;
- struct mutex restore_phase_mutex;
+ struct hl_device *hdev;
+ struct file *filp;
+ struct pid *taskpid;
+ struct hl_ctx *ctx;
+ struct hl_ctx_mgr ctx_mgr;
+ struct hl_mem_mgr mem_mgr;
+ struct hl_notifier_event notifier_event;
+ struct list_head debugfs_list;
+ struct list_head dev_node;
+ struct kref refcount;
+ struct mutex restore_phase_mutex;
};
@@ -1992,12 +2021,14 @@ struct hl_debugfs_entry {
* @userptr_spinlock: protects userptr_list.
* @ctx_mem_hash_list: list of available contexts with MMU mappings.
* @ctx_mem_hash_spinlock: protects cb_list.
- * @blob_desc: descriptor of blob
+ * @data_dma_blob_desc: data DMA descriptor of blob.
+ * @mon_dump_blob_desc: monitor dump descriptor of blob.
* @state_dump: data of the system states in case of a bad cs.
* @state_dump_sem: protects state_dump.
* @addr: next address to read/write from/to in read/write32.
* @mmu_addr: next virtual address to translate to physical address in mmu_show.
* @userptr_lookup: the target user ptr to look up for on demand.
+ * @memory_scrub_val: the value to which the dram will be scrubbed to using cb scrub_device_dram
* @mmu_asid: ASID to use while translating in mmu_show.
* @state_dump_head: index of the latest state dump
* @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
@@ -2021,12 +2052,14 @@ struct hl_dbg_device_entry {
spinlock_t userptr_spinlock;
struct list_head ctx_mem_hash_list;
spinlock_t ctx_mem_hash_spinlock;
- struct debugfs_blob_wrapper blob_desc;
+ struct debugfs_blob_wrapper data_dma_blob_desc;
+ struct debugfs_blob_wrapper mon_dump_blob_desc;
char *state_dump[HL_STATE_DUMP_HIST_LEN];
struct rw_semaphore state_dump_sem;
u64 addr;
u64 mmu_addr;
u64 userptr_lookup;
+ u64 memory_scrub_val;
u32 mmu_asid;
u32 state_dump_head;
u8 i2c_bus;
@@ -2442,6 +2475,24 @@ struct hl_mmu_funcs {
};
/**
+ * struct hl_prefetch_work - prefetch work structure handler
+ * @pf_work: actual work struct.
+ * @ctx: compute context.
+ * @va: virtual address to pre-fetch.
+ * @size: pre-fetch size.
+ * @flags: operation flags.
+ * @asid: ASID for maintenance operation.
+ */
+struct hl_prefetch_work {
+ struct work_struct pf_work;
+ struct hl_ctx *ctx;
+ u64 va;
+ u64 size;
+ u32 flags;
+ u32 asid;
+};
+
+/*
* number of user contexts allowed to call wait_for_multi_cs ioctl in
* parallel
*/
@@ -2517,37 +2568,50 @@ struct hl_clk_throttle {
};
/**
- * struct last_error_session_info - info about last session in which CS timeout or
- * razwi error occurred.
- * @open_dev_timestamp: device open timestamp.
- * @cs_timeout_timestamp: CS timeout timestamp.
- * @razwi_timestamp: razwi timestamp.
- * @cs_write_disable: if set writing to CS parameters in the structure is disabled so the
- * first (root cause) CS timeout will not be overwritten.
- * @razwi_write_disable: if set writing to razwi parameters in the structure is disabled so the
- * first (root cause) razwi will not be overwritten.
- * @cs_timeout_seq: CS timeout sequence number.
- * @razwi_addr: address that caused razwi.
- * @razwi_engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does
- * not have engine id it will be set to U16_MAX.
- * @razwi_engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible
- * engines which one them caused the razwi. In that case, it will contain the
- * second possible engine id, otherwise it will be set to U16_MAX.
- * @razwi_non_engine_initiator: in case the initiator of the razwi does not have engine id.
- * @razwi_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX.
+ * struct cs_timeout_info - info of last CS timeout occurred.
+ * @timestamp: CS timeout timestamp.
+ * @write_disable: if set writing to CS parameters in the structure is disabled so,
+ * the first (root cause) CS timeout will not be overwritten.
+ * @seq: CS timeout sequence number.
+ */
+struct cs_timeout_info {
+ ktime_t timestamp;
+ atomic_t write_disable;
+ u64 seq;
+};
+
+/**
+ * struct razwi_info - info about last razwi error occurred.
+ * @timestamp: razwi timestamp.
+ * @write_disable: if set writing to razwi parameters in the structure is disabled so the
+ * first (root cause) razwi will not be overwritten.
+ * @addr: address that caused razwi.
+ * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does
+ * not have engine id it will be set to U16_MAX.
+ * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible
+ * engines which one them caused the razwi. In that case, it will contain the
+ * second possible engine id, otherwise it will be set to U16_MAX.
+ * @non_engine_initiator: in case the initiator of the razwi does not have engine id.
+ * @type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX.
+ */
+struct razwi_info {
+ ktime_t timestamp;
+ atomic_t write_disable;
+ u64 addr;
+ u16 engine_id_1;
+ u16 engine_id_2;
+ u8 non_engine_initiator;
+ u8 type;
+};
+
+/**
+ * struct last_error_session_info - info about last session errors occurred.
+ * @cs_timeout: CS timeout error last information.
+ * @razwi: razwi last information.
*/
struct last_error_session_info {
- ktime_t open_dev_timestamp;
- ktime_t cs_timeout_timestamp;
- ktime_t razwi_timestamp;
- atomic_t cs_write_disable;
- atomic_t razwi_write_disable;
- u64 cs_timeout_seq;
- u64 razwi_addr;
- u16 razwi_engine_id_1;
- u16 razwi_engine_id_2;
- u8 razwi_non_engine_initiator;
- u8 razwi_type;
+ struct cs_timeout_info cs_timeout;
+ struct razwi_info razwi;
};
/**
@@ -2614,11 +2678,12 @@ struct hl_reset_info {
* context.
* @eq_wq: work queue of event queue for executing work in process context.
* @ts_free_obj_wq: work queue for timestamp registration objects release.
+ * @pf_wq: work queue for MMU pre-fetch operations.
* @kernel_ctx: Kernel driver context structure.
* @kernel_queues: array of hl_hw_queue.
* @cs_mirror_list: CS mirror list for TDR.
* @cs_mirror_lock: protects cs_mirror_list.
- * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CBs.
+ * @kernel_mem_mgr: memory manager for memory buffers with lifespan of driver.
* @event_queue: event queue for IRQ from CPU-CP.
* @dma_pool: DMA pool for small allocations.
* @cpu_accessible_dma_mem: Host <-> CPU-CP shared memory CPU address.
@@ -2656,9 +2721,10 @@ struct hl_reset_info {
* @state_dump_specs: constants and dictionaries needed to dump system state.
* @multi_cs_completion: array of multi-CS completion.
* @clk_throttling: holds information about current/previous clock throttling events
- * @reset_info: holds current device reset information.
* @last_error: holds information about last session in which CS timeout or razwi error occurred.
+ * @reset_info: holds current device reset information.
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
+ * @fw_major_version: major version of current loaded preboot
* @dram_used_mem: current DRAM memory consumption.
* @timeout_jiffies: device CS timeout value.
* @max_power: the max power of the device, as configured by the sysadmin. This
@@ -2678,6 +2744,9 @@ struct hl_reset_info {
* session.
* @open_counter: number of successful device open operations.
* @fw_poll_interval_usec: FW status poll interval in usec.
+ * used for CPU boot status
+ * @fw_comms_poll_interval_usec: FW comms/protocol poll interval in usec.
+ * used for COMMs protocols cmds(COMMS_STS_*)
* @card_type: Various ASICs have several card types. This indicates the card
* type of the current device.
* @major: habanalabs kernel driver major.
@@ -2686,6 +2755,7 @@ struct hl_reset_info {
* @id_control: minor of the control device
* @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
* addresses.
+ * @is_in_dram_scrub: true if dram scrub operation is on going.
* @disabled: is device disabled.
* @late_init_done: is late init stage was done during initialization.
* @hwmon_initialized: is H/W monitor sensors was initialized.
@@ -2699,7 +2769,6 @@ struct hl_reset_info {
* huge pages.
* @init_done: is the initialization of the device done.
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
- * @dma_mask: the dma mask that was set for this device
* @in_debug: whether the device is in a state where the profiling/tracing infrastructure
* can be used. This indication is needed because in some ASICs we need to do
* specific operations to enable that infrastructure.
@@ -2721,6 +2790,8 @@ struct hl_reset_info {
* cases where Linux was not loaded to device CPU
* @supports_wait_for_multi_cs: true if wait for multi CS is supported
* @is_compute_ctx_active: Whether there is an active compute context executing.
+ * @compute_ctx_in_release: true if the current compute context is being released.
+ * @supports_mmu_prefetch: true if prefetch is supported, otherwise false.
*/
struct hl_device {
struct pci_dev *pdev;
@@ -2742,11 +2813,12 @@ struct hl_device {
struct workqueue_struct **cq_wq;
struct workqueue_struct *eq_wq;
struct workqueue_struct *ts_free_obj_wq;
+ struct workqueue_struct *pf_wq;
struct hl_ctx *kernel_ctx;
struct hl_hw_queue *kernel_queues;
struct list_head cs_mirror_list;
spinlock_t cs_mirror_lock;
- struct hl_cb_mgr kernel_cb_mgr;
+ struct hl_mem_mgr kernel_mem_mgr;
struct hl_eq event_queue;
struct dma_pool *dma_pool;
void *cpu_accessible_dma_mem;
@@ -2797,6 +2869,7 @@ struct hl_device {
struct hl_reset_info reset_info;
u32 *stream_master_qid_arr;
+ u32 fw_major_version;
atomic64_t dram_used_mem;
u64 timeout_jiffies;
u64 max_power;
@@ -2807,12 +2880,15 @@ struct hl_device {
u64 open_counter;
u64 fw_poll_interval_usec;
ktime_t last_successful_open_ktime;
+ u64 fw_comms_poll_interval_usec;
+
enum cpucp_card_types card_type;
u32 major;
u32 high_pll;
u16 id;
u16 id_control;
u16 cpu_pci_msb_addr;
+ u8 is_in_dram_scrub;
u8 disabled;
u8 late_init_done;
u8 hwmon_initialized;
@@ -2823,7 +2899,6 @@ struct hl_device {
u8 pmmu_huge_range;
u8 init_done;
u8 device_cpu_disabled;
- u8 dma_mask;
u8 in_debug;
u8 cdev_sysfs_created;
u8 stop_on_err;
@@ -2839,6 +2914,8 @@ struct hl_device {
u8 supports_wait_for_multi_cs;
u8 stream_master_qid_arr_size;
u8 is_compute_ctx_active;
+ u8 compute_ctx_in_release;
+ u8 supports_mmu_prefetch;
/* Parameters for bring-up */
u64 nic_ports_mask;
@@ -2971,6 +3048,14 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
return ((address <= range_end_address) && (range_start_address <= end_address));
}
+uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr);
+int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
+void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+ enum dma_data_direction dir);
+int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
+ enum debugfs_access_type acc_type);
+int hl_access_dev_mem(struct hl_device *hdev, struct pci_mem_region *region,
+ enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type);
int hl_device_open(struct inode *inode, struct file *filp);
int hl_device_open_ctrl(struct inode *inode, struct file *filp);
bool hl_device_operational(struct hl_device *hdev,
@@ -3013,7 +3098,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
void hl_ctx_do_release(struct kref *ref);
-void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
+void hl_ctx_get(struct hl_ctx *ctx);
int hl_ctx_put(struct hl_ctx *ctx);
struct hl_ctx *hl_get_compute_ctx(struct hl_device *hdev);
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
@@ -3034,23 +3119,21 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization);
int hl_build_hwmon_channel_info(struct hl_device *hdev,
struct cpucp_sensor *sensors_arr);
+void hl_notifier_event_send_all(struct hl_device *hdev, u64 event);
+
int hl_sysfs_init(struct hl_device *hdev);
void hl_sysfs_fini(struct hl_device *hdev);
int hl_hwmon_init(struct hl_device *hdev);
void hl_hwmon_fini(struct hl_device *hdev);
-int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
+int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg,
struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
bool map_cb, u64 *handle);
-int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
-int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle);
int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
-struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
- u32 handle);
+struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle);
void hl_cb_put(struct hl_cb *cb);
-void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
-void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
bool internal_cb);
int hl_cb_pool_init(struct hl_device *hdev);
@@ -3104,6 +3187,8 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx);
void hl_mmu_ctx_fini(struct hl_ctx *ctx);
int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
u32 page_size, bool flush_pte);
+int hl_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
+ u32 page_size, u32 *real_page_size, bool is_dram_addr);
int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
bool flush_pte);
int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
@@ -3112,6 +3197,7 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags);
int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
u32 flags, u32 asid, u64 va, u64 size);
+int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size);
u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte);
u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
u8 hop_idx, u64 hop_addr, u64 virt_addr);
@@ -3149,6 +3235,7 @@ int hl_fw_cpucp_handshake(struct hl_device *hdev,
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
u32 boot_err1_reg);
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
+int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data);
int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters);
int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
@@ -3224,11 +3311,19 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
const char *format, ...);
char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
-void hl_ts_mgr_init(struct hl_ts_mgr *mgr);
-void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr);
-int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
-struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, u32 handle);
-void hl_ts_put(struct hl_ts_buff *buff);
+
+void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg);
+void hl_mem_mgr_fini(struct hl_mem_mgr *mmg);
+int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,
+ void *args);
+struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg,
+ u64 handle);
+int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle);
+int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf);
+struct hl_mmap_mem_buf *
+hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg,
+ struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp,
+ void *args);
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index ca404ed9d9a7..37edb69a7255 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -134,13 +134,14 @@ int hl_device_open(struct inode *inode, struct file *filp)
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
+
+ mutex_init(&hpriv->notifier_event.lock);
mutex_init(&hpriv->restore_phase_mutex);
kref_init(&hpriv->refcount);
nonseekable_open(inode, filp);
- hl_cb_mgr_init(&hpriv->cb_mgr);
hl_ctx_mgr_init(&hpriv->ctx_mgr);
- hl_ts_mgr_init(&hpriv->ts_mem_mgr);
+ hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
@@ -150,7 +151,28 @@ int hl_device_open(struct inode *inode, struct file *filp)
dev_err_ratelimited(hdev->dev,
"Can't open %s because it is %s\n",
dev_name(hdev->dev), hdev->status[status]);
- rc = -EPERM;
+
+ if (status == HL_DEVICE_STATUS_IN_RESET)
+ rc = -EAGAIN;
+ else
+ rc = -EPERM;
+
+ goto out_err;
+ }
+
+ if (hdev->is_in_dram_scrub) {
+ dev_dbg_ratelimited(hdev->dev,
+ "Can't open %s during dram scrub\n",
+ dev_name(hdev->dev));
+ rc = -EAGAIN;
+ goto out_err;
+ }
+
+ if (hdev->compute_ctx_in_release) {
+ dev_dbg_ratelimited(hdev->dev,
+ "Can't open %s because another user is still releasing it\n",
+ dev_name(hdev->dev));
+ rc = -EAGAIN;
goto out_err;
}
@@ -173,8 +195,8 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_debugfs_add_file(hpriv);
- atomic_set(&hdev->last_error.cs_write_disable, 0);
- atomic_set(&hdev->last_error.razwi_write_disable, 0);
+ atomic_set(&hdev->last_error.cs_timeout.write_disable, 0);
+ atomic_set(&hdev->last_error.razwi.write_disable, 0);
hdev->open_counter++;
hdev->last_successful_open_jif = jiffies;
@@ -184,11 +206,11 @@ int hl_device_open(struct inode *inode, struct file *filp)
out_err:
mutex_unlock(&hdev->fpriv_list_lock);
- hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
- hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
+ hl_mem_mgr_fini(&hpriv->mem_mgr);
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
filp->private_data = NULL;
mutex_destroy(&hpriv->restore_phase_mutex);
+ mutex_destroy(&hpriv->notifier_event.lock);
put_pid(hpriv->taskpid);
kfree(hpriv);
@@ -222,9 +244,11 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
+
+ mutex_init(&hpriv->notifier_event.lock);
nonseekable_open(inode, filp);
- hpriv->taskpid = find_get_pid(current->pid);
+ hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
mutex_lock(&hdev->fpriv_ctrl_list_lock);
@@ -288,6 +312,7 @@ static int fixup_device_params(struct hl_device *hdev)
hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
+ hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
hdev->stop_on_err = true;
hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
@@ -296,9 +321,6 @@ static int fixup_device_params(struct hl_device *hdev)
/* Enable only after the initialization of the device */
hdev->disabled = true;
- /* Set default DMA mask to 32 bits */
- hdev->dma_mask = 32;
-
return 0;
}
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index c13a3c2a7013..c7864d6bb0a1 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -76,6 +76,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
if (hw_ip.dram_size > PAGE_SIZE)
hw_ip.dram_enabled = 1;
hw_ip.dram_page_size = prop->dram_page_size;
+ hw_ip.device_mem_alloc_default_page_size = prop->device_mem_alloc_default_page_size;
hw_ip.num_of_events = prop->num_of_events;
memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version,
@@ -115,6 +116,23 @@ static int hw_events_info(struct hl_device *hdev, bool aggregate,
return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
}
+static int events_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ u32 max_size = args->return_size;
+ u64 events_mask;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((max_size < sizeof(u64)) || (!out))
+ return -EINVAL;
+
+ mutex_lock(&hpriv->notifier_event.lock);
+ events_mask = hpriv->notifier_event.events_mask;
+ hpriv->notifier_event.events_mask = 0;
+ mutex_unlock(&hpriv->notifier_event.lock);
+
+ return copy_to_user(out, &events_mask, sizeof(u64)) ? -EFAULT : 0;
+}
+
static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
@@ -497,6 +515,8 @@ static int open_stats_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
open_stats_info.last_open_period_ms = jiffies64_to_msecs(
hdev->last_open_session_duration_jif);
open_stats_info.open_counter = hdev->open_counter;
+ open_stats_info.is_compute_ctx_active = hdev->is_compute_ctx_active;
+ open_stats_info.compute_ctx_in_release = hdev->compute_ctx_in_release;
return copy_to_user(out, &open_stats_info,
min((size_t) max_size, sizeof(open_stats_info))) ? -EFAULT : 0;
@@ -549,7 +569,7 @@ static int last_err_open_dev_info(struct hl_fpriv *hpriv, struct hl_info_args *a
if ((!max_size) || (!out))
return -EINVAL;
- info.timestamp = ktime_to_ns(hdev->last_error.open_dev_timestamp);
+ info.timestamp = ktime_to_ns(hdev->last_successful_open_ktime);
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
@@ -564,8 +584,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
- info.seq = hdev->last_error.cs_timeout_seq;
- info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout_timestamp);
+ info.seq = hdev->last_error.cs_timeout.seq;
+ info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp);
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
@@ -580,16 +600,74 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
- info.timestamp = ktime_to_ns(hdev->last_error.razwi_timestamp);
- info.addr = hdev->last_error.razwi_addr;
- info.engine_id_1 = hdev->last_error.razwi_engine_id_1;
- info.engine_id_2 = hdev->last_error.razwi_engine_id_2;
- info.no_engine_id = hdev->last_error.razwi_non_engine_initiator;
- info.error_type = hdev->last_error.razwi_type;
+ info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp);
+ info.addr = hdev->last_error.razwi.addr;
+ info.engine_id_1 = hdev->last_error.razwi.engine_id_1;
+ info.engine_id_2 = hdev->last_error.razwi.engine_id_2;
+ info.no_engine_id = hdev->last_error.razwi.non_engine_initiator;
+ info.error_type = hdev->last_error.razwi.type;
+
+ return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
+}
+
+static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ struct hl_info_dev_memalloc_page_sizes info = {0};
+ struct hl_device *hdev = hpriv->hdev;
+ u32 max_size = args->return_size;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ /*
+ * Future ASICs that will support multiple DRAM page sizes will support only "powers of 2"
+ * pages (unlike some of the ASICs before supporting multiple page sizes).
+ * For this reason for all ASICs that not support multiple page size the function will
+ * return an empty bitmask indicating that multiple page sizes is not supported.
+ */
+ hdev->asic_funcs->get_valid_dram_page_orders(&info);
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
+static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ int rc;
+
+ /* check if there is already a registered on that process */
+ mutex_lock(&hpriv->notifier_event.lock);
+ if (hpriv->notifier_event.eventfd) {
+ mutex_unlock(&hpriv->notifier_event.lock);
+ return -EINVAL;
+ }
+
+ hpriv->notifier_event.eventfd = eventfd_ctx_fdget(args->eventfd);
+ if (IS_ERR(hpriv->notifier_event.eventfd)) {
+ rc = PTR_ERR(hpriv->notifier_event.eventfd);
+ hpriv->notifier_event.eventfd = NULL;
+ mutex_unlock(&hpriv->notifier_event.lock);
+ return rc;
+ }
+
+ mutex_unlock(&hpriv->notifier_event.lock);
+ return 0;
+}
+
+static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ mutex_lock(&hpriv->notifier_event.lock);
+ if (!hpriv->notifier_event.eventfd) {
+ mutex_unlock(&hpriv->notifier_event.lock);
+ return -EINVAL;
+ }
+
+ eventfd_ctx_put(hpriv->notifier_event.eventfd);
+ hpriv->notifier_event.eventfd = NULL;
+ mutex_unlock(&hpriv->notifier_event.lock);
+ return 0;
+}
+
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
@@ -640,6 +718,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_RAZWI_EVENT:
return razwi_info(hpriv, args);
+ case HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES:
+ return dev_mem_alloc_page_sizes_info(hpriv, args);
+
+ case HL_INFO_GET_EVENTS:
+ return events_info(hpriv, args);
+
default:
break;
}
@@ -690,6 +774,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_DRAM_PENDING_ROWS:
return dram_pending_rows_info(hpriv, args);
+ case HL_INFO_REGISTER_EVENTFD:
+ return eventfd_register(hpriv, args);
+
+ case HL_INFO_UNREGISTER_EVENTFD:
+ return eventfd_unregister(hpriv, args);
+
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -EINVAL;
diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
index e2bc128f2291..8500e15ef743 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -152,11 +152,11 @@ static void hl_ts_free_objects(struct work_struct *work)
struct hl_device *hdev = job->hdev;
list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
- dev_dbg(hdev->dev, "About to put refcount to ts_buff (%p) cq_cb(%p)\n",
- free_obj->ts_buff,
+ dev_dbg(hdev->dev, "About to put refcount to buf (%p) cq_cb(%p)\n",
+ free_obj->buf,
free_obj->cq_cb);
- hl_ts_put(free_obj->ts_buff);
+ hl_mmap_mem_buf_put(free_obj->buf);
hl_cb_put(free_obj->cq_cb);
kfree(free_obj);
}
@@ -210,7 +210,7 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
/* Putting the refcount for ts_buff and cq_cb objects will be handled
* in workqueue context, just add job to free_list.
*/
- free_node->ts_buff = pend->ts_reg_info.ts_buff;
+ free_node->buf = pend->ts_reg_info.buf;
free_node->cq_cb = pend->ts_reg_info.cq_cb;
list_add(&free_node->free_objects_node, *free_list);
@@ -244,7 +244,7 @@ static void handle_user_cq(struct hl_device *hdev,
list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) {
- if (pend->ts_reg_info.ts_buff) {
+ if (pend->ts_reg_info.buf) {
if (!reg_node_handle_fail) {
rc = handle_registration_node(hdev, pend,
&ts_reg_free_list_head);
@@ -282,10 +282,6 @@ irqreturn_t hl_irq_handler_user_cq(int irq, void *arg)
struct hl_user_interrupt *user_cq = arg;
struct hl_device *hdev = user_cq->hdev;
- dev_dbg(hdev->dev,
- "got user completion interrupt id %u",
- user_cq->interrupt_id);
-
/* Handle user cq interrupts registered on all interrupts */
handle_user_cq(hdev, &hdev->common_user_interrupt);
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index a13506dd8119..663dd7e589d4 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -41,7 +41,7 @@ static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u
return -EINVAL;
}
} else {
- psize = hdev->asic_prop.dram_page_size;
+ psize = prop->device_mem_alloc_default_page_size;
}
*page_size = psize;
@@ -117,7 +117,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
paddr = gen_pool_alloc(vm->dram_pg_pool, total_size);
if (!paddr) {
dev_err(hdev->dev,
- "failed to allocate %llu contiguous pages with total size of %llu\n",
+ "Cannot allocate %llu contiguous pages with total size of %llu\n",
num_pgs, total_size);
return -ENOMEM;
}
@@ -156,9 +156,10 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
else
phys_pg_pack->pages[i] = gen_pool_alloc(vm->dram_pg_pool,
page_size);
+
if (!phys_pg_pack->pages[i]) {
dev_err(hdev->dev,
- "Failed to allocate device memory (out of memory)\n");
+ "Cannot allocate device memory (out of memory)\n");
rc = -ENOMEM;
goto page_err;
}
@@ -237,19 +238,18 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
goto pin_err;
}
- rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
- userptr->sgt->nents, DMA_BIDIRECTIONAL);
- if (rc) {
- dev_err(hdev->dev, "failed to map sgt with DMA region\n");
- goto dma_map_err;
- }
-
userptr->dma_mapped = true;
userptr->dir = DMA_BIDIRECTIONAL;
userptr->vm_type = VM_TYPE_USERPTR;
*p_userptr = userptr;
+ rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
+ if (rc) {
+ dev_err(hdev->dev, "failed to map sgt with DMA region\n");
+ goto dma_map_err;
+ }
+
return 0;
dma_map_err:
@@ -900,7 +900,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
* consecutive block.
*/
total_npages = 0;
- for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+ for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
npages = hl_get_sg_info(sg, &dma_addr);
total_npages += npages;
@@ -929,7 +929,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
phys_pg_pack->total_size = total_npages * page_size;
j = 0;
- for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+ for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
npages = hl_get_sg_info(sg, &dma_addr);
/* align down to physical page size and save the offset */
@@ -1102,21 +1102,24 @@ static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
* map a device virtual block to this pages and return the start address of
* this block.
*/
-static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
- u64 *device_addr)
+static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device_addr)
{
- struct hl_device *hdev = ctx->hdev;
- struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_pack;
+ enum hl_va_range_type va_range_type = 0;
+ struct hl_device *hdev = ctx->hdev;
struct hl_userptr *userptr = NULL;
+ u32 handle = 0, va_block_align;
struct hl_vm_hash_node *hnode;
+ struct hl_vm *vm = &hdev->vm;
struct hl_va_range *va_range;
- enum vm_type *vm_type;
+ bool is_userptr, do_prefetch;
u64 ret_vaddr, hint_addr;
- u32 handle = 0, va_block_align;
+ enum vm_type *vm_type;
int rc;
- bool is_userptr = args->flags & HL_MEM_USERPTR;
- enum hl_va_range_type va_range_type = 0;
+
+ /* set map flags */
+ is_userptr = args->flags & HL_MEM_USERPTR;
+ do_prefetch = hdev->supports_mmu_prefetch && (args->flags & HL_MEM_PREFETCH);
/* Assume failure */
*device_addr = 0;
@@ -1241,19 +1244,27 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
if (rc) {
- mutex_unlock(&ctx->mmu_lock);
- dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
- handle);
+ dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
goto map_err;
}
rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV,
ctx->asid, ret_vaddr, phys_pg_pack->total_size);
+ if (rc)
+ goto map_err;
mutex_unlock(&ctx->mmu_lock);
- if (rc)
- goto map_err;
+ /*
+ * prefetch is done upon user's request. it is performed in WQ as and so can
+ * be outside the MMU lock. the operation itself is already protected by the mmu lock
+ */
+ if (do_prefetch) {
+ rc = hl_mmu_prefetch_cache_range(ctx, *vm_type, ctx->asid, ret_vaddr,
+ phys_pg_pack->total_size);
+ if (rc)
+ goto map_err;
+ }
ret_vaddr += phys_pg_pack->offset;
@@ -1272,6 +1283,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
return rc;
map_err:
+ mutex_unlock(&ctx->mmu_lock);
+
if (add_va_block(hdev, va_range, ret_vaddr,
ret_vaddr + phys_pg_pack->total_size - 1))
dev_warn(hdev->dev,
@@ -1509,7 +1522,7 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
vma->vm_ops = &hw_block_vm_ops;
vma->vm_private_data = lnode;
- hl_ctx_get(hdev, ctx);
+ hl_ctx_get(ctx);
rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
if (rc) {
@@ -1819,7 +1832,7 @@ static int export_dmabuf_common(struct hl_ctx *ctx,
}
hl_dmabuf->ctx = ctx;
- hl_ctx_get(hdev, hl_dmabuf->ctx);
+ hl_ctx_get(hl_dmabuf->ctx);
*dmabuf_fd = fd;
@@ -2076,164 +2089,34 @@ out:
return rc;
}
-static void ts_buff_release(struct kref *ref)
-{
- struct hl_ts_buff *buff;
-
- buff = container_of(ref, struct hl_ts_buff, refcount);
-
- vfree(buff->kernel_buff_address);
- vfree(buff->user_buff_address);
- kfree(buff);
-}
-
-struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr,
- u32 handle)
-{
- struct hl_ts_buff *buff;
-
- spin_lock(&mgr->ts_lock);
- buff = idr_find(&mgr->ts_handles, handle);
- if (!buff) {
- spin_unlock(&mgr->ts_lock);
- dev_warn(hdev->dev,
- "TS buff get failed, no match to handle 0x%x\n", handle);
- return NULL;
- }
- kref_get(&buff->refcount);
- spin_unlock(&mgr->ts_lock);
-
- return buff;
-}
-
-void hl_ts_put(struct hl_ts_buff *buff)
+static void ts_buff_release(struct hl_mmap_mem_buf *buf)
{
- kref_put(&buff->refcount, ts_buff_release);
-}
-
-static void buff_vm_close(struct vm_area_struct *vma)
-{
- struct hl_ts_buff *buff = (struct hl_ts_buff *) vma->vm_private_data;
- long new_mmap_size;
-
- new_mmap_size = buff->mmap_size - (vma->vm_end - vma->vm_start);
+ struct hl_ts_buff *ts_buff = buf->private;
- if (new_mmap_size > 0) {
- buff->mmap_size = new_mmap_size;
- return;
- }
-
- atomic_set(&buff->mmap, 0);
- hl_ts_put(buff);
- vma->vm_private_data = NULL;
+ vfree(ts_buff->kernel_buff_address);
+ vfree(ts_buff->user_buff_address);
+ kfree(ts_buff);
}
-static const struct vm_operations_struct ts_buff_vm_ops = {
- .close = buff_vm_close
-};
-
-int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args)
{
- struct hl_device *hdev = hpriv->hdev;
- struct hl_ts_buff *buff;
- u32 handle, user_buff_size;
- int rc;
-
- /* We use the page offset to hold the idr and thus we need to clear
- * it before doing the mmap itself
- */
- handle = vma->vm_pgoff;
- vma->vm_pgoff = 0;
-
- buff = hl_ts_get(hdev, &hpriv->ts_mem_mgr, handle);
- if (!buff) {
- dev_err(hdev->dev,
- "TS buff mmap failed, no match to handle 0x%x\n", handle);
- return -EINVAL;
- }
-
- /* Validation check */
- user_buff_size = vma->vm_end - vma->vm_start;
- if (user_buff_size != ALIGN(buff->user_buff_size, PAGE_SIZE)) {
- dev_err(hdev->dev,
- "TS buff mmap failed, mmap size 0x%x != 0x%x buff size\n",
- user_buff_size, ALIGN(buff->user_buff_size, PAGE_SIZE));
- rc = -EINVAL;
- goto put_buff;
- }
-
-#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
- if (!access_ok(VERIFY_WRITE,
- (void __user *) (uintptr_t) vma->vm_start, user_buff_size)) {
-#else
- if (!access_ok((void __user *) (uintptr_t) vma->vm_start,
- user_buff_size)) {
-#endif
- dev_err(hdev->dev,
- "user pointer is invalid - 0x%lx\n",
- vma->vm_start);
-
- rc = -EINVAL;
- goto put_buff;
- }
+ struct hl_ts_buff *ts_buff = buf->private;
- if (atomic_cmpxchg(&buff->mmap, 0, 1)) {
- dev_err(hdev->dev, "TS buff memory mmap failed, already mmaped to user\n");
- rc = -EINVAL;
- goto put_buff;
- }
-
- vma->vm_ops = &ts_buff_vm_ops;
- vma->vm_private_data = buff;
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE;
- rc = remap_vmalloc_range(vma, buff->user_buff_address, 0);
- if (rc) {
- atomic_set(&buff->mmap, 0);
- goto put_buff;
- }
-
- buff->mmap_size = buff->user_buff_size;
- vma->vm_pgoff = handle;
-
- return 0;
-
-put_buff:
- hl_ts_put(buff);
- return rc;
-}
-
-void hl_ts_mgr_init(struct hl_ts_mgr *mgr)
-{
- spin_lock_init(&mgr->ts_lock);
- idr_init(&mgr->ts_handles);
+ return remap_vmalloc_range(vma, ts_buff->user_buff_address, 0);
}
-void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr)
-{
- struct hl_ts_buff *buff;
- struct idr *idp;
- u32 id;
-
- idp = &mgr->ts_handles;
-
- idr_for_each_entry(idp, buff, id) {
- if (kref_put(&buff->refcount, ts_buff_release) != 1)
- dev_err(hdev->dev, "TS buff handle %d for CTX is still alive\n",
- id);
- }
-
- idr_destroy(&mgr->ts_handles);
-}
-
-static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_elements)
+static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)
{
struct hl_ts_buff *ts_buff = NULL;
- u32 size;
+ u32 size, num_elements;
void *p;
+ num_elements = *(u32 *)args;
+
ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL);
if (!ts_buff)
- return NULL;
+ return -ENOMEM;
/* Allocate the user buffer */
size = num_elements * sizeof(u64);
@@ -2242,7 +2125,7 @@ static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_eleme
goto free_mem;
ts_buff->user_buff_address = p;
- ts_buff->user_buff_size = size;
+ buf->mappable_size = size;
/* Allocate the internal kernel buffer */
size = num_elements * sizeof(struct hl_user_pending_interrupt);
@@ -2253,15 +2136,25 @@ static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_eleme
ts_buff->kernel_buff_address = p;
ts_buff->kernel_buff_size = size;
- return ts_buff;
+ buf->private = ts_buff;
+
+ return 0;
free_user_buff:
vfree(ts_buff->user_buff_address);
free_mem:
kfree(ts_buff);
- return NULL;
+ return -ENOMEM;
}
+static struct hl_mmap_mem_buf_behavior hl_ts_behavior = {
+ .topic = "TS",
+ .mem_id = HL_MMAP_TYPE_TS_BUFF,
+ .mmap = hl_ts_mmap,
+ .alloc = hl_ts_alloc_buf,
+ .release = ts_buff_release,
+};
+
/**
* allocate_timestamps_buffers() - allocate timestamps buffers
* This function will allocate ts buffer that will later on be mapped to the user
@@ -2278,54 +2171,22 @@ free_mem:
*/
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
{
- struct hl_ts_mgr *ts_mgr = &hpriv->ts_mem_mgr;
- struct hl_device *hdev = hpriv->hdev;
- struct hl_ts_buff *ts_buff;
- int rc = 0;
+ struct hl_mem_mgr *mmg = &hpriv->mem_mgr;
+ struct hl_mmap_mem_buf *buf;
if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) {
- dev_err(hdev->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
+ dev_err(mmg->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
args->num_of_elements, TS_MAX_ELEMENTS_NUM);
return -EINVAL;
}
- /* Allocate ts buffer object
- * This object will contain two buffers one that will be mapped to the user
- * and another internal buffer for the driver use only, which won't be mapped
- * to the user.
- */
- ts_buff = hl_ts_alloc_buff(hdev, args->num_of_elements);
- if (!ts_buff) {
- rc = -ENOMEM;
- goto out_err;
- }
-
- spin_lock(&ts_mgr->ts_lock);
- rc = idr_alloc(&ts_mgr->ts_handles, ts_buff, 1, 0, GFP_ATOMIC);
- spin_unlock(&ts_mgr->ts_lock);
- if (rc < 0) {
- dev_err(hdev->dev, "Failed to allocate IDR for a new ts buffer\n");
- goto release_ts_buff;
- }
-
- ts_buff->id = rc;
- ts_buff->hdev = hdev;
-
- kref_init(&ts_buff->refcount);
-
- /* idr is 32-bit so we can safely OR it with a mask that is above 32 bit */
- *handle = (u64) ts_buff->id | HL_MMAP_TYPE_TS_BUFF;
- *handle <<= PAGE_SHIFT;
+ buf = hl_mmap_mem_buf_alloc(mmg, &hl_ts_behavior, GFP_KERNEL, &args->num_of_elements);
+ if (!buf)
+ return -ENOMEM;
- dev_dbg(hdev->dev, "Created ts buff object handle(%u)\n", ts_buff->id);
+ *handle = buf->handle;
return 0;
-
-release_ts_buff:
- kref_put(&ts_buff->refcount, ts_buff_release);
-out_err:
- *handle = 0;
- return rc;
}
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
@@ -2587,9 +2448,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
hl_debugfs_remove_userptr(hdev, userptr);
if (userptr->dma_mapped)
- hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
- userptr->sgt->nents,
- userptr->dir);
+ hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
kvfree(userptr->pages);
diff --git a/drivers/misc/habanalabs/common/memory_mgr.c b/drivers/misc/habanalabs/common/memory_mgr.c
new file mode 100644
index 000000000000..ea5f2bd31b0a
--- /dev/null
+++ b/drivers/misc/habanalabs/common/memory_mgr.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2022 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+/**
+ * hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to
+ * the buffer descriptor.
+ *
+ * @mmg: parent unifed memory manager
+ * @handle: requested buffer handle
+ *
+ * Find the buffer in the store and return a pointer to its descriptor.
+ * Increase buffer refcount. If not found - return NULL.
+ */
+struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg, u64 handle)
+{
+ struct hl_mmap_mem_buf *buf;
+
+ spin_lock(&mmg->lock);
+ buf = idr_find(&mmg->handles, lower_32_bits(handle >> PAGE_SHIFT));
+ if (!buf) {
+ spin_unlock(&mmg->lock);
+ dev_warn(mmg->dev,
+ "Buff get failed, no match to handle %#llx\n", handle);
+ return NULL;
+ }
+ kref_get(&buf->refcount);
+ spin_unlock(&mmg->lock);
+ return buf;
+}
+
+/**
+ * hl_mmap_mem_buf_destroy - destroy the unused buffer
+ *
+ * @buf: memory manager buffer descriptor
+ *
+ * Internal function, used as a final step of buffer release. Shall be invoked
+ * only when the buffer is no longer in use (removed from idr). Will call the
+ * release callback (if applicable), and free the memory.
+ */
+static void hl_mmap_mem_buf_destroy(struct hl_mmap_mem_buf *buf)
+{
+ if (buf->behavior->release)
+ buf->behavior->release(buf);
+
+ kfree(buf);
+}
+
+/**
+ * hl_mmap_mem_buf_release - release buffer
+ *
+ * @kref: kref that reached 0.
+ *
+ * Internal function, used as a kref release callback, when the last user of
+ * the buffer is released. Shall be called from an interrupt context.
+ */
+static void hl_mmap_mem_buf_release(struct kref *kref)
+{
+ struct hl_mmap_mem_buf *buf =
+ container_of(kref, struct hl_mmap_mem_buf, refcount);
+
+ spin_lock(&buf->mmg->lock);
+ idr_remove(&buf->mmg->handles, lower_32_bits(buf->handle >> PAGE_SHIFT));
+ spin_unlock(&buf->mmg->lock);
+
+ hl_mmap_mem_buf_destroy(buf);
+}
+
+/**
+ * hl_mmap_mem_buf_remove_idr_locked - remove handle from idr
+ *
+ * @kref: kref that reached 0.
+ *
+ * Internal function, used for kref put by handle. Assumes mmg lock is taken.
+ * Will remove the buffer from idr, without destroying it.
+ */
+static void hl_mmap_mem_buf_remove_idr_locked(struct kref *kref)
+{
+ struct hl_mmap_mem_buf *buf =
+ container_of(kref, struct hl_mmap_mem_buf, refcount);
+
+ idr_remove(&buf->mmg->handles, lower_32_bits(buf->handle >> PAGE_SHIFT));
+}
+
+/**
+ * hl_mmap_mem_buf_put - decrease the reference to the buffer
+ *
+ * @buf: memory manager buffer descriptor
+ *
+ * Decrease the reference to the buffer, and release it if it was the last one.
+ * Shall be called from an interrupt context.
+ */
+int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf)
+{
+ return kref_put(&buf->refcount, hl_mmap_mem_buf_release);
+}
+
+/**
+ * hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the
+ * given handle.
+ *
+ * @mmg: parent unifed memory manager
+ * @handle: requested buffer handle
+ *
+ * Decrease the reference to the buffer, and release it if it was the last one.
+ * Shall not be called from an interrupt context. Return -EINVAL if handle was
+ * not found, else return the put outcome (0 or 1).
+ */
+int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle)
+{
+ struct hl_mmap_mem_buf *buf;
+
+ spin_lock(&mmg->lock);
+ buf = idr_find(&mmg->handles, lower_32_bits(handle >> PAGE_SHIFT));
+ if (!buf) {
+ spin_unlock(&mmg->lock);
+ dev_dbg(mmg->dev,
+ "Buff put failed, no match to handle %#llx\n", handle);
+ return -EINVAL;
+ }
+
+ if (kref_put(&buf->refcount, hl_mmap_mem_buf_remove_idr_locked)) {
+ spin_unlock(&mmg->lock);
+ hl_mmap_mem_buf_destroy(buf);
+ return 1;
+ }
+
+ spin_unlock(&mmg->lock);
+ return 0;
+}
+
+/**
+ * @hl_mmap_mem_buf_alloc - allocate a new mappable buffer
+ *
+ * @mmg: parent unifed memory manager
+ * @behavior: behavior object describing this buffer polymorphic behavior
+ * @gfp: gfp flags to use for the memory allocations
+ * @args: additional args passed to behavior->alloc
+ *
+ * Allocate and register a new memory buffer inside the give memory manager.
+ * Return the pointer to the new buffer on success or NULL on failure.
+ */
+struct hl_mmap_mem_buf *
+hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg,
+ struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp,
+ void *args)
+{
+ struct hl_mmap_mem_buf *buf;
+ int rc;
+
+ buf = kzalloc(sizeof(*buf), gfp);
+ if (!buf)
+ return NULL;
+
+ spin_lock(&mmg->lock);
+ rc = idr_alloc(&mmg->handles, buf, 1, 0, GFP_ATOMIC);
+ spin_unlock(&mmg->lock);
+ if (rc < 0) {
+ dev_err(mmg->dev,
+ "%s: Failed to allocate IDR for a new buffer, rc=%d\n",
+ behavior->topic, rc);
+ goto free_buf;
+ }
+
+ buf->mmg = mmg;
+ buf->behavior = behavior;
+ buf->handle = (((u64)rc | buf->behavior->mem_id) << PAGE_SHIFT);
+ kref_init(&buf->refcount);
+
+ rc = buf->behavior->alloc(buf, gfp, args);
+ if (rc) {
+ dev_err(mmg->dev, "%s: Failure in buffer alloc callback %d\n",
+ behavior->topic, rc);
+ goto remove_idr;
+ }
+
+ return buf;
+
+remove_idr:
+ spin_lock(&mmg->lock);
+ idr_remove(&mmg->handles, lower_32_bits(buf->handle >> PAGE_SHIFT));
+ spin_unlock(&mmg->lock);
+free_buf:
+ kfree(buf);
+ return NULL;
+}
+
+/**
+ * hl_mmap_mem_buf_vm_close - handle mmap close
+ *
+ * @vma: the vma object for which mmap was closed.
+ *
+ * Put the memory buffer if it is no longer mapped.
+ */
+static void hl_mmap_mem_buf_vm_close(struct vm_area_struct *vma)
+{
+ struct hl_mmap_mem_buf *buf =
+ (struct hl_mmap_mem_buf *)vma->vm_private_data;
+ long new_mmap_size;
+
+ new_mmap_size = buf->real_mapped_size - (vma->vm_end - vma->vm_start);
+
+ if (new_mmap_size > 0) {
+ buf->real_mapped_size = new_mmap_size;
+ return;
+ }
+
+ atomic_set(&buf->mmap, 0);
+ hl_mmap_mem_buf_put(buf);
+ vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = {
+ .close = hl_mmap_mem_buf_vm_close
+};
+
+/**
+ * hl_mem_mgr_mmap - map the given buffer to the user
+ *
+ * @mmg: unifed memory manager
+ * @vma: the vma object for which mmap was closed.
+ * @args: additional args passed to behavior->mmap
+ *
+ * Map the buffer specified by the vma->vm_pgoff to the given vma.
+ */
+int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,
+ void *args)
+{
+ struct hl_mmap_mem_buf *buf;
+ u64 user_mem_size;
+ u64 handle;
+ int rc;
+
+ /* We use the page offset to hold the idr and thus we need to clear
+ * it before doing the mmap itself
+ */
+ handle = vma->vm_pgoff << PAGE_SHIFT;
+ vma->vm_pgoff = 0;
+
+ /* Reference was taken here */
+ buf = hl_mmap_mem_buf_get(mmg, handle);
+ if (!buf) {
+ dev_err(mmg->dev,
+ "Memory mmap failed, no match to handle %#llx\n", handle);
+ return -EINVAL;
+ }
+
+ /* Validation check */
+ user_mem_size = vma->vm_end - vma->vm_start;
+ if (user_mem_size != ALIGN(buf->mappable_size, PAGE_SIZE)) {
+ dev_err(mmg->dev,
+ "%s: Memory mmap failed, mmap VM size 0x%llx != 0x%llx allocated physical mem size\n",
+ buf->behavior->topic, user_mem_size, buf->mappable_size);
+ rc = -EINVAL;
+ goto put_mem;
+ }
+
+#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
+ if (!access_ok(VERIFY_WRITE, (void __user *)(uintptr_t)vma->vm_start,
+ user_mem_size)) {
+#else
+ if (!access_ok((void __user *)(uintptr_t)vma->vm_start,
+ user_mem_size)) {
+#endif
+ dev_err(mmg->dev, "%s: User pointer is invalid - 0x%lx\n",
+ buf->behavior->topic, vma->vm_start);
+
+ rc = -EINVAL;
+ goto put_mem;
+ }
+
+ if (atomic_cmpxchg(&buf->mmap, 0, 1)) {
+ dev_err(mmg->dev,
+ "%s, Memory mmap failed, already mmaped to user\n",
+ buf->behavior->topic);
+ rc = -EINVAL;
+ goto put_mem;
+ }
+
+ vma->vm_ops = &hl_mmap_mem_buf_vm_ops;
+
+ /* Note: We're transferring the memory reference to vma->vm_private_data here. */
+
+ vma->vm_private_data = buf;
+
+ rc = buf->behavior->mmap(buf, vma, args);
+ if (rc) {
+ atomic_set(&buf->mmap, 0);
+ goto put_mem;
+ }
+
+ buf->real_mapped_size = buf->mappable_size;
+ vma->vm_pgoff = handle >> PAGE_SHIFT;
+
+ return 0;
+
+put_mem:
+ hl_mmap_mem_buf_put(buf);
+ return rc;
+}
+
+/**
+ * hl_mem_mgr_init - initialize unified memory manager
+ *
+ * @dev: owner device pointer
+ * @mmg: structure to initialize
+ *
+ * Initialize an instance of unified memory manager
+ */
+void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg)
+{
+ mmg->dev = dev;
+ spin_lock_init(&mmg->lock);
+ idr_init(&mmg->handles);
+}
+
+/**
+ * hl_mem_mgr_fini - release unified memory manager
+ *
+ * @mmg: parent unifed memory manager
+ *
+ * Release the unified memory manager. Shall be called from an interrupt context.
+ */
+void hl_mem_mgr_fini(struct hl_mem_mgr *mmg)
+{
+ struct hl_mmap_mem_buf *buf;
+ struct idr *idp;
+ const char *topic;
+ u32 id;
+
+ idp = &mmg->handles;
+
+ idr_for_each_entry(idp, buf, id) {
+ topic = buf->behavior->topic;
+ if (hl_mmap_mem_buf_put(buf) != 1)
+ dev_err(mmg->dev,
+ "%s: Buff handle %u for CTX is still alive\n",
+ topic, id);
+ }
+
+ /* TODO: can it happen that some buffer is still in use at this point? */
+
+ idr_destroy(&mmg->handles);
+}
diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index 810b73421ce1..f3734718d94f 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -9,6 +9,20 @@
#include "../habanalabs.h"
+/**
+ * hl_mmu_get_funcs() - get MMU functions structure
+ * @hdev: habanalabs device structure.
+ * @pgt_residency: page table residency.
+ * @is_dram_addr: true if we need HMMU functions
+ *
+ * @return appropriate MMU functions structure
+ */
+static struct hl_mmu_funcs *hl_mmu_get_funcs(struct hl_device *hdev, int pgt_residency,
+ bool is_dram_addr)
+{
+ return &hdev->mmu_func[pgt_residency];
+}
+
bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -122,6 +136,53 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
}
/*
+ * hl_mmu_get_real_page_size - get real page size to use in map/unmap operation
+ *
+ * @hdev: pointer to device data.
+ * @mmu_prop: MMU properties.
+ * @page_size: page size
+ * @real_page_size: set here the actual page size to use for the operation
+ * @is_dram_addr: true if DRAM address, otherwise false.
+ *
+ * @return 0 on success, otherwise non 0 error code
+ *
+ * note that this is general implementation that can fit most MMU arch. but as this is used as an
+ * MMU function:
+ * 1. it shall not be called directly- only from mmu_func structure instance
+ * 2. each MMU may modify the implementation internally
+ */
+int hl_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
+ u32 page_size, u32 *real_page_size, bool is_dram_addr)
+{
+ /*
+ * The H/W handles mapping of specific page sizes. Hence if the page
+ * size is bigger, we break it to sub-pages and map them separately.
+ */
+ if ((page_size % mmu_prop->page_size) == 0) {
+ *real_page_size = mmu_prop->page_size;
+ return 0;
+ }
+
+ dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
+ page_size, mmu_prop->page_size >> 10);
+
+ return -EFAULT;
+}
+
+static struct hl_mmu_properties *hl_mmu_get_prop(struct hl_device *hdev, u32 page_size,
+ bool is_dram_addr)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+ if (is_dram_addr)
+ return &prop->dmmu;
+ else if ((page_size % prop->pmmu_huge.page_size) == 0)
+ return &prop->pmmu_huge;
+
+ return &prop->pmmu;
+}
+
+/*
* hl_mmu_unmap_page - unmaps a virtual addr
*
* @ctx: pointer to the context structure
@@ -142,60 +203,35 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
* For optimization reasons PCI flush may be requested once after unmapping of
* large area.
*/
-int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
- bool flush_pte)
+int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flush_pte)
{
struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
- u64 real_virt_addr;
+ struct hl_mmu_funcs *mmu_funcs;
+ int i, pgt_residency, rc = 0;
u32 real_page_size, npages;
- int i, rc = 0, pgt_residency;
+ u64 real_virt_addr;
bool is_dram_addr;
if (!hdev->mmu_enable)
return 0;
is_dram_addr = hl_is_dram_va(hdev, virt_addr);
-
- if (is_dram_addr)
- mmu_prop = &prop->dmmu;
- else if ((page_size % prop->pmmu_huge.page_size) == 0)
- mmu_prop = &prop->pmmu_huge;
- else
- mmu_prop = &prop->pmmu;
+ mmu_prop = hl_mmu_get_prop(hdev, page_size, is_dram_addr);
pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
- /*
- * The H/W handles mapping of specific page sizes. Hence if the page
- * size is bigger, we break it to sub-pages and unmap them separately.
- */
- if ((page_size % mmu_prop->page_size) == 0) {
- real_page_size = mmu_prop->page_size;
- } else {
- /*
- * MMU page size may differ from DRAM page size.
- * In such case work with the DRAM page size and let the MMU
- * scrambling routine to handle this mismatch when
- * calculating the address to remove from the MMU page table
- */
- if (is_dram_addr && ((page_size % prop->dram_page_size) == 0)) {
- real_page_size = prop->dram_page_size;
- } else {
- dev_err(hdev->dev,
- "page size of %u is not %uKB aligned, can't unmap\n",
- page_size, mmu_prop->page_size >> 10);
+ mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr);
- return -EFAULT;
- }
- }
+ rc = hdev->asic_funcs->mmu_get_real_page_size(hdev, mmu_prop, page_size, &real_page_size,
+ is_dram_addr);
+ if (rc)
+ return rc;
npages = page_size / real_page_size;
real_virt_addr = virt_addr;
for (i = 0 ; i < npages ; i++) {
- rc = hdev->mmu_func[pgt_residency].unmap(ctx,
- real_virt_addr, is_dram_addr);
+ rc = mmu_funcs->unmap(ctx, real_virt_addr, is_dram_addr);
if (rc)
break;
@@ -203,7 +239,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
}
if (flush_pte)
- hdev->mmu_func[pgt_residency].flush(ctx);
+ mmu_funcs->flush(ctx);
return rc;
}
@@ -230,15 +266,15 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
* For optimization reasons PCI flush may be requested once after mapping of
* large area.
*/
-int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
- u32 page_size, bool flush_pte)
+int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
+ bool flush_pte)
{
+ int i, rc, pgt_residency, mapped_cnt = 0;
struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr, real_phys_addr;
+ struct hl_mmu_funcs *mmu_funcs;
u32 real_page_size, npages;
- int i, rc, pgt_residency, mapped_cnt = 0;
bool is_dram_addr;
@@ -246,40 +282,15 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
return 0;
is_dram_addr = hl_is_dram_va(hdev, virt_addr);
-
- if (is_dram_addr)
- mmu_prop = &prop->dmmu;
- else if ((page_size % prop->pmmu_huge.page_size) == 0)
- mmu_prop = &prop->pmmu_huge;
- else
- mmu_prop = &prop->pmmu;
+ mmu_prop = hl_mmu_get_prop(hdev, page_size, is_dram_addr);
pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
+ mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr);
- /*
- * The H/W handles mapping of specific page sizes. Hence if the page
- * size is bigger, we break it to sub-pages and map them separately.
- */
- if ((page_size % mmu_prop->page_size) == 0) {
- real_page_size = mmu_prop->page_size;
- } else if (is_dram_addr && ((page_size % prop->dram_page_size) == 0) &&
- (prop->dram_page_size < mmu_prop->page_size)) {
- /*
- * MMU page size may differ from DRAM page size.
- * In such case work with the DRAM page size and let the MMU
- * scrambling routine handle this mismatch when calculating
- * the address to place in the MMU page table. (in that case
- * also make sure that the dram_page_size smaller than the
- * mmu page size)
- */
- real_page_size = prop->dram_page_size;
- } else {
- dev_err(hdev->dev,
- "page size of %u is not %uKB aligned, can't map\n",
- page_size, mmu_prop->page_size >> 10);
-
- return -EFAULT;
- }
+ rc = hdev->asic_funcs->mmu_get_real_page_size(hdev, mmu_prop, page_size, &real_page_size,
+ is_dram_addr);
+ if (rc)
+ return rc;
/*
* Verify that the phys and virt addresses are aligned with the
@@ -302,9 +313,8 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
real_phys_addr = phys_addr;
for (i = 0 ; i < npages ; i++) {
- rc = hdev->mmu_func[pgt_residency].map(ctx,
- real_virt_addr, real_phys_addr,
- real_page_size, is_dram_addr);
+ rc = mmu_funcs->map(ctx, real_virt_addr, real_phys_addr, real_page_size,
+ is_dram_addr);
if (rc)
goto err;
@@ -314,22 +324,21 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
}
if (flush_pte)
- hdev->mmu_func[pgt_residency].flush(ctx);
+ mmu_funcs->flush(ctx);
return 0;
err:
real_virt_addr = virt_addr;
for (i = 0 ; i < mapped_cnt ; i++) {
- if (hdev->mmu_func[pgt_residency].unmap(ctx,
- real_virt_addr, is_dram_addr))
+ if (mmu_funcs->unmap(ctx, real_virt_addr, is_dram_addr))
dev_warn_ratelimited(hdev->dev,
"failed to unmap va: 0x%llx\n", real_virt_addr);
real_virt_addr += real_page_size;
}
- hdev->mmu_func[pgt_residency].flush(ctx);
+ mmu_funcs->flush(ctx);
return rc;
}
@@ -480,11 +489,9 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
struct hl_mmu_hop_info *hops,
u64 *phys_addr)
{
- struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
u64 offset_mask, addr_mask, hop_shift, tmp_phys_addr;
- u32 hop0_shift_off;
- void *p;
+ struct hl_mmu_properties *mmu_prop;
/* last hop holds the phys address and flags */
if (hops->unscrambled_paddr)
@@ -493,11 +500,11 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
tmp_phys_addr = hops->hop_info[hops->used_hops - 1].hop_pte_val;
if (hops->range_type == HL_VA_RANGE_TYPE_HOST_HUGE)
- p = &prop->pmmu_huge;
+ mmu_prop = &prop->pmmu_huge;
else if (hops->range_type == HL_VA_RANGE_TYPE_HOST)
- p = &prop->pmmu;
+ mmu_prop = &prop->pmmu;
else /* HL_VA_RANGE_TYPE_DRAM */
- p = &prop->dmmu;
+ mmu_prop = &prop->dmmu;
if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) &&
!is_power_of_2(prop->dram_page_size)) {
@@ -508,7 +515,7 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
/*
* Bit arithmetics cannot be used for non power of two page
* sizes. In addition, since bit arithmetics is not used,
- * we cannot ignore dram base. All that shall be considerd.
+ * we cannot ignore dram base. All that shall be considered.
*/
dram_page_size = prop->dram_page_size;
@@ -526,10 +533,7 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
* structure in order to determine the right masks
* for the page offset.
*/
- hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift);
- p = (char *)p + hop0_shift_off;
- p = (char *)p + ((hops->used_hops - 1) * sizeof(u64));
- hop_shift = *(u64 *)p;
+ hop_shift = mmu_prop->hop_shifts[hops->used_hops - 1];
offset_mask = (1ull << hop_shift) - 1;
addr_mask = ~(offset_mask);
*phys_addr = (tmp_phys_addr & addr_mask) |
@@ -557,40 +561,39 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
struct hl_mmu_hop_info *hops)
{
struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct asic_fixed_properties *prop;
struct hl_mmu_properties *mmu_prop;
- int rc;
+ struct hl_mmu_funcs *mmu_funcs;
+ int pgt_residency, rc;
bool is_dram_addr;
if (!hdev->mmu_enable)
return -EOPNOTSUPP;
+ prop = &hdev->asic_prop;
hops->scrambled_vaddr = virt_addr; /* assume no scrambling */
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
- prop->dmmu.start_addr,
- prop->dmmu.end_addr);
+ prop->dmmu.start_addr,
+ prop->dmmu.end_addr);
- /* host-residency is the same in PMMU and HPMMU, use one of them */
+ /* host-residency is the same in PMMU and PMMU huge, no need to distinguish here */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+ pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
+ mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr);
mutex_lock(&ctx->mmu_lock);
-
- if (mmu_prop->host_resident)
- rc = hdev->mmu_func[MMU_HR_PGT].get_tlb_info(ctx,
- virt_addr, hops);
- else
- rc = hdev->mmu_func[MMU_DR_PGT].get_tlb_info(ctx,
- virt_addr, hops);
-
+ rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops);
mutex_unlock(&ctx->mmu_lock);
+ if (rc)
+ return rc;
+
/* add page offset to physical address */
if (hops->unscrambled_paddr)
- hl_mmu_pa_page_with_offset(ctx, virt_addr, hops,
- &hops->unscrambled_paddr);
+ hl_mmu_pa_page_with_offset(ctx, virt_addr, hops, &hops->unscrambled_paddr);
- return rc;
+ return 0;
}
int hl_mmu_if_set_funcs(struct hl_device *hdev)
@@ -662,6 +665,55 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
return rc;
}
+static void hl_mmu_prefetch_work_function(struct work_struct *work)
+{
+ struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work);
+ struct hl_ctx *ctx = pfw->ctx;
+
+ if (!hl_device_operational(ctx->hdev, NULL))
+ goto put_ctx;
+
+ mutex_lock(&ctx->mmu_lock);
+
+ ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid,
+ pfw->va, pfw->size);
+
+ mutex_unlock(&ctx->mmu_lock);
+
+put_ctx:
+ /*
+ * context was taken in the common mmu prefetch function- see comment there about
+ * context handling.
+ */
+ hl_ctx_put(ctx);
+ kfree(pfw);
+}
+
+int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size)
+{
+ struct hl_prefetch_work *handle_pf_work;
+
+ handle_pf_work = kmalloc(sizeof(*handle_pf_work), GFP_KERNEL);
+ if (!handle_pf_work)
+ return -ENOMEM;
+
+ INIT_WORK(&handle_pf_work->pf_work, hl_mmu_prefetch_work_function);
+ handle_pf_work->ctx = ctx;
+ handle_pf_work->va = va;
+ handle_pf_work->size = size;
+ handle_pf_work->flags = flags;
+ handle_pf_work->asid = asid;
+
+ /*
+ * as actual prefetch is done in a WQ we must get the context (and put it
+ * at the end of the work function)
+ */
+ hl_ctx_get(ctx);
+ queue_work(ctx->hdev->pf_wq, &handle_pf_work->pf_work);
+
+ return 0;
+}
+
u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
{
return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX;
@@ -670,6 +722,7 @@ u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
/**
* hl_mmu_get_hop_pte_phys_addr() - extract PTE address from HOP
* @ctx: pointer to the context structure to initialize.
+ * @mmu_prop: MMU properties.
* @hop_idx: HOP index.
* @hop_addr: HOP address.
* @virt_addr: virtual address fro the translation.
@@ -686,33 +739,8 @@ u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *m
return U64_MAX;
}
- /* currently max number of HOPs is 6 */
- switch (hop_idx) {
- case 0:
- mask = mmu_prop->hop0_mask;
- shift = mmu_prop->hop0_shift;
- break;
- case 1:
- mask = mmu_prop->hop1_mask;
- shift = mmu_prop->hop1_shift;
- break;
- case 2:
- mask = mmu_prop->hop2_mask;
- shift = mmu_prop->hop2_shift;
- break;
- case 3:
- mask = mmu_prop->hop3_mask;
- shift = mmu_prop->hop3_shift;
- break;
- case 4:
- mask = mmu_prop->hop4_mask;
- shift = mmu_prop->hop4_shift;
- break;
- default:
- mask = mmu_prop->hop5_mask;
- shift = mmu_prop->hop5_shift;
- break;
- }
+ shift = mmu_prop->hop_shifts[hop_idx];
+ mask = mmu_prop->hop_masks[hop_idx];
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
}
diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
index d03786d0c407..e2d91a69acc2 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
@@ -10,6 +10,8 @@
#include <linux/slab.h>
+#define MMU_V1_MAX_HOPS (MMU_HOP4 + 1)
+
static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
@@ -170,51 +172,15 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
return num_of_ptes_left;
}
-static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
- u64 virt_addr, u64 mask, u64 shift)
-{
- return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
- ((virt_addr & mask) >> shift);
-}
-
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_prop,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
- mmu_prop->hop0_shift);
-}
-
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_prop,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
- mmu_prop->hop1_shift);
-}
-
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_prop,
- u64 hop_addr, u64 vaddr)
+static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
+ u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx)
{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
- mmu_prop->hop2_shift);
-}
+ u64 mask, shift;
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_prop,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
- mmu_prop->hop3_shift);
-}
-
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_prop,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
- mmu_prop->hop4_shift);
+ mask = mmu_prop->hop_masks[hop_idx];
+ shift = mmu_prop->hop_shifts[hop_idx];
+ return hop_addr_arr[hop_idx] +
+ ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
}
static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
@@ -516,74 +482,50 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
}
}
-static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
+static int hl_mmu_v1_unmap(struct hl_ctx *ctx,
u64 virt_addr, bool is_dram_addr)
{
+ u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
- u64 hop0_addr = 0, hop0_pte_addr = 0,
- hop1_addr = 0, hop1_pte_addr = 0,
- hop2_addr = 0, hop2_pte_addr = 0,
- hop3_addr = 0, hop3_pte_addr = 0,
- hop4_addr = 0, hop4_pte_addr = 0,
- curr_pte;
bool is_huge, clear_hop3 = true;
+ int hop_idx;
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
- hop0_addr = get_hop0_addr(ctx);
- hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
-
- curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
-
- hop1_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
-
- if (hop1_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
-
- curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
-
- hop2_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
-
- if (hop2_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
-
- curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
-
- hop3_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
-
- if (hop3_addr == ULLONG_MAX)
- goto not_mapped;
+ for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) {
+ if (hop_idx == MMU_HOP0) {
+ hop_addr[hop_idx] = get_hop0_addr(ctx);
+ } else {
+ hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
+ if (hop_addr[hop_idx] == ULLONG_MAX)
+ goto not_mapped;
+ }
- hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
+ hop_pte_addr[hop_idx] =
+ get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
- curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
+ curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
+ }
is_huge = curr_pte & mmu_prop->last_mask;
if (is_dram_addr && !is_huge) {
- dev_err(hdev->dev,
- "DRAM unmapping should use huge pages only\n");
+ dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
return -EFAULT;
}
if (!is_huge) {
- hop4_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
-
- if (hop4_addr == ULLONG_MAX)
+ hop_idx = MMU_HOP4;
+ hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
+ if (hop_addr[hop_idx] == ULLONG_MAX)
goto not_mapped;
- hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
- virt_addr);
-
- curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
-
+ hop_pte_addr[hop_idx] =
+ get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
+ curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
clear_hop3 = false;
}
@@ -605,39 +547,33 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
goto not_mapped;
}
- write_final_pte(ctx, hop3_pte_addr, default_pte);
- put_pte(ctx, hop3_addr);
+ hop_idx = MMU_HOP3;
+ write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);
+ put_pte(ctx, hop_addr[hop_idx]);
} else {
if (!(curr_pte & PAGE_PRESENT_MASK))
goto not_mapped;
- if (hop4_addr)
- clear_pte(ctx, hop4_pte_addr);
+ if (hop_addr[MMU_HOP4])
+ clear_pte(ctx, hop_pte_addr[MMU_HOP4]);
else
- clear_pte(ctx, hop3_pte_addr);
+ clear_pte(ctx, hop_pte_addr[MMU_HOP3]);
- if (hop4_addr && !put_pte(ctx, hop4_addr))
+ if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4]))
clear_hop3 = true;
if (!clear_hop3)
goto mapped;
- clear_pte(ctx, hop3_pte_addr);
+ for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) {
+ clear_pte(ctx, hop_pte_addr[hop_idx]);
- if (put_pte(ctx, hop3_addr))
- goto mapped;
+ if (hop_idx == MMU_HOP0)
+ break;
- clear_pte(ctx, hop2_pte_addr);
-
- if (put_pte(ctx, hop2_addr))
- goto mapped;
-
- clear_pte(ctx, hop1_pte_addr);
-
- if (put_pte(ctx, hop1_addr))
- goto mapped;
-
- clear_pte(ctx, hop0_pte_addr);
+ if (put_pte(ctx, hop_addr[hop_idx]))
+ goto mapped;
+ }
}
mapped:
@@ -650,21 +586,15 @@ not_mapped:
return -EINVAL;
}
-static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
u32 page_size, bool is_dram_addr)
{
+ u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
- u64 hop0_addr = 0, hop0_pte_addr = 0,
- hop1_addr = 0, hop1_pte_addr = 0,
- hop2_addr = 0, hop2_pte_addr = 0,
- hop3_addr = 0, hop3_pte_addr = 0,
- hop4_addr = 0, hop4_pte_addr = 0,
- curr_pte = 0;
- bool hop1_new = false, hop2_new = false, hop3_new = false,
- hop4_new = false, is_huge;
- int rc = -ENOMEM;
+ bool is_huge, hop_new[MMU_V1_MAX_HOPS] = {false};
+ int num_hops, hop_idx, prev_hop, rc = -ENOMEM;
/*
* This mapping function can map a page or a huge page. For huge page
@@ -684,39 +614,21 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
is_huge = false;
}
- hop0_addr = get_hop0_addr(ctx);
- hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
- curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
-
- hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
- if (hop1_addr == ULLONG_MAX)
- goto err;
-
- hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
- curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
-
- hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
- if (hop2_addr == ULLONG_MAX)
- goto err;
-
- hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
- curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
+ num_hops = is_huge ? (MMU_V1_MAX_HOPS - 1) : MMU_V1_MAX_HOPS;
- hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
- if (hop3_addr == ULLONG_MAX)
- goto err;
-
- hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
- curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
-
- if (!is_huge) {
- hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
- if (hop4_addr == ULLONG_MAX)
- goto err;
+ for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) {
+ if (hop_idx == MMU_HOP0) {
+ hop_addr[hop_idx] = get_hop0_addr(ctx);
+ } else {
+ hop_addr[hop_idx] =
+ get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);
+ if (hop_addr[hop_idx] == ULLONG_MAX)
+ goto err;
+ }
- hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
- virt_addr);
- curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
+ hop_pte_addr[hop_idx] =
+ get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
+ curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
}
if (hdev->dram_default_page_mapping && is_dram_addr) {
@@ -732,30 +644,22 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
goto err;
}
- if (hop1_new || hop2_new || hop3_new || hop4_new) {
- dev_err(hdev->dev,
- "DRAM mapping should not allocate more hops\n");
- rc = -EFAULT;
- goto err;
+ for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
+ if (hop_new[hop_idx]) {
+ dev_err(hdev->dev, "DRAM mapping should not allocate more hops\n");
+ rc = -EFAULT;
+ goto err;
+ }
}
} else if (curr_pte & PAGE_PRESENT_MASK) {
dev_err(hdev->dev,
"mapping already exists for virt_addr 0x%llx\n",
virt_addr);
- dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
- *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
- dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
- *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
- dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
- *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
- dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
- *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
-
- if (!is_huge)
- dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
- *(u64 *) (uintptr_t) hop4_pte_addr,
- hop4_pte_addr);
+ for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++)
+ dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", hop_idx,
+ *(u64 *) (uintptr_t) hop_pte_addr[hop_idx],
+ hop_pte_addr[hop_idx]);
rc = -EINVAL;
goto err;
@@ -764,53 +668,28 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask
| PAGE_PRESENT_MASK;
- if (is_huge)
- write_final_pte(ctx, hop3_pte_addr, curr_pte);
- else
- write_final_pte(ctx, hop4_pte_addr, curr_pte);
+ write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);
- if (hop1_new) {
- curr_pte =
- (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- write_pte(ctx, hop0_pte_addr, curr_pte);
- }
- if (hop2_new) {
- curr_pte =
- (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- write_pte(ctx, hop1_pte_addr, curr_pte);
- get_pte(ctx, hop1_addr);
- }
- if (hop3_new) {
- curr_pte =
- (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- write_pte(ctx, hop2_pte_addr, curr_pte);
- get_pte(ctx, hop2_addr);
- }
+ for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
+ prev_hop = hop_idx - 1;
- if (!is_huge) {
- if (hop4_new) {
- curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
- PAGE_PRESENT_MASK;
- write_pte(ctx, hop3_pte_addr, curr_pte);
- get_pte(ctx, hop3_addr);
+ if (hop_new[hop_idx]) {
+ curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+ write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);
+ if (hop_idx != MMU_HOP1)
+ get_pte(ctx, hop_addr[prev_hop]);
}
-
- get_pte(ctx, hop4_addr);
- } else {
- get_pte(ctx, hop3_addr);
}
+ get_pte(ctx, hop_addr[num_hops - 1]);
+
return 0;
err:
- if (hop4_new)
- free_hop(ctx, hop4_addr);
- if (hop3_new)
- free_hop(ctx, hop3_addr);
- if (hop2_new)
- free_hop(ctx, hop2_addr);
- if (hop1_new)
- free_hop(ctx, hop1_addr);
+ for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) {
+ if (hop_new[hop_idx])
+ free_hop(ctx, hop_addr[hop_idx]);
+ }
return rc;
}
@@ -928,8 +807,8 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
mmu->fini = hl_mmu_v1_fini;
mmu->ctx_init = hl_mmu_v1_ctx_init;
mmu->ctx_fini = hl_mmu_v1_ctx_fini;
- mmu->map = _hl_mmu_v1_map;
- mmu->unmap = _hl_mmu_v1_unmap;
+ mmu->map = hl_mmu_v1_map;
+ mmu->unmap = hl_mmu_v1_unmap;
mmu->flush = flush;
mmu->swap_out = hl_mmu_v1_swap_out;
mmu->swap_in = hl_mmu_v1_swap_in;
diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c
index bb9ce22bafc4..610acd4a8057 100644
--- a/drivers/misc/habanalabs/common/pci/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -392,6 +392,7 @@ enum pci_region hl_get_pci_memory_region(struct hl_device *hdev, u64 addr)
*/
int hl_pci_init(struct hl_device *hdev)
{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
struct pci_dev *pdev = hdev->pdev;
int rc;
@@ -419,17 +420,14 @@ int hl_pci_init(struct hl_device *hdev)
}
/* Driver must sleep in order for FW to finish the iATU configuration */
- if (hdev->asic_prop.iatu_done_by_fw) {
+ if (hdev->asic_prop.iatu_done_by_fw)
usleep_range(2000, 3000);
- hdev->asic_funcs->set_dma_mask_from_fw(hdev);
- }
- rc = dma_set_mask_and_coherent(&pdev->dev,
- DMA_BIT_MASK(hdev->dma_mask));
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(prop->dma_mask));
if (rc) {
dev_err(hdev->dev,
"Failed to set dma mask to %d bits, error %d\n",
- hdev->dma_mask, rc);
+ prop->dma_mask, rc);
goto unmap_pci_bars;
}
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 21c2b678ff72..fba322241096 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -95,7 +95,7 @@
#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
-#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
+#define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
BIT(GAUDI_ENGINE_ID_MME_0) |\
@@ -557,6 +557,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
}
prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
+ prop->host_base_address = HOST_PHYS_BASE;
+ prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
prop->collective_first_sob = 0;
prop->collective_first_mon = 0;
@@ -595,18 +597,19 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
+ prop->device_mem_alloc_default_page_size = prop->dram_page_size;
prop->dram_supports_virtual_memory = false;
- prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT;
- prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT;
- prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT;
- prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT;
- prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT;
- prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK;
- prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK;
- prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK;
- prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK;
- prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK;
+ prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
+ prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
+ prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
+ prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
+ prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
+ prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
+ prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
+ prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
+ prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
+ prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
prop->pmmu.start_addr = VA_HOST_SPACE_START;
prop->pmmu.end_addr =
(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
@@ -673,6 +676,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->set_max_power_on_device_init = true;
+ prop->dma_mask = 48;
+
return 0;
}
@@ -754,8 +759,6 @@ static int gaudi_init_iatu(struct hl_device *hdev)
if (rc)
goto done;
- hdev->asic_funcs->set_dma_mask_from_fw(hdev);
-
/* Outbound Region 0 - Point to Host */
outbound_region.addr = HOST_PHYS_BASE;
outbound_region.size = HOST_PHYS_SIZE;
@@ -1008,7 +1011,7 @@ free_job:
release_cb:
hl_cb_put(cb);
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
return rc;
}
@@ -1470,7 +1473,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
job->patched_cb = NULL;
job->job_cb_size = job->user_cb_size;
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
/* increment refcount as for external queues we get completion */
if (hw_queue_prop->type == QUEUE_TYPE_EXT)
@@ -2808,9 +2811,8 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
QMAN_EXTERNAL_MAKE_TRUSTED);
@@ -2987,9 +2989,8 @@ static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
@@ -3124,9 +3125,8 @@ static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
@@ -3258,9 +3258,8 @@ static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
@@ -3409,9 +3408,8 @@ static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
@@ -3792,9 +3790,6 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_
{
u32 wait_timeout_ms;
- dev_info(hdev->dev,
- "Halting compute engines and disabling interrupts\n");
-
if (hdev->pldm)
wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
else
@@ -4212,7 +4207,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset
}
if (fw_reset) {
- dev_info(hdev->dev,
+ dev_dbg(hdev->dev,
"Firmware performs HARD reset, going to wait %dms\n",
reset_timeout_ms);
@@ -4304,11 +4299,11 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
- dev_info(hdev->dev,
+ dev_dbg(hdev->dev,
"Issued HARD reset command, going to wait %dms\n",
reset_timeout_ms);
} else {
- dev_info(hdev->dev,
+ dev_dbg(hdev->dev,
"Firmware performs HARD reset, going to wait %dms\n",
reset_timeout_ms);
}
@@ -4745,12 +4740,11 @@ static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
}
-static int gaudi_hbm_scrubbing(struct hl_device *hdev)
+static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 cur_addr = DRAM_BASE_ADDR_USER;
- u32 val;
- u32 chunk_size;
+ u32 chunk_size, busy;
int rc, dma_id;
while (cur_addr < prop->dram_end_address) {
@@ -4764,8 +4758,10 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev)
"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
cur_addr, cur_addr + chunk_size);
- WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
- WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
+ WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
+ lower_32_bits(val));
+ WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
+ upper_32_bits(val));
WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
lower_32_bits(cur_addr));
WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
@@ -4788,8 +4784,8 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev)
rc = hl_poll_timeout(
hdev,
mmDMA0_CORE_STS0 + dma_offset,
- val,
- ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
+ busy,
+ ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
1000,
HBM_SCRUBBING_TIMEOUT_US);
@@ -4843,7 +4839,7 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
}
/* Scrub HBM using all DMA channels in parallel */
- rc = gaudi_hbm_scrubbing(hdev);
+ rc = gaudi_scrub_device_dram(hdev, 0xdeadbeaf);
if (rc)
dev_err(hdev->dev,
"Failed to clear HBM in mem scrub all\n");
@@ -5038,37 +5034,7 @@ static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
}
-static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int i;
-
- if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
- return -ENOMEM;
-
- /* Shift to the device's base physical address of host memory */
- for_each_sg(sgl, sg, nents, i)
- sg->dma_address += HOST_PHYS_BASE;
-
- return 0;
-}
-
-static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int i;
-
- /* Cancel the device's base physical address of host memory */
- for_each_sg(sgl, sg, nents, i)
- sg->dma_address -= HOST_PHYS_BASE;
-
- dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
-}
-
-static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
- struct sg_table *sgt)
+static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
{
struct scatterlist *sg, *sg_next_iter;
u32 count, dma_desc_cnt;
@@ -5077,8 +5043,7 @@ static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
dma_desc_cnt = 0;
- for_each_sg(sgt->sgl, sg, sgt->nents, count) {
-
+ for_each_sgtable_dma_sg(sgt, sg, count) {
len = sg_dma_len(sg);
addr = sg_dma_address(sg);
@@ -5132,8 +5097,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
list_add_tail(&userptr->job_node, parser->job_userptr_list);
- rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
- userptr->sgt->nents, dir);
+ rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory;
@@ -5408,7 +5372,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
sgt = userptr->sgt;
dma_desc_cnt = 0;
- for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+ for_each_sgtable_dma_sg(sgt, sg, count) {
len = sg_dma_len(sg);
dma_addr = sg_dma_address(sg);
@@ -5562,7 +5526,7 @@ static int gaudi_patch_cb(struct hl_device *hdev,
static int gaudi_parse_cb_mmu(struct hl_device *hdev,
struct hl_cs_parser *parser)
{
- u64 patched_cb_handle;
+ u64 handle;
u32 patched_cb_size;
struct hl_cb *user_cb;
int rc;
@@ -5578,9 +5542,9 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
else
parser->patched_cb_size = parser->user_cb_size;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
+ rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
parser->patched_cb_size, false, false,
- &patched_cb_handle);
+ &handle);
if (rc) {
dev_err(hdev->dev,
@@ -5589,13 +5553,10 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
return rc;
}
- patched_cb_handle >>= PAGE_SHIFT;
- parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
- (u32) patched_cb_handle);
+ parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
/* hl_cb_get should never fail */
if (!parser->patched_cb) {
- dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
- (u32) patched_cb_handle);
+ dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
rc = -EFAULT;
goto out;
}
@@ -5635,8 +5596,7 @@ out:
* cb_put will release it, but here we want to remove it from the
* idr
*/
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
- patched_cb_handle << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
return rc;
}
@@ -5644,7 +5604,7 @@ out:
static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
struct hl_cs_parser *parser)
{
- u64 patched_cb_handle;
+ u64 handle;
int rc;
rc = gaudi_validate_cb(hdev, parser, false);
@@ -5652,22 +5612,19 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
if (rc)
goto free_userptr;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
+ rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
parser->patched_cb_size, false, false,
- &patched_cb_handle);
+ &handle);
if (rc) {
dev_err(hdev->dev,
"Failed to allocate patched CB for DMA CS %d\n", rc);
goto free_userptr;
}
- patched_cb_handle >>= PAGE_SHIFT;
- parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
- (u32) patched_cb_handle);
+ parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
/* hl_cb_get should never fail here */
if (!parser->patched_cb) {
- dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
- (u32) patched_cb_handle);
+ dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
rc = -EFAULT;
goto out;
}
@@ -5684,8 +5641,7 @@ out:
* cb_put will release it, but here we want to remove it from the
* idr
*/
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
- patched_cb_handle << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
free_userptr:
if (rc)
@@ -5798,7 +5754,6 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
struct hl_cs_job *job;
u32 cb_size, ctl, err_cause;
struct hl_cb *cb;
- u64 id;
int rc;
cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
@@ -5865,9 +5820,8 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
}
release_cb:
- id = cb->id;
hl_cb_put(cb);
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
return rc;
}
@@ -5930,7 +5884,7 @@ static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
release_cb:
hl_cb_put(cb);
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
return rc;
}
@@ -6101,184 +6055,6 @@ static void gaudi_restore_phase_topology(struct hl_device *hdev)
}
-static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
- bool user_address, u32 *val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 hbm_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
-
- *val = RREG32(addr - CFG_BASE);
-
- } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
-
- *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
-
- } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-
- if (hbm_bar_addr != U64_MAX) {
- *val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
- }
-
- if (hbm_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
-
- *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
-static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
- bool user_address, u32 val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 hbm_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
-
- WREG32(addr - CFG_BASE, val);
-
- } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
-
- writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
-
- } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-
- if (hbm_bar_addr != U64_MAX) {
- writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
- }
-
- if (hbm_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
-
- *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
-static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
- bool user_address, u64 *val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 hbm_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
-
- u32 val_l = RREG32(addr - CFG_BASE);
- u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
-
- *val = (((u64) val_h) << 32) | val_l;
-
- } else if ((addr >= SRAM_BASE_ADDR) &&
- (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
-
- *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
-
- } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-
- if (hbm_bar_addr != U64_MAX) {
- *val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
- }
-
- if (hbm_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
-
- *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
-static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
- bool user_address, u64 val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 hbm_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
-
- WREG32(addr - CFG_BASE, lower_32_bits(val));
- WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
-
- } else if ((addr >= SRAM_BASE_ADDR) &&
- (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
-
- writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
-
- } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-
- if (hbm_bar_addr != U64_MAX) {
- writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
- }
-
- if (hbm_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
-
- *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
u32 size_to_dma, dma_addr_t dma_addr)
{
@@ -7628,19 +7404,18 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
/* In case it's the first razwi, save its parameters*/
- rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1);
+ rc = atomic_cmpxchg(&hdev->last_error.razwi.write_disable, 0, 1);
if (!rc) {
- hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
- hdev->last_error.razwi_timestamp = ktime_get();
- hdev->last_error.razwi_addr = razwi_addr;
- hdev->last_error.razwi_engine_id_1 = engine_id_1;
- hdev->last_error.razwi_engine_id_2 = engine_id_2;
+ hdev->last_error.razwi.timestamp = ktime_get();
+ hdev->last_error.razwi.addr = razwi_addr;
+ hdev->last_error.razwi.engine_id_1 = engine_id_1;
+ hdev->last_error.razwi.engine_id_2 = engine_id_2;
/*
* If first engine id holds non valid value the razwi initiator
* does not have engine id
*/
- hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX);
- hdev->last_error.razwi_type = razwi_type;
+ hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX);
+ hdev->last_error.razwi.type = razwi_type;
}
}
@@ -8103,7 +7878,6 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_MMU_PAGE_FAULT:
case GAUDI_EVENT_MMU_WR_PERM:
case GAUDI_EVENT_RAZWI_OR_ADC:
- case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
fallthrough;
@@ -8123,6 +7897,19 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
hl_fw_unmask_irq(hdev, event_type);
break;
+ case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
+ gaudi_print_irq_info(hdev, event_type, true);
+ gaudi_handle_qman_err(hdev, event_type);
+ hl_fw_unmask_irq(hdev, event_type);
+
+ /* In TPC QM event, notify on TPC assertion. While there isn't
+ * a specific event for assertion yet, the FW generates QM event.
+ * The SW upper layer will inspect an internal mapped area to indicate
+ * if the event is a tpc assertion or tpc QM.
+ */
+ hl_notifier_event_send_all(hdev, HL_NOTIFIER_EVENT_TPC_ASSERT);
+ break;
+
case GAUDI_EVENT_RAZWI_OR_ADC_SW:
gaudi_print_irq_info(hdev, event_type, true);
goto reset_device;
@@ -8328,8 +8115,6 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
set_default_power_values(hdev);
- hdev->max_power = prop->max_power_default;
-
return 0;
}
@@ -8501,6 +8286,16 @@ static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
return hl_fw_get_eeprom_data(hdev, data, max_size);
}
+static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
+{
+ struct gaudi_device *gaudi = hdev->asic_specific;
+
+ if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
+ return 0;
+
+ return hl_fw_get_monitor_dump(hdev, data);
+}
+
/*
* this function should be used only during initialization and/or after reset,
* when there are no active users.
@@ -9066,11 +8861,6 @@ static void gaudi_reset_sob(struct hl_device *hdev, void *data)
kref_init(&hw_sob->kref);
}
-static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
-{
- hdev->dma_mask = 48;
-}
-
static u64 gaudi_get_device_time(struct hl_device *hdev)
{
u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
@@ -9132,7 +8922,7 @@ static int gaudi_add_sync_to_engine_map_entry(
*/
if (reg_value == 0 || reg_value == 0xffffffff)
return 0;
- reg_value -= (u32)CFG_BASE;
+ reg_value -= lower_32_bits(CFG_BASE);
/* create a new hash entry */
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -9377,6 +9167,12 @@ static u32 *gaudi_get_stream_master_qid_arr(void)
return gaudi_stream_master;
}
+static void gaudi_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_sizes *info)
+{
+ /* set 0 since multiple pages are not supported */
+ info->page_order_bitmask = 0;
+}
+
static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
@@ -9418,24 +9214,21 @@ static const struct hl_asic_funcs gaudi_funcs = {
.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
.asic_dma_free_coherent = gaudi_dma_free_coherent,
.scrub_device_mem = gaudi_scrub_device_mem,
+ .scrub_device_dram = gaudi_scrub_device_dram,
.get_int_queue_base = gaudi_get_int_queue_base,
.test_queues = gaudi_test_queues,
.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
.asic_dma_pool_free = gaudi_dma_pool_free,
.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
- .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
+ .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
.cs_parser = gaudi_cs_parser,
- .asic_dma_map_sg = gaudi_dma_map_sg,
+ .asic_dma_map_sgtable = hl_dma_map_sgtable,
.get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
.update_eq_ci = gaudi_update_eq_ci,
.context_switch = gaudi_context_switch,
.restore_phase_topology = gaudi_restore_phase_topology,
- .debugfs_read32 = gaudi_debugfs_read32,
- .debugfs_write32 = gaudi_debugfs_write32,
- .debugfs_read64 = gaudi_debugfs_read64,
- .debugfs_write64 = gaudi_debugfs_write64,
.debugfs_read_dma = gaudi_debugfs_read_dma,
.add_device_attr = gaudi_add_device_attr,
.handle_eqe = gaudi_handle_eqe,
@@ -9444,6 +9237,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
.write_pte = gaudi_write_pte,
.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
+ .mmu_prefetch_cache_range = NULL,
.send_heartbeat = gaudi_send_heartbeat,
.debug_coresight = gaudi_debug_coresight,
.is_device_idle = gaudi_is_device_idle,
@@ -9452,6 +9246,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
.hw_queues_unlock = gaudi_hw_queues_unlock,
.get_pci_id = gaudi_get_pci_id,
.get_eeprom_data = gaudi_get_eeprom_data,
+ .get_monitor_dump = gaudi_get_monitor_dump,
.send_cpu_message = gaudi_send_cpu_message,
.pci_bars_map = gaudi_pci_bars_map,
.init_iatu = gaudi_init_iatu,
@@ -9469,7 +9264,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
.gen_wait_cb = gaudi_gen_wait_cb,
.reset_sob = gaudi_reset_sob,
.reset_sob_group = gaudi_reset_sob_group,
- .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
.get_device_time = gaudi_get_device_time,
.collective_wait_init_cs = gaudi_collective_wait_init_cs,
.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
@@ -9486,7 +9280,11 @@ static const struct hl_asic_funcs gaudi_funcs = {
.get_sob_addr = gaudi_get_sob_addr,
.set_pci_memory_regions = gaudi_set_pci_memory_regions,
.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
- .is_valid_dram_page_size = NULL
+ .is_valid_dram_page_size = NULL,
+ .mmu_get_real_page_size = hl_mmu_get_real_page_size,
+ .get_valid_dram_page_orders = gaudi_get_valid_dram_page_orders,
+ .access_dev_mem = hl_access_dev_mem,
+ .set_dram_bar_base = gaudi_set_hbm_bar_base,
};
/**
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 54de7c599072..4fbcf3f0afe5 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -148,14 +148,14 @@
#define MME_QMAN_LENGTH 1024
#define MME_QMAN_SIZE_IN_BYTES (MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
-#define HBM_DMA_QMAN_LENGTH 1024
+#define HBM_DMA_QMAN_LENGTH 4096
#define HBM_DMA_QMAN_SIZE_IN_BYTES \
(HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
#define TPC_QMAN_LENGTH 1024
#define TPC_QMAN_SIZE_IN_BYTES (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
-#define NIC_QMAN_LENGTH 1024
+#define NIC_QMAN_LENGTH 4096
#define NIC_QMAN_SIZE_IN_BYTES (NIC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index ec9358bcbf0b..4cde505a7416 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -390,6 +390,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
}
prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
+ prop->host_base_address = HOST_PHYS_BASE;
+ prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
prop->dram_base_address = DRAM_PHYS_BASE;
@@ -413,18 +415,19 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
+ prop->device_mem_alloc_default_page_size = prop->dram_page_size;
prop->dram_supports_virtual_memory = true;
- prop->dmmu.hop0_shift = MMU_V1_0_HOP0_SHIFT;
- prop->dmmu.hop1_shift = MMU_V1_0_HOP1_SHIFT;
- prop->dmmu.hop2_shift = MMU_V1_0_HOP2_SHIFT;
- prop->dmmu.hop3_shift = MMU_V1_0_HOP3_SHIFT;
- prop->dmmu.hop4_shift = MMU_V1_0_HOP4_SHIFT;
- prop->dmmu.hop0_mask = MMU_V1_0_HOP0_MASK;
- prop->dmmu.hop1_mask = MMU_V1_0_HOP1_MASK;
- prop->dmmu.hop2_mask = MMU_V1_0_HOP2_MASK;
- prop->dmmu.hop3_mask = MMU_V1_0_HOP3_MASK;
- prop->dmmu.hop4_mask = MMU_V1_0_HOP4_MASK;
+ prop->dmmu.hop_shifts[MMU_HOP0] = MMU_V1_0_HOP0_SHIFT;
+ prop->dmmu.hop_shifts[MMU_HOP1] = MMU_V1_0_HOP1_SHIFT;
+ prop->dmmu.hop_shifts[MMU_HOP2] = MMU_V1_0_HOP2_SHIFT;
+ prop->dmmu.hop_shifts[MMU_HOP3] = MMU_V1_0_HOP3_SHIFT;
+ prop->dmmu.hop_shifts[MMU_HOP4] = MMU_V1_0_HOP4_SHIFT;
+ prop->dmmu.hop_masks[MMU_HOP0] = MMU_V1_0_HOP0_MASK;
+ prop->dmmu.hop_masks[MMU_HOP1] = MMU_V1_0_HOP1_MASK;
+ prop->dmmu.hop_masks[MMU_HOP2] = MMU_V1_0_HOP2_MASK;
+ prop->dmmu.hop_masks[MMU_HOP3] = MMU_V1_0_HOP3_MASK;
+ prop->dmmu.hop_masks[MMU_HOP4] = MMU_V1_0_HOP4_MASK;
prop->dmmu.start_addr = VA_DDR_SPACE_START;
prop->dmmu.end_addr = VA_DDR_SPACE_END;
prop->dmmu.page_size = PAGE_SIZE_2MB;
@@ -487,6 +490,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->set_max_power_on_device_init = true;
+ prop->dma_mask = 48;
+
return 0;
}
@@ -574,8 +579,6 @@ static int goya_init_iatu(struct hl_device *hdev)
if (rc)
goto done;
- hdev->asic_funcs->set_dma_mask_from_fw(hdev);
-
/* Outbound Region 0 - Point to Host */
outbound_region.addr = HOST_PHYS_BASE;
outbound_region.size = HOST_PHYS_SIZE;
@@ -2479,9 +2482,6 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_r
{
u32 wait_timeout_ms;
- dev_info(hdev->dev,
- "Halting compute engines and disabling interrupts\n");
-
if (hdev->pldm)
wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
else
@@ -2825,12 +2825,12 @@ static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
goya_set_pll_refclk(hdev);
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
- dev_info(hdev->dev,
+ dev_dbg(hdev->dev,
"Issued HARD reset command, going to wait %dms\n",
reset_timeout_ms);
} else {
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
- dev_info(hdev->dev,
+ dev_dbg(hdev->dev,
"Issued SOFT reset command, going to wait %dms\n",
reset_timeout_ms);
}
@@ -3311,35 +3311,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
}
-static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int i;
-
- if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
- return -ENOMEM;
-
- /* Shift to the device's base physical address of host memory */
- for_each_sg(sgl, sg, nents, i)
- sg->dma_address += HOST_PHYS_BASE;
-
- return 0;
-}
-
-static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int i;
-
- /* Cancel the device's base physical address of host memory */
- for_each_sg(sgl, sg, nents, i)
- sg->dma_address -= HOST_PHYS_BASE;
-
- dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
-}
-
u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
{
struct scatterlist *sg, *sg_next_iter;
@@ -3349,8 +3320,7 @@ u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
dma_desc_cnt = 0;
- for_each_sg(sgt->sgl, sg, sgt->nents, count) {
-
+ for_each_sgtable_dma_sg(sgt, sg, count) {
len = sg_dma_len(sg);
addr = sg_dma_address(sg);
@@ -3404,8 +3374,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
list_add_tail(&userptr->job_node, parser->job_userptr_list);
- rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
- userptr->sgt->nents, dir);
+ rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory;
@@ -3869,7 +3838,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev,
sgt = userptr->sgt;
dma_desc_cnt = 0;
- for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+ for_each_sgtable_dma_sg(sgt, sg, count) {
len = sg_dma_len(sg);
dma_addr = sg_dma_address(sg);
@@ -4032,7 +4001,7 @@ static int goya_patch_cb(struct hl_device *hdev,
static int goya_parse_cb_mmu(struct hl_device *hdev,
struct hl_cs_parser *parser)
{
- u64 patched_cb_handle;
+ u64 handle;
u32 patched_cb_size;
struct hl_cb *user_cb;
int rc;
@@ -4045,9 +4014,9 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
parser->patched_cb_size = parser->user_cb_size +
sizeof(struct packet_msg_prot) * 2;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
+ rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
parser->patched_cb_size, false, false,
- &patched_cb_handle);
+ &handle);
if (rc) {
dev_err(hdev->dev,
@@ -4056,13 +4025,10 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
return rc;
}
- patched_cb_handle >>= PAGE_SHIFT;
- parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
- (u32) patched_cb_handle);
+ parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
/* hl_cb_get should never fail here */
if (!parser->patched_cb) {
- dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
- (u32) patched_cb_handle);
+ dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
rc = -EFAULT;
goto out;
}
@@ -4102,8 +4068,7 @@ out:
* cb_put will release it, but here we want to remove it from the
* idr
*/
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
- patched_cb_handle << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
return rc;
}
@@ -4111,7 +4076,7 @@ out:
static int goya_parse_cb_no_mmu(struct hl_device *hdev,
struct hl_cs_parser *parser)
{
- u64 patched_cb_handle;
+ u64 handle;
int rc;
rc = goya_validate_cb(hdev, parser, false);
@@ -4119,22 +4084,19 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
if (rc)
goto free_userptr;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
+ rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
parser->patched_cb_size, false, false,
- &patched_cb_handle);
+ &handle);
if (rc) {
dev_err(hdev->dev,
"Failed to allocate patched CB for DMA CS %d\n", rc);
goto free_userptr;
}
- patched_cb_handle >>= PAGE_SHIFT;
- parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
- (u32) patched_cb_handle);
+ parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
/* hl_cb_get should never fail here */
if (!parser->patched_cb) {
- dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
- (u32) patched_cb_handle);
+ dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
rc = -EFAULT;
goto out;
}
@@ -4151,8 +4113,7 @@ out:
* cb_put will release it, but here we want to remove it from the
* idr
*/
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
- patched_cb_handle << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
free_userptr:
if (rc)
@@ -4259,224 +4220,7 @@ static void goya_clear_sm_regs(struct hl_device *hdev)
i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
}
-/*
- * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
- * address.
- *
- * @hdev: pointer to hl_device structure
- * @addr: device or host mapped address
- * @val: returned value
- *
- * In case of DDR address that is not mapped into the default aperture that
- * the DDR bar exposes, the function will configure the iATU so that the DDR
- * bar will be positioned at a base address that allows reading from the
- * required address. Configuring the iATU during normal operation can
- * lead to undefined behavior and therefore, should be done with extreme care
- *
- */
-static int goya_debugfs_read32(struct hl_device *hdev, u64 addr,
- bool user_address, u32 *val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 ddr_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
- *val = RREG32(addr - CFG_BASE);
-
- } else if ((addr >= SRAM_BASE_ADDR) &&
- (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
-
- *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
- (addr - SRAM_BASE_ADDR));
-
- } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE +
- (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
- if (ddr_bar_addr != U64_MAX) {
- *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
- (addr - bar_base_addr));
-
- ddr_bar_addr = goya_set_ddr_bar_base(hdev,
- ddr_bar_addr);
- }
- if (ddr_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
- *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
-/*
- * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
- * address.
- *
- * @hdev: pointer to hl_device structure
- * @addr: device or host mapped address
- * @val: returned value
- *
- * In case of DDR address that is not mapped into the default aperture that
- * the DDR bar exposes, the function will configure the iATU so that the DDR
- * bar will be positioned at a base address that allows writing to the
- * required address. Configuring the iATU during normal operation can
- * lead to undefined behavior and therefore, should be done with extreme care
- *
- */
-static int goya_debugfs_write32(struct hl_device *hdev, u64 addr,
- bool user_address, u32 val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 ddr_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
- WREG32(addr - CFG_BASE, val);
-
- } else if ((addr >= SRAM_BASE_ADDR) &&
- (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
-
- writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
- (addr - SRAM_BASE_ADDR));
-
- } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE +
- (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
- if (ddr_bar_addr != U64_MAX) {
- writel(val, hdev->pcie_bar[DDR_BAR_ID] +
- (addr - bar_base_addr));
-
- ddr_bar_addr = goya_set_ddr_bar_base(hdev,
- ddr_bar_addr);
- }
- if (ddr_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
- *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
-static int goya_debugfs_read64(struct hl_device *hdev, u64 addr,
- bool user_address, u64 *val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 ddr_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
- u32 val_l = RREG32(addr - CFG_BASE);
- u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
-
- *val = (((u64) val_h) << 32) | val_l;
-
- } else if ((addr >= SRAM_BASE_ADDR) &&
- (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
-
- *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
- (addr - SRAM_BASE_ADDR));
-
- } else if (addr <=
- DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE +
- (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
- if (ddr_bar_addr != U64_MAX) {
- *val = readq(hdev->pcie_bar[DDR_BAR_ID] +
- (addr - bar_base_addr));
-
- ddr_bar_addr = goya_set_ddr_bar_base(hdev,
- ddr_bar_addr);
- }
- if (ddr_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
- *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
-static int goya_debugfs_write64(struct hl_device *hdev, u64 addr,
- bool user_address, u64 val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 ddr_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
- WREG32(addr - CFG_BASE, lower_32_bits(val));
- WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
-
- } else if ((addr >= SRAM_BASE_ADDR) &&
- (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
-
- writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
- (addr - SRAM_BASE_ADDR));
-
- } else if (addr <=
- DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE +
- (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
- if (ddr_bar_addr != U64_MAX) {
- writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
- (addr - bar_base_addr));
-
- ddr_bar_addr = goya_set_ddr_bar_base(hdev,
- ddr_bar_addr);
- }
- if (ddr_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
- *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
-static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
- void *blob_addr)
+static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
{
dev_err(hdev->dev, "Reading via DMA is unimplemented yet\n");
return -EPERM;
@@ -5101,7 +4845,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
release_cb:
hl_cb_put(cb);
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
return rc;
}
@@ -5561,11 +5305,6 @@ static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
}
-static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
-{
- hdev->dma_mask = 48;
-}
-
u64 goya_get_device_time(struct hl_device *hdev)
{
u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
@@ -5678,6 +5417,22 @@ static u32 *goya_get_stream_master_qid_arr(void)
return NULL;
}
+static void goya_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_sizes *info)
+{
+ /* set 0 since multiple pages are not supported */
+ info->page_order_bitmask = 0;
+}
+
+static int goya_get_monitor_dump(struct hl_device *hdev, void *data)
+{
+ return -EOPNOTSUPP;
+}
+
+static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
+{
+ return -EOPNOTSUPP;
+}
+
static const struct hl_asic_funcs goya_funcs = {
.early_init = goya_early_init,
.early_fini = goya_early_fini,
@@ -5696,24 +5451,21 @@ static const struct hl_asic_funcs goya_funcs = {
.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
.asic_dma_free_coherent = goya_dma_free_coherent,
.scrub_device_mem = goya_scrub_device_mem,
+ .scrub_device_dram = goya_scrub_device_dram,
.get_int_queue_base = goya_get_int_queue_base,
.test_queues = goya_test_queues,
.asic_dma_pool_zalloc = goya_dma_pool_zalloc,
.asic_dma_pool_free = goya_dma_pool_free,
.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
- .hl_dma_unmap_sg = goya_dma_unmap_sg,
+ .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
.cs_parser = goya_cs_parser,
- .asic_dma_map_sg = goya_dma_map_sg,
+ .asic_dma_map_sgtable = hl_dma_map_sgtable,
.get_dma_desc_list_size = goya_get_dma_desc_list_size,
.add_end_of_cb_packets = goya_add_end_of_cb_packets,
.update_eq_ci = goya_update_eq_ci,
.context_switch = goya_context_switch,
.restore_phase_topology = goya_restore_phase_topology,
- .debugfs_read32 = goya_debugfs_read32,
- .debugfs_write32 = goya_debugfs_write32,
- .debugfs_read64 = goya_debugfs_read64,
- .debugfs_write64 = goya_debugfs_write64,
.debugfs_read_dma = goya_debugfs_read_dma,
.add_device_attr = goya_add_device_attr,
.handle_eqe = goya_handle_eqe,
@@ -5722,6 +5474,7 @@ static const struct hl_asic_funcs goya_funcs = {
.write_pte = goya_write_pte,
.mmu_invalidate_cache = goya_mmu_invalidate_cache,
.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
+ .mmu_prefetch_cache_range = NULL,
.send_heartbeat = goya_send_heartbeat,
.debug_coresight = goya_debug_coresight,
.is_device_idle = goya_is_device_idle,
@@ -5730,6 +5483,7 @@ static const struct hl_asic_funcs goya_funcs = {
.hw_queues_unlock = goya_hw_queues_unlock,
.get_pci_id = goya_get_pci_id,
.get_eeprom_data = goya_get_eeprom_data,
+ .get_monitor_dump = goya_get_monitor_dump,
.send_cpu_message = goya_send_cpu_message,
.pci_bars_map = goya_pci_bars_map,
.init_iatu = goya_init_iatu,
@@ -5747,7 +5501,6 @@ static const struct hl_asic_funcs goya_funcs = {
.gen_wait_cb = goya_gen_wait_cb,
.reset_sob = goya_reset_sob,
.reset_sob_group = goya_reset_sob_group,
- .set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
.get_device_time = goya_get_device_time,
.collective_wait_init_cs = goya_collective_wait_init_cs,
.collective_wait_create_jobs = goya_collective_wait_create_jobs,
@@ -5764,7 +5517,11 @@ static const struct hl_asic_funcs goya_funcs = {
.get_sob_addr = &goya_get_sob_addr,
.set_pci_memory_regions = goya_set_pci_memory_regions,
.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
- .is_valid_dram_page_size = NULL
+ .is_valid_dram_page_size = NULL,
+ .mmu_get_real_page_size = hl_mmu_get_real_page_size,
+ .get_valid_dram_page_orders = goya_get_valid_dram_page_orders,
+ .access_dev_mem = hl_access_dev_mem,
+ .set_dram_bar_base = goya_set_ddr_bar_base,
};
/*
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 65668dac6a5f..38e44b6cf581 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -389,6 +389,14 @@ enum pq_init_status {
*
* CPUCP_PACKET_ENGINE_CORE_ASID_SET -
* Packet to perform engine core ASID configuration
+ *
+ * CPUCP_PACKET_MONITOR_DUMP_GET -
+ * Get monitors registers dump from the CpuCP kernel.
+ * The CPU will put the registers dump in the a buffer allocated by the driver
+ * which address is passed via the CpuCp packet. In addition, the host's driver
+ * passes the max size it allows the CpuCP to write to the structure, to prevent
+ * data corruption in case of mismatched driver/FW versions.
+ * Relevant only to Gaudi.
*/
enum cpucp_packet_id {
@@ -439,6 +447,11 @@ enum cpucp_packet_id {
CPUCP_PACKET_POWER_SET, /* internal */
CPUCP_PACKET_RESERVED, /* not used */
CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */
+ CPUCP_PACKET_RESERVED2, /* not used */
+ CPUCP_PACKET_RESERVED3, /* not used */
+ CPUCP_PACKET_RESERVED4, /* not used */
+ CPUCP_PACKET_RESERVED5, /* not used */
+ CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */
};
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
@@ -555,6 +568,12 @@ struct cpucp_array_data_packet {
__le32 data[];
};
+enum cpucp_led_index {
+ CPUCP_LED0_INDEX = 0,
+ CPUCP_LED1_INDEX,
+ CPUCP_LED2_INDEX
+};
+
enum cpucp_packet_rc {
cpucp_packet_success,
cpucp_packet_invalid,
@@ -576,7 +595,10 @@ enum cpucp_temp_type {
cpucp_temp_offset = 19,
cpucp_temp_lowest = 21,
cpucp_temp_highest = 22,
- cpucp_temp_reset_history = 23
+ cpucp_temp_reset_history = 23,
+ cpucp_temp_warn = 24,
+ cpucp_temp_max_crit = 25,
+ cpucp_temp_max_warn = 26,
};
enum cpucp_in_attributes {
@@ -686,6 +708,7 @@ enum pll_index {
enum rl_index {
TPC_RL = 0,
MME_RL,
+ EDMA_RL,
};
enum pvt_index {
@@ -820,6 +843,7 @@ enum cpucp_serdes_type {
TYPE_2_SERDES_TYPE,
HLS1_SERDES_TYPE,
HLS1H_SERDES_TYPE,
+ HLS2_SERDES_TYPE,
UNKNOWN_SERDES_TYPE,
MAX_NUM_SERDES_TYPE = UNKNOWN_SERDES_TYPE
};
@@ -833,9 +857,28 @@ struct cpucp_nic_info {
__u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN];
__le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN];
__le16 serdes_type; /* enum cpucp_serdes_type */
+ __le16 tx_swap_map[CPUCP_MAX_NICS];
__u8 reserved[6];
};
+#define PAGE_DISCARD_MAX 64
+
+struct page_discard_info {
+ __u8 num_entries;
+ __u8 reserved[7];
+ __le32 mmu_page_idx[PAGE_DISCARD_MAX];
+};
+
+/*
+ * struct ser_val - the SER (symbol error rate) value is represented by "integer * 10 ^ -exp".
+ * @integer: the integer part of the SER value;
+ * @exp: the exponent part of the SER value.
+ */
+struct ser_val {
+ __le16 integer;
+ __le16 exp;
+};
+
/*
* struct cpucp_nic_status - describes the status of a NIC port.
* @port: NIC port index.
@@ -889,4 +932,29 @@ struct cpucp_hbm_row_replaced_rows_info {
struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX];
};
+/*
+ * struct dcore_monitor_regs_data - DCORE monitor regs data.
+ * the structure follows sync manager block layout. relevant only to Gaudi.
+ * @mon_pay_addrl: array of payload address low bits.
+ * @mon_pay_addrh: array of payload address high bits.
+ * @mon_pay_data: array of payload data.
+ * @mon_arm: array of monitor arm.
+ * @mon_status: array of monitor status.
+ */
+struct dcore_monitor_regs_data {
+ __le32 mon_pay_addrl[512];
+ __le32 mon_pay_addrh[512];
+ __le32 mon_pay_data[512];
+ __le32 mon_arm[512];
+ __le32 mon_status[512];
+};
+
+/* contains SM data for each SYNC_MNGR (relevant only to Gaudi) */
+struct cpucp_monitor_dump {
+ struct dcore_monitor_regs_data sync_mngr_w_s;
+ struct dcore_monitor_regs_data sync_mngr_e_s;
+ struct dcore_monitor_regs_data sync_mngr_w_n;
+ struct dcore_monitor_regs_data sync_mngr_e_n;
+};
+
#endif /* CPUCP_IF_H */
diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
index 758f246627f8..cae8ac8bc5b1 100644
--- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
+++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
@@ -34,4 +34,14 @@
#define MMU_CONFIG_TIMEOUT_USEC 2000 /* 2 ms */
+enum mmu_hop_num {
+ MMU_HOP0,
+ MMU_HOP1,
+ MMU_HOP2,
+ MMU_HOP3,
+ MMU_HOP4,
+ MMU_HOP5,
+ MMU_HOP_MAX,
+};
+
#endif /* INCLUDE_MMU_GENERAL_H_ */
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index f21854ac5cc2..009239ad1d8a 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -68,40 +68,40 @@ void __init lkdtm_bugs_init(int *recur_param)
recur_count = *recur_param;
}
-void lkdtm_PANIC(void)
+static void lkdtm_PANIC(void)
{
panic("dumptest");
}
-void lkdtm_BUG(void)
+static void lkdtm_BUG(void)
{
BUG();
}
static int warn_counter;
-void lkdtm_WARNING(void)
+static void lkdtm_WARNING(void)
{
WARN_ON(++warn_counter);
}
-void lkdtm_WARNING_MESSAGE(void)
+static void lkdtm_WARNING_MESSAGE(void)
{
WARN(1, "Warning message trigger count: %d\n", ++warn_counter);
}
-void lkdtm_EXCEPTION(void)
+static void lkdtm_EXCEPTION(void)
{
*((volatile int *) 0) = 0;
}
-void lkdtm_LOOP(void)
+static void lkdtm_LOOP(void)
{
for (;;)
;
}
-void lkdtm_EXHAUST_STACK(void)
+static void lkdtm_EXHAUST_STACK(void)
{
pr_info("Calling function with %lu frame size to depth %d ...\n",
REC_STACK_SIZE, recur_count);
@@ -115,7 +115,7 @@ static noinline void __lkdtm_CORRUPT_STACK(void *stack)
}
/* This should trip the stack canary, not corrupt the return address. */
-noinline void lkdtm_CORRUPT_STACK(void)
+static noinline void lkdtm_CORRUPT_STACK(void)
{
/* Use default char array length that triggers stack protection. */
char data[8] __aligned(sizeof(void *));
@@ -125,7 +125,7 @@ noinline void lkdtm_CORRUPT_STACK(void)
}
/* Same as above but will only get a canary with -fstack-protector-strong */
-noinline void lkdtm_CORRUPT_STACK_STRONG(void)
+static noinline void lkdtm_CORRUPT_STACK_STRONG(void)
{
union {
unsigned short shorts[4];
@@ -139,7 +139,7 @@ noinline void lkdtm_CORRUPT_STACK_STRONG(void)
static pid_t stack_pid;
static unsigned long stack_addr;
-void lkdtm_REPORT_STACK(void)
+static void lkdtm_REPORT_STACK(void)
{
volatile uintptr_t magic;
pid_t pid = task_pid_nr(current);
@@ -222,7 +222,7 @@ static noinline void __lkdtm_REPORT_STACK_CANARY(void *stack)
}
}
-void lkdtm_REPORT_STACK_CANARY(void)
+static void lkdtm_REPORT_STACK_CANARY(void)
{
/* Use default char array length that triggers stack protection. */
char data[8] __aligned(sizeof(void *)) = { };
@@ -230,7 +230,7 @@ void lkdtm_REPORT_STACK_CANARY(void)
__lkdtm_REPORT_STACK_CANARY((void *)&data);
}
-void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void)
+static void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void)
{
static u8 data[5] __attribute__((aligned(4))) = {1, 2, 3, 4, 5};
u32 *p;
@@ -245,21 +245,21 @@ void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void)
pr_err("XFAIL: arch has CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS\n");
}
-void lkdtm_SOFTLOCKUP(void)
+static void lkdtm_SOFTLOCKUP(void)
{
preempt_disable();
for (;;)
cpu_relax();
}
-void lkdtm_HARDLOCKUP(void)
+static void lkdtm_HARDLOCKUP(void)
{
local_irq_disable();
for (;;)
cpu_relax();
}
-void lkdtm_SPINLOCKUP(void)
+static void lkdtm_SPINLOCKUP(void)
{
/* Must be called twice to trigger. */
spin_lock(&lock_me_up);
@@ -267,7 +267,7 @@ void lkdtm_SPINLOCKUP(void)
__release(&lock_me_up);
}
-void lkdtm_HUNG_TASK(void)
+static void lkdtm_HUNG_TASK(void)
{
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
@@ -276,7 +276,7 @@ void lkdtm_HUNG_TASK(void)
volatile unsigned int huge = INT_MAX - 2;
volatile unsigned int ignored;
-void lkdtm_OVERFLOW_SIGNED(void)
+static void lkdtm_OVERFLOW_SIGNED(void)
{
int value;
@@ -291,7 +291,7 @@ void lkdtm_OVERFLOW_SIGNED(void)
}
-void lkdtm_OVERFLOW_UNSIGNED(void)
+static void lkdtm_OVERFLOW_UNSIGNED(void)
{
unsigned int value;
@@ -319,7 +319,7 @@ struct array_bounds {
int three;
};
-void lkdtm_ARRAY_BOUNDS(void)
+static void lkdtm_ARRAY_BOUNDS(void)
{
struct array_bounds_flex_array *not_checked;
struct array_bounds *checked;
@@ -327,6 +327,11 @@ void lkdtm_ARRAY_BOUNDS(void)
not_checked = kmalloc(sizeof(*not_checked) * 2, GFP_KERNEL);
checked = kmalloc(sizeof(*checked) * 2, GFP_KERNEL);
+ if (!not_checked || !checked) {
+ kfree(not_checked);
+ kfree(checked);
+ return;
+ }
pr_info("Array access within bounds ...\n");
/* For both, touch all bytes in the actual member size. */
@@ -346,10 +351,13 @@ void lkdtm_ARRAY_BOUNDS(void)
kfree(not_checked);
kfree(checked);
pr_err("FAIL: survived array bounds overflow!\n");
- pr_expected_config(CONFIG_UBSAN_BOUNDS);
+ if (IS_ENABLED(CONFIG_UBSAN_BOUNDS))
+ pr_expected_config(CONFIG_UBSAN_TRAP);
+ else
+ pr_expected_config(CONFIG_UBSAN_BOUNDS);
}
-void lkdtm_CORRUPT_LIST_ADD(void)
+static void lkdtm_CORRUPT_LIST_ADD(void)
{
/*
* Initially, an empty list via LIST_HEAD:
@@ -389,7 +397,7 @@ void lkdtm_CORRUPT_LIST_ADD(void)
}
}
-void lkdtm_CORRUPT_LIST_DEL(void)
+static void lkdtm_CORRUPT_LIST_DEL(void)
{
LIST_HEAD(test_head);
struct lkdtm_list item;
@@ -417,7 +425,7 @@ void lkdtm_CORRUPT_LIST_DEL(void)
}
/* Test that VMAP_STACK is actually allocating with a leading guard page */
-void lkdtm_STACK_GUARD_PAGE_LEADING(void)
+static void lkdtm_STACK_GUARD_PAGE_LEADING(void)
{
const unsigned char *stack = task_stack_page(current);
const unsigned char *ptr = stack - 1;
@@ -431,7 +439,7 @@ void lkdtm_STACK_GUARD_PAGE_LEADING(void)
}
/* Test that VMAP_STACK is actually allocating with a trailing guard page */
-void lkdtm_STACK_GUARD_PAGE_TRAILING(void)
+static void lkdtm_STACK_GUARD_PAGE_TRAILING(void)
{
const unsigned char *stack = task_stack_page(current);
const unsigned char *ptr = stack + THREAD_SIZE;
@@ -444,7 +452,7 @@ void lkdtm_STACK_GUARD_PAGE_TRAILING(void)
pr_err("FAIL: accessed page after stack! (byte: %x)\n", byte);
}
-void lkdtm_UNSET_SMEP(void)
+static void lkdtm_UNSET_SMEP(void)
{
#if IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_UML)
#define MOV_CR4_DEPTH 64
@@ -510,7 +518,7 @@ void lkdtm_UNSET_SMEP(void)
#endif
}
-void lkdtm_DOUBLE_FAULT(void)
+static void lkdtm_DOUBLE_FAULT(void)
{
#if IS_ENABLED(CONFIG_X86_32) && !IS_ENABLED(CONFIG_UML)
/*
@@ -558,7 +566,7 @@ static noinline void change_pac_parameters(void)
}
#endif
-noinline void lkdtm_CORRUPT_PAC(void)
+static noinline void lkdtm_CORRUPT_PAC(void)
{
#ifdef CONFIG_ARM64
#define CORRUPT_PAC_ITERATE 10
@@ -586,3 +594,37 @@ noinline void lkdtm_CORRUPT_PAC(void)
pr_err("XFAIL: this test is arm64-only\n");
#endif
}
+
+static struct crashtype crashtypes[] = {
+ CRASHTYPE(PANIC),
+ CRASHTYPE(BUG),
+ CRASHTYPE(WARNING),
+ CRASHTYPE(WARNING_MESSAGE),
+ CRASHTYPE(EXCEPTION),
+ CRASHTYPE(LOOP),
+ CRASHTYPE(EXHAUST_STACK),
+ CRASHTYPE(CORRUPT_STACK),
+ CRASHTYPE(CORRUPT_STACK_STRONG),
+ CRASHTYPE(REPORT_STACK),
+ CRASHTYPE(REPORT_STACK_CANARY),
+ CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE),
+ CRASHTYPE(SOFTLOCKUP),
+ CRASHTYPE(HARDLOCKUP),
+ CRASHTYPE(SPINLOCKUP),
+ CRASHTYPE(HUNG_TASK),
+ CRASHTYPE(OVERFLOW_SIGNED),
+ CRASHTYPE(OVERFLOW_UNSIGNED),
+ CRASHTYPE(ARRAY_BOUNDS),
+ CRASHTYPE(CORRUPT_LIST_ADD),
+ CRASHTYPE(CORRUPT_LIST_DEL),
+ CRASHTYPE(STACK_GUARD_PAGE_LEADING),
+ CRASHTYPE(STACK_GUARD_PAGE_TRAILING),
+ CRASHTYPE(UNSET_SMEP),
+ CRASHTYPE(DOUBLE_FAULT),
+ CRASHTYPE(CORRUPT_PAC),
+};
+
+struct crashtype_category bugs_crashtypes = {
+ .crashtypes = crashtypes,
+ .len = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/cfi.c b/drivers/misc/lkdtm/cfi.c
index c9aeddef1044..666a7f4bc137 100644
--- a/drivers/misc/lkdtm/cfi.c
+++ b/drivers/misc/lkdtm/cfi.c
@@ -3,6 +3,7 @@
* This is for all the tests relating directly to Control Flow Integrity.
*/
#include "lkdtm.h"
+#include <asm/page.h>
static int called_count;
@@ -22,7 +23,7 @@ static noinline int lkdtm_increment_int(int *counter)
/*
* This tries to call an indirect function with a mismatched prototype.
*/
-void lkdtm_CFI_FORWARD_PROTO(void)
+static void lkdtm_CFI_FORWARD_PROTO(void)
{
/*
* Matches lkdtm_increment_void()'s prototype, but not
@@ -41,3 +42,145 @@ void lkdtm_CFI_FORWARD_PROTO(void)
pr_err("FAIL: survived mismatched prototype function call!\n");
pr_expected_config(CONFIG_CFI_CLANG);
}
+
+/*
+ * This can stay local to LKDTM, as there should not be a production reason
+ * to disable PAC && SCS.
+ */
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+# ifdef CONFIG_ARM64_BTI_KERNEL
+# define __no_pac "branch-protection=bti"
+# else
+# define __no_pac "branch-protection=none"
+# endif
+# define __no_ret_protection __noscs __attribute__((__target__(__no_pac)))
+#else
+# define __no_ret_protection __noscs
+#endif
+
+#define no_pac_addr(addr) \
+ ((__force __typeof__(addr))((uintptr_t)(addr) | PAGE_OFFSET))
+
+/* The ultimate ROP gadget. */
+static noinline __no_ret_protection
+void set_return_addr_unchecked(unsigned long *expected, unsigned long *addr)
+{
+ /* Use of volatile is to make sure final write isn't seen as a dead store. */
+ unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1;
+
+ /* Make sure we've found the right place on the stack before writing it. */
+ if (no_pac_addr(*ret_addr) == expected)
+ *ret_addr = (addr);
+ else
+ /* Check architecture, stack layout, or compiler behavior... */
+ pr_warn("Eek: return address mismatch! %px != %px\n",
+ *ret_addr, addr);
+}
+
+static noinline
+void set_return_addr(unsigned long *expected, unsigned long *addr)
+{
+ /* Use of volatile is to make sure final write isn't seen as a dead store. */
+ unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1;
+
+ /* Make sure we've found the right place on the stack before writing it. */
+ if (no_pac_addr(*ret_addr) == expected)
+ *ret_addr = (addr);
+ else
+ /* Check architecture, stack layout, or compiler behavior... */
+ pr_warn("Eek: return address mismatch! %px != %px\n",
+ *ret_addr, addr);
+}
+
+static volatile int force_check;
+
+static void lkdtm_CFI_BACKWARD(void)
+{
+ /* Use calculated gotos to keep labels addressable. */
+ void *labels[] = {0, &&normal, &&redirected, &&check_normal, &&check_redirected};
+
+ pr_info("Attempting unchecked stack return address redirection ...\n");
+
+ /* Always false */
+ if (force_check) {
+ /*
+ * Prepare to call with NULLs to avoid parameters being treated as
+ * constants in -02.
+ */
+ set_return_addr_unchecked(NULL, NULL);
+ set_return_addr(NULL, NULL);
+ if (force_check)
+ goto *labels[1];
+ if (force_check)
+ goto *labels[2];
+ if (force_check)
+ goto *labels[3];
+ if (force_check)
+ goto *labels[4];
+ return;
+ }
+
+ /*
+ * Use fallthrough switch case to keep basic block ordering between
+ * set_return_addr*() and the label after it.
+ */
+ switch (force_check) {
+ case 0:
+ set_return_addr_unchecked(&&normal, &&redirected);
+ fallthrough;
+ case 1:
+normal:
+ /* Always true */
+ if (!force_check) {
+ pr_err("FAIL: stack return address manipulation failed!\n");
+ /* If we can't redirect "normally", we can't test mitigations. */
+ return;
+ }
+ break;
+ default:
+redirected:
+ pr_info("ok: redirected stack return address.\n");
+ break;
+ }
+
+ pr_info("Attempting checked stack return address redirection ...\n");
+
+ switch (force_check) {
+ case 0:
+ set_return_addr(&&check_normal, &&check_redirected);
+ fallthrough;
+ case 1:
+check_normal:
+ /* Always true */
+ if (!force_check) {
+ pr_info("ok: control flow unchanged.\n");
+ return;
+ }
+
+check_redirected:
+ pr_err("FAIL: stack return address was redirected!\n");
+ break;
+ }
+
+ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) {
+ pr_expected_config(CONFIG_ARM64_PTR_AUTH_KERNEL);
+ return;
+ }
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK)) {
+ pr_expected_config(CONFIG_SHADOW_CALL_STACK);
+ return;
+ }
+ pr_warn("This is probably expected, since this %s was built *without* %s=y nor %s=y\n",
+ lkdtm_kernel_info,
+ "CONFIG_ARM64_PTR_AUTH_KERNEL", "CONFIG_SHADOW_CALL_STACK");
+}
+
+static struct crashtype crashtypes[] = {
+ CRASHTYPE(CFI_FORWARD_PROTO),
+ CRASHTYPE(CFI_BACKWARD),
+};
+
+struct crashtype_category cfi_crashtypes = {
+ .crashtypes = crashtypes,
+ .len = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index e2228b6fc09b..b4712ff196b4 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -86,109 +86,21 @@ static struct crashpoint crashpoints[] = {
#endif
};
-
-/* Crash types. */
-struct crashtype {
- const char *name;
- void (*func)(void);
-};
-
-#define CRASHTYPE(_name) \
- { \
- .name = __stringify(_name), \
- .func = lkdtm_ ## _name, \
- }
-
-/* Define the possible types of crashes that can be triggered. */
-static const struct crashtype crashtypes[] = {
- CRASHTYPE(PANIC),
- CRASHTYPE(BUG),
- CRASHTYPE(WARNING),
- CRASHTYPE(WARNING_MESSAGE),
- CRASHTYPE(EXCEPTION),
- CRASHTYPE(LOOP),
- CRASHTYPE(EXHAUST_STACK),
- CRASHTYPE(CORRUPT_STACK),
- CRASHTYPE(CORRUPT_STACK_STRONG),
- CRASHTYPE(REPORT_STACK),
- CRASHTYPE(REPORT_STACK_CANARY),
- CRASHTYPE(CORRUPT_LIST_ADD),
- CRASHTYPE(CORRUPT_LIST_DEL),
- CRASHTYPE(STACK_GUARD_PAGE_LEADING),
- CRASHTYPE(STACK_GUARD_PAGE_TRAILING),
- CRASHTYPE(UNSET_SMEP),
- CRASHTYPE(CORRUPT_PAC),
- CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE),
- CRASHTYPE(SLAB_LINEAR_OVERFLOW),
- CRASHTYPE(VMALLOC_LINEAR_OVERFLOW),
- CRASHTYPE(WRITE_AFTER_FREE),
- CRASHTYPE(READ_AFTER_FREE),
- CRASHTYPE(WRITE_BUDDY_AFTER_FREE),
- CRASHTYPE(READ_BUDDY_AFTER_FREE),
- CRASHTYPE(SLAB_INIT_ON_ALLOC),
- CRASHTYPE(BUDDY_INIT_ON_ALLOC),
- CRASHTYPE(SLAB_FREE_DOUBLE),
- CRASHTYPE(SLAB_FREE_CROSS),
- CRASHTYPE(SLAB_FREE_PAGE),
- CRASHTYPE(SOFTLOCKUP),
- CRASHTYPE(HARDLOCKUP),
- CRASHTYPE(SPINLOCKUP),
- CRASHTYPE(HUNG_TASK),
- CRASHTYPE(OVERFLOW_SIGNED),
- CRASHTYPE(OVERFLOW_UNSIGNED),
- CRASHTYPE(ARRAY_BOUNDS),
- CRASHTYPE(EXEC_DATA),
- CRASHTYPE(EXEC_STACK),
- CRASHTYPE(EXEC_KMALLOC),
- CRASHTYPE(EXEC_VMALLOC),
- CRASHTYPE(EXEC_RODATA),
- CRASHTYPE(EXEC_USERSPACE),
- CRASHTYPE(EXEC_NULL),
- CRASHTYPE(ACCESS_USERSPACE),
- CRASHTYPE(ACCESS_NULL),
- CRASHTYPE(WRITE_RO),
- CRASHTYPE(WRITE_RO_AFTER_INIT),
- CRASHTYPE(WRITE_KERN),
- CRASHTYPE(WRITE_OPD),
- CRASHTYPE(REFCOUNT_INC_OVERFLOW),
- CRASHTYPE(REFCOUNT_ADD_OVERFLOW),
- CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW),
- CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_OVERFLOW),
- CRASHTYPE(REFCOUNT_DEC_ZERO),
- CRASHTYPE(REFCOUNT_DEC_NEGATIVE),
- CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE),
- CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE),
- CRASHTYPE(REFCOUNT_INC_ZERO),
- CRASHTYPE(REFCOUNT_ADD_ZERO),
- CRASHTYPE(REFCOUNT_INC_SATURATED),
- CRASHTYPE(REFCOUNT_DEC_SATURATED),
- CRASHTYPE(REFCOUNT_ADD_SATURATED),
- CRASHTYPE(REFCOUNT_INC_NOT_ZERO_SATURATED),
- CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_SATURATED),
- CRASHTYPE(REFCOUNT_DEC_AND_TEST_SATURATED),
- CRASHTYPE(REFCOUNT_SUB_AND_TEST_SATURATED),
- CRASHTYPE(REFCOUNT_TIMING),
- CRASHTYPE(ATOMIC_TIMING),
- CRASHTYPE(USERCOPY_HEAP_SIZE_TO),
- CRASHTYPE(USERCOPY_HEAP_SIZE_FROM),
- CRASHTYPE(USERCOPY_HEAP_WHITELIST_TO),
- CRASHTYPE(USERCOPY_HEAP_WHITELIST_FROM),
- CRASHTYPE(USERCOPY_STACK_FRAME_TO),
- CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
- CRASHTYPE(USERCOPY_STACK_BEYOND),
- CRASHTYPE(USERCOPY_KERNEL),
- CRASHTYPE(STACKLEAK_ERASING),
- CRASHTYPE(CFI_FORWARD_PROTO),
- CRASHTYPE(FORTIFIED_OBJECT),
- CRASHTYPE(FORTIFIED_SUBOBJECT),
- CRASHTYPE(FORTIFIED_STRSCPY),
- CRASHTYPE(DOUBLE_FAULT),
+/* List of possible types for crashes that can be triggered. */
+static const struct crashtype_category *crashtype_categories[] = {
+ &bugs_crashtypes,
+ &heap_crashtypes,
+ &perms_crashtypes,
+ &refcount_crashtypes,
+ &usercopy_crashtypes,
+ &stackleak_crashtypes,
+ &cfi_crashtypes,
+ &fortify_crashtypes,
#ifdef CONFIG_PPC_64S_HASH_MMU
- CRASHTYPE(PPC_SLB_MULTIHIT),
+ &powerpc_crashtypes,
#endif
};
-
/* Global kprobe entry and crashtype. */
static struct kprobe *lkdtm_kprobe;
static struct crashpoint *lkdtm_crashpoint;
@@ -223,11 +135,16 @@ char *lkdtm_kernel_info;
/* Return the crashtype number or NULL if the name is invalid */
static const struct crashtype *find_crashtype(const char *name)
{
- int i;
+ int cat, idx;
+
+ for (cat = 0; cat < ARRAY_SIZE(crashtype_categories); cat++) {
+ for (idx = 0; idx < crashtype_categories[cat]->len; idx++) {
+ struct crashtype *crashtype;
- for (i = 0; i < ARRAY_SIZE(crashtypes); i++) {
- if (!strcmp(name, crashtypes[i].name))
- return &crashtypes[i];
+ crashtype = &crashtype_categories[cat]->crashtypes[idx];
+ if (!strcmp(name, crashtype->name))
+ return crashtype;
+ }
}
return NULL;
@@ -347,17 +264,24 @@ static ssize_t lkdtm_debugfs_entry(struct file *f,
static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf,
size_t count, loff_t *off)
{
+ int n, cat, idx;
+ ssize_t out;
char *buf;
- int i, n, out;
buf = (char *)__get_free_page(GFP_KERNEL);
if (buf == NULL)
return -ENOMEM;
n = scnprintf(buf, PAGE_SIZE, "Available crash types:\n");
- for (i = 0; i < ARRAY_SIZE(crashtypes); i++) {
- n += scnprintf(buf + n, PAGE_SIZE - n, "%s\n",
- crashtypes[i].name);
+
+ for (cat = 0; cat < ARRAY_SIZE(crashtype_categories); cat++) {
+ for (idx = 0; idx < crashtype_categories[cat]->len; idx++) {
+ struct crashtype *crashtype;
+
+ crashtype = &crashtype_categories[cat]->crashtypes[idx];
+ n += scnprintf(buf + n, PAGE_SIZE - n, "%s\n",
+ crashtype->name);
+ }
}
buf[n] = '\0';
diff --git a/drivers/misc/lkdtm/fortify.c b/drivers/misc/lkdtm/fortify.c
index ab33bb5e2e7a..080293fa3c52 100644
--- a/drivers/misc/lkdtm/fortify.c
+++ b/drivers/misc/lkdtm/fortify.c
@@ -10,7 +10,7 @@
static volatile int fortify_scratch_space;
-void lkdtm_FORTIFIED_OBJECT(void)
+static void lkdtm_FORTIFIED_OBJECT(void)
{
struct target {
char a[10];
@@ -31,7 +31,7 @@ void lkdtm_FORTIFIED_OBJECT(void)
pr_expected_config(CONFIG_FORTIFY_SOURCE);
}
-void lkdtm_FORTIFIED_SUBOBJECT(void)
+static void lkdtm_FORTIFIED_SUBOBJECT(void)
{
struct target {
char a[10];
@@ -67,7 +67,7 @@ void lkdtm_FORTIFIED_SUBOBJECT(void)
* strscpy and generate a panic because there is a write overflow (i.e. src
* length is greater than dst length).
*/
-void lkdtm_FORTIFIED_STRSCPY(void)
+static void lkdtm_FORTIFIED_STRSCPY(void)
{
char *src;
char dst[5];
@@ -134,3 +134,14 @@ void lkdtm_FORTIFIED_STRSCPY(void)
kfree(src);
}
+
+static struct crashtype crashtypes[] = {
+ CRASHTYPE(FORTIFIED_OBJECT),
+ CRASHTYPE(FORTIFIED_SUBOBJECT),
+ CRASHTYPE(FORTIFIED_STRSCPY),
+};
+
+struct crashtype_category fortify_crashtypes = {
+ .crashtypes = crashtypes,
+ .len = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/heap.c b/drivers/misc/lkdtm/heap.c
index 8a92f5a800fa..62516078a619 100644
--- a/drivers/misc/lkdtm/heap.c
+++ b/drivers/misc/lkdtm/heap.c
@@ -22,8 +22,11 @@ static volatile int __offset = 1;
/*
* If there aren't guard pages, it's likely that a consecutive allocation will
* let us overflow into the second allocation without overwriting something real.
+ *
+ * This should always be caught because there is an unconditional unmapped
+ * page after vmap allocations.
*/
-void lkdtm_VMALLOC_LINEAR_OVERFLOW(void)
+static void lkdtm_VMALLOC_LINEAR_OVERFLOW(void)
{
char *one, *two;
@@ -41,8 +44,11 @@ void lkdtm_VMALLOC_LINEAR_OVERFLOW(void)
* This tries to stay within the next largest power-of-2 kmalloc cache
* to avoid actually overwriting anything important if it's not detected
* correctly.
+ *
+ * This should get caught by either memory tagging, KASan, or by using
+ * CONFIG_SLUB_DEBUG=y and slub_debug=ZF (or CONFIG_SLUB_DEBUG_ON=y).
*/
-void lkdtm_SLAB_LINEAR_OVERFLOW(void)
+static void lkdtm_SLAB_LINEAR_OVERFLOW(void)
{
size_t len = 1020;
u32 *data = kmalloc(len, GFP_KERNEL);
@@ -50,11 +56,12 @@ void lkdtm_SLAB_LINEAR_OVERFLOW(void)
return;
pr_info("Attempting slab linear overflow ...\n");
+ OPTIMIZER_HIDE_VAR(data);
data[1024 / sizeof(u32)] = 0x12345678;
kfree(data);
}
-void lkdtm_WRITE_AFTER_FREE(void)
+static void lkdtm_WRITE_AFTER_FREE(void)
{
int *base, *again;
size_t len = 1024;
@@ -80,7 +87,7 @@ void lkdtm_WRITE_AFTER_FREE(void)
pr_info("Hmm, didn't get the same memory range.\n");
}
-void lkdtm_READ_AFTER_FREE(void)
+static void lkdtm_READ_AFTER_FREE(void)
{
int *base, *val, saw;
size_t len = 1024;
@@ -124,7 +131,7 @@ void lkdtm_READ_AFTER_FREE(void)
kfree(val);
}
-void lkdtm_WRITE_BUDDY_AFTER_FREE(void)
+static void lkdtm_WRITE_BUDDY_AFTER_FREE(void)
{
unsigned long p = __get_free_page(GFP_KERNEL);
if (!p) {
@@ -144,7 +151,7 @@ void lkdtm_WRITE_BUDDY_AFTER_FREE(void)
schedule();
}
-void lkdtm_READ_BUDDY_AFTER_FREE(void)
+static void lkdtm_READ_BUDDY_AFTER_FREE(void)
{
unsigned long p = __get_free_page(GFP_KERNEL);
int saw, *val;
@@ -181,7 +188,7 @@ void lkdtm_READ_BUDDY_AFTER_FREE(void)
kfree(val);
}
-void lkdtm_SLAB_INIT_ON_ALLOC(void)
+static void lkdtm_SLAB_INIT_ON_ALLOC(void)
{
u8 *first;
u8 *val;
@@ -213,7 +220,7 @@ void lkdtm_SLAB_INIT_ON_ALLOC(void)
kfree(val);
}
-void lkdtm_BUDDY_INIT_ON_ALLOC(void)
+static void lkdtm_BUDDY_INIT_ON_ALLOC(void)
{
u8 *first;
u8 *val;
@@ -246,7 +253,7 @@ void lkdtm_BUDDY_INIT_ON_ALLOC(void)
free_page((unsigned long)val);
}
-void lkdtm_SLAB_FREE_DOUBLE(void)
+static void lkdtm_SLAB_FREE_DOUBLE(void)
{
int *val;
@@ -263,7 +270,7 @@ void lkdtm_SLAB_FREE_DOUBLE(void)
kmem_cache_free(double_free_cache, val);
}
-void lkdtm_SLAB_FREE_CROSS(void)
+static void lkdtm_SLAB_FREE_CROSS(void)
{
int *val;
@@ -279,7 +286,7 @@ void lkdtm_SLAB_FREE_CROSS(void)
kmem_cache_free(b_cache, val);
}
-void lkdtm_SLAB_FREE_PAGE(void)
+static void lkdtm_SLAB_FREE_PAGE(void)
{
unsigned long p = __get_free_page(GFP_KERNEL);
@@ -313,3 +320,22 @@ void __exit lkdtm_heap_exit(void)
kmem_cache_destroy(a_cache);
kmem_cache_destroy(b_cache);
}
+
+static struct crashtype crashtypes[] = {
+ CRASHTYPE(SLAB_LINEAR_OVERFLOW),
+ CRASHTYPE(VMALLOC_LINEAR_OVERFLOW),
+ CRASHTYPE(WRITE_AFTER_FREE),
+ CRASHTYPE(READ_AFTER_FREE),
+ CRASHTYPE(WRITE_BUDDY_AFTER_FREE),
+ CRASHTYPE(READ_BUDDY_AFTER_FREE),
+ CRASHTYPE(SLAB_INIT_ON_ALLOC),
+ CRASHTYPE(BUDDY_INIT_ON_ALLOC),
+ CRASHTYPE(SLAB_FREE_DOUBLE),
+ CRASHTYPE(SLAB_FREE_CROSS),
+ CRASHTYPE(SLAB_FREE_PAGE),
+};
+
+struct crashtype_category heap_crashtypes = {
+ .crashtypes = crashtypes,
+ .len = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 305fc2ec3f25..015e0484026b 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -9,19 +9,19 @@
extern char *lkdtm_kernel_info;
#define pr_expected_config(kconfig) \
-{ \
+do { \
if (IS_ENABLED(kconfig)) \
pr_err("Unexpected! This %s was built with " #kconfig "=y\n", \
lkdtm_kernel_info); \
else \
pr_warn("This is probably expected, since this %s was built *without* " #kconfig "=y\n", \
lkdtm_kernel_info); \
-}
+} while (0)
#ifndef MODULE
int lkdtm_check_bool_cmdline(const char *param);
#define pr_expected_config_param(kconfig, param) \
-{ \
+do { \
if (IS_ENABLED(kconfig)) { \
switch (lkdtm_check_bool_cmdline(param)) { \
case 0: \
@@ -52,119 +52,49 @@ int lkdtm_check_bool_cmdline(const char *param);
break; \
} \
} \
-}
+} while (0)
#else
#define pr_expected_config_param(kconfig, param) pr_expected_config(kconfig)
#endif
-/* bugs.c */
+/* Crash types. */
+struct crashtype {
+ const char *name;
+ void (*func)(void);
+};
+
+#define CRASHTYPE(_name) \
+ { \
+ .name = __stringify(_name), \
+ .func = lkdtm_ ## _name, \
+ }
+
+/* Category's collection of crashtypes. */
+struct crashtype_category {
+ struct crashtype *crashtypes;
+ size_t len;
+};
+
+/* Each category's crashtypes list. */
+extern struct crashtype_category bugs_crashtypes;
+extern struct crashtype_category heap_crashtypes;
+extern struct crashtype_category perms_crashtypes;
+extern struct crashtype_category refcount_crashtypes;
+extern struct crashtype_category usercopy_crashtypes;
+extern struct crashtype_category stackleak_crashtypes;
+extern struct crashtype_category cfi_crashtypes;
+extern struct crashtype_category fortify_crashtypes;
+extern struct crashtype_category powerpc_crashtypes;
+
+/* Each category's init/exit routines. */
void __init lkdtm_bugs_init(int *recur_param);
-void lkdtm_PANIC(void);
-void lkdtm_BUG(void);
-void lkdtm_WARNING(void);
-void lkdtm_WARNING_MESSAGE(void);
-void lkdtm_EXCEPTION(void);
-void lkdtm_LOOP(void);
-void lkdtm_EXHAUST_STACK(void);
-void lkdtm_CORRUPT_STACK(void);
-void lkdtm_CORRUPT_STACK_STRONG(void);
-void lkdtm_REPORT_STACK(void);
-void lkdtm_REPORT_STACK_CANARY(void);
-void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void);
-void lkdtm_SOFTLOCKUP(void);
-void lkdtm_HARDLOCKUP(void);
-void lkdtm_SPINLOCKUP(void);
-void lkdtm_HUNG_TASK(void);
-void lkdtm_OVERFLOW_SIGNED(void);
-void lkdtm_OVERFLOW_UNSIGNED(void);
-void lkdtm_ARRAY_BOUNDS(void);
-void lkdtm_CORRUPT_LIST_ADD(void);
-void lkdtm_CORRUPT_LIST_DEL(void);
-void lkdtm_STACK_GUARD_PAGE_LEADING(void);
-void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
-void lkdtm_UNSET_SMEP(void);
-void lkdtm_DOUBLE_FAULT(void);
-void lkdtm_CORRUPT_PAC(void);
-
-/* heap.c */
void __init lkdtm_heap_init(void);
void __exit lkdtm_heap_exit(void);
-void lkdtm_VMALLOC_LINEAR_OVERFLOW(void);
-void lkdtm_SLAB_LINEAR_OVERFLOW(void);
-void lkdtm_WRITE_AFTER_FREE(void);
-void lkdtm_READ_AFTER_FREE(void);
-void lkdtm_WRITE_BUDDY_AFTER_FREE(void);
-void lkdtm_READ_BUDDY_AFTER_FREE(void);
-void lkdtm_SLAB_INIT_ON_ALLOC(void);
-void lkdtm_BUDDY_INIT_ON_ALLOC(void);
-void lkdtm_SLAB_FREE_DOUBLE(void);
-void lkdtm_SLAB_FREE_CROSS(void);
-void lkdtm_SLAB_FREE_PAGE(void);
-
-/* perms.c */
void __init lkdtm_perms_init(void);
-void lkdtm_WRITE_RO(void);
-void lkdtm_WRITE_RO_AFTER_INIT(void);
-void lkdtm_WRITE_KERN(void);
-void lkdtm_WRITE_OPD(void);
-void lkdtm_EXEC_DATA(void);
-void lkdtm_EXEC_STACK(void);
-void lkdtm_EXEC_KMALLOC(void);
-void lkdtm_EXEC_VMALLOC(void);
-void lkdtm_EXEC_RODATA(void);
-void lkdtm_EXEC_USERSPACE(void);
-void lkdtm_EXEC_NULL(void);
-void lkdtm_ACCESS_USERSPACE(void);
-void lkdtm_ACCESS_NULL(void);
-
-/* refcount.c */
-void lkdtm_REFCOUNT_INC_OVERFLOW(void);
-void lkdtm_REFCOUNT_ADD_OVERFLOW(void);
-void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void);
-void lkdtm_REFCOUNT_ADD_NOT_ZERO_OVERFLOW(void);
-void lkdtm_REFCOUNT_DEC_ZERO(void);
-void lkdtm_REFCOUNT_DEC_NEGATIVE(void);
-void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void);
-void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void);
-void lkdtm_REFCOUNT_INC_ZERO(void);
-void lkdtm_REFCOUNT_ADD_ZERO(void);
-void lkdtm_REFCOUNT_INC_SATURATED(void);
-void lkdtm_REFCOUNT_DEC_SATURATED(void);
-void lkdtm_REFCOUNT_ADD_SATURATED(void);
-void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void);
-void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void);
-void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void);
-void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void);
-void lkdtm_REFCOUNT_TIMING(void);
-void lkdtm_ATOMIC_TIMING(void);
-
-/* rodata.c */
-void lkdtm_rodata_do_nothing(void);
-
-/* usercopy.c */
void __init lkdtm_usercopy_init(void);
void __exit lkdtm_usercopy_exit(void);
-void lkdtm_USERCOPY_HEAP_SIZE_TO(void);
-void lkdtm_USERCOPY_HEAP_SIZE_FROM(void);
-void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void);
-void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void);
-void lkdtm_USERCOPY_STACK_FRAME_TO(void);
-void lkdtm_USERCOPY_STACK_FRAME_FROM(void);
-void lkdtm_USERCOPY_STACK_BEYOND(void);
-void lkdtm_USERCOPY_KERNEL(void);
-
-/* stackleak.c */
-void lkdtm_STACKLEAK_ERASING(void);
-
-/* cfi.c */
-void lkdtm_CFI_FORWARD_PROTO(void);
-/* fortify.c */
-void lkdtm_FORTIFIED_OBJECT(void);
-void lkdtm_FORTIFIED_SUBOBJECT(void);
-void lkdtm_FORTIFIED_STRSCPY(void);
-
-/* powerpc.c */
-void lkdtm_PPC_SLB_MULTIHIT(void);
+/* Special declaration for function-in-rodata. */
+void lkdtm_rodata_do_nothing(void);
#endif
diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 2c6aba3ff32b..b93404d65650 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -103,7 +103,7 @@ static void execute_user_location(void *dst)
pr_err("FAIL: func returned\n");
}
-void lkdtm_WRITE_RO(void)
+static void lkdtm_WRITE_RO(void)
{
/* Explicitly cast away "const" for the test and make volatile. */
volatile unsigned long *ptr = (unsigned long *)&rodata;
@@ -113,7 +113,7 @@ void lkdtm_WRITE_RO(void)
pr_err("FAIL: survived bad write\n");
}
-void lkdtm_WRITE_RO_AFTER_INIT(void)
+static void lkdtm_WRITE_RO_AFTER_INIT(void)
{
volatile unsigned long *ptr = &ro_after_init;
@@ -132,7 +132,7 @@ void lkdtm_WRITE_RO_AFTER_INIT(void)
pr_err("FAIL: survived bad write\n");
}
-void lkdtm_WRITE_KERN(void)
+static void lkdtm_WRITE_KERN(void)
{
size_t size;
volatile unsigned char *ptr;
@@ -149,7 +149,7 @@ void lkdtm_WRITE_KERN(void)
do_overwritten();
}
-void lkdtm_WRITE_OPD(void)
+static void lkdtm_WRITE_OPD(void)
{
size_t size = sizeof(func_desc_t);
void (*func)(void) = do_nothing;
@@ -166,38 +166,38 @@ void lkdtm_WRITE_OPD(void)
func();
}
-void lkdtm_EXEC_DATA(void)
+static void lkdtm_EXEC_DATA(void)
{
execute_location(data_area, CODE_WRITE);
}
-void lkdtm_EXEC_STACK(void)
+static void lkdtm_EXEC_STACK(void)
{
u8 stack_area[EXEC_SIZE];
execute_location(stack_area, CODE_WRITE);
}
-void lkdtm_EXEC_KMALLOC(void)
+static void lkdtm_EXEC_KMALLOC(void)
{
u32 *kmalloc_area = kmalloc(EXEC_SIZE, GFP_KERNEL);
execute_location(kmalloc_area, CODE_WRITE);
kfree(kmalloc_area);
}
-void lkdtm_EXEC_VMALLOC(void)
+static void lkdtm_EXEC_VMALLOC(void)
{
u32 *vmalloc_area = vmalloc(EXEC_SIZE);
execute_location(vmalloc_area, CODE_WRITE);
vfree(vmalloc_area);
}
-void lkdtm_EXEC_RODATA(void)
+static void lkdtm_EXEC_RODATA(void)
{
execute_location(dereference_function_descriptor(lkdtm_rodata_do_nothing),
CODE_AS_IS);
}
-void lkdtm_EXEC_USERSPACE(void)
+static void lkdtm_EXEC_USERSPACE(void)
{
unsigned long user_addr;
@@ -212,12 +212,12 @@ void lkdtm_EXEC_USERSPACE(void)
vm_munmap(user_addr, PAGE_SIZE);
}
-void lkdtm_EXEC_NULL(void)
+static void lkdtm_EXEC_NULL(void)
{
execute_location(NULL, CODE_AS_IS);
}
-void lkdtm_ACCESS_USERSPACE(void)
+static void lkdtm_ACCESS_USERSPACE(void)
{
unsigned long user_addr, tmp = 0;
unsigned long *ptr;
@@ -250,7 +250,7 @@ void lkdtm_ACCESS_USERSPACE(void)
vm_munmap(user_addr, PAGE_SIZE);
}
-void lkdtm_ACCESS_NULL(void)
+static void lkdtm_ACCESS_NULL(void)
{
unsigned long tmp;
volatile unsigned long *ptr = (unsigned long *)NULL;
@@ -270,3 +270,24 @@ void __init lkdtm_perms_init(void)
/* Make sure we can write to __ro_after_init values during __init */
ro_after_init |= 0xAA;
}
+
+static struct crashtype crashtypes[] = {
+ CRASHTYPE(WRITE_RO),
+ CRASHTYPE(WRITE_RO_AFTER_INIT),
+ CRASHTYPE(WRITE_KERN),
+ CRASHTYPE(WRITE_OPD),
+ CRASHTYPE(EXEC_DATA),
+ CRASHTYPE(EXEC_STACK),
+ CRASHTYPE(EXEC_KMALLOC),
+ CRASHTYPE(EXEC_VMALLOC),
+ CRASHTYPE(EXEC_RODATA),
+ CRASHTYPE(EXEC_USERSPACE),
+ CRASHTYPE(EXEC_NULL),
+ CRASHTYPE(ACCESS_USERSPACE),
+ CRASHTYPE(ACCESS_NULL),
+};
+
+struct crashtype_category perms_crashtypes = {
+ .crashtypes = crashtypes,
+ .len = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/powerpc.c b/drivers/misc/lkdtm/powerpc.c
index 077c9f9ed8d0..be385449911a 100644
--- a/drivers/misc/lkdtm/powerpc.c
+++ b/drivers/misc/lkdtm/powerpc.c
@@ -100,7 +100,7 @@ static void insert_dup_slb_entry_0(void)
preempt_enable();
}
-void lkdtm_PPC_SLB_MULTIHIT(void)
+static void lkdtm_PPC_SLB_MULTIHIT(void)
{
if (!radix_enabled()) {
pr_info("Injecting SLB multihit errors\n");
@@ -118,3 +118,12 @@ void lkdtm_PPC_SLB_MULTIHIT(void)
pr_err("XFAIL: This test is for ppc64 and with hash mode MMU only\n");
}
}
+
+static struct crashtype crashtypes[] = {
+ CRASHTYPE(PPC_SLB_MULTIHIT),
+};
+
+struct crashtype_category powerpc_crashtypes = {
+ .crashtypes = crashtypes,
+ .len = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c
index de7c5ab528d9..5cd488f54cfa 100644
--- a/drivers/misc/lkdtm/refcount.c
+++ b/drivers/misc/lkdtm/refcount.c
@@ -24,7 +24,7 @@ static void overflow_check(refcount_t *ref)
* A refcount_inc() above the maximum value of the refcount implementation,
* should at least saturate, and at most also WARN.
*/
-void lkdtm_REFCOUNT_INC_OVERFLOW(void)
+static void lkdtm_REFCOUNT_INC_OVERFLOW(void)
{
refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX - 1);
@@ -40,7 +40,7 @@ void lkdtm_REFCOUNT_INC_OVERFLOW(void)
}
/* refcount_add() should behave just like refcount_inc() above. */
-void lkdtm_REFCOUNT_ADD_OVERFLOW(void)
+static void lkdtm_REFCOUNT_ADD_OVERFLOW(void)
{
refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX - 1);
@@ -58,7 +58,7 @@ void lkdtm_REFCOUNT_ADD_OVERFLOW(void)
}
/* refcount_inc_not_zero() should behave just like refcount_inc() above. */
-void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void)
+static void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void)
{
refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX);
@@ -70,7 +70,7 @@ void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void)
}
/* refcount_add_not_zero() should behave just like refcount_inc() above. */
-void lkdtm_REFCOUNT_ADD_NOT_ZERO_OVERFLOW(void)
+static void lkdtm_REFCOUNT_ADD_NOT_ZERO_OVERFLOW(void)
{
refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX);
@@ -103,7 +103,7 @@ static void check_zero(refcount_t *ref)
* zero it should either saturate (when inc-from-zero isn't protected)
* or stay at zero (when inc-from-zero is protected) and should WARN for both.
*/
-void lkdtm_REFCOUNT_DEC_ZERO(void)
+static void lkdtm_REFCOUNT_DEC_ZERO(void)
{
refcount_t zero = REFCOUNT_INIT(2);
@@ -142,7 +142,7 @@ static void check_negative(refcount_t *ref, int start)
}
/* A refcount_dec() going negative should saturate and may WARN. */
-void lkdtm_REFCOUNT_DEC_NEGATIVE(void)
+static void lkdtm_REFCOUNT_DEC_NEGATIVE(void)
{
refcount_t neg = REFCOUNT_INIT(0);
@@ -156,7 +156,7 @@ void lkdtm_REFCOUNT_DEC_NEGATIVE(void)
* A refcount_dec_and_test() should act like refcount_dec() above when
* going negative.
*/
-void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void)
+static void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void)
{
refcount_t neg = REFCOUNT_INIT(0);
@@ -171,7 +171,7 @@ void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void)
* A refcount_sub_and_test() should act like refcount_dec_and_test()
* above when going negative.
*/
-void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void)
+static void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void)
{
refcount_t neg = REFCOUNT_INIT(3);
@@ -203,7 +203,7 @@ static void check_from_zero(refcount_t *ref)
/*
* A refcount_inc() from zero should pin to zero or saturate and may WARN.
*/
-void lkdtm_REFCOUNT_INC_ZERO(void)
+static void lkdtm_REFCOUNT_INC_ZERO(void)
{
refcount_t zero = REFCOUNT_INIT(0);
@@ -228,7 +228,7 @@ void lkdtm_REFCOUNT_INC_ZERO(void)
* A refcount_add() should act like refcount_inc() above when starting
* at zero.
*/
-void lkdtm_REFCOUNT_ADD_ZERO(void)
+static void lkdtm_REFCOUNT_ADD_ZERO(void)
{
refcount_t zero = REFCOUNT_INIT(0);
@@ -267,7 +267,7 @@ static void check_saturated(refcount_t *ref)
* A refcount_inc() from a saturated value should at most warn about
* being saturated already.
*/
-void lkdtm_REFCOUNT_INC_SATURATED(void)
+static void lkdtm_REFCOUNT_INC_SATURATED(void)
{
refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
@@ -278,7 +278,7 @@ void lkdtm_REFCOUNT_INC_SATURATED(void)
}
/* Should act like refcount_inc() above from saturated. */
-void lkdtm_REFCOUNT_DEC_SATURATED(void)
+static void lkdtm_REFCOUNT_DEC_SATURATED(void)
{
refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
@@ -289,7 +289,7 @@ void lkdtm_REFCOUNT_DEC_SATURATED(void)
}
/* Should act like refcount_inc() above from saturated. */
-void lkdtm_REFCOUNT_ADD_SATURATED(void)
+static void lkdtm_REFCOUNT_ADD_SATURATED(void)
{
refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
@@ -300,7 +300,7 @@ void lkdtm_REFCOUNT_ADD_SATURATED(void)
}
/* Should act like refcount_inc() above from saturated. */
-void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void)
+static void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void)
{
refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
@@ -312,7 +312,7 @@ void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void)
}
/* Should act like refcount_inc() above from saturated. */
-void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void)
+static void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void)
{
refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
@@ -324,7 +324,7 @@ void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void)
}
/* Should act like refcount_inc() above from saturated. */
-void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void)
+static void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void)
{
refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
@@ -336,7 +336,7 @@ void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void)
}
/* Should act like refcount_inc() above from saturated. */
-void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void)
+static void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void)
{
refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
@@ -348,7 +348,7 @@ void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void)
}
/* Used to time the existing atomic_t when used for reference counting */
-void lkdtm_ATOMIC_TIMING(void)
+static void lkdtm_ATOMIC_TIMING(void)
{
unsigned int i;
atomic_t count = ATOMIC_INIT(1);
@@ -373,7 +373,7 @@ void lkdtm_ATOMIC_TIMING(void)
* cd /sys/kernel/debug/provoke-crash
* perf stat -B -- cat <(echo REFCOUNT_TIMING) > DIRECT
*/
-void lkdtm_REFCOUNT_TIMING(void)
+static void lkdtm_REFCOUNT_TIMING(void)
{
unsigned int i;
refcount_t count = REFCOUNT_INIT(1);
@@ -390,3 +390,30 @@ void lkdtm_REFCOUNT_TIMING(void)
else
pr_info("refcount timing: done\n");
}
+
+static struct crashtype crashtypes[] = {
+ CRASHTYPE(REFCOUNT_INC_OVERFLOW),
+ CRASHTYPE(REFCOUNT_ADD_OVERFLOW),
+ CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW),
+ CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_OVERFLOW),
+ CRASHTYPE(REFCOUNT_DEC_ZERO),
+ CRASHTYPE(REFCOUNT_DEC_NEGATIVE),
+ CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE),
+ CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE),
+ CRASHTYPE(REFCOUNT_INC_ZERO),
+ CRASHTYPE(REFCOUNT_ADD_ZERO),
+ CRASHTYPE(REFCOUNT_INC_SATURATED),
+ CRASHTYPE(REFCOUNT_DEC_SATURATED),
+ CRASHTYPE(REFCOUNT_ADD_SATURATED),
+ CRASHTYPE(REFCOUNT_INC_NOT_ZERO_SATURATED),
+ CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_SATURATED),
+ CRASHTYPE(REFCOUNT_DEC_AND_TEST_SATURATED),
+ CRASHTYPE(REFCOUNT_SUB_AND_TEST_SATURATED),
+ CRASHTYPE(ATOMIC_TIMING),
+ CRASHTYPE(REFCOUNT_TIMING),
+};
+
+struct crashtype_category refcount_crashtypes = {
+ .crashtypes = crashtypes,
+ .len = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c
index 82369c6f889e..025b133297a6 100644
--- a/drivers/misc/lkdtm/stackleak.c
+++ b/drivers/misc/lkdtm/stackleak.c
@@ -115,7 +115,7 @@ out:
}
}
-void lkdtm_STACKLEAK_ERASING(void)
+static void lkdtm_STACKLEAK_ERASING(void)
{
unsigned long flags;
@@ -124,7 +124,7 @@ void lkdtm_STACKLEAK_ERASING(void)
local_irq_restore(flags);
}
#else /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */
-void lkdtm_STACKLEAK_ERASING(void)
+static void lkdtm_STACKLEAK_ERASING(void)
{
if (IS_ENABLED(CONFIG_HAVE_ARCH_STACKLEAK)) {
pr_err("XFAIL: stackleak is not enabled (CONFIG_GCC_PLUGIN_STACKLEAK=n)\n");
@@ -133,3 +133,12 @@ void lkdtm_STACKLEAK_ERASING(void)
}
}
#endif /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */
+
+static struct crashtype crashtypes[] = {
+ CRASHTYPE(STACKLEAK_ERASING),
+};
+
+struct crashtype_category stackleak_crashtypes = {
+ .crashtypes = crashtypes,
+ .len = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c
index 9161ce7ed47a..6215ec995cd3 100644
--- a/drivers/misc/lkdtm/usercopy.c
+++ b/drivers/misc/lkdtm/usercopy.c
@@ -5,6 +5,7 @@
*/
#include "lkdtm.h"
#include <linux/slab.h>
+#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <linux/sched/task_stack.h>
#include <linux/mman.h>
@@ -30,12 +31,12 @@ static const unsigned char test_text[] = "This is a test.\n";
*/
static noinline unsigned char *trick_compiler(unsigned char *stack)
{
- return stack + 0;
+ return stack + unconst;
}
static noinline unsigned char *do_usercopy_stack_callee(int value)
{
- unsigned char buf[32];
+ unsigned char buf[128];
int i;
/* Exercise stack to avoid everything living in registers. */
@@ -43,7 +44,12 @@ static noinline unsigned char *do_usercopy_stack_callee(int value)
buf[i] = value & 0xff;
}
- return trick_compiler(buf);
+ /*
+ * Put the target buffer in the middle of stack allocation
+ * so that we don't step on future stack users regardless
+ * of stack growth direction.
+ */
+ return trick_compiler(&buf[(128/2)-32]);
}
static noinline void do_usercopy_stack(bool to_user, bool bad_frame)
@@ -66,6 +72,12 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame)
bad_stack -= sizeof(unsigned long);
}
+#ifdef ARCH_HAS_CURRENT_STACK_POINTER
+ pr_info("stack : %px\n", (void *)current_stack_pointer);
+#endif
+ pr_info("good_stack: %px-%px\n", good_stack, good_stack + sizeof(good_stack));
+ pr_info("bad_stack : %px-%px\n", bad_stack, bad_stack + sizeof(good_stack));
+
user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_ANONYMOUS | MAP_PRIVATE, 0);
@@ -119,7 +131,7 @@ free_user:
* This checks for whole-object size validation with hardened usercopy,
* with or without usercopy whitelisting.
*/
-static void do_usercopy_heap_size(bool to_user)
+static void do_usercopy_slab_size(bool to_user)
{
unsigned long user_addr;
unsigned char *one, *two;
@@ -185,9 +197,9 @@ free_kernel:
/*
* This checks for the specific whitelist window within an object. If this
- * test passes, then do_usercopy_heap_size() tests will pass too.
+ * test passes, then do_usercopy_slab_size() tests will pass too.
*/
-static void do_usercopy_heap_whitelist(bool to_user)
+static void do_usercopy_slab_whitelist(bool to_user)
{
unsigned long user_alloc;
unsigned char *buf = NULL;
@@ -261,42 +273,42 @@ free_alloc:
}
/* Callable tests. */
-void lkdtm_USERCOPY_HEAP_SIZE_TO(void)
+static void lkdtm_USERCOPY_SLAB_SIZE_TO(void)
{
- do_usercopy_heap_size(true);
+ do_usercopy_slab_size(true);
}
-void lkdtm_USERCOPY_HEAP_SIZE_FROM(void)
+static void lkdtm_USERCOPY_SLAB_SIZE_FROM(void)
{
- do_usercopy_heap_size(false);
+ do_usercopy_slab_size(false);
}
-void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void)
+static void lkdtm_USERCOPY_SLAB_WHITELIST_TO(void)
{
- do_usercopy_heap_whitelist(true);
+ do_usercopy_slab_whitelist(true);
}
-void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void)
+static void lkdtm_USERCOPY_SLAB_WHITELIST_FROM(void)
{
- do_usercopy_heap_whitelist(false);
+ do_usercopy_slab_whitelist(false);
}
-void lkdtm_USERCOPY_STACK_FRAME_TO(void)
+static void lkdtm_USERCOPY_STACK_FRAME_TO(void)
{
do_usercopy_stack(true, true);
}
-void lkdtm_USERCOPY_STACK_FRAME_FROM(void)
+static void lkdtm_USERCOPY_STACK_FRAME_FROM(void)
{
do_usercopy_stack(false, true);
}
-void lkdtm_USERCOPY_STACK_BEYOND(void)
+static void lkdtm_USERCOPY_STACK_BEYOND(void)
{
do_usercopy_stack(true, false);
}
-void lkdtm_USERCOPY_KERNEL(void)
+static void lkdtm_USERCOPY_KERNEL(void)
{
unsigned long user_addr;
@@ -330,6 +342,86 @@ free_user:
vm_munmap(user_addr, PAGE_SIZE);
}
+/*
+ * This expects "kaddr" to point to a PAGE_SIZE allocation, which means
+ * a more complete test that would include copy_from_user() would risk
+ * memory corruption. Just test copy_to_user() here, as that exercises
+ * almost exactly the same code paths.
+ */
+static void do_usercopy_page_span(const char *name, void *kaddr)
+{
+ unsigned long uaddr;
+
+ uaddr = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0);
+ if (uaddr >= TASK_SIZE) {
+ pr_warn("Failed to allocate user memory\n");
+ return;
+ }
+
+ /* Initialize contents. */
+ memset(kaddr, 0xAA, PAGE_SIZE);
+
+ /* Bump the kaddr forward to detect a page-spanning overflow. */
+ kaddr += PAGE_SIZE / 2;
+
+ pr_info("attempting good copy_to_user() from kernel %s: %px\n",
+ name, kaddr);
+ if (copy_to_user((void __user *)uaddr, kaddr,
+ unconst + (PAGE_SIZE / 2))) {
+ pr_err("copy_to_user() failed unexpectedly?!\n");
+ goto free_user;
+ }
+
+ pr_info("attempting bad copy_to_user() from kernel %s: %px\n",
+ name, kaddr);
+ if (copy_to_user((void __user *)uaddr, kaddr, unconst + PAGE_SIZE)) {
+ pr_warn("Good, copy_to_user() failed, but lacked Oops(?!)\n");
+ goto free_user;
+ }
+
+ pr_err("FAIL: bad copy_to_user() not detected!\n");
+ pr_expected_config_param(CONFIG_HARDENED_USERCOPY, "hardened_usercopy");
+
+free_user:
+ vm_munmap(uaddr, PAGE_SIZE);
+}
+
+static void lkdtm_USERCOPY_VMALLOC(void)
+{
+ void *addr;
+
+ addr = vmalloc(PAGE_SIZE);
+ if (!addr) {
+ pr_err("vmalloc() failed!?\n");
+ return;
+ }
+ do_usercopy_page_span("vmalloc", addr);
+ vfree(addr);
+}
+
+static void lkdtm_USERCOPY_FOLIO(void)
+{
+ struct folio *folio;
+ void *addr;
+
+ /*
+ * FIXME: Folio checking currently misses 0-order allocations, so
+ * allocate and bump forward to the last page.
+ */
+ folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, 1);
+ if (!folio) {
+ pr_err("folio_alloc() failed!?\n");
+ return;
+ }
+ addr = folio_address(folio);
+ if (addr)
+ do_usercopy_page_span("folio", addr + PAGE_SIZE);
+ else
+ pr_err("folio_address() failed?!\n");
+ folio_put(folio);
+}
+
void __init lkdtm_usercopy_init(void)
{
/* Prepare cache that lacks SLAB_USERCOPY flag. */
@@ -345,3 +437,21 @@ void __exit lkdtm_usercopy_exit(void)
{
kmem_cache_destroy(whitelist_cache);
}
+
+static struct crashtype crashtypes[] = {
+ CRASHTYPE(USERCOPY_SLAB_SIZE_TO),
+ CRASHTYPE(USERCOPY_SLAB_SIZE_FROM),
+ CRASHTYPE(USERCOPY_SLAB_WHITELIST_TO),
+ CRASHTYPE(USERCOPY_SLAB_WHITELIST_FROM),
+ CRASHTYPE(USERCOPY_STACK_FRAME_TO),
+ CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
+ CRASHTYPE(USERCOPY_STACK_BEYOND),
+ CRASHTYPE(USERCOPY_VMALLOC),
+ CRASHTYPE(USERCOPY_FOLIO),
+ CRASHTYPE(USERCOPY_KERNEL),
+};
+
+struct crashtype_category usercopy_crashtypes = {
+ .crashtypes = crashtypes,
+ .len = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/pvpanic/pvpanic.c b/drivers/misc/pvpanic/pvpanic.c
index 4b8f1c7d726d..049a12006348 100644
--- a/drivers/misc/pvpanic/pvpanic.c
+++ b/drivers/misc/pvpanic/pvpanic.c
@@ -34,7 +34,9 @@ pvpanic_send_event(unsigned int event)
{
struct pvpanic_instance *pi_cur;
- spin_lock(&pvpanic_lock);
+ if (!spin_trylock(&pvpanic_lock))
+ return;
+
list_for_each_entry(pi_cur, &pvpanic_list, list) {
if (event & pi_cur->capability & pi_cur->events)
iowrite8(event, pi_cur->base);
@@ -55,9 +57,13 @@ pvpanic_panic_notify(struct notifier_block *nb, unsigned long code, void *unused
return NOTIFY_DONE;
}
+/*
+ * Call our notifier very early on panic, deferring the
+ * action taken to the hypervisor.
+ */
static struct notifier_block pvpanic_panic_nb = {
.notifier_call = pvpanic_panic_notify,
- .priority = 1, /* let this called before broken drm_fb_helper() */
+ .priority = INT_MAX,
};
static void pvpanic_remove(void *param)
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index f1d8ba6d4857..086ce77d9074 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -1452,10 +1452,10 @@ static void vmballoon_reset(struct vmballoon *b)
error = vmballoon_vmci_init(b);
if (error)
- pr_err("failed to initialize vmci doorbell\n");
+ pr_err_once("failed to initialize vmci doorbell\n");
if (vmballoon_send_guest_id(b))
- pr_err("failed to send guest ID to the host\n");
+ pr_err_once("failed to send guest ID to the host\n");
unlock:
up_write(&b->conf_sem);
diff --git a/drivers/misc/vmw_vmci/Kconfig b/drivers/misc/vmw_vmci/Kconfig
index 605794aadf11..b6d4d7fd686a 100644
--- a/drivers/misc/vmw_vmci/Kconfig
+++ b/drivers/misc/vmw_vmci/Kconfig
@@ -5,7 +5,7 @@
config VMWARE_VMCI
tristate "VMware VMCI Driver"
- depends on X86 && PCI
+ depends on (X86 || ARM64) && !CPU_BIG_ENDIAN && PCI
help
This is VMware's Virtual Machine Communication Interface. It enables
high-speed communication between host and guest in a virtual
diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c
index 6cf3e21c7604..172696abce31 100644
--- a/drivers/misc/vmw_vmci/vmci_context.c
+++ b/drivers/misc/vmw_vmci/vmci_context.c
@@ -665,9 +665,8 @@ int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
{
struct vmci_ctx *context;
- struct vmci_handle_list *notifier, *tmp;
+ struct vmci_handle_list *notifier = NULL, *iter, *tmp;
struct vmci_handle handle;
- bool found = false;
context = vmci_ctx_get(context_id);
if (!context)
@@ -676,23 +675,23 @@ int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
spin_lock(&context->lock);
- list_for_each_entry_safe(notifier, tmp,
+ list_for_each_entry_safe(iter, tmp,
&context->notifier_list, node) {
- if (vmci_handle_is_equal(notifier->handle, handle)) {
- list_del_rcu(&notifier->node);
+ if (vmci_handle_is_equal(iter->handle, handle)) {
+ list_del_rcu(&iter->node);
context->n_notifiers--;
- found = true;
+ notifier = iter;
break;
}
}
spin_unlock(&context->lock);
- if (found)
+ if (notifier)
kvfree_rcu(notifier);
vmci_ctx_put(context);
- return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
+ return notifier ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
}
static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
index 57a6157209a1..aa7b05de97dd 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -614,6 +614,10 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
}
if (!mmio_base) {
+ if (IS_ENABLED(CONFIG_ARM64)) {
+ dev_err(&pdev->dev, "MMIO base is invalid\n");
+ return -ENXIO;
+ }
error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
if (error) {
dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 94ebf7f3fd58..8f2de1893245 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -2577,6 +2577,12 @@ static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q,
if (result < VMCI_SUCCESS)
return result;
+ /*
+ * This virt_wmb() ensures that data written to the queue
+ * is observable before the new producer_tail is.
+ */
+ virt_wmb();
+
vmci_q_header_add_producer_tail(produce_q->q_header, written,
produce_q_size);
return written;
@@ -2620,6 +2626,12 @@ static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q,
if (buf_ready < VMCI_SUCCESS)
return (ssize_t) buf_ready;
+ /*
+ * This virt_rmb() ensures that data from the queue will be read
+ * after we have determined how much is ready to be consumed.
+ */
+ virt_rmb();
+
read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready);
head = vmci_q_header_consumer_head(produce_q->q_header);
if (likely(head + read < consume_q_size)) {