/* * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct nvdla_config nvdla_config_os_initial = { .atom_size = 32, .bdma_enable = true, .rubik_enable = true, .weight_compress_support = true, }; static struct nvdla_config nvdla_config_small = { .atom_size = 8, .bdma_enable = false, .rubik_enable = false, .weight_compress_support = false, }; static struct nvdla_config nvdla_config_large = { .atom_size = 32, .bdma_enable = false, .rubik_enable = false, .weight_compress_support = false, }; void dla_debug(const char *str, ...) { va_list args; va_start(args, str); vprintk(pr_fmt(str), args); va_end(args); } void dla_info(const char *str, ...) { va_list args; va_start(args, str); vprintk(str, args); va_end(args); } void dla_warn(const char *str, ...) { va_list args; va_start(args, str); vprintk(str, args); va_end(args); } void dla_error(const char *str, ...) { va_list args; va_start(args, str); vprintk(str, args); va_end(args); } void *dla_memset(void *src, int ch, uint64_t len) { memset(src, ch, len); return src; } void *dla_memcpy(void *dest, const void *src, uint64_t len) { return memcpy(dest, src, len); } int64_t dla_get_time_us(void) { return ktime_get_ns() / NSEC_PER_USEC; } void dla_reg_write(void *driver_context, uint32_t addr, uint32_t reg) { struct nvdla_device *nvdla_dev = (struct nvdla_device *)driver_context; if (!nvdla_dev) return; writel(reg, nvdla_dev->base + addr); } uint32_t dla_reg_read(void *driver_context, uint32_t addr) { struct nvdla_device *nvdla_dev = (struct nvdla_device *)driver_context; if (!nvdla_dev) return 0; return readl(nvdla_dev->base + addr); } static irqreturn_t nvdla_engine_isr(int32_t irq, void *data) { unsigned long flags; struct nvdla_device *nvdla_dev = (struct nvdla_device *)data; if (!nvdla_dev) return IRQ_NONE; spin_lock_irqsave(&nvdla_dev->nvdla_lock, flags); dla_isr_handler(nvdla_dev->engine_context); complete(&nvdla_dev->event_notifier); spin_unlock_irqrestore(&nvdla_dev->nvdla_lock, flags); return IRQ_HANDLED; } static int32_t dla_read_dma_address(void *driver_context, void *task_data, int16_t index, void *dst) { int32_t ret = 0; struct nvdla_mem_handle *handles; dma_addr_t *phys_addr = (dma_addr_t *)(dst); struct nvdla_device *nvdla_dev = (struct nvdla_device *)driver_context; struct nvdla_task *task = (struct nvdla_task *)task_data; if (index == -1 || index > task->num_addresses) return -EINVAL; handles = (struct nvdla_mem_handle *)task->address_list; ret = nvdla_gem_dma_addr(nvdla_dev->drm, task->file, handles[index].handle, phys_addr); /* Add offset to IOVA address */ *phys_addr = *phys_addr + handles[index].offset; return ret; } static int32_t dla_read_cpu_address(void *driver_context, void *task_data, int16_t index, void *dst) { uint64_t *temp = (uint64_t *)dst; struct nvdla_task *task = (struct nvdla_task *)task_data; if (index == -1 || index > task->num_addresses) return -EINVAL; *temp = (uint64_t)index; return 0; } int32_t dla_get_dma_address(void *driver_context, void *task_data, int16_t index, void *dst_ptr, uint32_t destination) { int32_t ret = 0; if (destination == DESTINATION_PROCESSOR) { ret = dla_read_cpu_address(driver_context, task_data, index, dst_ptr); } else if (destination == DESTINATION_DMA) { ret = dla_read_dma_address(driver_context, task_data, index, dst_ptr); } else { ret = -EINVAL; } return ret; } int32_t dla_data_write(void *driver_context, void *task_data, void *src, uint64_t dst, uint32_t size, uint64_t offset) { int32_t ret; void *ptr = NULL; struct dma_buf *buf; struct dma_buf_map map; struct nvdla_mem_handle *handles; struct nvdla_task *task = (struct nvdla_task *)task_data; uint64_t dma_addr = 0; dla_get_dma_address(driver_context, task_data,dst, (void *)&dma_addr, DESTINATION_DMA); handles = task->address_list; buf = dma_buf_get(handles[dst].handle); if (IS_ERR(buf)) { pr_err("%s: Failed get dma_buf for handle=%d\n", __func__, handles[dst].handle); return -EFAULT; } ret = dma_buf_begin_cpu_access(buf, DMA_BIDIRECTIONAL); if (ret) goto put_dma_buf; ret = dma_buf_vmap(buf, &map); ptr = ret ? NULL : map.vaddr; if (!ptr) { pr_err("%s: Failed to vmap dma_buf for handle=%d\n", __func__, handles[dst].handle); ret = -ENOMEM; goto end_cpu_access; } memcpy((void *)((uint8_t *)ptr + offset), src, size); dma_buf_vunmap(buf, &map); end_cpu_access: dma_buf_end_cpu_access(buf, DMA_BIDIRECTIONAL); put_dma_buf: dma_buf_put(buf); return ret; } int32_t dla_data_read(void *driver_context, void *task_data, uint64_t src, void *dst, uint32_t size, uint64_t offset) { int32_t ret; void *ptr = NULL; struct dma_buf *buf; struct dma_buf_map map; struct nvdla_mem_handle *handles; struct nvdla_task *task = (struct nvdla_task *)task_data; uint64_t dma_addr = 0; dla_get_dma_address(driver_context, task_data, src, (void *)&dma_addr, DESTINATION_DMA); handles = task->address_list; buf = dma_buf_get(handles[src].handle); if (IS_ERR(buf)) { pr_err("%s: Failed get dma_buf for handle=%d\n", __func__, handles[src].handle); return -EFAULT; } ret = dma_buf_begin_cpu_access(buf, DMA_BIDIRECTIONAL); if (ret) goto put_dma_buf; ret = dma_buf_vmap(buf, &map); ptr = ret ? NULL : map.vaddr; if (!ptr) { pr_err("%s: Failed to vmap dma_buf for handle=%d\n", __func__, handles[src].handle); ret = -ENOMEM; goto end_cpu_access; } memcpy(dst, (void *)(((uint8_t *)ptr) + offset), size); dma_buf_vunmap(buf, &map); end_cpu_access: dma_buf_end_cpu_access(buf, DMA_BIDIRECTIONAL); put_dma_buf: dma_buf_put(buf); return ret; } int32_t nvdla_task_submit(struct nvdla_device *nvdla_dev, struct nvdla_task *task) { int32_t err = 0; uint32_t task_complete = 0; nvdla_dev->task = task; err = dla_execute_task(nvdla_dev->engine_context, (void *)task, nvdla_dev->config_data); if (err) { pr_err("Task execution failed\n"); return err; } pr_debug("Wait for task complete\n"); while (1) { unsigned long flags; wait_for_completion(&nvdla_dev->event_notifier); spin_lock_irqsave(&nvdla_dev->nvdla_lock, flags); err = dla_process_events(nvdla_dev->engine_context, &task_complete); spin_unlock_irqrestore(&nvdla_dev->nvdla_lock, flags); if (err || task_complete) break; } pr_debug("Task complete\n"); dla_clear_task(nvdla_dev->engine_context); return err; } /* driver probe and init */ static const struct of_device_id nvdla_of_match[] = { { .compatible = "nvidia,nvdla_os_initial", .data = &nvdla_config_os_initial, }, { .compatible = "nvidia,nv_small", .data = &nvdla_config_small, }, { .compatible = "nvidia,nv_large", .data = &nvdla_config_large, }, { }, }; static int32_t nvdla_probe(struct platform_device *pdev) { int32_t err = 0; struct resource *res; struct nvdla_device *nvdla_dev; struct device *dev = &pdev->dev; const struct of_device_id *match; if (!pdev->dev.of_node) return -EINVAL; match = of_match_device(nvdla_of_match, &pdev->dev); if (!match) { pr_err("Missing DT entry!\n"); return -EINVAL; } pr_err("Probe NVDLA config %s\n", match->compatible); nvdla_dev = devm_kzalloc(dev, sizeof(*nvdla_dev), GFP_KERNEL); if (!nvdla_dev) return -ENOMEM; platform_set_drvdata(pdev, nvdla_dev); nvdla_dev->pdev = pdev; nvdla_dev->config_data = (struct nvdla_config *)match->data; init_completion(&nvdla_dev->event_notifier); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); nvdla_dev->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(nvdla_dev->base)) return PTR_ERR(nvdla_dev->base); res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res) { dev_err(&pdev->dev, "no irq resource\n"); return -EINVAL; } nvdla_dev->irq = res->start; err = devm_request_irq(&pdev->dev, nvdla_dev->irq, nvdla_engine_isr, 0, dev_name(&pdev->dev), nvdla_dev); if (err) return err; dla_register_driver(&nvdla_dev->engine_context, (void *)nvdla_dev); dla_clear_task(nvdla_dev->engine_context); err = nvdla_drm_probe(nvdla_dev); if (err) dev_err(&pdev->dev, "failed to register drm device\n"); return err; } static int32_t __exit nvdla_remove(struct platform_device *pdev) { struct nvdla_device *nvdla_dev = dev_get_drvdata(&pdev->dev); nvdla_drm_remove(nvdla_dev); return 0; } static struct platform_driver nvdla_driver = { .probe = nvdla_probe, .remove = __exit_p(nvdla_remove), .driver = { .owner = THIS_MODULE, .name = "NVDLA", .of_match_table = of_match_ptr(nvdla_of_match), }, }; module_platform_driver(nvdla_driver); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("NVIDIA"); MODULE_DESCRIPTION("Nvidia Deep Learning Accelerator driver");