summaryrefslogtreecommitdiff
path: root/drivers/nvdla/common.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvdla/common.c')
-rw-r--r--drivers/nvdla/common.c324
1 files changed, 324 insertions, 0 deletions
diff --git a/drivers/nvdla/common.c b/drivers/nvdla/common.c
new file mode 100644
index 000000000000..c36c6ff3cc78
--- /dev/null
+++ b/drivers/nvdla/common.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of NVIDIA CORPORATION nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <opendla.h>
+#include <dla_debug.h>
+#include <dla_err.h>
+#include <dla_interface.h>
+
+#include "common.h"
+#include "dla_engine_internal.h"
+
+static const uint8_t map_lut_method[] = {
+ FIELD_ENUM(CDP_S_LUT_CFG_0, LUT_LE_FUNCTION, EXPONENT),
+ FIELD_ENUM(CDP_S_LUT_CFG_0, LUT_LE_FUNCTION, LINEAR)
+};
+static const uint8_t map_lut_out[] = {
+ FIELD_ENUM(CDP_S_LUT_CFG_0, LUT_UFLOW_PRIORITY, LE),
+ FIELD_ENUM(CDP_S_LUT_CFG_0, LUT_UFLOW_PRIORITY, LO)
+};
+
+static const uint16_t access_data_offset[] = {
+ CDP_S_LUT_ACCESS_DATA_0 - CDP_S_LUT_ACCESS_CFG_0,
+ SDP_S_LUT_ACCESS_DATA_0 - SDP_S_LUT_ACCESS_CFG_0,
+};
+static const uint16_t lut_cfg_offset[] = {
+ CDP_S_LUT_CFG_0 - CDP_S_LUT_ACCESS_CFG_0,
+ SDP_S_LUT_CFG_0 - SDP_S_LUT_ACCESS_CFG_0,
+};
+static const uint16_t lut_info_offset[] = {
+ CDP_S_LUT_INFO_0 - CDP_S_LUT_ACCESS_CFG_0,
+ SDP_S_LUT_INFO_0 - SDP_S_LUT_ACCESS_CFG_0,
+};
+static const uint16_t le_start_offset[] = {
+ CDP_S_LUT_LE_START_LOW_0 - CDP_S_LUT_ACCESS_CFG_0,
+ SDP_S_LUT_LE_START_0 - SDP_S_LUT_ACCESS_CFG_0,
+};
+static const uint16_t le_end_offset[] = {
+ CDP_S_LUT_LE_END_LOW_0 - CDP_S_LUT_ACCESS_CFG_0,
+ SDP_S_LUT_LE_END_0 - SDP_S_LUT_ACCESS_CFG_0,
+};
+static const uint16_t lo_start_offset[] = {
+ CDP_S_LUT_LO_START_LOW_0 - CDP_S_LUT_ACCESS_CFG_0,
+ SDP_S_LUT_LO_START_0 - SDP_S_LUT_ACCESS_CFG_0,
+};
+static const uint16_t lo_end_offset[] = {
+ CDP_S_LUT_LO_END_LOW_0 - CDP_S_LUT_ACCESS_CFG_0,
+ SDP_S_LUT_LO_END_0 - SDP_S_LUT_ACCESS_CFG_0,
+};
+static const uint16_t le_slope_scale_offset[] = {
+ CDP_S_LUT_LE_SLOPE_SCALE_0 - CDP_S_LUT_ACCESS_CFG_0,
+ SDP_S_LUT_LE_SLOPE_SCALE_0 - SDP_S_LUT_ACCESS_CFG_0,
+};
+static const uint16_t le_slope_shift_offset[] = {
+ CDP_S_LUT_LE_SLOPE_SHIFT_0 - CDP_S_LUT_ACCESS_CFG_0,
+ SDP_S_LUT_LE_SLOPE_SHIFT_0 - SDP_S_LUT_ACCESS_CFG_0,
+};
+static const uint16_t lo_slope_scale_offset[] = {
+ CDP_S_LUT_LO_SLOPE_SCALE_0 - CDP_S_LUT_ACCESS_CFG_0,
+ SDP_S_LUT_LO_SLOPE_SCALE_0 - SDP_S_LUT_ACCESS_CFG_0,
+};
+static const uint16_t lo_slope_shift_offset[] = {
+ CDP_S_LUT_LO_SLOPE_SHIFT_0 - CDP_S_LUT_ACCESS_CFG_0,
+ SDP_S_LUT_LO_SLOPE_SHIFT_0 - SDP_S_LUT_ACCESS_CFG_0,
+};
+
+void update_lut(uint32_t reg_base, struct dla_lut_param *lut,
+ uint8_t precision)
+{
+ int32_t i;
+ uint32_t reg;
+ uint32_t high, low;
+ int32_t is_sdp = reg_base == SDP_S_LUT_ACCESS_CFG_0;
+ struct dla_engine *engine = dla_get_engine();
+
+ /* program raw table */
+ reg = (FIELD_ENUM(CDP_S_LUT_ACCESS_CFG_0, LUT_TABLE_ID, LE)
+ << SHIFT(CDP_S_LUT_ACCESS_CFG_0, LUT_TABLE_ID)) |
+ (FIELD_ENUM(CDP_S_LUT_ACCESS_CFG_0, LUT_ACCESS_TYPE, WRITE)
+ << SHIFT(CDP_S_LUT_ACCESS_CFG_0, LUT_ACCESS_TYPE));
+ reg_write(reg_base, reg);
+
+ for (i = 0; i < (1<<LUT_LINEAR_EXP_TABLE_ENTRY_LOG2)+1; i++) {
+ dla_reg_write(engine->driver_context,
+ reg_base + access_data_offset[is_sdp],
+ lut->linear_exp_table[i]);
+ }
+
+ /* program density table */
+ reg = (FIELD_ENUM(CDP_S_LUT_ACCESS_CFG_0, LUT_TABLE_ID, LO)
+ << SHIFT(CDP_S_LUT_ACCESS_CFG_0, LUT_TABLE_ID)) |
+ (FIELD_ENUM(CDP_S_LUT_ACCESS_CFG_0, LUT_ACCESS_TYPE, WRITE)
+ << SHIFT(CDP_S_LUT_ACCESS_CFG_0, LUT_ACCESS_TYPE));
+ dla_reg_write(engine->driver_context, reg_base, reg);
+
+ for (i = 0; i < (1<<LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2)+1; i++) {
+ dla_reg_write(engine->driver_context,
+ reg_base + access_data_offset[is_sdp],
+ lut->linear_only_table[i]);
+ }
+
+ /* program other configurations */
+ reg = (map_lut_method[lut->method] <<
+ SHIFT(CDP_S_LUT_CFG_0, LUT_LE_FUNCTION)) |
+ (map_lut_out[lut->hybrid_priority] <<
+ SHIFT(CDP_S_LUT_CFG_0, LUT_HYBRID_PRIORITY)) |
+ (map_lut_out[lut->underflow_priority] <<
+ SHIFT(CDP_S_LUT_CFG_0, LUT_UFLOW_PRIORITY)) |
+ (map_lut_out[lut->overflow_priority] <<
+ SHIFT(CDP_S_LUT_CFG_0, LUT_OFLOW_PRIORITY));
+ dla_reg_write(engine->driver_context,
+ reg_base + lut_cfg_offset[is_sdp], reg);
+
+ if (lut->method == FIELD_ENUM(CDP_S_LUT_CFG_0,
+ LUT_LE_FUNCTION, EXPONENT)) {
+ reg = ((((uint32_t)lut->linear_exp_offset.exp_offset) <<
+ SHIFT(CDP_S_LUT_INFO_0, LUT_LE_INDEX_OFFSET))&
+ MASK(CDP_S_LUT_INFO_0, LUT_LE_INDEX_OFFSET)) |
+ ((((uint32_t)lut->linear_only_offset.frac_bits) <<
+ SHIFT(CDP_S_LUT_INFO_0, LUT_LO_INDEX_SELECT))&
+ MASK(CDP_S_LUT_INFO_0, LUT_LO_INDEX_SELECT));
+ } else {
+ reg = ((((uint32_t)lut->linear_exp_offset.frac_bits) <<
+ SHIFT(CDP_S_LUT_INFO_0, LUT_LE_INDEX_SELECT))&
+ MASK(CDP_S_LUT_INFO_0, LUT_LE_INDEX_SELECT)) |
+ ((((uint32_t)lut->linear_only_offset.frac_bits) <<
+ SHIFT(CDP_S_LUT_INFO_0, LUT_LO_INDEX_SELECT))&
+ MASK(CDP_S_LUT_INFO_0, LUT_LO_INDEX_SELECT));
+ }
+ dla_reg_write(engine->driver_context,
+ reg_base + lut_info_offset[is_sdp], reg);
+ high = HIGH32BITS(lut->linear_exp_start);
+ low = LOW32BITS(lut->linear_exp_start);
+ dla_reg_write(engine->driver_context,
+ reg_base + le_start_offset[is_sdp], low);
+ if (!is_sdp)
+ dla_reg_write(engine->driver_context,
+ reg_base + le_start_offset[is_sdp] + 4, high);
+
+ high = HIGH32BITS(lut->linear_exp_end);
+ low = LOW32BITS(lut->linear_exp_end);
+ dla_reg_write(engine->driver_context,
+ reg_base + le_end_offset[is_sdp], low);
+ if (!is_sdp)
+ dla_reg_write(engine->driver_context,
+ reg_base + le_end_offset[is_sdp] + 4, high);
+
+ high = HIGH32BITS(lut->linear_only_start);
+ low = LOW32BITS(lut->linear_only_start);
+ dla_reg_write(engine->driver_context,
+ reg_base + lo_start_offset[is_sdp], low);
+ if (!is_sdp)
+ dla_reg_write(engine->driver_context,
+ reg_base + lo_start_offset[is_sdp] + 4, high);
+
+ high = HIGH32BITS(lut->linear_only_end);
+ low = LOW32BITS(lut->linear_only_end);
+ dla_reg_write(engine->driver_context,
+ reg_base + lo_end_offset[is_sdp], low);
+ if (!is_sdp)
+ dla_reg_write(engine->driver_context,
+ reg_base + lo_end_offset[is_sdp] + 4, high);
+
+ if (precision == PRECISION_FP16) {
+ reg = (lut->linear_exp_underflow_slope.data_f <<
+ SHIFT(CDP_S_LUT_LE_SLOPE_SCALE_0,
+ LUT_LE_SLOPE_UFLOW_SCALE)) |
+ (lut->linear_exp_overflow_slope.data_f <<
+ SHIFT(CDP_S_LUT_LE_SLOPE_SCALE_0,
+ LUT_LE_SLOPE_OFLOW_SCALE));
+ dla_reg_write(engine->driver_context,
+ reg_base + le_slope_scale_offset[is_sdp], reg);
+
+ reg = (lut->linear_only_underflow_slope.data_f <<
+ SHIFT(CDP_S_LUT_LO_SLOPE_SCALE_0,
+ LUT_LO_SLOPE_UFLOW_SCALE)) |
+ (lut->linear_only_overflow_slope.data_f <<
+ SHIFT(CDP_S_LUT_LO_SLOPE_SCALE_0,
+ LUT_LO_SLOPE_OFLOW_SCALE));
+ dla_reg_write(engine->driver_context,
+ reg_base + lo_slope_scale_offset[is_sdp], reg);
+ } else {
+ union dla_slope *oslope;
+ union dla_slope *uslope;
+
+ uslope = &lut->linear_exp_underflow_slope;
+ oslope = &lut->linear_exp_overflow_slope;
+ reg = ((((uint32_t)uslope->data_i.scale)
+ << SHIFT(CDP_S_LUT_LE_SLOPE_SCALE_0,
+ LUT_LE_SLOPE_UFLOW_SCALE))&
+ MASK(CDP_S_LUT_LE_SLOPE_SCALE_0,
+ LUT_LE_SLOPE_UFLOW_SCALE)) |
+ ((((uint32_t)oslope->data_i.scale)
+ << SHIFT(CDP_S_LUT_LE_SLOPE_SCALE_0,
+ LUT_LE_SLOPE_OFLOW_SCALE))&
+ MASK(CDP_S_LUT_LE_SLOPE_SCALE_0,
+ LUT_LE_SLOPE_OFLOW_SCALE));
+ dla_reg_write(engine->driver_context,
+ reg_base + le_slope_scale_offset[is_sdp], reg);
+
+ reg = ((((uint32_t)uslope->data_i.shifter) <<
+ SHIFT(CDP_S_LUT_LE_SLOPE_SHIFT_0,
+ LUT_LE_SLOPE_UFLOW_SHIFT))&
+ MASK(CDP_S_LUT_LE_SLOPE_SHIFT_0,
+ LUT_LE_SLOPE_UFLOW_SHIFT)) |
+ ((((uint32_t)oslope->data_i.shifter) <<
+ SHIFT(CDP_S_LUT_LE_SLOPE_SHIFT_0,
+ LUT_LE_SLOPE_OFLOW_SHIFT))&
+ MASK(CDP_S_LUT_LE_SLOPE_SHIFT_0,
+ LUT_LE_SLOPE_OFLOW_SHIFT));
+ dla_reg_write(engine->driver_context,
+ reg_base + le_slope_shift_offset[is_sdp], reg);
+
+ uslope = &lut->linear_only_underflow_slope;
+ oslope = &lut->linear_only_overflow_slope;
+ reg = ((((uint32_t)uslope->data_i.scale) <<
+ SHIFT(CDP_S_LUT_LO_SLOPE_SCALE_0,
+ LUT_LO_SLOPE_UFLOW_SCALE))&
+ MASK(CDP_S_LUT_LO_SLOPE_SCALE_0,
+ LUT_LO_SLOPE_UFLOW_SCALE)) |
+ ((((uint32_t)oslope->data_i.scale) <<
+ SHIFT(CDP_S_LUT_LO_SLOPE_SCALE_0,
+ LUT_LO_SLOPE_OFLOW_SCALE))&
+ MASK(CDP_S_LUT_LO_SLOPE_SCALE_0,
+ LUT_LO_SLOPE_OFLOW_SCALE));
+ dla_reg_write(engine->driver_context,
+ reg_base + lo_slope_scale_offset[is_sdp], reg);
+ reg = ((((uint32_t)uslope->data_i.shifter) <<
+ SHIFT(CDP_S_LUT_LO_SLOPE_SHIFT_0,
+ LUT_LO_SLOPE_UFLOW_SHIFT))&
+ MASK(CDP_S_LUT_LO_SLOPE_SHIFT_0,
+ LUT_LO_SLOPE_UFLOW_SHIFT)) |
+ ((((uint32_t)oslope->data_i.shifter) <<
+ SHIFT(CDP_S_LUT_LO_SLOPE_SHIFT_0,
+ LUT_LO_SLOPE_OFLOW_SHIFT))&
+ MASK(CDP_S_LUT_LO_SLOPE_SHIFT_0,
+ LUT_LO_SLOPE_OFLOW_SHIFT));
+ dla_reg_write(engine->driver_context,
+ reg_base + lo_slope_shift_offset[is_sdp], reg);
+ }
+}
+
+int
+validate_data_cube(struct dla_data_cube src_data_cube,
+ struct dla_data_cube dst_data_cube,
+ uint8_t mem_type)
+{
+ int32_t ret = 0;
+
+ dla_trace("Enter: %s", __func__);
+
+ if ((src_data_cube.width > DCUBE_MAX_WIDTH) ||
+ (src_data_cube.height > DCUBE_MAX_HEIGHT) ||
+ (src_data_cube.channel > DCUBE_MAX_CHANNEL)) {
+ dla_error("Invalid SrcInput Cude[W: %u, H: %u, C: %u]",
+ src_data_cube.width, src_data_cube.height,
+ src_data_cube.channel);
+ ret = ERR(INVALID_INPUT);
+ goto exit;
+ }
+
+ if ((dst_data_cube.width > DCUBE_MAX_WIDTH) ||
+ (dst_data_cube.height > DCUBE_MAX_HEIGHT) ||
+ (dst_data_cube.channel > DCUBE_MAX_CHANNEL)) {
+ dla_error("Invalid DstInput Cude[W: %u, H: %u, C: %u]",
+ dst_data_cube.width, dst_data_cube.height,
+ dst_data_cube.channel);
+ ret = ERR(INVALID_INPUT);
+ goto exit;
+ }
+
+ if (src_data_cube.type > mem_type) {
+ dla_error("Invalid src_data.mem_type: %u\n", src_data_cube.type);
+ ret = ERR(INVALID_INPUT);
+ goto exit;
+ }
+
+ if (dst_data_cube.type > mem_type) {
+ dla_error("Invalid dst_data.mem_type: %u\n", dst_data_cube.type);
+ ret = ERR(INVALID_INPUT);
+ goto exit;
+ }
+
+exit:
+ dla_trace("Exit: %s", __func__);
+ RETURN(ret);
+}
+
+int
+validate_precision(uint8_t precision, uint8_t map_precision)
+{
+ int32_t ret = 0;
+
+ if (precision >= map_precision) {
+ dla_error("Invalid precision: %u\n", precision);
+ ret = ERR(INVALID_INPUT);
+ }
+
+ RETURN(ret);
+}