summaryrefslogtreecommitdiff
path: root/drivers/nvdla/dla_engine_internal.h
blob: d29c7b4c56c36c5220ff22cf8643098a2b1fe9a9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
/*
 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *  * Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef __FIRMWARE_DLA_ENGINE_INTERNAL_H_
#define __FIRMWARE_DLA_ENGINE_INTERNAL_H_

#include <opendla.h>
#include <dla_engine.h>
#include <dla_interface.h>
#include <dla_debug.h>

#include "nvdla_interface.h"

#define BITS(num, range) ((((0xFFFFFFFF >> (31 - (1 ? range))) & \
			(0xFFFFFFFF << (0 ? range))) & num) >> \
			(0 ? range))
#define HIGH32BITS(val64bit) ((uint32_t)(val64bit >> 32))
#define LOW32BITS(val64bit) ((uint32_t)(val64bit))

#ifdef MIN
#undef MIN
#endif /* MIN */

#ifdef MAX
#undef MAX
#endif /* MAX */

#define MIN(a, b) ((a) > (b) ? (b) : (a))
#define MAX(a, b) ((a) > (b) ? (a) : (b))

/*********************************************************/
/******************** Utilities **************************/
/*********************************************************/
#ifdef DEBUG
#define CHECK_ALIGN(val, align)		 assert((val&(align-1)) == 0)
#else
#define CHECK_ALIGN(val, align)
#endif /* DEBUG */

#define MASK(reg, field)		(reg##_##field##_FIELD)
#define FIELD_ENUM(r, f, e)		(r##_##f##_##e)
#define SHIFT(reg, field)		(reg##_##field##_SHIFT)

#define GLB_REG(name)                GLB_##name##_0
#define MCIF_REG(name)               MCIF_##name##_0
#define CVIF_REG(name)               CVIF_##name##_0
#define BDMA_REG(name)               BDMA_##name##_0
#define CDMA_REG(name)               CDMA_##name##_0
#define CSC_REG(name)                CSC_##name##_0
#define CMAC_A_REG(name)             CMAC_A_##name##_0
#define CMAC_B_REG(name)             CMAC_B_##name##_0
#define CACC_REG(name)               CACC_##name##_0
#define SDP_RDMA_REG(name)           SDP_RDMA_##name##_0
#define SDP_REG(name)                SDP_##name##_0
#define PDP_RDMA_REG(name)           PDP_RDMA_##name##_0
#define PDP_REG(name)                PDP_##name##_0
#define CDP_RDMA_REG(name)           CDP_RDMA_##name##_0
#define CDP_REG(name)                CDP_##name##_0
#define RBK_REG(name)                RBK_##name##_0

/* alias for register read for each sub-module */
#define glb_reg_read(reg)           reg_read(GLB_REG(reg))
#define bdma_reg_read(reg)          reg_read(BDMA_REG(reg))
#define cdma_reg_read(reg)          reg_read(CDMA_REG(reg))
#define csc_reg_read(reg)           reg_read(CSC_REG(reg))
#define cmac_a_reg_read(reg)        reg_read(CMAC_A_REG(reg))
#define cmac_b_reg_read(reg)        reg_read(CMAC_B_REG(reg))
#define cacc_reg_read(reg)          reg_read(CACC_REG(reg))
#define sdp_rdma_reg_read(reg)      reg_read(SDP_RDMA_REG(reg))
#define sdp_reg_read(reg)           reg_read(SDP_REG(reg))
#define pdp_rdma_reg_read(reg)      reg_read(PDP_RDMA_REG(reg))
#define pdp_reg_read(reg)           reg_read(PDP_REG(reg))
#define cdp_rdma_reg_read(reg)      reg_read(CDP_RDMA_REG(reg))
#define cdp_reg_read(reg)           reg_read(CDP_REG(reg))
#define rubik_reg_read(reg)         reg_read(RBK_REG(reg))

/* alias for register write for each sub-module */
#define glb_reg_write(reg, val)      reg_write(GLB_REG(reg), val)
#define bdma_reg_write(reg, val)     reg_write(BDMA_REG(reg), val)
#define cdma_reg_write(reg, val)     reg_write(CDMA_REG(reg), val)
#define csc_reg_write(reg, val)      reg_write(CSC_REG(reg), val)
#define cmac_a_reg_write(reg, val)   reg_write(CMAC_A_REG(reg), val)
#define cmac_b_reg_write(reg, val)   reg_write(CMAC_B_REG(reg), val)
#define cacc_reg_write(reg, val)     reg_write(CACC_REG(reg), val)
#define sdp_rdma_reg_write(reg, val) reg_write(SDP_RDMA_REG(reg), val)
#define sdp_reg_write(reg, val)      reg_write(SDP_REG(reg), val)
#define pdp_rdma_reg_write(reg, val) reg_write(PDP_RDMA_REG(reg), val)
#define pdp_reg_write(reg, val)      reg_write(PDP_REG(reg), val)
#define cdp_rdma_reg_write(reg, val) reg_write(CDP_RDMA_REG(reg), val)
#define cdp_reg_write(reg, val)      reg_write(CDP_REG(reg), val)
#define rubik_reg_write(reg, val)    reg_write(RBK_REG(reg), val)

void reg_write(uint32_t addr, uint32_t reg);
uint32_t reg_read(uint32_t addr);

/**
 * Operation descriptor cache functions
 */
void
dla_put_op_desc(struct dla_common_op_desc *op_desc);
struct dla_common_op_desc
*dla_get_op_desc(struct dla_task *task,
			   int16_t index,
			   uint8_t op_type,
			   uint8_t roi_index);
void
dla_dump_op_desc(struct dla_common_op_desc *desc);
void
dla_get_refcount(struct dla_common_op_desc *op_desc);
void
dla_init_op_cache(struct dla_engine *engine);

/**
 * Operation completion handler
 */
int
dla_op_completion(struct dla_processor *processor,
		      struct dla_processor_group *group);

int32_t
dla_read_lut(struct dla_engine *engine, int16_t index, void *dst);
int
dla_enable_intr(uint32_t mask);
int
dla_disable_intr(uint32_t mask);
int
utils_get_free_group(struct dla_processor *processor,
			uint8_t *group_id,
			uint8_t *rdma_id);
int32_t
dla_get_dma_cube_address(void *driver_context,
						void *task_data,
						int16_t index,
						uint32_t offset,
						void *dst_ptr,
						uint32_t destination);
int
dla_read_input_address(struct dla_data_cube *data,
		       uint64_t *address,
		       int16_t op_index,
		       uint8_t roi_index,
		       uint8_t bpp);

/**
 * BDMA operations
 */
void
dla_bdma_set_producer(int32_t group_id, int32_t rdma_group_id);
int
dla_bdma_enable(struct dla_processor_group *group);
int
dla_bdma_program(struct dla_processor_group *group);
int
dla_bdma_is_ready(struct dla_processor *processor,
			    struct dla_processor_group *group);
void
dla_bdma_dump_config(struct dla_processor_group *group);
void
dla_bdma_rdma_check(struct dla_processor_group *group);

#if STAT_ENABLE
void
dla_bdma_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group);
void
dla_bdma_dump_stat(struct dla_processor *processor);

#else
static inline void
dla_bdma_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group) {}
static inline void
dla_bdma_dump_stat(struct dla_processor *processor) {}
#endif

/**
 * Convolution operations
 */
void
dla_conv_set_producer(int32_t group_id, int32_t rdma_group_id);
int
dla_conv_enable(struct dla_processor_group *group);
int
dla_conv_program(struct dla_processor_group *group);
int
dla_conv_is_ready(struct dla_processor *processor,
			    struct dla_processor_group *group);
void
dla_conv_dump_config(struct dla_processor_group *group);
void
dla_conv_rdma_check(struct dla_processor_group *group);

#if STAT_ENABLE
void
dla_conv_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group);
void
dla_conv_dump_stat(struct dla_processor *processor);

#else
static inline void
dla_conv_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group) {}
static inline void
dla_conv_dump_stat(struct dla_processor *processor) {}
#endif /* STAT_ENABLE */

/**
 * SDP operations
 */
void
dla_sdp_set_producer(int32_t group_id, int32_t rdma_group_id);
int
dla_sdp_enable(struct dla_processor_group *group);
int
dla_sdp_program(struct dla_processor_group *group);
int
dla_sdp_is_ready(struct dla_processor *processor,
			   struct dla_processor_group *group);
void
dla_sdp_dump_config(struct dla_processor_group *group);
void
dla_sdp_rdma_check(struct dla_processor_group *group);

#if STAT_ENABLE
void
dla_sdp_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group);
void
dla_sdp_dump_stat(struct dla_processor *processor);

#else
static inline void
dla_sdp_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group) {}
static inline void
dla_sdp_dump_stat(struct dla_processor *processor) {}
#endif

/**
 * PDP operations
 */
void
dla_pdp_set_producer(int32_t group_id, int32_t rdma_group_id);
int
dla_pdp_enable(struct dla_processor_group *group);
int
dla_pdp_program(struct dla_processor_group *group);
int
dla_pdp_is_ready(struct dla_processor *processor,
			   struct dla_processor_group *group);
void
dla_pdp_dump_config(struct dla_processor_group *group);
void
dla_pdp_rdma_check(struct dla_processor_group *group);

#if STAT_ENABLE
void
dla_pdp_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group);
void
dla_pdp_dump_stat(struct dla_processor *processor);

#else
static inline void
dla_pdp_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group) {}
static inline void
dla_pdp_dump_stat(struct dla_processor *processor) {}
#endif

/**
 * CDP operations
 */
void
dla_cdp_set_producer(int32_t group_id, int32_t rdma_group_id);
int
dla_cdp_enable(struct dla_processor_group *group);
int
dla_cdp_program(struct dla_processor_group *group);
int
dla_cdp_is_ready(struct dla_processor *processor,
			   struct dla_processor_group *group);
void
dla_cdp_dump_config(struct dla_processor_group *group);
void
dla_cdp_rdma_check(struct dla_processor_group *group);

#if STAT_ENABLE
void
dla_cdp_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group);
void
dla_cdp_dump_stat(struct dla_processor *processor);

#else
static inline void
dla_cdp_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group) {}
static inline void
dla_cdp_dump_stat(struct dla_processor *processor) {}
#endif

/**
 * RUBIK operations
 */
void
dla_rubik_set_producer(int32_t group_id, int32_t rdma_group_id);
int
dla_rubik_enable(struct dla_processor_group *group);
int
dla_rubik_program(struct dla_processor_group *group);
int
dla_rubik_is_ready(struct dla_processor *processor,
			     struct dla_processor_group *group);
void
dla_rubik_dump_config(struct dla_processor_group *group);
void
dla_rubik_rdma_check(struct dla_processor_group *group);

#if STAT_ENABLE
void
dla_rubik_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group);
void
dla_rubik_dump_stat(struct dla_processor *processor);

#else
static inline void
dla_rubik_stat_data(struct dla_processor *processor,
				struct dla_processor_group *group) {}
static inline void
dla_rubik_dump_stat(struct dla_processor *processor) {}
#endif

#endif