1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
|
/* SPDX-License-Identifier: GPL-2.0-only
* Copyright (C) 2020 Marvell.
*/
#ifndef __OTX2_CPTLF_H
#define __OTX2_CPTLF_H
#include <linux/soc/marvell/octeontx2/asm.h>
#include <mbox.h>
#include <rvu.h>
#include "otx2_cpt_common.h"
#include "otx2_cpt_reqmgr.h"
/*
* CPT instruction and pending queues user requested length in CPT_INST_S msgs
*/
#define OTX2_CPT_USER_REQUESTED_QLEN_MSGS 8200
/*
* CPT instruction queue size passed to HW is in units of 40*CPT_INST_S
* messages.
*/
#define OTX2_CPT_SIZE_DIV40 (OTX2_CPT_USER_REQUESTED_QLEN_MSGS/40)
/*
* CPT instruction and pending queues length in CPT_INST_S messages
*/
#define OTX2_CPT_INST_QLEN_MSGS ((OTX2_CPT_SIZE_DIV40 - 1) * 40)
/* CPT instruction queue length in bytes */
#define OTX2_CPT_INST_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 40 * \
OTX2_CPT_INST_SIZE)
/* CPT instruction group queue length in bytes */
#define OTX2_CPT_INST_GRP_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 16)
/* CPT FC length in bytes */
#define OTX2_CPT_Q_FC_LEN 128
/* CPT instruction queue alignment */
#define OTX2_CPT_INST_Q_ALIGNMENT 128
/* Mask which selects all engine groups */
#define OTX2_CPT_ALL_ENG_GRPS_MASK 0xFF
/* Maximum LFs supported in OcteonTX2 for CPT */
#define OTX2_CPT_MAX_LFS_NUM 64
/* Queue priority */
#define OTX2_CPT_QUEUE_HI_PRIO 0x1
#define OTX2_CPT_QUEUE_LOW_PRIO 0x0
enum otx2_cptlf_state {
OTX2_CPTLF_IN_RESET,
OTX2_CPTLF_STARTED,
};
struct otx2_cpt_inst_queue {
u8 *vaddr;
u8 *real_vaddr;
dma_addr_t dma_addr;
dma_addr_t real_dma_addr;
u32 size;
};
struct otx2_cptlfs_info;
struct otx2_cptlf_wqe {
struct tasklet_struct work;
struct otx2_cptlfs_info *lfs;
u8 lf_num;
};
struct otx2_cptlf_info {
struct otx2_cptlfs_info *lfs; /* Ptr to cptlfs_info struct */
void __iomem *lmtline; /* Address of LMTLINE */
void __iomem *ioreg; /* LMTLINE send register */
int msix_offset; /* MSI-X interrupts offset */
cpumask_var_t affinity_mask; /* IRQs affinity mask */
u8 irq_name[OTX2_CPT_LF_MSIX_VECTORS][32];/* Interrupts name */
u8 is_irq_reg[OTX2_CPT_LF_MSIX_VECTORS]; /* Is interrupt registered */
u8 slot; /* Slot number of this LF */
struct otx2_cpt_inst_queue iqueue;/* Instruction queue */
struct otx2_cpt_pending_queue pqueue; /* Pending queue */
struct otx2_cptlf_wqe *wqe; /* Tasklet work info */
};
struct cpt_hw_ops {
void (*send_cmd)(union otx2_cpt_inst_s *cptinst, u32 insts_num,
struct otx2_cptlf_info *lf);
u8 (*cpt_get_compcode)(union otx2_cpt_res_s *result);
u8 (*cpt_get_uc_compcode)(union otx2_cpt_res_s *result);
};
struct otx2_cptlfs_info {
/* Registers start address of VF/PF LFs are attached to */
void __iomem *reg_base;
#define LMTLINE_SIZE 128
void __iomem *lmt_base;
struct pci_dev *pdev; /* Device LFs are attached to */
struct otx2_cptlf_info lf[OTX2_CPT_MAX_LFS_NUM];
struct otx2_mbox *mbox;
struct cpt_hw_ops *ops;
u8 are_lfs_attached; /* Whether CPT LFs are attached */
u8 lfs_num; /* Number of CPT LFs */
u8 kcrypto_eng_grp_num; /* Kernel crypto engine group number */
u8 kvf_limits; /* Kernel crypto limits */
atomic_t state; /* LF's state. started/reset */
int blkaddr; /* CPT blkaddr: BLKADDR_CPT0/BLKADDR_CPT1 */
};
static inline void otx2_cpt_free_instruction_queues(
struct otx2_cptlfs_info *lfs)
{
struct otx2_cpt_inst_queue *iq;
int i;
for (i = 0; i < lfs->lfs_num; i++) {
iq = &lfs->lf[i].iqueue;
if (iq->real_vaddr)
dma_free_coherent(&lfs->pdev->dev,
iq->size,
iq->real_vaddr,
iq->real_dma_addr);
iq->real_vaddr = NULL;
iq->vaddr = NULL;
}
}
static inline int otx2_cpt_alloc_instruction_queues(
struct otx2_cptlfs_info *lfs)
{
struct otx2_cpt_inst_queue *iq;
int ret = 0, i;
if (!lfs->lfs_num)
return -EINVAL;
for (i = 0; i < lfs->lfs_num; i++) {
iq = &lfs->lf[i].iqueue;
iq->size = OTX2_CPT_INST_QLEN_BYTES +
OTX2_CPT_Q_FC_LEN +
OTX2_CPT_INST_GRP_QLEN_BYTES +
OTX2_CPT_INST_Q_ALIGNMENT;
iq->real_vaddr = dma_alloc_coherent(&lfs->pdev->dev, iq->size,
&iq->real_dma_addr, GFP_KERNEL);
if (!iq->real_vaddr) {
ret = -ENOMEM;
goto error;
}
iq->vaddr = iq->real_vaddr + OTX2_CPT_INST_GRP_QLEN_BYTES;
iq->dma_addr = iq->real_dma_addr + OTX2_CPT_INST_GRP_QLEN_BYTES;
/* Align pointers */
iq->vaddr = PTR_ALIGN(iq->vaddr, OTX2_CPT_INST_Q_ALIGNMENT);
iq->dma_addr = PTR_ALIGN(iq->dma_addr,
OTX2_CPT_INST_Q_ALIGNMENT);
}
return 0;
error:
otx2_cpt_free_instruction_queues(lfs);
return ret;
}
static inline void otx2_cptlf_set_iqueues_base_addr(
struct otx2_cptlfs_info *lfs)
{
union otx2_cptx_lf_q_base lf_q_base;
int slot;
for (slot = 0; slot < lfs->lfs_num; slot++) {
lf_q_base.u = lfs->lf[slot].iqueue.dma_addr;
otx2_cpt_write64(lfs->reg_base, BLKADDR_CPT0, slot,
OTX2_CPT_LF_Q_BASE, lf_q_base.u);
}
}
static inline void otx2_cptlf_do_set_iqueue_size(struct otx2_cptlf_info *lf)
{
union otx2_cptx_lf_q_size lf_q_size = { .u = 0x0 };
lf_q_size.s.size_div40 = OTX2_CPT_SIZE_DIV40;
otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot,
OTX2_CPT_LF_Q_SIZE, lf_q_size.u);
}
static inline void otx2_cptlf_set_iqueues_size(struct otx2_cptlfs_info *lfs)
{
int slot;
for (slot = 0; slot < lfs->lfs_num; slot++)
otx2_cptlf_do_set_iqueue_size(&lfs->lf[slot]);
}
static inline void otx2_cptlf_do_disable_iqueue(struct otx2_cptlf_info *lf)
{
union otx2_cptx_lf_ctl lf_ctl = { .u = 0x0 };
union otx2_cptx_lf_inprog lf_inprog;
int timeout = 20;
/* Disable instructions enqueuing */
otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot,
OTX2_CPT_LF_CTL, lf_ctl.u);
/* Wait for instruction queue to become empty */
do {
lf_inprog.u = otx2_cpt_read64(lf->lfs->reg_base, BLKADDR_CPT0,
lf->slot, OTX2_CPT_LF_INPROG);
if (!lf_inprog.s.inflight)
break;
usleep_range(10000, 20000);
if (timeout-- < 0) {
dev_err(&lf->lfs->pdev->dev,
"Error LF %d is still busy.\n", lf->slot);
break;
}
} while (1);
/*
* Disable executions in the LF's queue,
* the queue should be empty at this point
*/
lf_inprog.s.eena = 0x0;
otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot,
OTX2_CPT_LF_INPROG, lf_inprog.u);
}
static inline void otx2_cptlf_disable_iqueues(struct otx2_cptlfs_info *lfs)
{
int slot;
for (slot = 0; slot < lfs->lfs_num; slot++)
otx2_cptlf_do_disable_iqueue(&lfs->lf[slot]);
}
static inline void otx2_cptlf_set_iqueue_enq(struct otx2_cptlf_info *lf,
bool enable)
{
union otx2_cptx_lf_ctl lf_ctl;
lf_ctl.u = otx2_cpt_read64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot,
OTX2_CPT_LF_CTL);
/* Set iqueue's enqueuing */
lf_ctl.s.ena = enable ? 0x1 : 0x0;
otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot,
OTX2_CPT_LF_CTL, lf_ctl.u);
}
static inline void otx2_cptlf_enable_iqueue_enq(struct otx2_cptlf_info *lf)
{
otx2_cptlf_set_iqueue_enq(lf, true);
}
static inline void otx2_cptlf_set_iqueue_exec(struct otx2_cptlf_info *lf,
bool enable)
{
union otx2_cptx_lf_inprog lf_inprog;
lf_inprog.u = otx2_cpt_read64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot,
OTX2_CPT_LF_INPROG);
/* Set iqueue's execution */
lf_inprog.s.eena = enable ? 0x1 : 0x0;
otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot,
OTX2_CPT_LF_INPROG, lf_inprog.u);
}
static inline void otx2_cptlf_enable_iqueue_exec(struct otx2_cptlf_info *lf)
{
otx2_cptlf_set_iqueue_exec(lf, true);
}
static inline void otx2_cptlf_disable_iqueue_exec(struct otx2_cptlf_info *lf)
{
otx2_cptlf_set_iqueue_exec(lf, false);
}
static inline void otx2_cptlf_enable_iqueues(struct otx2_cptlfs_info *lfs)
{
int slot;
for (slot = 0; slot < lfs->lfs_num; slot++) {
otx2_cptlf_enable_iqueue_exec(&lfs->lf[slot]);
otx2_cptlf_enable_iqueue_enq(&lfs->lf[slot]);
}
}
static inline void otx2_cpt_fill_inst(union otx2_cpt_inst_s *cptinst,
struct otx2_cpt_iq_command *iq_cmd,
u64 comp_baddr)
{
cptinst->u[0] = 0x0;
cptinst->s.doneint = true;
cptinst->s.res_addr = comp_baddr;
cptinst->u[2] = 0x0;
cptinst->u[3] = 0x0;
cptinst->s.ei0 = iq_cmd->cmd.u;
cptinst->s.ei1 = iq_cmd->dptr;
cptinst->s.ei2 = iq_cmd->rptr;
cptinst->s.ei3 = iq_cmd->cptr.u;
}
/*
* On OcteonTX2 platform the parameter insts_num is used as a count of
* instructions to be enqueued. The valid values for insts_num are:
* 1 - 1 CPT instruction will be enqueued during LMTST operation
* 2 - 2 CPT instructions will be enqueued during LMTST operation
*/
static inline void otx2_cpt_send_cmd(union otx2_cpt_inst_s *cptinst,
u32 insts_num, struct otx2_cptlf_info *lf)
{
void __iomem *lmtline = lf->lmtline;
long ret;
/*
* Make sure memory areas pointed in CPT_INST_S
* are flushed before the instruction is sent to CPT
*/
dma_wmb();
do {
/* Copy CPT command to LMTLINE */
memcpy_toio(lmtline, cptinst, insts_num * OTX2_CPT_INST_SIZE);
/*
* LDEOR initiates atomic transfer to I/O device
* The following will cause the LMTST to fail (the LDEOR
* returns zero):
* - No stores have been performed to the LMTLINE since it was
* last invalidated.
* - The bytes which have been stored to LMTLINE since it was
* last invalidated form a pattern that is non-contiguous, does
* not start at byte 0, or does not end on a 8-byte boundary.
* (i.e.comprises a formation of other than 1–16 8-byte
* words.)
*
* These rules are designed such that an operating system
* context switch or hypervisor guest switch need have no
* knowledge of the LMTST operations; the switch code does not
* need to store to LMTCANCEL. Also note as LMTLINE data cannot
* be read, there is no information leakage between processes.
*/
ret = otx2_lmt_flush(lf->ioreg);
} while (!ret);
}
static inline bool otx2_cptlf_started(struct otx2_cptlfs_info *lfs)
{
return atomic_read(&lfs->state) == OTX2_CPTLF_STARTED;
}
int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_msk, int pri,
int lfs_num);
void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs);
int otx2_cptlf_register_interrupts(struct otx2_cptlfs_info *lfs);
void otx2_cptlf_unregister_interrupts(struct otx2_cptlfs_info *lfs);
void otx2_cptlf_free_irqs_affinity(struct otx2_cptlfs_info *lfs);
int otx2_cptlf_set_irqs_affinity(struct otx2_cptlfs_info *lfs);
#endif /* __OTX2_CPTLF_H */
|