1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
/*
* Copyright 2012 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#ifndef _GXIO_DMA_QUEUE_H_
#define _GXIO_DMA_QUEUE_H_
/*
* DMA queue management APIs shared between TRIO and mPIPE.
*/
#include <gxio/common.h>
/* The credit counter lives in the high 32 bits. */
#define DMA_QUEUE_CREDIT_SHIFT 32
/*
* State object that tracks a DMA queue's head and tail indices, as
* well as the number of commands posted and completed. The
* structure is accessed via a thread-safe, lock-free algorithm.
*/
typedef struct {
/*
* Address of a MPIPE_EDMA_POST_REGION_VAL_t,
* TRIO_PUSH_DMA_REGION_VAL_t, or TRIO_PULL_DMA_REGION_VAL_t
* register. These register have identical encodings and provide
* information about how many commands have been processed.
*/
void *post_region_addr;
/*
* A lazily-updated count of how many edescs the hardware has
* completed.
*/
uint64_t hw_complete_count __attribute__ ((aligned(64)));
/*
* High 32 bits are a count of available egress command credits,
* low 24 bits are the next egress "slot".
*/
int64_t credits_and_next_index;
} __gxio_dma_queue_t;
/* Initialize a dma queue. */
extern void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue,
void *post_region_addr,
unsigned int num_entries);
/*
* Update the "credits_and_next_index" and "hw_complete_count" fields
* based on pending hardware completions. Note that some other thread
* may have already done this and, importantly, may still be in the
* process of updating "credits_and_next_index".
*/
extern void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue);
/* Wait for credits to become available. */
extern int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue,
int64_t modifier);
/* Reserve slots in the queue, optionally waiting for slots to become
* available, and optionally returning a "completion_slot" suitable for
* direct comparison to "hw_complete_count".
*/
static inline int64_t __gxio_dma_queue_reserve(__gxio_dma_queue_t *dma_queue,
unsigned int num, bool wait,
bool completion)
{
uint64_t slot;
/*
* Try to reserve 'num' egress command slots. We do this by
* constructing a constant that subtracts N credits and adds N to
* the index, and using fetchaddgez to only apply it if the credits
* count doesn't go negative.
*/
int64_t modifier = (((int64_t)(-num)) << DMA_QUEUE_CREDIT_SHIFT) | num;
int64_t old =
__insn_fetchaddgez(&dma_queue->credits_and_next_index,
modifier);
if (unlikely(old + modifier < 0)) {
/*
* We're out of credits. Try once to get more by checking for
* completed egress commands. If that fails, wait or fail.
*/
__gxio_dma_queue_update_credits(dma_queue);
old = __insn_fetchaddgez(&dma_queue->credits_and_next_index,
modifier);
if (old + modifier < 0) {
if (wait)
old = __gxio_dma_queue_wait_for_credits
(dma_queue, modifier);
else
return GXIO_ERR_DMA_CREDITS;
}
}
/* The bottom 24 bits of old encode the "slot". */
slot = (old & 0xffffff);
if (completion) {
/*
* A "completion_slot" is a "slot" which can be compared to
* "hw_complete_count" at any time in the future. To convert
* "slot" into a "completion_slot", we access "hw_complete_count"
* once (knowing that we have reserved a slot, and thus, it will
* be "basically" accurate), and combine its high 40 bits with
* the 24 bit "slot", and handle "wrapping" by adding "1 << 24"
* if the result is LESS than "hw_complete_count".
*/
uint64_t complete;
complete = READ_ONCE(dma_queue->hw_complete_count);
slot |= (complete & 0xffffffffff000000);
if (slot < complete)
slot += 0x1000000;
}
/*
* If any of our slots mod 256 were equivalent to 0, go ahead and
* collect some egress credits, and update "hw_complete_count", and
* make sure the index doesn't overflow into the credits.
*/
if (unlikely(((old + num) & 0xff) < num)) {
__gxio_dma_queue_update_credits(dma_queue);
/* Make sure the index doesn't overflow into the credits. */
#ifdef __BIG_ENDIAN__
*(((uint8_t *)&dma_queue->credits_and_next_index) + 4) = 0;
#else
*(((uint8_t *)&dma_queue->credits_and_next_index) + 3) = 0;
#endif
}
return slot;
}
/* Non-inlinable "__gxio_dma_queue_reserve(..., true)". */
extern int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue,
unsigned int num, int wait);
/* Check whether a particular "completion slot" has completed.
*
* Note that this function requires a "completion slot", and thus
* cannot be used with the result of any "reserve_fast" function.
*/
extern int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue,
int64_t completion_slot, int update);
#endif /* !_GXIO_DMA_QUEUE_H_ */
|