diff options
author | Ingo Molnar <mingo@kernel.org> | 2018-04-12 10:42:34 +0300 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-04-12 10:42:34 +0300 |
commit | ef389b734691cdc8beb009dd402135dcdcb86a56 (patch) | |
tree | 9523a37db93cb7c7874a5f18b4d9a7014898b814 /drivers/misc/echo/echo.c | |
parent | a774635db5c430cbf21fa5d2f2df3d23aaa8e782 (diff) | |
parent | c76fc98260751e71c884dc1a18a07e427ef033b5 (diff) | |
download | linux-ef389b734691cdc8beb009dd402135dcdcb86a56.tar.xz |
Merge branch 'WIP.x86/asm' into x86/urgent, because the topic is ready
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'drivers/misc/echo/echo.c')
-rw-r--r-- | drivers/misc/echo/echo.c | 73 |
1 files changed, 0 insertions, 73 deletions
diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c index 9597e9523cac..8a5adc0d2e88 100644 --- a/drivers/misc/echo/echo.c +++ b/drivers/misc/echo/echo.c @@ -115,78 +115,6 @@ /* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */ -#ifdef __bfin__ -static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) -{ - int i; - int offset1; - int offset2; - int factor; - int exp; - int16_t *phist; - int n; - - if (shift > 0) - factor = clean << shift; - else - factor = clean >> -shift; - - /* Update the FIR taps */ - - offset2 = ec->curr_pos; - offset1 = ec->taps - offset2; - phist = &ec->fir_state_bg.history[offset2]; - - /* st: and en: help us locate the assembler in echo.s */ - - /* asm("st:"); */ - n = ec->taps; - for (i = 0; i < n; i++) { - exp = *phist++ * factor; - ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); - } - /* asm("en:"); */ - - /* Note the asm for the inner loop above generated by Blackfin gcc - 4.1.1 is pretty good (note even parallel instructions used): - - R0 = W [P0++] (X); - R0 *= R2; - R0 = R0 + R3 (NS) || - R1 = W [P1] (X) || - nop; - R0 >>>= 15; - R0 = R0 + R1; - W [P1++] = R0; - - A block based update algorithm would be much faster but the - above can't be improved on much. Every instruction saved in - the loop above is 2 MIPs/ch! The for loop above is where the - Blackfin spends most of it's time - about 17 MIPs/ch measured - with speedtest.c with 256 taps (32ms). Write-back and - Write-through cache gave about the same performance. - */ -} - -/* - IDEAS for further optimisation of lms_adapt_bg(): - - 1/ The rounding is quite costly. Could we keep as 32 bit coeffs - then make filter pluck the MS 16-bits of the coeffs when filtering? - However this would lower potential optimisation of filter, as I - think the dual-MAC architecture requires packed 16 bit coeffs. - - 2/ Block based update would be more efficient, as per comments above, - could use dual MAC architecture. - - 3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC - packing. - - 4/ Execute the whole e/c in a block of say 20ms rather than sample - by sample. Processing a few samples every ms is inefficient. -*/ - -#else static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) { int i; @@ -215,7 +143,6 @@ static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); } } -#endif static inline int top_bit(unsigned int bits) { |