mirror of
https://github.com/musix-org/musix-oss
synced 2025-06-17 23:16:00 +00:00
Modules
This commit is contained in:
375
node_modules/node-opus/deps/opus/silk/fixed/x86/burg_modified_FIX_sse.c
generated
vendored
Normal file
375
node_modules/node-opus/deps/opus/silk/fixed/x86/burg_modified_FIX_sse.c
generated
vendored
Normal file
@ -0,0 +1,375 @@
|
||||
/* Copyright (c) 2014, Cisco Systems, INC
|
||||
Written by XiangMingZhu WeiZhou MinPeng YanWang
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include "SigProc_FIX.h"
|
||||
#include "define.h"
|
||||
#include "tuning_parameters.h"
|
||||
#include "pitch.h"
|
||||
#include "celt/x86/x86cpu.h"
|
||||
|
||||
#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
|
||||
|
||||
#define QA 25
|
||||
#define N_BITS_HEAD_ROOM 2
|
||||
#define MIN_RSHIFTS -16
|
||||
#define MAX_RSHIFTS (32 - QA)
|
||||
|
||||
/* Compute reflection coefficients from input signal */
|
||||
void silk_burg_modified_sse4_1(
|
||||
opus_int32 *res_nrg, /* O Residual energy */
|
||||
opus_int *res_nrg_Q, /* O Residual energy Q value */
|
||||
opus_int32 A_Q16[], /* O Prediction coefficients (length order) */
|
||||
const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */
|
||||
const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */
|
||||
const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */
|
||||
const opus_int nb_subfr, /* I Number of subframes stacked in x */
|
||||
const opus_int D, /* I Order */
|
||||
int arch /* I Run-time architecture */
|
||||
)
|
||||
{
|
||||
opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
|
||||
opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
|
||||
const opus_int16 *x_ptr;
|
||||
opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ];
|
||||
opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ];
|
||||
opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ];
|
||||
opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ];
|
||||
opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ];
|
||||
opus_int32 xcorr[ SILK_MAX_ORDER_LPC ];
|
||||
|
||||
__m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210;
|
||||
__m128i CONST1 = _mm_set1_epi32(1);
|
||||
|
||||
silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
|
||||
|
||||
/* Compute autocorrelations, added over subframes */
|
||||
silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
|
||||
if( rshifts > MAX_RSHIFTS ) {
|
||||
C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
|
||||
silk_assert( C0 > 0 );
|
||||
rshifts = MAX_RSHIFTS;
|
||||
} else {
|
||||
lz = silk_CLZ32( C0 ) - 1;
|
||||
rshifts_extra = N_BITS_HEAD_ROOM - lz;
|
||||
if( rshifts_extra > 0 ) {
|
||||
rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
|
||||
C0 = silk_RSHIFT32( C0, rshifts_extra );
|
||||
} else {
|
||||
rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
|
||||
C0 = silk_LSHIFT32( C0, -rshifts_extra );
|
||||
}
|
||||
rshifts += rshifts_extra;
|
||||
}
|
||||
CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */
|
||||
silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
|
||||
if( rshifts > 0 ) {
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
x_ptr = x + s * subfr_length;
|
||||
for( n = 1; n < D + 1; n++ ) {
|
||||
C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
|
||||
silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts );
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
int i;
|
||||
opus_int32 d;
|
||||
x_ptr = x + s * subfr_length;
|
||||
celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
|
||||
for( n = 1; n < D + 1; n++ ) {
|
||||
for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
|
||||
d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
|
||||
xcorr[ n - 1 ] += d;
|
||||
}
|
||||
for( n = 1; n < D + 1; n++ ) {
|
||||
C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts );
|
||||
}
|
||||
}
|
||||
}
|
||||
silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
|
||||
|
||||
/* Initialize */
|
||||
CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */
|
||||
|
||||
invGain_Q30 = (opus_int32)1 << 30;
|
||||
reached_max_gain = 0;
|
||||
for( n = 0; n < D; n++ ) {
|
||||
/* Update first row of correlation matrix (without first element) */
|
||||
/* Update last row of correlation matrix (without last element, stored in reversed order) */
|
||||
/* Update C * Af */
|
||||
/* Update C * flipud(Af) (stored in reversed order) */
|
||||
if( rshifts > -2 ) {
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
x_ptr = x + s * subfr_length;
|
||||
x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */
|
||||
x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */
|
||||
tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */
|
||||
tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */
|
||||
for( k = 0; k < n; k++ ) {
|
||||
C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */
|
||||
C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
|
||||
Atmp_QA = Af_QA[ k ];
|
||||
tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */
|
||||
tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */
|
||||
}
|
||||
tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */
|
||||
tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */
|
||||
for( k = 0; k <= n; k++ ) {
|
||||
CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */
|
||||
CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
x_ptr = x + s * subfr_length;
|
||||
x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */
|
||||
x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */
|
||||
tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */
|
||||
tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */
|
||||
|
||||
X1_3210 = _mm_set1_epi32( x1 );
|
||||
X2_3210 = _mm_set1_epi32( x2 );
|
||||
TMP1_3210 = _mm_setzero_si128();
|
||||
TMP2_3210 = _mm_setzero_si128();
|
||||
for( k = 0; k < n - 3; k += 4 ) {
|
||||
PTR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 1 - 3 ] );
|
||||
SUBFR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k ] );
|
||||
FIRST_3210 = _mm_loadu_si128( (__m128i *)&C_first_row[ k ] );
|
||||
PTR_3210 = _mm_shuffle_epi32( PTR_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) );
|
||||
LAST_3210 = _mm_loadu_si128( (__m128i *)&C_last_row[ k ] );
|
||||
ATMP_3210 = _mm_loadu_si128( (__m128i *)&Af_QA[ k ] );
|
||||
|
||||
T1_3210 = _mm_mullo_epi32( PTR_3210, X1_3210 );
|
||||
T2_3210 = _mm_mullo_epi32( SUBFR_3210, X2_3210 );
|
||||
|
||||
ATMP_3210 = _mm_srai_epi32( ATMP_3210, 7 );
|
||||
ATMP_3210 = _mm_add_epi32( ATMP_3210, CONST1 );
|
||||
ATMP_3210 = _mm_srai_epi32( ATMP_3210, 1 );
|
||||
|
||||
FIRST_3210 = _mm_add_epi32( FIRST_3210, T1_3210 );
|
||||
LAST_3210 = _mm_add_epi32( LAST_3210, T2_3210 );
|
||||
|
||||
PTR_3210 = _mm_mullo_epi32( ATMP_3210, PTR_3210 );
|
||||
SUBFR_3210 = _mm_mullo_epi32( ATMP_3210, SUBFR_3210 );
|
||||
|
||||
_mm_storeu_si128( (__m128i *)&C_first_row[ k ], FIRST_3210 );
|
||||
_mm_storeu_si128( (__m128i *)&C_last_row[ k ], LAST_3210 );
|
||||
|
||||
TMP1_3210 = _mm_add_epi32( TMP1_3210, PTR_3210 );
|
||||
TMP2_3210 = _mm_add_epi32( TMP2_3210, SUBFR_3210 );
|
||||
}
|
||||
|
||||
TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_unpackhi_epi64(TMP1_3210, TMP1_3210 ) );
|
||||
TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_unpackhi_epi64(TMP2_3210, TMP2_3210 ) );
|
||||
TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_shufflelo_epi16(TMP1_3210, 0x0E ) );
|
||||
TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_shufflelo_epi16(TMP2_3210, 0x0E ) );
|
||||
|
||||
tmp1 += _mm_cvtsi128_si32( TMP1_3210 );
|
||||
tmp2 += _mm_cvtsi128_si32( TMP2_3210 );
|
||||
|
||||
for( ; k < n; k++ ) {
|
||||
C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */
|
||||
C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
|
||||
Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */
|
||||
tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */
|
||||
tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */
|
||||
}
|
||||
|
||||
tmp1 = -tmp1; /* Q17 */
|
||||
tmp2 = -tmp2; /* Q17 */
|
||||
|
||||
{
|
||||
__m128i xmm_tmp1, xmm_tmp2;
|
||||
__m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1;
|
||||
__m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1;
|
||||
|
||||
xmm_tmp1 = _mm_set1_epi32( tmp1 );
|
||||
xmm_tmp2 = _mm_set1_epi32( tmp2 );
|
||||
|
||||
for( k = 0; k <= n - 3; k += 4 ) {
|
||||
xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 3 ] );
|
||||
xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k - 1 ] );
|
||||
|
||||
xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 1, 2, 3 ) );
|
||||
|
||||
xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32( xmm_x_ptr_n_k_x2x0, -rshifts - 1 );
|
||||
xmm_x_ptr_sub_x2x0 = _mm_slli_epi32( xmm_x_ptr_sub_x2x0, -rshifts - 1 );
|
||||
|
||||
/* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_srli_si128(xmm_x_ptr_n_k_x2x0, 4)*/
|
||||
xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
|
||||
xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_sub_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
|
||||
|
||||
xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32( xmm_x_ptr_n_k_x2x0, xmm_tmp1 );
|
||||
xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32( xmm_x_ptr_n_k_x3x1, xmm_tmp1 );
|
||||
xmm_x_ptr_sub_x2x0 = _mm_mul_epi32( xmm_x_ptr_sub_x2x0, xmm_tmp2 );
|
||||
xmm_x_ptr_sub_x3x1 = _mm_mul_epi32( xmm_x_ptr_sub_x3x1, xmm_tmp2 );
|
||||
|
||||
xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64( xmm_x_ptr_n_k_x2x0, 16 );
|
||||
xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64( xmm_x_ptr_n_k_x3x1, 16 );
|
||||
xmm_x_ptr_sub_x2x0 = _mm_srli_epi64( xmm_x_ptr_sub_x2x0, 16 );
|
||||
xmm_x_ptr_sub_x3x1 = _mm_slli_epi64( xmm_x_ptr_sub_x3x1, 16 );
|
||||
|
||||
xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16( xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1, 0xCC );
|
||||
xmm_x_ptr_sub_x2x0 = _mm_blend_epi16( xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1, 0xCC );
|
||||
|
||||
X1_3210 = _mm_loadu_si128( (__m128i *)&CAf[ k ] );
|
||||
PTR_3210 = _mm_loadu_si128( (__m128i *)&CAb[ k ] );
|
||||
|
||||
X1_3210 = _mm_add_epi32( X1_3210, xmm_x_ptr_n_k_x2x0 );
|
||||
PTR_3210 = _mm_add_epi32( PTR_3210, xmm_x_ptr_sub_x2x0 );
|
||||
|
||||
_mm_storeu_si128( (__m128i *)&CAf[ k ], X1_3210 );
|
||||
_mm_storeu_si128( (__m128i *)&CAb[ k ], PTR_3210 );
|
||||
}
|
||||
|
||||
for( ; k <= n; k++ ) {
|
||||
CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
|
||||
silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */
|
||||
CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
|
||||
silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
|
||||
tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */
|
||||
tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */
|
||||
num = 0; /* Q( -rshifts ) */
|
||||
nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */
|
||||
for( k = 0; k < n; k++ ) {
|
||||
Atmp_QA = Af_QA[ k ];
|
||||
lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
|
||||
lz = silk_min( 32 - QA, lz );
|
||||
Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */
|
||||
|
||||
tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */
|
||||
tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */
|
||||
num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */
|
||||
nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
|
||||
Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */
|
||||
}
|
||||
CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */
|
||||
CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */
|
||||
num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */
|
||||
num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */
|
||||
|
||||
/* Calculate the next order reflection (parcor) coefficient */
|
||||
if( silk_abs( num ) < nrg ) {
|
||||
rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
|
||||
} else {
|
||||
rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
|
||||
}
|
||||
|
||||
/* Update inverse prediction gain */
|
||||
tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
|
||||
tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
|
||||
if( tmp1 <= minInvGain_Q30 ) {
|
||||
/* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
|
||||
tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */
|
||||
rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */
|
||||
/* Newton-Raphson iteration */
|
||||
rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */
|
||||
rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */
|
||||
if( num < 0 ) {
|
||||
/* Ensure adjusted reflection coefficients has the original sign */
|
||||
rc_Q31 = -rc_Q31;
|
||||
}
|
||||
invGain_Q30 = minInvGain_Q30;
|
||||
reached_max_gain = 1;
|
||||
} else {
|
||||
invGain_Q30 = tmp1;
|
||||
}
|
||||
|
||||
/* Update the AR coefficients */
|
||||
for( k = 0; k < (n + 1) >> 1; k++ ) {
|
||||
tmp1 = Af_QA[ k ]; /* QA */
|
||||
tmp2 = Af_QA[ n - k - 1 ]; /* QA */
|
||||
Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */
|
||||
Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */
|
||||
}
|
||||
Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */
|
||||
|
||||
if( reached_max_gain ) {
|
||||
/* Reached max prediction gain; set remaining coefficients to zero and exit loop */
|
||||
for( k = n + 1; k < D; k++ ) {
|
||||
Af_QA[ k ] = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Update C * Af and C * Ab */
|
||||
for( k = 0; k <= n + 1; k++ ) {
|
||||
tmp1 = CAf[ k ]; /* Q( -rshifts ) */
|
||||
tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */
|
||||
CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */
|
||||
CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */
|
||||
}
|
||||
}
|
||||
|
||||
if( reached_max_gain ) {
|
||||
for( k = 0; k < D; k++ ) {
|
||||
/* Scale coefficients */
|
||||
A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
|
||||
}
|
||||
/* Subtract energy of preceding samples from C0 */
|
||||
if( rshifts > 0 ) {
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
x_ptr = x + s * subfr_length;
|
||||
C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts );
|
||||
}
|
||||
} else {
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
x_ptr = x + s * subfr_length;
|
||||
C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch ), -rshifts );
|
||||
}
|
||||
}
|
||||
/* Approximate residual energy */
|
||||
*res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
|
||||
*res_nrg_Q = -rshifts;
|
||||
} else {
|
||||
/* Return residual energy */
|
||||
nrg = CAf[ 0 ]; /* Q( -rshifts ) */
|
||||
tmp1 = (opus_int32)1 << 16; /* Q16 */
|
||||
for( k = 0; k < D; k++ ) {
|
||||
Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */
|
||||
nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */
|
||||
tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */
|
||||
A_Q16[ k ] = -Atmp1;
|
||||
}
|
||||
*res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
|
||||
*res_nrg_Q = -rshifts;
|
||||
}
|
||||
}
|
160
node_modules/node-opus/deps/opus/silk/fixed/x86/prefilter_FIX_sse.c
generated
vendored
Normal file
160
node_modules/node-opus/deps/opus/silk/fixed/x86/prefilter_FIX_sse.c
generated
vendored
Normal file
@ -0,0 +1,160 @@
|
||||
/* Copyright (c) 2014, Cisco Systems, INC
|
||||
Written by XiangMingZhu WeiZhou MinPeng YanWang
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#include "main.h"
|
||||
#include "celt/x86/x86cpu.h"
|
||||
|
||||
void silk_warped_LPC_analysis_filter_FIX_sse4_1(
|
||||
opus_int32 state[], /* I/O State [order + 1] */
|
||||
opus_int32 res_Q2[], /* O Residual signal [length] */
|
||||
const opus_int16 coef_Q13[], /* I Coefficients [order] */
|
||||
const opus_int16 input[], /* I Input signal [length] */
|
||||
const opus_int16 lambda_Q16, /* I Warping factor */
|
||||
const opus_int length, /* I Length of input signal */
|
||||
const opus_int order /* I Filter order (even) */
|
||||
)
|
||||
{
|
||||
opus_int n, i;
|
||||
opus_int32 acc_Q11, tmp1, tmp2;
|
||||
|
||||
/* Order must be even */
|
||||
silk_assert( ( order & 1 ) == 0 );
|
||||
|
||||
if (order == 10)
|
||||
{
|
||||
if (0 == lambda_Q16)
|
||||
{
|
||||
__m128i coef_Q13_3210, coef_Q13_7654;
|
||||
__m128i coef_Q13_0123, coef_Q13_4567;
|
||||
__m128i state_0123, state_4567;
|
||||
__m128i xmm_product1, xmm_product2;
|
||||
__m128i xmm_tempa, xmm_tempb;
|
||||
|
||||
register opus_int32 sum;
|
||||
register opus_int32 state_8, state_9, state_a;
|
||||
register opus_int64 coef_Q13_8, coef_Q13_9;
|
||||
|
||||
silk_assert( length > 0 );
|
||||
|
||||
coef_Q13_3210 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 0 ] );
|
||||
coef_Q13_7654 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 4 ] );
|
||||
|
||||
coef_Q13_0123 = _mm_shuffle_epi32( coef_Q13_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) );
|
||||
coef_Q13_4567 = _mm_shuffle_epi32( coef_Q13_7654, _MM_SHUFFLE( 0, 1, 2, 3 ) );
|
||||
|
||||
coef_Q13_8 = (opus_int64) coef_Q13[ 8 ];
|
||||
coef_Q13_9 = (opus_int64) coef_Q13[ 9 ];
|
||||
|
||||
state_0123 = _mm_loadu_si128( (__m128i *)(&state[ 0 ] ) );
|
||||
state_4567 = _mm_loadu_si128( (__m128i *)(&state[ 4 ] ) );
|
||||
|
||||
state_0123 = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) );
|
||||
state_4567 = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) );
|
||||
|
||||
state_8 = state[ 8 ];
|
||||
state_9 = state[ 9 ];
|
||||
state_a = 0;
|
||||
|
||||
for( n = 0; n < length; n++ )
|
||||
{
|
||||
xmm_product1 = _mm_mul_epi32( coef_Q13_0123, state_0123 ); /* 64-bit multiply, only 2 pairs */
|
||||
xmm_product2 = _mm_mul_epi32( coef_Q13_4567, state_4567 );
|
||||
|
||||
xmm_tempa = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) );
|
||||
xmm_tempb = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) );
|
||||
|
||||
xmm_product1 = _mm_srli_epi64( xmm_product1, 16 ); /* >> 16, zero extending works */
|
||||
xmm_product2 = _mm_srli_epi64( xmm_product2, 16 );
|
||||
|
||||
xmm_tempa = _mm_mul_epi32( coef_Q13_3210, xmm_tempa );
|
||||
xmm_tempb = _mm_mul_epi32( coef_Q13_7654, xmm_tempb );
|
||||
|
||||
xmm_tempa = _mm_srli_epi64( xmm_tempa, 16 );
|
||||
xmm_tempb = _mm_srli_epi64( xmm_tempb, 16 );
|
||||
|
||||
xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_product1 );
|
||||
xmm_tempb = _mm_add_epi32( xmm_tempb, xmm_product2 );
|
||||
xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_tempb );
|
||||
|
||||
sum = (coef_Q13_8 * state_8) >> 16;
|
||||
sum += (coef_Q13_9 * state_9) >> 16;
|
||||
|
||||
xmm_tempa = _mm_add_epi32( xmm_tempa, _mm_shuffle_epi32( xmm_tempa, _MM_SHUFFLE( 0, 0, 0, 2 ) ) );
|
||||
sum += _mm_cvtsi128_si32( xmm_tempa);
|
||||
res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( ( 5 + sum ), 9);
|
||||
|
||||
/* move right */
|
||||
state_a = state_9;
|
||||
state_9 = state_8;
|
||||
state_8 = _mm_cvtsi128_si32( state_4567 );
|
||||
state_4567 = _mm_alignr_epi8( state_0123, state_4567, 4 );
|
||||
|
||||
state_0123 = _mm_alignr_epi8( _mm_cvtsi32_si128( silk_LSHIFT( input[ n ], 14 ) ), state_0123, 4 );
|
||||
}
|
||||
|
||||
_mm_storeu_si128( (__m128i *)( &state[ 0 ] ), _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ) );
|
||||
_mm_storeu_si128( (__m128i *)( &state[ 4 ] ), _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ) );
|
||||
state[ 8 ] = state_8;
|
||||
state[ 9 ] = state_9;
|
||||
state[ 10 ] = state_a;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for( n = 0; n < length; n++ ) {
|
||||
/* Output of lowpass section */
|
||||
tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
|
||||
state[ 0 ] = silk_LSHIFT( input[ n ], 14 );
|
||||
/* Output of allpass section */
|
||||
tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
|
||||
state[ 1 ] = tmp2;
|
||||
acc_Q11 = silk_RSHIFT( order, 1 );
|
||||
acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );
|
||||
/* Loop over allpass sections */
|
||||
for( i = 2; i < order; i += 2 ) {
|
||||
/* Output of allpass section */
|
||||
tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
|
||||
state[ i ] = tmp1;
|
||||
acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
|
||||
/* Output of allpass section */
|
||||
tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
|
||||
state[ i + 1 ] = tmp2;
|
||||
acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
|
||||
}
|
||||
state[ order ] = tmp1;
|
||||
acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
|
||||
res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
|
||||
}
|
||||
}
|
88
node_modules/node-opus/deps/opus/silk/fixed/x86/vector_ops_FIX_sse.c
generated
vendored
Normal file
88
node_modules/node-opus/deps/opus/silk/fixed/x86/vector_ops_FIX_sse.c
generated
vendored
Normal file
@ -0,0 +1,88 @@
|
||||
/* Copyright (c) 2014, Cisco Systems, INC
|
||||
Written by XiangMingZhu WeiZhou MinPeng YanWang
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#include "main.h"
|
||||
|
||||
#include "SigProc_FIX.h"
|
||||
#include "pitch.h"
|
||||
|
||||
opus_int64 silk_inner_prod16_aligned_64_sse4_1(
|
||||
const opus_int16 *inVec1, /* I input vector 1 */
|
||||
const opus_int16 *inVec2, /* I input vector 2 */
|
||||
const opus_int len /* I vector lengths */
|
||||
)
|
||||
{
|
||||
opus_int i, dataSize8;
|
||||
opus_int64 sum;
|
||||
|
||||
__m128i xmm_tempa;
|
||||
__m128i inVec1_76543210, acc1;
|
||||
__m128i inVec2_76543210, acc2;
|
||||
|
||||
sum = 0;
|
||||
dataSize8 = len & ~7;
|
||||
|
||||
acc1 = _mm_setzero_si128();
|
||||
acc2 = _mm_setzero_si128();
|
||||
|
||||
for( i = 0; i < dataSize8; i += 8 ) {
|
||||
inVec1_76543210 = _mm_loadu_si128( (__m128i *)(&inVec1[i + 0] ) );
|
||||
inVec2_76543210 = _mm_loadu_si128( (__m128i *)(&inVec2[i + 0] ) );
|
||||
|
||||
/* only when all 4 operands are -32768 (0x8000), this results in wrap around */
|
||||
inVec1_76543210 = _mm_madd_epi16( inVec1_76543210, inVec2_76543210 );
|
||||
|
||||
xmm_tempa = _mm_cvtepi32_epi64( inVec1_76543210 );
|
||||
/* equal shift right 8 bytes */
|
||||
inVec1_76543210 = _mm_shuffle_epi32( inVec1_76543210, _MM_SHUFFLE( 0, 0, 3, 2 ) );
|
||||
inVec1_76543210 = _mm_cvtepi32_epi64( inVec1_76543210 );
|
||||
|
||||
acc1 = _mm_add_epi64( acc1, xmm_tempa );
|
||||
acc2 = _mm_add_epi64( acc2, inVec1_76543210 );
|
||||
}
|
||||
|
||||
acc1 = _mm_add_epi64( acc1, acc2 );
|
||||
|
||||
/* equal shift right 8 bytes */
|
||||
acc2 = _mm_shuffle_epi32( acc1, _MM_SHUFFLE( 0, 0, 3, 2 ) );
|
||||
acc1 = _mm_add_epi64( acc1, acc2 );
|
||||
|
||||
_mm_storel_epi64( (__m128i *)&sum, acc1 );
|
||||
|
||||
for( ; i < len; i++ ) {
|
||||
sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] );
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
Reference in New Issue
Block a user