Replace 64bits multiplies by 32bits to further optimize the code

This commit is contained in:
Johan Hedberg 2008-02-22 13:41:02 +00:00
parent e814491d3e
commit 4170955ad1
2 changed files with 29 additions and 35 deletions

View File

@ -551,7 +551,7 @@ static inline void sbc_synthesize_four(struct sbc_decoder_state *state,
struct sbc_frame *frame, int ch, int blk)
{
int i, j, k, idx;
sbc_extended_t res;
sbc_fixed_t res;
for (i = 0; i < 8; i++) {
/* Shifting */
@ -592,7 +592,7 @@ static inline void sbc_synthesize_eight(struct sbc_decoder_state *state,
struct sbc_frame *frame, int ch, int blk)
{
int i, j, k, idx;
sbc_extended_t res;
sbc_fixed_t res;
for (i = 0; i < 16; i++) {
/* Shifting */
@ -667,8 +667,7 @@ static void sbc_encoder_init(struct sbc_encoder_state *state,
static inline void _sbc_analyze_four(const int32_t *in, int32_t *out)
{
sbc_fixed_t t[8];
sbc_extended_t s[5];
sbc_fixed_t t[8], s[5];
t[0] = SCALE4_STAGE1( /* Q8 */
MULA(_sbc_proto_4[0], in[8] - in[32], /* Q18 */
@ -752,8 +751,7 @@ static inline void sbc_analyze_four(struct sbc_encoder_state *state,
static inline void _sbc_analyze_eight(const int32_t *in, int32_t *out)
{
sbc_fixed_t t[8];
sbc_extended_t s[8];
sbc_fixed_t t[8], s[8];
t[0] = SCALE8_STAGE1( /* Q10 */
MULA(_sbc_proto_8[0], (in[16] - in[64]), /* Q18 = Q18 * Q0 */

View File

@ -28,49 +28,45 @@
always be correct and every compiler *should* generate optimal code */
#define ASR(val, bits) ((-2 >> 1 == -1) ? \
((int32_t)(val)) >> (bits) : ((int32_t) (val)) / (1 << (bits)))
#define ASR_64(val, bits) ((-2 >> 1 == -1) ? \
((long long)(val)) >> (bits) : ((long long) (val)) / (1 << (bits)))
#define SCALE_PROTO4_TBL 15
#define SCALE_ANA4_TBL 16
#define SCALE_PROTO8_TBL 15
#define SCALE_ANA8_TBL 16
#define SCALE_SPROTO4_TBL 16
#define SCALE_SPROTO8_TBL 16
#define SCALE_NPROTO4_TBL 10
#define SCALE_NPROTO8_TBL 12
#define SCALE_SAMPLES 14
#define SCALE_PROTO8_TBL 16
#define SCALE_ANA8_TBL 17
#define SCALE_SPROTO4_TBL 15
#define SCALE_SPROTO8_TBL 14
#define SCALE_NPROTO4_TBL 13
#define SCALE_NPROTO8_TBL 11
#define SCALE4_STAGE1_BITS 16
#define SCALE4_STAGE2_BITS 16
#define SCALE4_STAGED1_BITS 14
#define SCALE4_STAGED2_BITS 14
#define SCALE8_STAGE1_BITS 16
#define SCALE8_STAGE2_BITS 16
#define SCALE8_STAGED1_BITS 14
#define SCALE8_STAGED2_BITS 14
#define SCALE4_STAGE2_BITS 15
#define SCALE4_STAGED1_BITS 12
#define SCALE4_STAGED2_BITS 16
#define SCALE8_STAGE1_BITS 15
#define SCALE8_STAGE2_BITS 15
#define SCALE8_STAGED1_BITS 15
#define SCALE8_STAGED2_BITS 16
typedef int32_t sbc_fixed_t;
typedef long long sbc_extended_t;
#define SCALE4_STAGE1(src) ASR_64(src, SCALE4_STAGE1_BITS)
#define SCALE4_STAGE2(src) ASR_64(src, SCALE4_STAGE2_BITS)
#define SCALE4_STAGED1(src) ASR_64(src, SCALE4_STAGED1_BITS)
#define SCALE4_STAGED2(src) ASR_64(src, SCALE4_STAGED2_BITS)
#define SCALE8_STAGE1(src) ASR_64(src, SCALE8_STAGE1_BITS)
#define SCALE8_STAGE2(src) ASR_64(src, SCALE8_STAGE2_BITS)
#define SCALE8_STAGED1(src) ASR_64(src, SCALE8_STAGED1_BITS)
#define SCALE8_STAGED2(src) ASR_64(src, SCALE8_STAGED2_BITS)
#define SCALE4_STAGE1(src) ASR(src, SCALE4_STAGE1_BITS)
#define SCALE4_STAGE2(src) ASR(src, SCALE4_STAGE2_BITS)
#define SCALE4_STAGED1(src) ASR(src, SCALE4_STAGED1_BITS)
#define SCALE4_STAGED2(src) ASR(src, SCALE4_STAGED2_BITS)
#define SCALE8_STAGE1(src) ASR(src, SCALE8_STAGE1_BITS)
#define SCALE8_STAGE2(src) ASR(src, SCALE8_STAGE2_BITS)
#define SCALE8_STAGED1(src) ASR(src, SCALE8_STAGED1_BITS)
#define SCALE8_STAGED2(src) ASR(src, SCALE8_STAGED2_BITS)
#define SBC_FIXED_0(val) { val = 0; }
#define MUL(a, b) ((sbc_extended_t)(a) * (b))
#define MUL(a, b) ((a) * (b))
#ifdef __arm__
#define MULA(a, b, res) ({ \
long long tmp = res; \
int tmp = res; \
__asm__( \
"smlal %Q0, %R0, %2, %3" \
"mla %0, %2, %3, %0" \
: "=&r" (tmp) \
: "0" (tmp), "r" (a), "r" (b)); \
tmp; })
#else
#define MULA(a, b, res) ((sbc_extended_t)(a) * (b) + (res))
#define MULA(a, b, res) ((a) * (b) + (res))
#endif