re PR target/54222 ([avr] Implement fixed-point support)

gcc/
	PR target/54222
	* config/avr/avr-dimode.md (umulsidi3, mulsidi3): New expanders.
	(umulsidi3_insn, mulsidi3_insn): New insns.

libgcc/
	PR target/54222
	* config/avr/t-avr (LIB2FUNCS_EXCLUDE): Add: _usmulUHA, _usmulUSA,
	_ssmulHA, _ssmulSA.
	(LIB1ASMFUNCS): Add: _muldi3_6, _mulsidi3, _umulsidi3, _usmuluha3,
	_ssmulha3, _usmulusa3, _ssmulsa3.
	* config/avr/lib1funcs.S (__muldi3_6): Break out of __muldi3.
	(__muldi3): XCALL __muldi3_6 instead of rcall.
	(__umulsidi3, __mulsidi3): New functions.
	(do_prologue_saves, do_epilogue_restores): New .macros.
	(__divdi3_moddi3): Use them.
	* config/avr/lib1funcs-fixed.S (__usmuluha3, __ssmulha3)
	(__usmulusa3, __ssmulsa3): New functions.

From-SVN: r195978
This commit is contained in:
Georg-Johann Lay 2013-02-12 14:55:16 +00:00 committed by Georg-Johann Lay
parent 2597da229a
commit e68a4ef6ea
6 changed files with 563 additions and 23 deletions

View File

@ -1,3 +1,9 @@
2013-02-12 Georg-Johann Lay <avr@gjlay.de>
PR target/54222
* config/avr/avr-dimode.md (umulsidi3, mulsidi3): New expanders.
(umulsidi3_insn, mulsidi3_insn): New insns.
2013-02-12 Christophe Lyon <christophe.lyon@linaro.org>
* config/arm/arm-protos.h (struct cpu_vec_costs): New struct type.

View File

@ -446,3 +446,34 @@
"%~call __<code_stdname>di3"
[(set_attr "adjust_len" "call")
(set_attr "cc" "clobber")])
;; "umulsidi3"
;; "mulsidi3"
(define_expand "<extend_u>mulsidi3"
[(parallel [(match_operand:DI 0 "register_operand" "")
(match_operand:SI 1 "general_operand" "")
(match_operand:SI 2 "general_operand" "")
;; Just to mention the iterator
(clobber (any_extend:SI (match_dup 1)))])]
"avr_have_dimode"
{
emit_move_insn (gen_rtx_REG (SImode, 22), operands[1]);
emit_move_insn (gen_rtx_REG (SImode, 18), operands[2]);
emit_insn (gen_<extend_u>mulsidi3_insn());
// Use emit_move_insn and not open-coded expand because of missing movdi
emit_move_insn (operands[0], gen_rtx_REG (DImode, ACC_A));
DONE;
})
;; "umulsidi3_insn"
;; "mulsidi3_insn"
(define_insn "<extend_u>mulsidi3_insn"
[(set (reg:DI ACC_A)
(mult:DI (any_extend:DI (reg:SI 18))
(any_extend:DI (reg:SI 22))))
(clobber (reg:HI REG_X))
(clobber (reg:HI REG_Z))]
"avr_have_dimode"
"%~call __<extend_u>mulsidi3"
[(set_attr "adjust_len" "call")
(set_attr "cc" "clobber")])

View File

@ -1,3 +1,18 @@
2013-02-12 Georg-Johann Lay <avr@gjlay.de>
PR target/54222
* config/avr/t-avr (LIB2FUNCS_EXCLUDE): Add: _usmulUHA, _usmulUSA,
_ssmulHA, _ssmulSA.
(LIB1ASMFUNCS): Add: _muldi3_6, _mulsidi3, _umulsidi3, _usmuluha3,
_ssmulha3, _usmulusa3, _ssmulsa3.
* config/avr/lib1funcs.S (__muldi3_6): Break out of __muldi3.
(__muldi3): XCALL __muldi3_6 instead of rcall.
(__umulsidi3, __mulsidi3): New functions.
(do_prologue_saves, do_epilogue_restores): New .macros.
(__divdi3_moddi3): Use them.
* config/avr/lib1funcs-fixed.S (__usmuluha3, __ssmulha3)
(__usmulusa3, __ssmulsa3): New functions.
2013-02-11 Iain Sandoe <iain@codesourcery.com>
Jack Howarth <howarth@bromo.med.uc.edu>
Patrick Marlier <patrick.marlier@gmail.com>

View File

@ -669,6 +669,210 @@ ENDF __mulusa3_round
#undef GUARD
/***********************************************************
Fixed unsigned saturated Multiplication 8.8 x 8.8
***********************************************************/
#define C0 22
#define C1 C0+1
#define C2 C0+2
#define C3 C0+3
#define SS __tmp_reg__
#if defined (L_usmuluha3)
DEFUN __usmuluha3
;; Widening multiply
#ifdef __AVR_HAVE_MUL__
;; Adjust interface
movw R26, R22
movw R18, R24
#endif /* HAVE MUL */
XCALL __umulhisi3
tst C3
brne .Lmax
;; Round, target is in C1..C2
lsl C0
adc C1, __zero_reg__
adc C2, __zero_reg__
brcs .Lmax
;; Move result into place
mov C3, C2
mov C2, C1
ret
.Lmax:
;; Saturate
ldi C2, 0xff
ldi C3, 0xff
ret
ENDF __usmuluha3
#endif /* L_usmuluha3 */
/***********************************************************
Fixed signed saturated Multiplication s8.7 x s8.7
***********************************************************/
#if defined (L_ssmulha3)
DEFUN __ssmulha3
;; Widening multiply
#ifdef __AVR_HAVE_MUL__
;; Adjust interface
movw R26, R22
movw R18, R24
#endif /* HAVE MUL */
XCALL __mulhisi3
;; Adjust decimal point
lsl C0
rol C1
rol C2
brvs .LsatC3.3
;; The 9 MSBs must be the same
rol C3
sbc SS, SS
cp C3, SS
brne .LsatSS
;; Round
lsl C0
adc C1, __zero_reg__
adc C2, __zero_reg__
brvs .Lmax
;; Move result into place
mov C3, C2
mov C2, C1
ret
.Lmax:
;; Load 0x7fff
clr C3
.LsatC3.3:
;; C3 < 0 --> 0x8000
;; C3 >= 0 --> 0x7fff
mov SS, C3
.LsatSS:
;; Load min / max value:
;; SS = -1 --> 0x8000
;; SS = 0 --> 0x7fff
ldi C3, 0x7f
ldi C2, 0xff
sbrc SS, 7
adiw C2, 1
ret
ENDF __ssmulha3
#endif /* L_ssmulha3 */
#undef C0
#undef C1
#undef C2
#undef C3
#undef SS
/***********************************************************
Fixed unsigned saturated Multiplication 16.16 x 16.16
***********************************************************/
#define C0 18
#define C1 C0+1
#define C2 C0+2
#define C3 C0+3
#define C4 C0+4
#define C5 C0+5
#define C6 C0+6
#define C7 C0+7
#define SS __tmp_reg__
#if defined (L_usmulusa3)
;; R22[4] = R22[4] *{ssat} R18[4]
;; Ordinary ABI function
DEFUN __usmulusa3
;; Widening multiply
XCALL __umulsidi3
or C7, C6
brne .Lmax
;; Round, target is in C2..C5
lsl C1
adc C2, __zero_reg__
adc C3, __zero_reg__
adc C4, __zero_reg__
adc C5, __zero_reg__
brcs .Lmax
;; Move result into place
wmov C6, C4
wmov C4, C2
ret
.Lmax:
;; Saturate
ldi C7, 0xff
ldi C6, 0xff
wmov C4, C6
ret
ENDF __usmulusa3
#endif /* L_usmulusa3 */
/***********************************************************
Fixed signed saturated Multiplication s16.15 x s16.15
***********************************************************/
#if defined (L_ssmulsa3)
;; R22[4] = R22[4] *{ssat} R18[4]
;; Ordinary ABI function
DEFUN __ssmulsa3
;; Widening multiply
XCALL __mulsidi3
;; Adjust decimal point
lsl C1
rol C2
rol C3
rol C4
rol C5
brvs .LsatC7.7
;; The 17 MSBs must be the same
rol C6
rol C7
sbc SS, SS
cp C6, SS
cpc C7, SS
brne .LsatSS
;; Round
lsl C1
adc C2, __zero_reg__
adc C3, __zero_reg__
adc C4, __zero_reg__
adc C5, __zero_reg__
brvs .Lmax
;; Move result into place
wmov C6, C4
wmov C4, C2
ret
.Lmax:
;; Load 0x7fffffff
clr C7
.LsatC7.7:
;; C7 < 0 --> 0x80000000
;; C7 >= 0 --> 0x7fffffff
lsl C7
sbc SS, SS
.LsatSS:
;; Load min / max value:
;; SS = -1 --> 0x80000000
;; SS = 0 --> 0x7fffffff
com SS
mov C4, SS
mov C5, C4
wmov C6, C4
subi C7, 0x80
ret
ENDF __ssmulsa3
#endif /* L_ssmulsa3 */
#undef C0
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#undef SS
/*******************************************************
Fractional Division 8 / 8
*******************************************************/

View File

@ -79,6 +79,41 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define XJMP rjmp
#endif
;; Prologue stuff
.macro do_prologue_saves n_pushed n_frame=0
ldi r26, lo8(\n_frame)
ldi r27, hi8(\n_frame)
ldi r30, lo8(gs(.L_prologue_saves.\@))
ldi r31, hi8(gs(.L_prologue_saves.\@))
XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
.L_prologue_saves.\@:
.endm
;; Epilogue stuff
.macro do_epilogue_restores n_pushed n_frame=0
in r28, __SP_L__
#ifdef __AVR_HAVE_SPH__
in r29, __SP_H__
.if \n_frame > 63
subi r28, lo8(-\n_frame)
sbci r29, hi8(-\n_frame)
.elseif \n_frame > 0
adiw r28, \n_frame
.endif
#else
clr r29
.if \n_frame > 0
subi r28, lo8(-\n_frame)
.endif
#endif /* HAVE SPH */
ldi r30, \n_pushed
XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
.endm
;; Support function entry and exit for convenience
.macro DEFUN name
.global \name
.func \name
@ -98,6 +133,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
.endfunc
.endm
;; Skip next instruction, typically a jump target
#define skip cpse 0,0
;; Negate a 2-byte value held in consecutive registers
.macro NEG2 reg
com \reg+1
@ -736,8 +774,6 @@ ENDF __mulsqipsi3
Multiplication 64 x 64
*******************************************************/
#if defined (L_muldi3)
;; A[] = A[] * B[]
;; A[0..7]: In: Multiplicand
@ -774,6 +810,8 @@ ENDF __mulsqipsi3
#define C6 C4+2
#define C7 C4+3
#if defined (L_muldi3)
;; A[] *= B[]
;; R25:R18 *= R17:R10
;; Ordinary ABI-Function
@ -818,7 +856,7 @@ DEFUN __muldi3
wmov 26, B2
;; 0 * 1
rcall __muldi3_6
XCALL __muldi3_6
pop A0
pop A1
@ -829,7 +867,7 @@ DEFUN __muldi3
pop r26
pop r27
;; 1 * 0
rcall __muldi3_6
XCALL __muldi3_6
pop A0
pop A1
@ -852,7 +890,12 @@ DEFUN __muldi3
pop r28
pop r29
ret
ENDF __muldi3
#endif /* L_muldi3 */
#if defined (L_muldi3_6)
;; A helper for some 64-bit multiplications with MUL available
DEFUN __muldi3_6
__muldi3_6:
XCALL __umulhisi3
add C2, 22
@ -862,7 +905,8 @@ __muldi3_6:
brcc 0f
adiw C6, 1
0: ret
ENDF __muldi3
ENDF __muldi3_6
#endif /* L_muldi3_6 */
#undef C7
#undef C6
@ -875,6 +919,8 @@ ENDF __muldi3
#else /* !HAVE_MUL */
#if defined (L_muldi3)
#define C0 26
#define C1 C0+1
#define C2 C0+2
@ -952,6 +998,7 @@ ENDF __muldi3
#undef C1
#undef C0
#endif /* L_muldi3 */
#endif /* HAVE_MUL */
#undef B7
@ -972,7 +1019,240 @@ ENDF __muldi3
#undef A1
#undef A0
#endif /* L_muldi3 */
/*******************************************************
Widening Multiplication 64 = 32 x 32 with MUL
*******************************************************/
#if defined (__AVR_HAVE_MUL__)
#define A0 r22
#define A1 r23
#define A2 r24
#define A3 r25
#define B0 r18
#define B1 r19
#define B2 r20
#define B3 r21
#define C0 18
#define C1 C0+1
#define C2 20
#define C3 C2+1
#define C4 28
#define C5 C4+1
#define C6 C4+2
#define C7 C4+3
#if defined (L_umulsidi3)
;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
;; R18[8] = R22[4] * R18[4]
;;
;; Ordinary ABI Function, but additionally sets
;; X = R20[2] = B2[2]
;; Z = R22[2] = A0[2]
DEFUN __umulsidi3
clt
;; FALLTHRU
ENDF __umulsidi3
;; T = sign (A)
DEFUN __umulsidi3_helper
push 29 $ push 28 ; Y
wmov 30, A2
;; Counting in Words, we have to perform 4 Multiplications
;; 0 * 0
wmov 26, A0
XCALL __umulhisi3
push 23 $ push 22 ; C0
wmov 28, B0
wmov 18, B2
wmov C2, 24
push 27 $ push 26 ; A0
push 19 $ push 18 ; B2
;;
;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
;; B2 C2 -- -- -- B0 A2
;; 1 * 1
wmov 26, 30 ; A2
XCALL __umulhisi3
;; Sign-extend A. T holds the sign of A
brtc 0f
;; Subtract B from the high part of the result
sub 22, 28
sbc 23, 29
sbc 24, 18
sbc 25, 19
0: wmov 18, 28 ;; B0
wmov C4, 22
wmov C6, 24
;;
;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
;; B0 C2 -- -- A2 C4 C6
;;
;; 1 * 0
XCALL __muldi3_6
;; 0 * 1
pop 26 $ pop 27 ;; B2
pop 18 $ pop 19 ;; A0
XCALL __muldi3_6
;; Move result C into place and save A0 in Z
wmov 22, C4
wmov 24, C6
wmov 30, 18 ; A0
pop C0 $ pop C1
;; Epilogue
pop 28 $ pop 29 ;; Y
ret
ENDF __umulsidi3_helper
#endif /* L_umulsidi3 */
#if defined (L_mulsidi3)
;; Signed widening 64 = 32 * 32 Multiplication
;;
;; R18[8] = R22[4] * R18[4]
;; Ordinary ABI Function
DEFUN __mulsidi3
bst A3, 7
sbrs B3, 7 ; Enhanced core has no skip bug
XJMP __umulsidi3_helper
;; B needs sign-extension
push A3
push A2
XCALL __umulsidi3_helper
;; A0 survived in Z
sub r22, r30
sbc r23, r31
pop r26
pop r27
sbc r24, r26
sbc r25, r27
ret
ENDF __mulsidi3
#endif /* L_mulsidi3 */
#undef A0
#undef A1
#undef A2
#undef A3
#undef B0
#undef B1
#undef B2
#undef B3
#undef C0
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#endif /* HAVE_MUL */
/**********************************************************
Widening Multiplication 64 = 32 x 32 without MUL
**********************************************************/
#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
#define A0 18
#define A1 A0+1
#define A2 A0+2
#define A3 A0+3
#define A4 A0+4
#define A5 A0+5
#define A6 A0+6
#define A7 A0+7
#define B0 10
#define B1 B0+1
#define B2 B0+2
#define B3 B0+3
#define B4 B0+4
#define B5 B0+5
#define B6 B0+6
#define B7 B0+7
#define AA0 22
#define AA1 AA0+1
#define AA2 AA0+2
#define AA3 AA0+3
#define BB0 18
#define BB1 BB0+1
#define BB2 BB0+2
#define BB3 BB0+3
#define Mask r30
;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
;;
;; R18[8] = R22[4] * R18[4]
;; Ordinary ABI Function
DEFUN __mulsidi3
set
skip
;; FALLTHRU
ENDF __mulsidi3
DEFUN __umulsidi3
clt ; skipped
;; Save 10 Registers: R10..R17, R28, R29
do_prologue_saves 10
ldi Mask, 0xff
bld Mask, 7
;; Move B into place...
wmov B0, BB0
wmov B2, BB2
;; ...and extend it
and BB3, Mask
lsl BB3
sbc B4, B4
mov B5, B4
wmov B6, B4
;; Move A into place...
wmov A0, AA0
wmov A2, AA2
;; ...and extend it
and AA3, Mask
lsl AA3
sbc A4, A4
mov A5, A4
wmov A6, A4
XCALL __muldi3
do_epilogue_restores 10
ENDF __umulsidi3
#undef A0
#undef A1
#undef A2
#undef A3
#undef A4
#undef A5
#undef A6
#undef A7
#undef B0
#undef B1
#undef B2
#undef B3
#undef B4
#undef B5
#undef B6
#undef B7
#undef AA0
#undef AA1
#undef AA2
#undef AA3
#undef BB0
#undef BB1
#undef BB2
#undef BB3
#undef Mask
#endif /* L_mulsidi3 && !HAVE_MUL */
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -1625,14 +1905,10 @@ DEFUN __divdi3_moddi3
0: ;; The Prologue
;; Save 12 Registers: Y, 17...8
;; No Frame needed (X = 0)
clr r26
clr r27
ldi r30, lo8(gs(1f))
ldi r31, hi8(gs(1f))
XJMP __prologue_saves__ + ((18 - 12) * 2)
;; No Frame needed
do_prologue_saves 12
1: ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
;; SS.6 will contain the Sign of the Remainder (A.sign)
mov SS, A7
asr SS
@ -1672,15 +1948,8 @@ DEFUN __divdi3_moddi3
#endif /* __AVR_HAVE_JMP_CALL__ */
XCALL __negdi2
4: ;; Epilogue: Restore the Z = 12 Registers and return
in r28, __SP_L__
#if defined (__AVR_HAVE_SPH__)
in r29, __SP_H__
#else
clr r29
#endif /* #SP = 8/16 */
ldi r30, 12
XJMP __epilogue_restores__ + ((18 - 12) * 2)
4: ;; Epilogue: Restore 12 Registers and return
do_epilogue_restores 12
ENDF __divdi3_moddi3

View File

@ -18,7 +18,8 @@ LIB1ASMFUNCS = \
_udivmodsi4 \
_divmodsi4 \
_divdi3 _udivdi3 \
_muldi3 \
_muldi3 _muldi3_6 \
_mulsidi3 _umulsidi3 \
_udivmod64 \
_negsi2 _negdi2 \
_prologue \
@ -69,6 +70,8 @@ LIB1ASMFUNCS += \
_mulhq3 _muluhq3 \
_mulha3 _muluha3 _muluha3_round \
_mulsa3 _mulusa3 \
_usmuluha3 _ssmulha3 \
_usmulusa3 _ssmulsa3 \
_divqq3 _udivuqq3 _divqq_helper \
_divhq3 _udivuhq3 \
_divha3 _udivuha3 \
@ -260,3 +263,15 @@ LIB2FUNCS_EXCLUDE += \
LIB2FUNCS_EXCLUDE += \
$(foreach func,_div,\
$(foreach mode,$(sdiv_modes) $(udiv_modes),$(func_X)))
ssmul_modes = HA SA
usmul_modes = UHA USA
LIB2FUNCS_EXCLUDE += \
$(foreach func,_usmul,\
$(foreach mode,$(usmul_modes),$(func_X)))
LIB2FUNCS_EXCLUDE += \
$(foreach func,_ssmul,\
$(foreach mode,$(ssmul_modes),$(func_X)))