aarch64: Add the SME2 FP<->FP conversion instructions

This patch adds the BFCVT{,N} and FCVT{,N} instructions,
which narrow a pair of .S registers to a single .H register.
This commit is contained in:
Richard Sandiford 2023-03-30 11:09:15 +01:00
parent 28ef4f20c0
commit c04965ec7d
9 changed files with 1164 additions and 1010 deletions

View File

@ -0,0 +1,3 @@
#as: -march=armv8-a
#source: sme2-24-invalid.s
#error_output: sme2-24-invalid.l

View File

@ -0,0 +1,22 @@
[^ :]+: Assembler messages:
[^ :]+:[0-9]+: Error: expected a register at operand 1 -- `bfcvt 0,{z0\.s-z1\.s}'
[^ :]+:[0-9]+: Error: expected a register or register list at operand 2 -- `bfcvt z0\.h,0'
[^ :]+:[0-9]+: Error: start register out of range at operand 2 -- `bfcvt z0\.h,{z1\.s-z2\.s}'
[^ :]+:[0-9]+: Error: expected a list of 2 registers at operand 2 -- `bfcvtn z0\.h,{z0\.s-z2\.s}'
[^ :]+:[0-9]+: Error: expected a list of 2 registers at operand 2 -- `bfcvt z0\.h,{z0\.s-z3\.s}'
[^ :]+:[0-9]+: Error: operand mismatch -- `bfcvtn z0\.s,{z0\.s-z3\.s}'
[^ :]+:[0-9]+: Info: did you mean this\?
[^ :]+:[0-9]+: Info: bfcvtn z0\.h, {z0\.s-z3\.s}
[^ :]+:[0-9]+: Error: operand mismatch -- `bfcvt z0\.s,{z0\.h-z3\.h}'
[^ :]+:[0-9]+: Info: did you mean this\?
[^ :]+:[0-9]+: Info: bfcvt z0\.h, {z0\.s-z3\.s}
[^ :]+:[0-9]+: Error: operand mismatch -- `fcvt z0\.s,{z0\.h-z1\.h}'
[^ :]+:[0-9]+: Info: did you mean this\?
[^ :]+:[0-9]+: Info: fcvt z0\.h, {z0\.s-z1\.s}
[^ :]+:[0-9]+: Error: operand mismatch -- `fcvt z0\.s,{z0\.s-z1\.s}'
[^ :]+:[0-9]+: Info: did you mean this\?
[^ :]+:[0-9]+: Info: fcvt z0\.h, {z0\.s-z1\.s}
[^ :]+:[0-9]+: Error: operand mismatch -- `fcvt z0\.d,{z0\.s-z1\.s}'
[^ :]+:[0-9]+: Info: did you mean this\?
[^ :]+:[0-9]+: Info: fcvt z0\.h, {z0\.s-z1\.s}
[^ :]+:[0-9]+: Error: start register out of range at operand 2 -- `fcvt z0\.h,{z1\.s-z2\.s}'

View File

@ -0,0 +1,13 @@
bfcvt 0, { z0.s - z1.s }
bfcvt z0.h, 0
bfcvt z0.h, { z1.s - z2.s }
bfcvtn z0.h, { z0.s - z2.s }
bfcvt z0.h, { z0.s - z3.s }
bfcvtn z0.s, { z0.s - z3.s }
bfcvt z0.s, { z0.h - z3.h }
fcvt z0.s, { z0.h - z1.h }
fcvt z0.s, { z0.s - z1.s }
fcvt z0.d, { z0.s - z1.s }
fcvt z0.h, { z1.s - z2.s }

View File

@ -0,0 +1,3 @@
#as: -march=armv8-a+sme
#source: sme2-24.s
#error_output: sme2-24-noarch.l

View File

@ -0,0 +1,17 @@
[^ :]+: Assembler messages:
[^ :]+:[0-9]+: Error: selected processor does not support `bfcvt z0\.h,{z0\.s-z1\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `bfcvt z31\.h,{z0\.s-z1\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `bfcvt z0\.h,{z30\.s-z31\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `bfcvt z14\.h,{z20\.s-z21\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `bfcvtn z0\.h,{z0\.s-z1\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `bfcvtn z31\.h,{z0\.s-z1\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `bfcvtn z0\.h,{z30\.s-z31\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `bfcvtn z26\.h,{z14\.s-z15\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `fcvt z0\.h,{z0\.s-z1\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `fcvt z31\.h,{z0\.s-z1\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `fcvt z0\.h,{z30\.s-z31\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `fcvt z29\.h,{z6\.s-z7\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `fcvtn z0\.h,{z0\.s-z1\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `fcvtn z31\.h,{z0\.s-z1\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `fcvtn z0\.h,{z30\.s-z31\.s}'
[^ :]+:[0-9]+: Error: selected processor does not support `fcvtn z29\.h,{z6\.s-z7\.s}'

View File

@ -0,0 +1,25 @@
#as: -march=armv8-a+sme2
#objdump: -dr
[^:]+: file format .*
[^:]+:
[^:]+:
[^:]+: c160e000 bfcvt z0\.h, {z0\.s-z1\.s}
[^:]+: c160e01f bfcvt z31\.h, {z0\.s-z1\.s}
[^:]+: c160e3c0 bfcvt z0\.h, {z30\.s-z31\.s}
[^:]+: c160e28e bfcvt z14\.h, {z20\.s-z21\.s}
[^:]+: c160e020 bfcvtn z0\.h, {z0\.s-z1\.s}
[^:]+: c160e03f bfcvtn z31\.h, {z0\.s-z1\.s}
[^:]+: c160e3e0 bfcvtn z0\.h, {z30\.s-z31\.s}
[^:]+: c160e1fa bfcvtn z26\.h, {z14\.s-z15\.s}
[^:]+: c120e000 fcvt z0\.h, {z0\.s-z1\.s}
[^:]+: c120e01f fcvt z31\.h, {z0\.s-z1\.s}
[^:]+: c120e3c0 fcvt z0\.h, {z30\.s-z31\.s}
[^:]+: c120e0dd fcvt z29\.h, {z6\.s-z7\.s}
[^:]+: c120e020 fcvtn z0\.h, {z0\.s-z1\.s}
[^:]+: c120e03f fcvtn z31\.h, {z0\.s-z1\.s}
[^:]+: c120e3e0 fcvtn z0\.h, {z30\.s-z31\.s}
[^:]+: c120e0fd fcvtn z29\.h, {z6\.s-z7\.s}

View File

@ -0,0 +1,19 @@
bfcvt z0.h, { z0.s - z1.s }
bfcvt z31.h, { z0.s - z1.s }
bfcvt z0.h, { z30.s - z31.s }
bfcvt z14.h, { z20.s - z21.s }
bfcvtn z0.h, { z0.s - z1.s }
bfcvtn z31.h, { z0.s - z1.s }
bfcvtn z0.h, { z30.s - z31.s }
bfcvtn z26.h, { z14.s - z15.s }
fcvt z0.h, { z0.s - z1.s }
fcvt z31.h, { z0.s - z1.s }
fcvt z0.h, { z30.s - z31.s }
fcvt z29.h, { z6.s - z7.s }
fcvtn z0.h, { z0.s - z1.s }
fcvtn z31.h, { z0.s - z1.s }
fcvtn z0.h, { z30.s - z31.s }
fcvtn z29.h, { z6.s - z7.s }

File diff suppressed because it is too large Load Diff

View File

@ -1624,6 +1624,10 @@
{ \
QLF3(S_H,P_M,S_S), \
}
#define OP_SVE_HS \
{ \
QLF2(S_H,S_S), \
}
#define OP_SVE_HU \
{ \
QLF2(S_H,NIL), \
@ -5353,6 +5357,8 @@ const struct aarch64_opcode aarch64_opcode_table[] =
SME2_INSN ("add", 0xc1a11810, 0xffa39c78, sme_int_sd, 0, OP3 (SME_ZA_array_off3_0, SME_Znx4, SME_Zmx4), OP_SVE_VVV_SD, F_OD (4), 0),
SME2_INSN ("add", 0xc120a300, 0xff30ffe1, sme_size_22, 0, OP3 (SME_Zdnx2, SME_Zdnx2, SME_Zm), OP_SVE_VVV_BHSD, 0, 1),
SME2_INSN ("add", 0xc120ab00, 0xff30ffe3, sme_size_22, 0, OP3 (SME_Zdnx4, SME_Zdnx4, SME_Zm), OP_SVE_VVV_BHSD, 0, 1),
SME2_INSN ("bfcvt", 0xc160e000, 0xfffffc20, sme_misc, 0, OP2 (SVE_Zd, SME_Znx2), OP_SVE_HS, 0, 0),
SME2_INSN ("bfcvtn", 0xc160e020, 0xfffffc20, sme_misc, 0, OP2 (SVE_Zd, SME_Znx2), OP_SVE_HS, 0, 0),
SME2_INSN ("bfdot", 0xc1501018, 0xfff09038, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SME_Znx2, SME_Zm_INDEX2), OP_SVE_SHH, F_OD (2), 0),
SME2_INSN ("bfdot", 0xc1509018, 0xfff09078, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SME_Znx4, SME_Zm_INDEX2), OP_SVE_SHH, F_OD (4), 0),
SME2_INSN ("bfdot", 0xc1201010, 0xfff09c18, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SVE_ZnxN, SME_Zm), OP_SVE_SHH, F_OD (2), 0),
@ -5383,6 +5389,8 @@ const struct aarch64_opcode aarch64_opcode_table[] =
SME2_INSN ("fadd", 0xc1a11c00, 0xffbf9c78, sme_fp_sd, 0, OP2 (SME_ZA_array_off3_0, SME_Znx4), OP_SVE_VVV_SD, F_OD (4), 0),
SME2_INSN ("fclamp", 0xc120c000, 0xff20fc01, sme_size_22_hsd, 0, OP3 (SME_Zdnx2, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_HSD, 0, 0),
SME2_INSN ("fclamp", 0xc120c800, 0xff20fc03, sme_size_22_hsd, 0, OP3 (SME_Zdnx4, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_HSD, 0, 0),
SME2_INSN ("fcvt", 0xc120e000, 0xfffffc20, sve_misc, 0, OP2 (SVE_Zd, SME_Znx2), OP_SVE_HS, 0, 0),
SME2_INSN ("fcvtn", 0xc120e020, 0xfffffc20, sve_misc, 0, OP2 (SVE_Zd, SME_Znx2), OP_SVE_HS, 0, 0),
SME2_INSN ("fcvtzs", 0xc121e000, 0xfffffc21, sve_misc, 0, OP2 (SME_Zdnx2, SME_Znx2), OP_SVE_SS, 0, 0),
SME2_INSN ("fcvtzs", 0xc131e000, 0xfffffc63, sve_misc, 0, OP2 (SME_Zdnx4, SME_Znx4), OP_SVE_SS, 0, 0),
SME2_INSN ("fcvtzu", 0xc121e020, 0xfffffc21, sve_misc, 0, OP2 (SME_Zdnx2, SME_Znx2), OP_SVE_SS, 0, 0),