target/arm: Implement SME2 SQRSHR, UQRSHR, SQRSHRN
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Message-id: 20250704142112.1018902-65-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
9a7d445c09
commit
70ad5b9fb1
4 changed files with 210 additions and 0 deletions
|
|
@ -266,3 +266,23 @@ DEF_HELPER_FLAGS_3(sme2_uzp4_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
|||
DEF_HELPER_FLAGS_3(sme2_uzp4_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uzp4_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uzp4_q, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshr_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshr_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshru_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshr_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshr_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshru_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshr_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshr_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshru_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshrn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrun_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshrn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrun_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshrn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrun_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
|
|
|||
|
|
@ -811,3 +811,40 @@ UZP_4 11000001 esz:2 1 10110 111000 ...00 ... 10 \
|
|||
&zz_e zd=%zd_ax4 zn=%zn_ax4
|
||||
UZP_4 11000001 001 10111 111000 ...00 ... 10 \
|
||||
&zz_e esz=4 zd=%zd_ax4 zn=%zn_ax4
|
||||
|
||||
### SME2 Multi-vector SVE Constructive Binary
|
||||
|
||||
&rshr zd zn shift
|
||||
|
||||
%rshr_sh_shift 16:4 !function=rsub_16
|
||||
%rshr_sb_shift 16:5 !function=rsub_32
|
||||
%rshr_dh_shift 22:1 16:5 !function=rsub_64
|
||||
|
||||
@rshr_sh ........ .... .... ...... ..... zd:5 \
|
||||
&rshr zn=%zn_ax2 shift=%rshr_sh_shift
|
||||
@rshr_sb ........ ... ..... ...... ..... zd:5 \
|
||||
&rshr zn=%zn_ax4 shift=%rshr_sb_shift
|
||||
@rshr_dh ........ ... ..... ...... ..... zd:5 \
|
||||
&rshr zn=%zn_ax4 shift=%rshr_dh_shift
|
||||
|
||||
SQRSHR_sh 11000001 1110 .... 110101 ....0 ..... @rshr_sh
|
||||
UQRSHR_sh 11000001 1110 .... 110101 ....1 ..... @rshr_sh
|
||||
SQRSHRU_sh 11000001 1111 .... 110101 ....0 ..... @rshr_sh
|
||||
|
||||
SQRSHR_sb 11000001 011 ..... 110110 ...00 ..... @rshr_sb
|
||||
SQRSHR_dh 11000001 1.1 ..... 110110 ...00 ..... @rshr_dh
|
||||
UQRSHR_sb 11000001 011 ..... 110110 ...01 ..... @rshr_sb
|
||||
UQRSHR_dh 11000001 1.1 ..... 110110 ...01 ..... @rshr_dh
|
||||
SQRSHRU_sb 11000001 011 ..... 110110 ...10 ..... @rshr_sb
|
||||
SQRSHRU_dh 11000001 1.1 ..... 110110 ...10 ..... @rshr_dh
|
||||
|
||||
SQRSHRN_sh 01000101 1011 .... 001010 ....0 ..... @rshr_sh
|
||||
UQRSHRN_sh 01000101 1011 .... 001110 ....0 ..... @rshr_sh
|
||||
SQRSHRUN_sh 01000101 1011 .... 000010 ....0 ..... @rshr_sh
|
||||
|
||||
SQRSHRN_sb 11000001 011 ..... 110111 ...00 ..... @rshr_sb
|
||||
SQRSHRN_dh 11000001 1.1 ..... 110111 ...00 ..... @rshr_dh
|
||||
UQRSHRN_sb 11000001 011 ..... 110111 ...01 ..... @rshr_sb
|
||||
UQRSHRN_dh 11000001 1.1 ..... 110111 ...01 ..... @rshr_dh
|
||||
SQRSHRUN_sb 11000001 011 ..... 110111 ...10 ..... @rshr_sb
|
||||
SQRSHRUN_dh 11000001 1.1 ..... 110111 ...10 ..... @rshr_dh
|
||||
|
|
|
|||
|
|
@ -1626,6 +1626,66 @@ SQCVT4(sme2_sqcvtu_dh, int64_t, uint16_t, H8, H2, do_usat_h)
|
|||
|
||||
#undef SQCVT4
|
||||
|
||||
#define SQRSHR2(NAME, TW, TN, HW, HN, RSHR, SAT) \
|
||||
void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \
|
||||
{ \
|
||||
ARMVectorReg scratch; \
|
||||
size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \
|
||||
int shift = simd_data(desc); \
|
||||
TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \
|
||||
TN *d = vd; \
|
||||
if (vectors_overlap(vd, 1, vs, 2)) { \
|
||||
d = (TN *)&scratch; \
|
||||
} \
|
||||
for (size_t i = 0; i < n; ++i) { \
|
||||
d[HN(i)] = SAT(RSHR(s0[HW(i)], shift)); \
|
||||
d[HN(i + n)] = SAT(RSHR(s1[HW(i)], shift)); \
|
||||
} \
|
||||
if (d != vd) { \
|
||||
memcpy(vd, d, oprsz); \
|
||||
} \
|
||||
}
|
||||
|
||||
SQRSHR2(sme2_sqrshr_sh, int32_t, int16_t, H4, H2, do_srshr, do_ssat_h)
|
||||
SQRSHR2(sme2_uqrshr_sh, uint32_t, uint16_t, H4, H2, do_urshr, do_usat_h)
|
||||
SQRSHR2(sme2_sqrshru_sh, int32_t, uint16_t, H4, H2, do_srshr, do_usat_h)
|
||||
|
||||
#undef SQRSHR2
|
||||
|
||||
#define SQRSHR4(NAME, TW, TN, HW, HN, RSHR, SAT) \
|
||||
void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \
|
||||
{ \
|
||||
ARMVectorReg scratch; \
|
||||
size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \
|
||||
int shift = simd_data(desc); \
|
||||
TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \
|
||||
TW *s2 = vs + 2 * sizeof(ARMVectorReg); \
|
||||
TW *s3 = vs + 3 * sizeof(ARMVectorReg); \
|
||||
TN *d = vd; \
|
||||
if (vectors_overlap(vd, 1, vs, 4)) { \
|
||||
d = (TN *)&scratch; \
|
||||
} \
|
||||
for (size_t i = 0; i < n; ++i) { \
|
||||
d[HN(i)] = SAT(RSHR(s0[HW(i)], shift)); \
|
||||
d[HN(i + n)] = SAT(RSHR(s1[HW(i)], shift)); \
|
||||
d[HN(i + 2 * n)] = SAT(RSHR(s2[HW(i)], shift)); \
|
||||
d[HN(i + 3 * n)] = SAT(RSHR(s3[HW(i)], shift)); \
|
||||
} \
|
||||
if (d != vd) { \
|
||||
memcpy(vd, d, oprsz); \
|
||||
} \
|
||||
}
|
||||
|
||||
SQRSHR4(sme2_sqrshr_sb, int32_t, int8_t, H4, H2, do_srshr, do_ssat_b)
|
||||
SQRSHR4(sme2_uqrshr_sb, uint32_t, uint8_t, H4, H2, do_urshr, do_usat_b)
|
||||
SQRSHR4(sme2_sqrshru_sb, int32_t, uint8_t, H4, H2, do_srshr, do_usat_b)
|
||||
|
||||
SQRSHR4(sme2_sqrshr_dh, int64_t, int16_t, H8, H2, do_srshr, do_ssat_h)
|
||||
SQRSHR4(sme2_uqrshr_dh, uint64_t, uint16_t, H8, H2, do_urshr, do_usat_h)
|
||||
SQRSHR4(sme2_sqrshru_dh, int64_t, uint16_t, H8, H2, do_srshr, do_usat_h)
|
||||
|
||||
#undef SQRSHR4
|
||||
|
||||
/* Convert and interleave */
|
||||
void HELPER(sme2_bfcvtn)(void *vd, void *vs, float_status *fpst, uint32_t desc)
|
||||
{
|
||||
|
|
@ -1715,6 +1775,66 @@ SQCVTN4(sme2_sqcvtun_dh, int64_t, uint16_t, H8, H2, do_usat_h)
|
|||
|
||||
#undef SQCVTN4
|
||||
|
||||
#define SQRSHRN2(NAME, TW, TN, HW, HN, RSHR, SAT) \
|
||||
void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \
|
||||
{ \
|
||||
ARMVectorReg scratch; \
|
||||
size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \
|
||||
int shift = simd_data(desc); \
|
||||
TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \
|
||||
TN *d = vd; \
|
||||
if (vectors_overlap(vd, 1, vs, 2)) { \
|
||||
d = (TN *)&scratch; \
|
||||
} \
|
||||
for (size_t i = 0; i < n; ++i) { \
|
||||
d[HN(2 * i + 0)] = SAT(RSHR(s0[HW(i)], shift)); \
|
||||
d[HN(2 * i + 1)] = SAT(RSHR(s1[HW(i)], shift)); \
|
||||
} \
|
||||
if (d != vd) { \
|
||||
memcpy(vd, d, oprsz); \
|
||||
} \
|
||||
}
|
||||
|
||||
SQRSHRN2(sme2_sqrshrn_sh, int32_t, int16_t, H4, H2, do_srshr, do_ssat_h)
|
||||
SQRSHRN2(sme2_uqrshrn_sh, uint32_t, uint16_t, H4, H2, do_urshr, do_usat_h)
|
||||
SQRSHRN2(sme2_sqrshrun_sh, int32_t, uint16_t, H4, H2, do_srshr, do_usat_h)
|
||||
|
||||
#undef SQRSHRN2
|
||||
|
||||
#define SQRSHRN4(NAME, TW, TN, HW, HN, RSHR, SAT) \
|
||||
void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \
|
||||
{ \
|
||||
ARMVectorReg scratch; \
|
||||
size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \
|
||||
int shift = simd_data(desc); \
|
||||
TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \
|
||||
TW *s2 = vs + 2 * sizeof(ARMVectorReg); \
|
||||
TW *s3 = vs + 3 * sizeof(ARMVectorReg); \
|
||||
TN *d = vd; \
|
||||
if (vectors_overlap(vd, 1, vs, 4)) { \
|
||||
d = (TN *)&scratch; \
|
||||
} \
|
||||
for (size_t i = 0; i < n; ++i) { \
|
||||
d[HN(4 * i + 0)] = SAT(RSHR(s0[HW(i)], shift)); \
|
||||
d[HN(4 * i + 1)] = SAT(RSHR(s1[HW(i)], shift)); \
|
||||
d[HN(4 * i + 2)] = SAT(RSHR(s2[HW(i)], shift)); \
|
||||
d[HN(4 * i + 3)] = SAT(RSHR(s3[HW(i)], shift)); \
|
||||
} \
|
||||
if (d != vd) { \
|
||||
memcpy(vd, d, oprsz); \
|
||||
} \
|
||||
}
|
||||
|
||||
SQRSHRN4(sme2_sqrshrn_sb, int32_t, int8_t, H4, H1, do_srshr, do_ssat_b)
|
||||
SQRSHRN4(sme2_uqrshrn_sb, uint32_t, uint8_t, H4, H1, do_urshr, do_usat_b)
|
||||
SQRSHRN4(sme2_sqrshrun_sb, int32_t, uint8_t, H4, H1, do_srshr, do_usat_b)
|
||||
|
||||
SQRSHRN4(sme2_sqrshrn_dh, int64_t, int16_t, H8, H2, do_srshr, do_ssat_h)
|
||||
SQRSHRN4(sme2_uqrshrn_dh, uint64_t, uint16_t, H8, H2, do_urshr, do_usat_h)
|
||||
SQRSHRN4(sme2_sqrshrun_dh, int64_t, uint16_t, H8, H2, do_srshr, do_usat_h)
|
||||
|
||||
#undef SQRSHRN4
|
||||
|
||||
/* Expand and convert */
|
||||
void HELPER(sme2_fcvt_w)(void *vd, void *vs, float_status *fpst, uint32_t desc)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1474,3 +1474,36 @@ static gen_helper_gvec_2 * const uzp4_fns[] = {
|
|||
gen_helper_sme2_uzp4_q,
|
||||
};
|
||||
TRANS_FEAT(UZP_4, aa64_sme2, do_zipuzp_4, a, uzp4_fns)
|
||||
|
||||
static bool do_zz_rshr(DisasContext *s, arg_rshr *a, gen_helper_gvec_2 *fn)
|
||||
{
|
||||
if (sve_access_check(s)) {
|
||||
int vl = vec_full_reg_size(s);
|
||||
tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->zd),
|
||||
vec_full_reg_offset(s, a->zn),
|
||||
vl, vl, a->shift, fn);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TRANS_FEAT(SQRSHR_sh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshr_sh)
|
||||
TRANS_FEAT(UQRSHR_sh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshr_sh)
|
||||
TRANS_FEAT(SQRSHRU_sh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshru_sh)
|
||||
|
||||
TRANS_FEAT(SQRSHR_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshr_sb)
|
||||
TRANS_FEAT(SQRSHR_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshr_dh)
|
||||
TRANS_FEAT(UQRSHR_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshr_sb)
|
||||
TRANS_FEAT(UQRSHR_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshr_dh)
|
||||
TRANS_FEAT(SQRSHRU_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshru_sb)
|
||||
TRANS_FEAT(SQRSHRU_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshru_dh)
|
||||
|
||||
TRANS_FEAT(SQRSHRN_sh, aa64_sme2_or_sve2p1, do_zz_rshr, a, gen_helper_sme2_sqrshrn_sh)
|
||||
TRANS_FEAT(UQRSHRN_sh, aa64_sme2_or_sve2p1, do_zz_rshr, a, gen_helper_sme2_uqrshrn_sh)
|
||||
TRANS_FEAT(SQRSHRUN_sh, aa64_sme2_or_sve2p1, do_zz_rshr, a, gen_helper_sme2_sqrshrun_sh)
|
||||
|
||||
TRANS_FEAT(SQRSHRN_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrn_sb)
|
||||
TRANS_FEAT(SQRSHRN_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrn_dh)
|
||||
TRANS_FEAT(UQRSHRN_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshrn_sb)
|
||||
TRANS_FEAT(UQRSHRN_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshrn_dh)
|
||||
TRANS_FEAT(SQRSHRUN_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrun_sb)
|
||||
TRANS_FEAT(SQRSHRUN_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrun_dh)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue