target/arm: Implement SVE2p1 PEXT
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20250704142112.1018902-83-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
8f7e127b66
commit
16fe3bb942
5 changed files with 146 additions and 0 deletions
|
|
@ -2953,3 +2953,5 @@ DEF_HELPER_FLAGS_4(sve2p1_uminqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
|||
DEF_HELPER_FLAGS_4(sve2p1_uminqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_uminqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_uminqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(pext, TCG_CALL_NO_RWG, void, ptr, i32, i32)
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@
|
|||
%rn_ax2 6:4 !function=times_2
|
||||
|
||||
%pnd 0:3 !function=plus_8
|
||||
%pnn 5:3 !function=plus_8
|
||||
|
||||
###########################################################################
|
||||
# Named attribute sets. These are used to make nice(er) names
|
||||
|
|
@ -823,6 +824,11 @@ WHILE_lt_cnt4 00100101 .. 1 ..... 0110 . 1 ..... 1 . ... @while_cnt
|
|||
WHILE_gt_cnt2 00100101 .. 1 ..... 0100 . 0 ..... 1 . ... @while_cnt
|
||||
WHILE_gt_cnt4 00100101 .. 1 ..... 0110 . 0 ..... 1 . ... @while_cnt
|
||||
|
||||
# SVE2.1 extract mask predicate from predicate-as-counter
|
||||
&pext rd rn esz imm
|
||||
PEXT_1 00100101 esz:2 1 00000 0111 00 imm:2 ... 1 rd:4 &pext rn=%pnn
|
||||
PEXT_2 00100101 esz:2 1 00000 0111 010 imm:1 ... 1 rd:4 &pext rn=%pnn
|
||||
|
||||
### SVE Integer Wide Immediate - Unpredicated Group
|
||||
|
||||
# SVE broadcast floating-point immediate (unpredicated)
|
||||
|
|
|
|||
|
|
@ -7821,3 +7821,31 @@ DO_FCVTLT(sve2_fcvtlt_sd, uint64_t, uint32_t, H1_8, H1_4, float32_to_float64)
|
|||
|
||||
#undef DO_FCVTLT
|
||||
#undef DO_FCVTNT
|
||||
|
||||
void HELPER(pext)(void *vd, uint32_t png, uint32_t desc)
|
||||
{
|
||||
int pl = FIELD_EX32(desc, PREDDESC, OPRSZ);
|
||||
int vl = pl * 8;
|
||||
unsigned v_esz = FIELD_EX32(desc, PREDDESC, ESZ);
|
||||
int part = FIELD_EX32(desc, PREDDESC, DATA);
|
||||
DecodeCounter p = decode_counter(png, vl, v_esz);
|
||||
uint64_t mask = pred_esz_masks[v_esz + p.lg2_stride];
|
||||
ARMPredicateReg *d = vd;
|
||||
|
||||
/*
|
||||
* Convert from element count to byte count and adjust
|
||||
* for the portion of the 4*VL counter to be extracted.
|
||||
*/
|
||||
int b_count = (p.count << v_esz) - vl * part;
|
||||
|
||||
memset(d, 0, sizeof(*d));
|
||||
if (p.invert) {
|
||||
if (b_count <= 0) {
|
||||
do_whilel(vd, mask, vl, vl);
|
||||
} else if (b_count < vl) {
|
||||
do_whileg(vd, mask, vl - b_count, vl);
|
||||
}
|
||||
} else if (b_count > 0) {
|
||||
do_whilel(vd, mask, MIN(b_count, vl), vl);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3336,6 +3336,42 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool do_pext(DisasContext *s, arg_pext *a, int n)
|
||||
{
|
||||
TCGv_i32 t_png;
|
||||
TCGv_ptr t_pd;
|
||||
int pl;
|
||||
|
||||
if (!sve_access_check(s)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
t_png = tcg_temp_new_i32();
|
||||
tcg_gen_ld16u_i32(t_png, tcg_env,
|
||||
pred_full_reg_offset(s, a->rn) ^
|
||||
(HOST_BIG_ENDIAN ? 6 : 0));
|
||||
|
||||
t_pd = tcg_temp_new_ptr();
|
||||
pl = pred_full_reg_size(s);
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int rd = (a->rd + i) % 16;
|
||||
int part = a->imm * n + i;
|
||||
unsigned desc = 0;
|
||||
|
||||
desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pl);
|
||||
desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
|
||||
desc = FIELD_DP32(desc, PREDDESC, DATA, part);
|
||||
|
||||
tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, rd));
|
||||
gen_helper_pext(t_pd, t_png, tcg_constant_i32(desc));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TRANS_FEAT(PEXT_1, aa64_sme2_or_sve2p1, do_pext, a, 1)
|
||||
TRANS_FEAT(PEXT_2, aa64_sme2_or_sve2p1, do_pext, a, 2)
|
||||
|
||||
/*
|
||||
*** SVE Integer Wide Immediate - Unpredicated Group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -337,4 +337,78 @@ bfloat16 helper_sme2_ah_fmin_b16(bfloat16 a, bfloat16 b, float_status *fpst);
|
|||
float32 sve_f16_to_f32(float16 f, float_status *fpst);
|
||||
float16 sve_f32_to_f16(float32 f, float_status *fpst);
|
||||
|
||||
/*
|
||||
* Decode helper functions for predicate as counter.
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
unsigned count;
|
||||
unsigned lg2_stride;
|
||||
bool invert;
|
||||
} DecodeCounter;
|
||||
|
||||
static inline DecodeCounter
|
||||
decode_counter(unsigned png, unsigned vl, unsigned v_esz)
|
||||
{
|
||||
DecodeCounter ret = { };
|
||||
|
||||
/* C.f. Arm pseudocode CounterToPredicate. */
|
||||
if (likely(png & 0xf)) {
|
||||
unsigned p_esz = ctz32(png);
|
||||
|
||||
/*
|
||||
* maxbit = log2(pl(bits) * 4)
|
||||
* = log2(vl(bytes) * 4)
|
||||
* = log2(vl) + 2
|
||||
* maxbit_mask = ones<maxbit:0>
|
||||
* = (1 << (maxbit + 1)) - 1
|
||||
* = (1 << (log2(vl) + 2 + 1)) - 1
|
||||
* = (1 << (log2(vl) + 3)) - 1
|
||||
* = (pow2ceil(vl) << 3) - 1
|
||||
*/
|
||||
ret.count = png & (((unsigned)pow2ceil(vl) << 3) - 1);
|
||||
ret.count >>= p_esz + 1;
|
||||
|
||||
ret.invert = (png >> 15) & 1;
|
||||
|
||||
/*
|
||||
* The Arm pseudocode for CounterToPredicate expands the count to
|
||||
* a set of bits, and then the operation proceeds as for the original
|
||||
* interpretation of predicates as a set of bits.
|
||||
*
|
||||
* We can avoid the expansion by adjusting the count and supplying
|
||||
* an element stride.
|
||||
*/
|
||||
if (unlikely(p_esz != v_esz)) {
|
||||
if (p_esz < v_esz) {
|
||||
/*
|
||||
* For predicate esz < vector esz, the expanded predicate
|
||||
* will have more bits set than will be consumed.
|
||||
* Adjust the count down, rounding up.
|
||||
* Consider p_esz = MO_8, v_esz = MO_64, count 14:
|
||||
* The expanded predicate would be
|
||||
* 0011 1111 1111 1111
|
||||
* The significant bits are
|
||||
* ...1 ...1 ...1 ...1
|
||||
*/
|
||||
unsigned shift = v_esz - p_esz;
|
||||
unsigned trunc = ret.count >> shift;
|
||||
ret.count = trunc + (ret.count != (trunc << shift));
|
||||
} else {
|
||||
/*
|
||||
* For predicate esz > vector esz, the expanded predicate
|
||||
* will have bits set only at power-of-two multiples of
|
||||
* the vector esz. Bits at other multiples will all be
|
||||
* false. Adjust the count up, and supply the caller
|
||||
* with a stride of elements to skip.
|
||||
*/
|
||||
unsigned shift = p_esz - v_esz;
|
||||
ret.count <<= shift;
|
||||
ret.lg2_stride = shift;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* TARGET_ARM_VEC_INTERNAL_H */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue