aarch64: Turn sve_width tuning field into a bitmask

The tuning structures have an sve_width field that specifies the
number of bits in an SVE vector (or SVE_NOT_IMPLEMENTED if not
applicable).  This patch turns the field into a bitmask so that
it can specify multiple widths at the same time.  For now we
always treat the mininum width as the likely width.

An alternative would have been to add extra fields, which would
have coped correctly with non-power-of-2 widths.  However,
we're very far from supporting constant non-power-of-2 vectors
in GCC, so I think the non-power-of-2 case will in reality always
have to be hidden behind VLA.

gcc/
	* config/aarch64/aarch64-protos.h (tune_params::sve_width): Turn
	into a bitmask.
	* config/aarch64/aarch64.c (aarch64_cmp_autovec_modes): Update
	accordingly.
	(aarch64_estimated_poly_value): Likewise.  Use the least significant
	set bit for the minimum and likely values.  Use the most significant
	set bit for the maximum value.
This commit is contained in:
Richard Sandiford 2021-08-03 13:00:43 +01:00
parent d0b952edd3
commit fa3ca6151c
2 changed files with 14 additions and 9 deletions

View File

@ -506,10 +506,10 @@ struct tune_params
const struct cpu_vector_cost *vec_costs;
const struct cpu_branch_cost *branch_costs;
const struct cpu_approx_modes *approx_modes;
/* Width of the SVE registers or SVE_NOT_IMPLEMENTED if not applicable.
Only used for tuning decisions, does not disable VLA
vectorization. */
enum aarch64_sve_vector_bits_enum sve_width;
/* A bitmask of the possible SVE register widths in bits,
or SVE_NOT_IMPLEMENTED if not applicable. Only used for tuning
decisions, does not disable VLA vectorization. */
unsigned int sve_width;
int memmov_cost;
int issue_rate;
unsigned int fusible_ops;

View File

@ -19144,14 +19144,12 @@ aarch64_cmp_autovec_modes (machine_mode sve_m, machine_mode asimd_m)
bool prefer_asimd = aarch64_autovec_preference == 3;
bool prefer_sve = aarch64_autovec_preference == 4;
aarch64_sve_vector_bits_enum tune_width = aarch64_tune_params.sve_width;
poly_int64 nunits_sve = GET_MODE_NUNITS (sve_m);
poly_int64 nunits_asimd = GET_MODE_NUNITS (asimd_m);
/* If the CPU information does not have an SVE width registered use the
generic poly_int comparison that prefers SVE. If a preference is
explicitly requested avoid this path. */
if (tune_width == SVE_SCALABLE
if (aarch64_tune_params.sve_width == SVE_SCALABLE
&& !prefer_asimd
&& !prefer_sve)
return maybe_gt (nunits_sve, nunits_asimd);
@ -24980,8 +24978,7 @@ aarch64_estimated_poly_value (poly_int64 val,
poly_value_estimate_kind kind
= POLY_VALUE_LIKELY)
{
enum aarch64_sve_vector_bits_enum width_source
= aarch64_tune_params.sve_width;
unsigned int width_source = aarch64_tune_params.sve_width;
/* If there is no core-specific information then the minimum and likely
values are based on 128-bit vectors and the maximum is based on
@ -24996,6 +24993,14 @@ aarch64_estimated_poly_value (poly_int64 val,
return val.coeffs[0] + val.coeffs[1] * 15;
}
/* Allow sve_width to be a bitmask of different VL, treating the lowest
as likely. This could be made more general if future -mtune options
need it to be. */
if (kind == POLY_VALUE_MAX)
width_source = 1 << floor_log2 (width_source);
else
width_source = least_bit_hwi (width_source);
/* If the core provides width information, use that. */
HOST_WIDE_INT over_128 = width_source - 128;
return val.coeffs[0] + val.coeffs[1] * over_128 / 128;