intel/fs: Move packHalf2x16 handling to lower_pack()

This mainly lets the software scoreboarding pass correctly mark the
instructions, without needing to resort to fragile manual handling in
the generator.

We can also make small improvements.  On Gfx 8LP-12.0, we no longer have
the restrictions about DWord alignment, so we can simply write each half
into its intended location, rather than writing it to the low DWord and
then shifting it in place.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21783>
This commit is contained in:
Kenneth Graunke 2023-03-07 20:41:55 -08:00 committed by Marge Bot
parent f5e5705c91
commit c590a3eadf
3 changed files with 33 additions and 61 deletions

View File

@ -637,11 +637,6 @@ private:
void generate_halt(fs_inst *inst);
void generate_pack_half_2x16_split(fs_inst *inst,
struct brw_reg dst,
struct brw_reg x,
struct brw_reg y);
void generate_mov_indirect(fs_inst *inst,
struct brw_reg dst,
struct brw_reg reg,

View File

@ -1642,55 +1642,6 @@ fs_generator::generate_set_sample_id(fs_inst *inst,
}
}
void
fs_generator::generate_pack_half_2x16_split(fs_inst *,
struct brw_reg dst,
struct brw_reg x,
struct brw_reg y)
{
assert(devinfo->ver >= 7);
assert(dst.type == BRW_REGISTER_TYPE_UD);
assert(x.type == BRW_REGISTER_TYPE_F);
assert(y.type == BRW_REGISTER_TYPE_F);
/* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
*
* Because this instruction does not have a 16-bit floating-point type,
* the destination data type must be Word (W).
*
* The destination must be DWord-aligned and specify a horizontal stride
* (HorzStride) of 2. The 16-bit result is stored in the lower word of
* each destination channel and the upper word is not modified.
*/
const enum brw_reg_type t = devinfo->ver > 7
? BRW_REGISTER_TYPE_HF : BRW_REGISTER_TYPE_W;
struct brw_reg dst_w = spread(retype(dst, t), 2);
if (y.file == IMM) {
const uint32_t hhhh0000 = _mesa_float_to_half(y.f) << 16;
brw_MOV(p, dst, brw_imm_ud(hhhh0000));
brw_set_default_swsb(p, tgl_swsb_regdist(1));
} else {
/* Give each 32-bit channel of dst the form below, where "." means
* unchanged.
* 0x....hhhh
*/
brw_F32TO16(p, dst_w, y);
/* Now the form:
* 0xhhhh0000
*/
brw_set_default_swsb(p, tgl_swsb_regdist(1));
brw_SHL(p, dst, dst, brw_imm_ud(16u));
}
/* And, finally the form of packHalf2x16's output:
* 0xhhhhllll
*/
brw_F32TO16(p, dst_w, x);
}
void
fs_generator::enable_debug(const char *shader_name)
{
@ -2350,10 +2301,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
generate_set_sample_id(inst, dst, src[0], src[1]);
break;
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
break;
case SHADER_OPCODE_HALT_TARGET:
/* This is the place where the final HALT needs to be inserted if
* we've emitted any discards. If not, this will emit no code.

View File

@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*/
#include "util/half_float.h"
#include "brw_fs.h"
#include "brw_cfg.h"
#include "brw_fs_builder.h"
@ -33,7 +34,8 @@ fs_visitor::lower_pack()
bool progress = false;
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
if (inst->opcode != FS_OPCODE_PACK)
if (inst->opcode != FS_OPCODE_PACK &&
inst->opcode != FS_OPCODE_PACK_HALF_2x16_SPLIT)
continue;
assert(inst->dst.file == VGRF);
@ -48,8 +50,36 @@ fs_visitor::lower_pack()
*/
if (!inst->is_partial_write())
ibld.emit_undef_for_dst(inst);
for (unsigned i = 0; i < inst->sources; i++)
ibld.MOV(subscript(dst, inst->src[i].type, i), inst->src[i]);
switch (inst->opcode) {
case FS_OPCODE_PACK:
for (unsigned i = 0; i < inst->sources; i++)
ibld.MOV(subscript(dst, inst->src[i].type, i), inst->src[i]);
break;
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
assert(dst.type == BRW_REGISTER_TYPE_UD);
for (unsigned i = 0; i < inst->sources; i++) {
if (inst->src[i].file == IMM) {
const uint32_t half = _mesa_float_to_half(inst->src[i].f);
ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, i),
brw_imm_uw(half));
} else if (i == 1 && devinfo->ver < 9) {
/* Pre-Skylake requires DWord aligned destinations */
fs_reg tmp = ibld.vgrf(BRW_REGISTER_TYPE_UD);
ibld.F32TO16(subscript(tmp, BRW_REGISTER_TYPE_HF, 0),
inst->src[i]);
ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, 1),
subscript(tmp, BRW_REGISTER_TYPE_UW, 0));
} else {
ibld.F32TO16(subscript(dst, BRW_REGISTER_TYPE_HF, i),
inst->src[i]);
}
}
break;
default:
unreachable("skipped above");
}
inst->remove(block);
progress = true;