mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-12-02 22:54:05 +08:00
intel/fs: Move packHalf2x16 handling to lower_pack()
This mainly lets the software scoreboarding pass correctly mark the instructions, without needing to resort to fragile manual handling in the generator. We can also make small improvements. On Gfx 8LP-12.0, we no longer have the restrictions about DWord alignment, so we can simply write each half into its intended location, rather than writing it to the low DWord and then shifting it in place. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21783>
This commit is contained in:
parent
f5e5705c91
commit
c590a3eadf
@ -637,11 +637,6 @@ private:
|
||||
|
||||
void generate_halt(fs_inst *inst);
|
||||
|
||||
void generate_pack_half_2x16_split(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg x,
|
||||
struct brw_reg y);
|
||||
|
||||
void generate_mov_indirect(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg reg,
|
||||
|
@ -1642,55 +1642,6 @@ fs_generator::generate_set_sample_id(fs_inst *inst,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_pack_half_2x16_split(fs_inst *,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg x,
|
||||
struct brw_reg y)
|
||||
{
|
||||
assert(devinfo->ver >= 7);
|
||||
assert(dst.type == BRW_REGISTER_TYPE_UD);
|
||||
assert(x.type == BRW_REGISTER_TYPE_F);
|
||||
assert(y.type == BRW_REGISTER_TYPE_F);
|
||||
|
||||
/* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
|
||||
*
|
||||
* Because this instruction does not have a 16-bit floating-point type,
|
||||
* the destination data type must be Word (W).
|
||||
*
|
||||
* The destination must be DWord-aligned and specify a horizontal stride
|
||||
* (HorzStride) of 2. The 16-bit result is stored in the lower word of
|
||||
* each destination channel and the upper word is not modified.
|
||||
*/
|
||||
const enum brw_reg_type t = devinfo->ver > 7
|
||||
? BRW_REGISTER_TYPE_HF : BRW_REGISTER_TYPE_W;
|
||||
struct brw_reg dst_w = spread(retype(dst, t), 2);
|
||||
|
||||
if (y.file == IMM) {
|
||||
const uint32_t hhhh0000 = _mesa_float_to_half(y.f) << 16;
|
||||
|
||||
brw_MOV(p, dst, brw_imm_ud(hhhh0000));
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
} else {
|
||||
/* Give each 32-bit channel of dst the form below, where "." means
|
||||
* unchanged.
|
||||
* 0x....hhhh
|
||||
*/
|
||||
brw_F32TO16(p, dst_w, y);
|
||||
|
||||
/* Now the form:
|
||||
* 0xhhhh0000
|
||||
*/
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
brw_SHL(p, dst, dst, brw_imm_ud(16u));
|
||||
}
|
||||
|
||||
/* And, finally the form of packHalf2x16's output:
|
||||
* 0xhhhhllll
|
||||
*/
|
||||
brw_F32TO16(p, dst_w, x);
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::enable_debug(const char *shader_name)
|
||||
{
|
||||
@ -2350,10 +2301,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
||||
generate_set_sample_id(inst, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
|
||||
generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_HALT_TARGET:
|
||||
/* This is the place where the final HALT needs to be inserted if
|
||||
* we've emitted any discards. If not, this will emit no code.
|
||||
|
@ -21,6 +21,7 @@
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "util/half_float.h"
|
||||
#include "brw_fs.h"
|
||||
#include "brw_cfg.h"
|
||||
#include "brw_fs_builder.h"
|
||||
@ -33,7 +34,8 @@ fs_visitor::lower_pack()
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
|
||||
if (inst->opcode != FS_OPCODE_PACK)
|
||||
if (inst->opcode != FS_OPCODE_PACK &&
|
||||
inst->opcode != FS_OPCODE_PACK_HALF_2x16_SPLIT)
|
||||
continue;
|
||||
|
||||
assert(inst->dst.file == VGRF);
|
||||
@ -48,8 +50,36 @@ fs_visitor::lower_pack()
|
||||
*/
|
||||
if (!inst->is_partial_write())
|
||||
ibld.emit_undef_for_dst(inst);
|
||||
for (unsigned i = 0; i < inst->sources; i++)
|
||||
ibld.MOV(subscript(dst, inst->src[i].type, i), inst->src[i]);
|
||||
|
||||
switch (inst->opcode) {
|
||||
case FS_OPCODE_PACK:
|
||||
for (unsigned i = 0; i < inst->sources; i++)
|
||||
ibld.MOV(subscript(dst, inst->src[i].type, i), inst->src[i]);
|
||||
break;
|
||||
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
|
||||
assert(dst.type == BRW_REGISTER_TYPE_UD);
|
||||
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == IMM) {
|
||||
const uint32_t half = _mesa_float_to_half(inst->src[i].f);
|
||||
ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, i),
|
||||
brw_imm_uw(half));
|
||||
} else if (i == 1 && devinfo->ver < 9) {
|
||||
/* Pre-Skylake requires DWord aligned destinations */
|
||||
fs_reg tmp = ibld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ibld.F32TO16(subscript(tmp, BRW_REGISTER_TYPE_HF, 0),
|
||||
inst->src[i]);
|
||||
ibld.MOV(subscript(dst, BRW_REGISTER_TYPE_UW, 1),
|
||||
subscript(tmp, BRW_REGISTER_TYPE_UW, 0));
|
||||
} else {
|
||||
ibld.F32TO16(subscript(dst, BRW_REGISTER_TYPE_HF, i),
|
||||
inst->src[i]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
unreachable("skipped above");
|
||||
}
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
|
Loading…
Reference in New Issue
Block a user