mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2024-11-28 20:54:00 +08:00
radeonsi: ARB_gpu_shader_fp64 + ARB_vertex_attrib_64bit support.
This adds the translation from TGSI to AMDGPU llvm backend, for the 64-bit opcodes. The backend pretty much handles everything for us fine. There is one patch required for SI DFRAC support, that I know off. [airlied: fixed missing comma, updated relnotes] Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
8108de4774
commit
4cbf0a0ccf
@ -109,7 +109,7 @@ GL 4.0, GLSL 4.00:
|
||||
- Enhanced per-sample shading DONE (r600, radeonsi)
|
||||
- Interpolation functions DONE (r600)
|
||||
- New overload resolution rules DONE
|
||||
GL_ARB_gpu_shader_fp64 DONE (nvc0, llvmpipe, softpipe)
|
||||
GL_ARB_gpu_shader_fp64 DONE (nvc0, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_sample_shading DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_ARB_shader_subroutine started (Dave)
|
||||
GL_ARB_tessellation_shader started (Chris, Ilia)
|
||||
@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10:
|
||||
GL_ARB_get_program_binary DONE (0 binary formats)
|
||||
GL_ARB_separate_shader_objects DONE (all drivers)
|
||||
GL_ARB_shader_precision started (Micah)
|
||||
GL_ARB_vertex_attrib_64bit DONE (nvc0, llvmpipe, softpipe)
|
||||
GL_ARB_vertex_attrib_64bit DONE (nvc0, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_viewport_array DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
|
||||
|
||||
|
||||
|
@ -47,9 +47,9 @@ Note: some of the new features are only available with certain drivers.
|
||||
<li>GL_AMD_vertex_shader_viewport_index on radeonsi</li>
|
||||
<li>GL_ARB_fragment_layer_viewport on radeonsi</li>
|
||||
<li>GL_ARB_framebuffer_no_attachments on i965</li>
|
||||
<li>GL_ARB_gpu_shader_fp64 on llvmpipe</li>
|
||||
<li>GL_ARB_gpu_shader_fp64 on llvmpipe, radeonsi</li>
|
||||
<li>GL_ARB_shader_stencil_export on llvmpipe</li>
|
||||
<li>GL_ARB_vertex_attrib_64bit on llvmpipe</li>
|
||||
<li>GL_ARB_vertex_attrib_64bit on llvmpipe, radeonsi</li>
|
||||
<li>GL_ARB_viewport_array on radeonsi</li>
|
||||
<li>GLX_ARB_create_context_robustness on r600, radeonsi</li>
|
||||
<li>EGL_EXT_create_context_robustness on r600, radeonsi</li>
|
||||
|
@ -146,6 +146,8 @@ static inline LLVMTypeRef tgsi2llvmtype(
|
||||
case TGSI_TYPE_UNSIGNED:
|
||||
case TGSI_TYPE_SIGNED:
|
||||
return LLVMInt32TypeInContext(ctx);
|
||||
case TGSI_TYPE_DOUBLE:
|
||||
return LLVMDoubleTypeInContext(ctx);
|
||||
case TGSI_TYPE_UNTYPED:
|
||||
case TGSI_TYPE_FLOAT:
|
||||
return LLVMFloatTypeInContext(ctx);
|
||||
@ -205,6 +207,9 @@ build_tgsi_intrinsic_nomem(
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
radeon_llvm_emit_fetch_double(struct lp_build_tgsi_context *bld_base,
|
||||
LLVMValueRef ptr,
|
||||
LLVMValueRef ptr2);
|
||||
|
||||
#endif /* RADEON_LLVM_H */
|
||||
|
@ -116,6 +116,28 @@ emit_fetch(
|
||||
enum tgsi_opcode_type type,
|
||||
unsigned swizzle);
|
||||
|
||||
LLVMValueRef
|
||||
radeon_llvm_emit_fetch_double(
|
||||
struct lp_build_tgsi_context *bld_base,
|
||||
LLVMValueRef ptr,
|
||||
LLVMValueRef ptr2)
|
||||
{
|
||||
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
||||
LLVMValueRef result;
|
||||
|
||||
result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
|
||||
|
||||
result = LLVMBuildInsertElement(builder,
|
||||
result,
|
||||
bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
|
||||
bld_base->int_bld.zero, "");
|
||||
result = LLVMBuildInsertElement(builder,
|
||||
result,
|
||||
bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
|
||||
bld_base->int_bld.one, "");
|
||||
return bitcast(bld_base, TGSI_TYPE_DOUBLE, result);
|
||||
}
|
||||
|
||||
static LLVMValueRef
|
||||
emit_array_fetch(
|
||||
struct lp_build_tgsi_context *bld_base,
|
||||
@ -160,7 +182,7 @@ emit_fetch(
|
||||
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
|
||||
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
|
||||
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
||||
LLVMValueRef result = NULL, ptr;
|
||||
LLVMValueRef result = NULL, ptr, ptr2;
|
||||
|
||||
if (swizzle == ~0) {
|
||||
LLVMValueRef values[TGSI_NUM_CHANNELS];
|
||||
@ -184,11 +206,27 @@ emit_fetch(
|
||||
switch(reg->Register.File) {
|
||||
case TGSI_FILE_IMMEDIATE: {
|
||||
LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
|
||||
return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
|
||||
if (type == TGSI_TYPE_DOUBLE) {
|
||||
result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
|
||||
result = LLVMConstInsertElement(result,
|
||||
bld->immediates[reg->Register.Index][swizzle],
|
||||
bld_base->int_bld.zero);
|
||||
result = LLVMConstInsertElement(result,
|
||||
bld->immediates[reg->Register.Index][swizzle + 1],
|
||||
bld_base->int_bld.one);
|
||||
return LLVMConstBitCast(result, ctype);
|
||||
} else {
|
||||
return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
|
||||
}
|
||||
}
|
||||
|
||||
case TGSI_FILE_INPUT:
|
||||
result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
|
||||
if (type == TGSI_TYPE_DOUBLE) {
|
||||
ptr = result;
|
||||
ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
|
||||
return radeon_llvm_emit_fetch_double(bld_base, ptr, ptr2);
|
||||
}
|
||||
break;
|
||||
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
@ -199,11 +237,23 @@ emit_fetch(
|
||||
break;
|
||||
}
|
||||
ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
|
||||
if (type == TGSI_TYPE_DOUBLE) {
|
||||
ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
|
||||
return radeon_llvm_emit_fetch_double(bld_base,
|
||||
LLVMBuildLoad(builder, ptr, ""),
|
||||
LLVMBuildLoad(builder, ptr2, ""));
|
||||
}
|
||||
result = LLVMBuildLoad(builder, ptr, "");
|
||||
break;
|
||||
|
||||
case TGSI_FILE_OUTPUT:
|
||||
ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
|
||||
if (type == TGSI_TYPE_DOUBLE) {
|
||||
ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
|
||||
return radeon_llvm_emit_fetch_double(bld_base,
|
||||
LLVMBuildLoad(builder, ptr, ""),
|
||||
LLVMBuildLoad(builder, ptr2, ""));
|
||||
}
|
||||
result = LLVMBuildLoad(builder, ptr, "");
|
||||
break;
|
||||
|
||||
@ -348,9 +398,10 @@ emit_store(
|
||||
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
|
||||
const struct tgsi_full_dst_register *reg = &inst->Dst[0];
|
||||
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
||||
LLVMValueRef temp_ptr;
|
||||
LLVMValueRef temp_ptr, temp_ptr2 = NULL;
|
||||
unsigned chan, chan_index;
|
||||
boolean is_vec_store = FALSE;
|
||||
enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
|
||||
|
||||
if (dst[0]) {
|
||||
LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
|
||||
@ -371,6 +422,8 @@ emit_store(
|
||||
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
|
||||
LLVMValueRef value = dst[chan_index];
|
||||
|
||||
if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3))
|
||||
continue;
|
||||
if (inst->Instruction.Saturate)
|
||||
value = radeon_llvm_saturate(bld_base, value);
|
||||
|
||||
@ -379,8 +432,9 @@ emit_store(
|
||||
LLVMBuildStore(builder, value, temp_ptr);
|
||||
continue;
|
||||
}
|
||||
|
||||
value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
|
||||
|
||||
if (dtype != TGSI_TYPE_DOUBLE)
|
||||
value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
|
||||
|
||||
if (reg->Register.Indirect) {
|
||||
struct tgsi_declaration_range range = get_array_range(bld_base,
|
||||
@ -418,6 +472,8 @@ emit_store(
|
||||
switch(reg->Register.File) {
|
||||
case TGSI_FILE_OUTPUT:
|
||||
temp_ptr = bld->outputs[reg->Register.Index][chan_index];
|
||||
if (dtype == TGSI_TYPE_DOUBLE)
|
||||
temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
|
||||
break;
|
||||
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
@ -428,12 +484,28 @@ emit_store(
|
||||
break;
|
||||
}
|
||||
temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
|
||||
if (dtype == TGSI_TYPE_DOUBLE)
|
||||
temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
return;
|
||||
}
|
||||
LLVMBuildStore(builder, value, temp_ptr);
|
||||
if (dtype != TGSI_TYPE_DOUBLE)
|
||||
LLVMBuildStore(builder, value, temp_ptr);
|
||||
else {
|
||||
LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
|
||||
LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
|
||||
LLVMValueRef val2;
|
||||
value = LLVMBuildExtractElement(builder, ptr,
|
||||
bld_base->uint_bld.zero, "");
|
||||
val2 = LLVMBuildExtractElement(builder, ptr,
|
||||
bld_base->uint_bld.one, "");
|
||||
|
||||
LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
|
||||
LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -996,6 +1068,35 @@ static void emit_fcmp(
|
||||
emit_data->output[emit_data->chan] = v;
|
||||
}
|
||||
|
||||
static void emit_dcmp(
|
||||
const struct lp_build_tgsi_action *action,
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
||||
LLVMContextRef context = bld_base->base.gallivm->context;
|
||||
LLVMRealPredicate pred;
|
||||
|
||||
/* Use ordered for everything but NE (which is usual for
|
||||
* float comparisons)
|
||||
*/
|
||||
switch (emit_data->inst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
|
||||
case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
|
||||
case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
|
||||
case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
|
||||
default: assert(!"unknown instruction"); pred = 0; break;
|
||||
}
|
||||
|
||||
LLVMValueRef v = LLVMBuildFCmp(builder, pred,
|
||||
emit_data->args[0], emit_data->args[1],"");
|
||||
|
||||
v = LLVMBuildSExtOrBitCast(builder, v,
|
||||
LLVMInt32TypeInContext(context), "");
|
||||
|
||||
emit_data->output[emit_data->chan] = v;
|
||||
}
|
||||
|
||||
static void emit_not(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
@ -1161,6 +1262,16 @@ static void emit_ineg(
|
||||
emit_data->args[0], "");
|
||||
}
|
||||
|
||||
static void emit_dneg(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
||||
emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
|
||||
emit_data->args[0], "");
|
||||
}
|
||||
|
||||
static void emit_f2i(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
@ -1423,6 +1534,12 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
|
||||
lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
|
||||
lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
|
||||
lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
|
||||
{
|
||||
struct lp_type dbl_type;
|
||||
dbl_type = type;
|
||||
dbl_type.width *= 2;
|
||||
lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
|
||||
}
|
||||
|
||||
bld_base->soa = 1;
|
||||
bld_base->emit_store = emit_store;
|
||||
@ -1461,10 +1578,24 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
|
||||
bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
|
||||
bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
|
||||
bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "fabs";
|
||||
bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
|
||||
bld_base->op_actions[TGSI_OPCODE_DFRAC].intr_name = "llvm.AMDIL.fraction.";
|
||||
bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
|
||||
bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
|
||||
bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
|
||||
bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
|
||||
bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
|
||||
bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
|
||||
bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
|
||||
bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
|
||||
bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
|
||||
bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.AMDGPU.rsq.f64";
|
||||
bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
|
||||
bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
|
||||
bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
|
||||
bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
|
||||
|
@ -451,6 +451,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_DOUBLES:
|
||||
return HAVE_LLVM >= 0x0307;
|
||||
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
return 0;
|
||||
|
@ -703,8 +703,15 @@ static LLVMValueRef fetch_constant(
|
||||
buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
|
||||
idx = reg->Register.Index * 4 + swizzle;
|
||||
|
||||
if (!reg->Register.Indirect)
|
||||
return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
|
||||
if (!reg->Register.Indirect) {
|
||||
if (type != TGSI_TYPE_DOUBLE)
|
||||
return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
|
||||
else {
|
||||
return radeon_llvm_emit_fetch_double(bld_base,
|
||||
si_shader_ctx->constants[buf][idx],
|
||||
si_shader_ctx->constants[buf][idx + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
|
||||
addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
|
||||
@ -713,9 +720,25 @@ static LLVMValueRef fetch_constant(
|
||||
lp_build_const_int32(base->gallivm, idx * 4));
|
||||
|
||||
result = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
|
||||
addr, base->elem_type);
|
||||
addr, bld_base->base.elem_type);
|
||||
|
||||
return bitcast(bld_base, type, result);
|
||||
if (type != TGSI_TYPE_DOUBLE)
|
||||
result = bitcast(bld_base, type, result);
|
||||
else {
|
||||
LLVMValueRef addr2, result2;
|
||||
addr2 = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
|
||||
addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
|
||||
addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
|
||||
addr2 = lp_build_add(&bld_base->uint_bld, addr2,
|
||||
lp_build_const_int32(base->gallivm, idx * 4));
|
||||
|
||||
result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
|
||||
addr2, bld_base->base.elem_type);
|
||||
|
||||
result = radeon_llvm_emit_fetch_double(bld_base,
|
||||
result, result2);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Initialize arguments for the shader export intrinsic */
|
||||
|
Loading…
Reference in New Issue
Block a user