mirror of
https://github.com/edk2-porting/linux-next.git
synced 2025-01-07 05:04:04 +08:00
drm/nouveau/gr/gf100-: port tile mapping calculations from NVGPU
There's also a couple of hardcoded tables for a couple of very specific configurations that NVGPU's algorithm didn't work for. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
This commit is contained in:
parent
5c05a58985
commit
5f6474a4e6
@ -1116,27 +1116,14 @@ gf100_grctx_generate_rop_mapping(struct gf100_gr *gr)
|
||||
{
|
||||
struct nvkm_device *device = gr->base.engine.subdev.device;
|
||||
u32 data[6] = {}, data2[2] = {};
|
||||
u8 tpcnr[GPC_MAX];
|
||||
u8 shift, ntpcv;
|
||||
int gpc, tpc, i;
|
||||
int i;
|
||||
|
||||
/* calculate first set of magics */
|
||||
memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
|
||||
/* Pack tile map into register format. */
|
||||
for (i = 0; i < 32; i++)
|
||||
data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
|
||||
|
||||
gpc = -1;
|
||||
for (tpc = 0; tpc < gr->tpc_total; tpc++) {
|
||||
do {
|
||||
gpc = (gpc + 1) % gr->gpc_nr;
|
||||
} while (!tpcnr[gpc]);
|
||||
tpcnr[gpc]--;
|
||||
|
||||
data[tpc / 6] |= gpc << ((tpc % 6) * 5);
|
||||
}
|
||||
|
||||
for (; tpc < 32; tpc++)
|
||||
data[tpc / 6] |= 7 << ((tpc % 6) * 5);
|
||||
|
||||
/* and the second... */
|
||||
/* Magic. */
|
||||
shift = 0;
|
||||
ntpcv = gr->tpc_total;
|
||||
while (!(ntpcv & (1 << 4))) {
|
||||
|
@ -194,27 +194,14 @@ gf117_grctx_generate_rop_mapping(struct gf100_gr *gr)
|
||||
{
|
||||
struct nvkm_device *device = gr->base.engine.subdev.device;
|
||||
u32 data[6] = {}, data2[2] = {};
|
||||
u8 tpcnr[GPC_MAX];
|
||||
u8 shift, ntpcv;
|
||||
int gpc, tpc, i;
|
||||
int i;
|
||||
|
||||
/* calculate first set of magics */
|
||||
memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
|
||||
/* Pack tile map into register format. */
|
||||
for (i = 0; i < 32; i++)
|
||||
data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
|
||||
|
||||
gpc = -1;
|
||||
for (tpc = 0; tpc < gr->tpc_total; tpc++) {
|
||||
do {
|
||||
gpc = (gpc + 1) % gr->gpc_nr;
|
||||
} while (!tpcnr[gpc]);
|
||||
tpcnr[gpc]--;
|
||||
|
||||
data[tpc / 6] |= gpc << ((tpc % 6) * 5);
|
||||
}
|
||||
|
||||
for (; tpc < 32; tpc++)
|
||||
data[tpc / 6] |= 7 << ((tpc % 6) * 5);
|
||||
|
||||
/* and the second... */
|
||||
/* Magic. */
|
||||
shift = 0;
|
||||
ntpcv = gr->tpc_total;
|
||||
while (!(ntpcv & (1 << 4))) {
|
||||
|
@ -1652,6 +1652,82 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
gf100_gr_oneinit_tiles(struct gf100_gr *gr)
|
||||
{
|
||||
static const u8 primes[] = {
|
||||
3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61
|
||||
};
|
||||
int init_frac[GPC_MAX], init_err[GPC_MAX], run_err[GPC_MAX], i, j;
|
||||
u32 mul_factor, comm_denom;
|
||||
u8 gpc_map[GPC_MAX];
|
||||
bool sorted;
|
||||
|
||||
switch (gr->tpc_total) {
|
||||
case 15: gr->screen_tile_row_offset = 0x06; break;
|
||||
case 14: gr->screen_tile_row_offset = 0x05; break;
|
||||
case 13: gr->screen_tile_row_offset = 0x02; break;
|
||||
case 11: gr->screen_tile_row_offset = 0x07; break;
|
||||
case 10: gr->screen_tile_row_offset = 0x06; break;
|
||||
case 7:
|
||||
case 5: gr->screen_tile_row_offset = 0x01; break;
|
||||
case 3: gr->screen_tile_row_offset = 0x02; break;
|
||||
case 2:
|
||||
case 1: gr->screen_tile_row_offset = 0x01; break;
|
||||
default: gr->screen_tile_row_offset = 0x03;
|
||||
for (i = 0; i < ARRAY_SIZE(primes); i++) {
|
||||
if (gr->tpc_total % primes[i]) {
|
||||
gr->screen_tile_row_offset = primes[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Sort GPCs by TPC count, highest-to-lowest. */
|
||||
for (i = 0; i < gr->gpc_nr; i++)
|
||||
gpc_map[i] = i;
|
||||
sorted = false;
|
||||
|
||||
while (!sorted) {
|
||||
for (sorted = true, i = 0; i < gr->gpc_nr - 1; i++) {
|
||||
if (gr->tpc_nr[gpc_map[i + 1]] >
|
||||
gr->tpc_nr[gpc_map[i + 0]]) {
|
||||
u8 swap = gpc_map[i];
|
||||
gpc_map[i + 0] = gpc_map[i + 1];
|
||||
gpc_map[i + 1] = swap;
|
||||
sorted = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Determine tile->GPC mapping */
|
||||
mul_factor = gr->gpc_nr * gr->tpc_max;
|
||||
if (mul_factor & 1)
|
||||
mul_factor = 2;
|
||||
else
|
||||
mul_factor = 1;
|
||||
|
||||
comm_denom = gr->gpc_nr * gr->tpc_max * mul_factor;
|
||||
|
||||
for (i = 0; i < gr->gpc_nr; i++) {
|
||||
init_frac[i] = gr->tpc_nr[gpc_map[i]] * gr->gpc_nr * mul_factor;
|
||||
init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2;
|
||||
run_err[i] = init_frac[i] + init_err[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < gr->tpc_total;) {
|
||||
for (j = 0; j < gr->gpc_nr; j++) {
|
||||
if ((run_err[j] * 2) >= comm_denom) {
|
||||
gr->tile[i++] = gpc_map[j];
|
||||
run_err[j] += init_frac[j] - comm_denom;
|
||||
} else {
|
||||
run_err[j] += init_frac[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
gf100_gr_oneinit(struct nvkm_gr *base)
|
||||
{
|
||||
@ -1691,45 +1767,8 @@ gf100_gr_oneinit(struct nvkm_gr *base)
|
||||
}
|
||||
}
|
||||
|
||||
/*XXX: these need figuring out... though it might not even matter */
|
||||
switch (device->chipset) {
|
||||
case 0xc0:
|
||||
if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
|
||||
gr->screen_tile_row_offset = 0x07;
|
||||
} else
|
||||
if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
|
||||
gr->screen_tile_row_offset = 0x05;
|
||||
} else
|
||||
if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
|
||||
gr->screen_tile_row_offset = 0x06;
|
||||
}
|
||||
break;
|
||||
case 0xc3: /* 450, 4/0/0/0, 2 */
|
||||
gr->screen_tile_row_offset = 0x03;
|
||||
break;
|
||||
case 0xc4: /* 460, 3/4/0/0, 4 */
|
||||
gr->screen_tile_row_offset = 0x01;
|
||||
break;
|
||||
case 0xc1: /* 2/0/0/0, 1 */
|
||||
gr->screen_tile_row_offset = 0x01;
|
||||
break;
|
||||
case 0xc8: /* 4/4/3/4, 5 */
|
||||
gr->screen_tile_row_offset = 0x06;
|
||||
break;
|
||||
case 0xce: /* 4/4/0/0, 4 */
|
||||
gr->screen_tile_row_offset = 0x03;
|
||||
break;
|
||||
case 0xcf: /* 4/0/0/0, 3 */
|
||||
gr->screen_tile_row_offset = 0x03;
|
||||
break;
|
||||
case 0xd7:
|
||||
case 0xd9: /* 1/0/0/0, 1 */
|
||||
case 0xea: /* gk20a */
|
||||
case 0x12b: /* gm20b */
|
||||
gr->screen_tile_row_offset = 0x01;
|
||||
break;
|
||||
}
|
||||
|
||||
memset(gr->tile, 0xff, sizeof(gr->tile));
|
||||
gr->func->oneinit_tiles(gr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2164,6 +2203,7 @@ gf100_gr_gpccs_ucode = {
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gf100_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
|
||||
|
@ -107,12 +107,13 @@ struct gf100_gr {
|
||||
u8 ppc_tpc_nr[GPC_MAX][4];
|
||||
u8 ppc_tpc_min;
|
||||
|
||||
u8 screen_tile_row_offset;
|
||||
u8 tile[TPC_MAX];
|
||||
|
||||
struct gf100_gr_data mmio_data[4];
|
||||
struct gf100_gr_mmio mmio_list[4096/8];
|
||||
u32 size;
|
||||
u32 *data;
|
||||
|
||||
u8 screen_tile_row_offset;
|
||||
};
|
||||
|
||||
int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *,
|
||||
@ -123,6 +124,7 @@ void *gf100_gr_dtor(struct nvkm_gr *);
|
||||
|
||||
struct gf100_gr_func {
|
||||
void (*dtor)(struct gf100_gr *);
|
||||
void (*oneinit_tiles)(struct gf100_gr *);
|
||||
int (*init)(struct gf100_gr *);
|
||||
void (*init_gpc_mmu)(struct gf100_gr *);
|
||||
void (*init_r405a14)(struct gf100_gr *);
|
||||
@ -164,6 +166,7 @@ struct gf100_gr_func {
|
||||
};
|
||||
|
||||
int gf100_gr_rops(struct gf100_gr *);
|
||||
void gf100_gr_oneinit_tiles(struct gf100_gr *);
|
||||
int gf100_gr_init(struct gf100_gr *);
|
||||
void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
|
||||
void gf100_gr_init_zcull(struct gf100_gr *);
|
||||
@ -191,6 +194,7 @@ void gm107_gr_init_400054(struct gf100_gr *);
|
||||
|
||||
int gk20a_gr_init(struct gf100_gr *);
|
||||
|
||||
void gm200_gr_oneinit_tiles(struct gf100_gr *);
|
||||
int gm200_gr_rops(struct gf100_gr *);
|
||||
void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
|
||||
void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
|
||||
|
@ -114,6 +114,7 @@ gf104_gr_pack_mmio[] = {
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gf104_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
|
||||
|
@ -111,6 +111,7 @@ gf108_gr_init_r405a14(struct gf100_gr *gr)
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gf108_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_r405a14 = gf108_gr_init_r405a14,
|
||||
|
@ -86,6 +86,7 @@ gf110_gr_pack_mmio[] = {
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gf110_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
|
||||
|
@ -150,6 +150,7 @@ gf117_gr_init_zcull(struct gf100_gr *gr)
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gf117_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
|
||||
|
@ -177,6 +177,7 @@ gf119_gr_pack_mmio[] = {
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gf119_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
|
||||
|
@ -448,6 +448,7 @@ gk104_gr_gpccs_ucode = {
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gk104_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
@ -350,6 +350,7 @@ gk110_gr_init_419eb4(struct gf100_gr *gr)
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gk110_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
@ -102,6 +102,7 @@ gk110b_gr_pack_mmio[] = {
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gk110b_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
@ -161,6 +161,7 @@ gk208_gr_gpccs_ucode = {
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gk208_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
@ -282,6 +282,7 @@ gk20a_gr_init(struct gf100_gr *gr)
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gk20a_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gk20a_gr_init,
|
||||
.init_zcull = gf117_gr_init_zcull,
|
||||
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
|
||||
|
@ -391,6 +391,7 @@ gm107_gr_gpccs_ucode = {
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gm107_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm107_gr_init_gpc_mmu,
|
||||
.init_bios = gm107_gr_init_bios,
|
||||
|
@ -77,6 +77,46 @@ gm200_gr_init_rop_active_fbps(struct gf100_gr *gr)
|
||||
nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
|
||||
}
|
||||
|
||||
static u8
|
||||
gm200_gr_tile_map_6_24[] = {
|
||||
0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2,
|
||||
};
|
||||
|
||||
static u8
|
||||
gm200_gr_tile_map_4_16[] = {
|
||||
0, 1, 2, 3, 2, 3, 0, 1, 3, 0, 1, 2, 1, 2, 3, 0,
|
||||
};
|
||||
|
||||
static u8
|
||||
gm200_gr_tile_map_2_8[] = {
|
||||
0, 1, 1, 0, 0, 1, 1, 0,
|
||||
};
|
||||
|
||||
void
|
||||
gm200_gr_oneinit_tiles(struct gf100_gr *gr)
|
||||
{
|
||||
/*XXX: Not sure what this is about. The algorithm from NVGPU
|
||||
* seems to work for all boards I tried from earlier (and
|
||||
* later) GPUs except in these specific configurations.
|
||||
*
|
||||
* Let's just hardcode them for now.
|
||||
*/
|
||||
if (gr->gpc_nr == 2 && gr->tpc_total == 8) {
|
||||
memcpy(gr->tile, gm200_gr_tile_map_2_8, gr->tpc_total);
|
||||
gr->screen_tile_row_offset = 1;
|
||||
} else
|
||||
if (gr->gpc_nr == 4 && gr->tpc_total == 16) {
|
||||
memcpy(gr->tile, gm200_gr_tile_map_4_16, gr->tpc_total);
|
||||
gr->screen_tile_row_offset = 4;
|
||||
} else
|
||||
if (gr->gpc_nr == 6 && gr->tpc_total == 24) {
|
||||
memcpy(gr->tile, gm200_gr_tile_map_6_24, gr->tpc_total);
|
||||
gr->screen_tile_row_offset = 5;
|
||||
} else {
|
||||
gf100_gr_oneinit_tiles(gr);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
|
||||
int index, struct nvkm_gr **pgr)
|
||||
@ -117,6 +157,7 @@ gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gm200_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_bios = gm107_gr_init_bios,
|
||||
|
@ -64,6 +64,7 @@ gm20b_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gm20b_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.init = gk20a_gr_init,
|
||||
.init_zcull = gf117_gr_init_zcull,
|
||||
.init_gpc_mmu = gm20b_gr_init_gpc_mmu,
|
||||
|
@ -64,6 +64,7 @@ gp100_gr_init_rop_active_fbps(struct gf100_gr *gr)
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gp100_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
@ -42,6 +42,7 @@ gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr)
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gp102_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
@ -26,6 +26,7 @@
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gp104_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gp107_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
@ -27,6 +27,7 @@
|
||||
|
||||
static const struct gf100_gr_func
|
||||
gp10b_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
Loading…
Reference in New Issue
Block a user