diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index c5db0c4fe788..14c3fe692723 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -46,8 +46,9 @@ radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \ radeon_encoders.o radeon_display.o radeon_cursor.o radeon_i2c.o \ radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \ radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \ - rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o \ - radeon_test.o r200.o radeon_legacy_tv.o + rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \ + r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \ + r600_blit_kms.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 8e31e992ec53..a7edd0f2ac37 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -389,6 +389,7 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode) pll_flags |= RADEON_PLL_USE_REF_DIV; } radeon_encoder = to_radeon_encoder(encoder); + break; } } diff --git a/drivers/gpu/drm/radeon/r300.h b/drivers/gpu/drm/radeon/avivod.h similarity index 50% rename from drivers/gpu/drm/radeon/r300.h rename to drivers/gpu/drm/radeon/avivod.h index 8486b4da9d69..d4e6e6e4a938 100644 --- a/drivers/gpu/drm/radeon/r300.h +++ b/drivers/gpu/drm/radeon/avivod.h @@ -1,7 +1,6 @@ /* - * Copyright 2008 Advanced Micro Devices, Inc. - * Copyright 2008 Red Hat Inc. - * Copyright 2009 Jerome Glisse. + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2009 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,12 +24,37 @@ * Alex Deucher * Jerome Glisse */ -#ifndef R300_H -#define R300_H +#ifndef AVIVOD_H +#define AVIVOD_H -struct r300_asic { - const unsigned *reg_safe_bm; - unsigned reg_safe_bm_size; -}; + +#define D1CRTC_CONTROL 0x6080 +#define CRTC_EN (1 << 0) +#define D1CRTC_UPDATE_LOCK 0x60E8 +#define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110 +#define D1GRPH_SECONDARY_SURFACE_ADDRESS 0x6118 + +#define D2CRTC_CONTROL 0x6880 +#define D2CRTC_UPDATE_LOCK 0x68E8 +#define D2GRPH_PRIMARY_SURFACE_ADDRESS 0x6910 +#define D2GRPH_SECONDARY_SURFACE_ADDRESS 0x6918 + +#define D1VGA_CONTROL 0x0330 +#define DVGA_CONTROL_MODE_ENABLE (1 << 0) +#define DVGA_CONTROL_TIMING_SELECT (1 << 8) +#define DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9) +#define DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10) +#define DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16) +#define DVGA_CONTROL_ROTATE (1 << 24) +#define D2VGA_CONTROL 0x0338 + +#define VGA_HDP_CONTROL 0x328 +#define VGA_MEM_PAGE_SELECT_EN (1 << 0) +#define VGA_MEMORY_DISABLE (1 << 4) +#define VGA_RBBM_LOCK_DISABLE (1 << 8) +#define VGA_SOFT_RESET (1 << 16) +#define VGA_MEMORY_BASE_ADDRESS 0x0310 +#define VGA_RENDER_CONTROL 0x0300 +#define VGA_VSTATUS_CNTL_MASK 0x00030000 #endif diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index ee3ab62417e2..5708c07ce733 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -31,6 +31,8 @@ #include "radeon_drm.h" #include "radeon_reg.h" #include "radeon.h" +#include "r100d.h" + #include #include @@ -391,9 +393,9 @@ int r100_wb_init(struct radeon_device *rdev) return r; } } - WREG32(0x774, rdev->wb.gpu_addr); - WREG32(0x70C, rdev->wb.gpu_addr + 1024); - WREG32(0x770, 0xff); + WREG32(RADEON_SCRATCH_ADDR, rdev->wb.gpu_addr); + WREG32(RADEON_CP_RB_RPTR_ADDR, rdev->wb.gpu_addr + 1024); + WREG32(RADEON_SCRATCH_UMSK, 0xff); return 0; } @@ -559,18 +561,18 @@ static int r100_cp_init_microcode(struct radeon_device *rdev) fw_name = FIRMWARE_R520; } - err = request_firmware(&rdev->fw, fw_name, &pdev->dev); + err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev); platform_device_unregister(pdev); if (err) { printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n", fw_name); - } else if (rdev->fw->size % 8) { + } else if (rdev->me_fw->size % 8) { printk(KERN_ERR "radeon_cp: Bogus length %zu in firmware \"%s\"\n", - rdev->fw->size, fw_name); + rdev->me_fw->size, fw_name); err = -EINVAL; - release_firmware(rdev->fw); - rdev->fw = NULL; + release_firmware(rdev->me_fw); + rdev->me_fw = NULL; } return err; } @@ -584,9 +586,9 @@ static void r100_cp_load_microcode(struct radeon_device *rdev) "programming pipes. Bad things might happen.\n"); } - if (rdev->fw) { - size = rdev->fw->size / 4; - fw_data = (const __be32 *)&rdev->fw->data[0]; + if (rdev->me_fw) { + size = rdev->me_fw->size / 4; + fw_data = (const __be32 *)&rdev->me_fw->data[0]; WREG32(RADEON_CP_ME_RAM_ADDR, 0); for (i = 0; i < size; i += 2) { WREG32(RADEON_CP_ME_RAM_DATAH, @@ -632,7 +634,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) DRM_INFO("radeon: cp idle (0x%08X)\n", tmp); } - if (!rdev->fw) { + if (!rdev->me_fw) { r = r100_cp_init_microcode(rdev); if (r) { DRM_ERROR("Failed to load firmware!\n"); @@ -765,6 +767,12 @@ int r100_cp_reset(struct radeon_device *rdev) return -1; } +void r100_cp_commit(struct radeon_device *rdev) +{ + WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); + (void)RREG32(RADEON_CP_RB_WPTR); +} + /* * CS functions @@ -2954,3 +2962,106 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track } } } + +int r100_ring_test(struct radeon_device *rdev) +{ + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ring_lock(rdev, 2); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + radeon_scratch_free(rdev, scratch); + return r; + } + radeon_ring_write(rdev, PACKET0(scratch, 0)); + radeon_ring_write(rdev, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) { + break; + } + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ring test succeeded in %d usecs\n", i); + } else { + DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + return r; +} + +void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1)); + radeon_ring_write(rdev, ib->gpu_addr); + radeon_ring_write(rdev, ib->length_dw); +} + +int r100_ib_test(struct radeon_device *rdev) +{ + struct radeon_ib *ib; + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ib_get(rdev, &ib); + if (r) { + return r; + } + ib->ptr[0] = PACKET0(scratch, 0); + ib->ptr[1] = 0xDEADBEEF; + ib->ptr[2] = PACKET2(0); + ib->ptr[3] = PACKET2(0); + ib->ptr[4] = PACKET2(0); + ib->ptr[5] = PACKET2(0); + ib->ptr[6] = PACKET2(0); + ib->ptr[7] = PACKET2(0); + ib->length_dw = 8; + r = radeon_ib_schedule(rdev, ib); + if (r) { + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); + return r; + } + r = radeon_fence_wait(ib->fence, false); + if (r) { + return r; + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) { + break; + } + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ib test succeeded in %u usecs\n", i); + } else { + DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); + return r; +} diff --git a/drivers/gpu/drm/radeon/r100d.h b/drivers/gpu/drm/radeon/r100d.h new file mode 100644 index 000000000000..6da7d92c321c --- /dev/null +++ b/drivers/gpu/drm/radeon/r100d.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef __R100D_H__ +#define __R100D_H__ + +#define CP_PACKET0 0x00000000 +#define PACKET0_BASE_INDEX_SHIFT 0 +#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) +#define PACKET0_COUNT_SHIFT 16 +#define PACKET0_COUNT_MASK (0x3fff << 16) +#define CP_PACKET1 0x40000000 +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) +#define CP_PACKET3 0xC0000000 +#define PACKET3_IT_OPCODE_SHIFT 8 +#define PACKET3_IT_OPCODE_MASK (0xff << 8) +#define PACKET3_COUNT_SHIFT 16 +#define PACKET3_COUNT_MASK (0x3fff << 16) +/* PACKET3 op code */ +#define PACKET3_NOP 0x10 +#define PACKET3_3D_DRAW_VBUF 0x28 +#define PACKET3_3D_DRAW_IMMD 0x29 +#define PACKET3_3D_DRAW_INDX 0x2A +#define PACKET3_3D_LOAD_VBPNTR 0x2F +#define PACKET3_INDX_BUFFER 0x33 +#define PACKET3_3D_DRAW_VBUF_2 0x34 +#define PACKET3_3D_DRAW_IMMD_2 0x35 +#define PACKET3_3D_DRAW_INDX_2 0x36 +#define PACKET3_BITBLT_MULTI 0x9B + +#define PACKET0(reg, n) (CP_PACKET0 | \ + REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ + REG_SET(PACKET0_COUNT, (n))) +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) +#define PACKET3(op, n) (CP_PACKET3 | \ + REG_SET(PACKET3_IT_OPCODE, (op)) | \ + REG_SET(PACKET3_COUNT, (n))) + +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) +#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) + +#endif diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 33a2c557eac4..a5f82f7beed6 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -33,6 +33,7 @@ #include "radeon_drm.h" #include "radeon_share.h" #include "r100_track.h" +#include "r300d.h" #include "r300_reg_safe.h" @@ -127,7 +128,7 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev) WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp); rv370_pcie_gart_tlb_flush(rdev); DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n", - rdev->mc.gtt_size >> 20, table_addr); + (unsigned)(rdev->mc.gtt_size >> 20), table_addr); rdev->gart.ready = true; return 0; } diff --git a/drivers/gpu/drm/radeon/r300d.h b/drivers/gpu/drm/radeon/r300d.h new file mode 100644 index 000000000000..63ec076f2cd4 --- /dev/null +++ b/drivers/gpu/drm/radeon/r300d.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef __R300D_H__ +#define __R300D_H__ + +#define CP_PACKET0 0x00000000 +#define PACKET0_BASE_INDEX_SHIFT 0 +#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) +#define PACKET0_COUNT_SHIFT 16 +#define PACKET0_COUNT_MASK (0x3fff << 16) +#define CP_PACKET1 0x40000000 +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) +#define CP_PACKET3 0xC0000000 +#define PACKET3_IT_OPCODE_SHIFT 8 +#define PACKET3_IT_OPCODE_MASK (0xff << 8) +#define PACKET3_COUNT_SHIFT 16 +#define PACKET3_COUNT_MASK (0x3fff << 16) +/* PACKET3 op code */ +#define PACKET3_NOP 0x10 +#define PACKET3_3D_DRAW_VBUF 0x28 +#define PACKET3_3D_DRAW_IMMD 0x29 +#define PACKET3_3D_DRAW_INDX 0x2A +#define PACKET3_3D_LOAD_VBPNTR 0x2F +#define PACKET3_INDX_BUFFER 0x33 +#define PACKET3_3D_DRAW_VBUF_2 0x34 +#define PACKET3_3D_DRAW_IMMD_2 0x35 +#define PACKET3_3D_DRAW_INDX_2 0x36 +#define PACKET3_BITBLT_MULTI 0x9B + +#define PACKET0(reg, n) (CP_PACKET0 | \ + REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ + REG_SET(PACKET0_COUNT, (n))) +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) +#define PACKET3(op, n) (CP_PACKET3 | \ + REG_SET(PACKET3_IT_OPCODE, (op)) | \ + REG_SET(PACKET3_COUNT, (n))) + +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) +#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) + +#endif diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 538cd907df69..d8fcef44a69f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -25,12 +25,46 @@ * Alex Deucher * Jerome Glisse */ +#include +#include +#include #include "drmP.h" -#include "radeon_reg.h" +#include "radeon_drm.h" #include "radeon.h" +#include "radeon_mode.h" +#include "radeon_share.h" +#include "r600d.h" +#include "avivod.h" +#include "atom.h" -/* r600,rv610,rv630,rv620,rv635,rv670 depends on : */ -void rs600_mc_disable_clients(struct radeon_device *rdev); +#define PFP_UCODE_SIZE 576 +#define PM4_UCODE_SIZE 1792 +#define R700_PFP_UCODE_SIZE 848 +#define R700_PM4_UCODE_SIZE 1360 + +/* Firmware Names */ +MODULE_FIRMWARE("radeon/R600_pfp.bin"); +MODULE_FIRMWARE("radeon/R600_me.bin"); +MODULE_FIRMWARE("radeon/RV610_pfp.bin"); +MODULE_FIRMWARE("radeon/RV610_me.bin"); +MODULE_FIRMWARE("radeon/RV630_pfp.bin"); +MODULE_FIRMWARE("radeon/RV630_me.bin"); +MODULE_FIRMWARE("radeon/RV620_pfp.bin"); +MODULE_FIRMWARE("radeon/RV620_me.bin"); +MODULE_FIRMWARE("radeon/RV635_pfp.bin"); +MODULE_FIRMWARE("radeon/RV635_me.bin"); +MODULE_FIRMWARE("radeon/RV670_pfp.bin"); +MODULE_FIRMWARE("radeon/RV670_me.bin"); +MODULE_FIRMWARE("radeon/RS780_pfp.bin"); +MODULE_FIRMWARE("radeon/RS780_me.bin"); +MODULE_FIRMWARE("radeon/RV770_pfp.bin"); +MODULE_FIRMWARE("radeon/RV770_me.bin"); +MODULE_FIRMWARE("radeon/RV730_pfp.bin"); +MODULE_FIRMWARE("radeon/RV730_me.bin"); +MODULE_FIRMWARE("radeon/RV710_pfp.bin"); +MODULE_FIRMWARE("radeon/RV710_me.bin"); + +int r600_debugfs_mc_info_init(struct radeon_device *rdev); /* This files gather functions specifics to: * r600,rv610,rv630,rv620,rv635,rv670 @@ -39,87 +73,270 @@ void rs600_mc_disable_clients(struct radeon_device *rdev); */ int r600_mc_wait_for_idle(struct radeon_device *rdev); void r600_gpu_init(struct radeon_device *rdev); +void r600_fini(struct radeon_device *rdev); /* - * MC + * R600 PCIE GART */ -int r600_mc_init(struct radeon_device *rdev) +int r600_gart_clear_page(struct radeon_device *rdev, int i) { - uint32_t tmp; + void __iomem *ptr = (void *)rdev->gart.table.vram.ptr; + u64 pte; - r600_gpu_init(rdev); - - /* setup the gart before changing location so we can ask to - * discard unmapped mc request - */ - /* FIXME: disable out of gart access */ - tmp = rdev->mc.gtt_location / 4096; - tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp); - tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096; - tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp); - - rs600_mc_disable_clients(rdev); - if (r600_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); - } - - tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1; - tmp = REG_SET(R600_MC_FB_TOP, tmp >> 24); - tmp |= REG_SET(R600_MC_FB_BASE, rdev->mc.vram_location >> 24); - WREG32(R600_MC_VM_FB_LOCATION, tmp); - tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; - tmp = REG_SET(R600_MC_AGP_TOP, tmp >> 22); - WREG32(R600_MC_VM_AGP_TOP, tmp); - tmp = REG_SET(R600_MC_AGP_BOT, rdev->mc.gtt_location >> 22); - WREG32(R600_MC_VM_AGP_BOT, tmp); + if (i < 0 || i > rdev->gart.num_gpu_pages) + return -EINVAL; + pte = 0; + writeq(pte, ((void __iomem *)ptr) + (i * 8)); return 0; } -void r600_mc_fini(struct radeon_device *rdev) +void r600_pcie_gart_tlb_flush(struct radeon_device *rdev) { - /* FIXME: implement */ + unsigned i; + u32 tmp; + + WREG32(VM_CONTEXT0_INVALIDATION_LOW_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (rdev->mc.gtt_end - 1) >> 12); + WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1)); + for (i = 0; i < rdev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32(VM_CONTEXT0_REQUEST_RESPONSE); + tmp = (tmp & RESPONSE_TYPE_MASK) >> RESPONSE_TYPE_SHIFT; + if (tmp == 2) { + printk(KERN_WARNING "[drm] r600 flush TLB failed\n"); + return; + } + if (tmp) { + return; + } + udelay(1); + } } - -/* - * Global GPU functions - */ -void r600_errata(struct radeon_device *rdev) +int r600_pcie_gart_enable(struct radeon_device *rdev) { - rdev->pll_errata = 0; + u32 tmp; + int r, i; + + /* Initialize common gart structure */ + r = radeon_gart_init(rdev); + if (r) { + return r; + } + rdev->gart.table_size = rdev->gart.num_gpu_pages * 8; + r = radeon_gart_table_vram_alloc(rdev); + if (r) { + return r; + } + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | + ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL2, 0); + WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1)); + /* Setup TLB control */ + tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING | + SYSTEM_ACCESS_MODE_NOT_IN_SYS | + EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) | + ENABLE_WAIT_L2_QUERY; + WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp | ENABLE_L1_STRICT_ORDERING); + WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); + WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); + WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); + WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, + (u32)(rdev->dummy_page.addr >> 12)); + for (i = 1; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); + + r600_pcie_gart_tlb_flush(rdev); + rdev->gart.ready = true; + return 0; +} + +void r600_pcie_gart_disable(struct radeon_device *rdev) +{ + u32 tmp; + int i; + + /* Clear ptes*/ + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + r600_pcie_gart_tlb_flush(rdev); + /* Disable all tables */ + for (i = 0; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); + + /* Disable L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1)); + /* Setup L1 TLB control */ + tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) | + ENABLE_WAIT_L2_QUERY; + WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp); } int r600_mc_wait_for_idle(struct radeon_device *rdev) { - /* FIXME: implement */ - return 0; + unsigned i; + u32 tmp; + + for (i = 0; i < rdev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32(R_000E50_SRBM_STATUS) & 0x3F00; + if (!tmp) + return 0; + udelay(1); + } + return -1; } -void r600_gpu_init(struct radeon_device *rdev) +static void r600_mc_resume(struct radeon_device *rdev) { - /* FIXME: implement */ + u32 d1vga_control, d2vga_control; + u32 vga_render_control, vga_hdp_control; + u32 d1crtc_control, d2crtc_control; + u32 new_d1grph_primary, new_d1grph_secondary; + u32 new_d2grph_primary, new_d2grph_secondary; + u64 old_vram_start; + u32 tmp; + int i, j; + + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); + + d1vga_control = RREG32(D1VGA_CONTROL); + d2vga_control = RREG32(D2VGA_CONTROL); + vga_render_control = RREG32(VGA_RENDER_CONTROL); + vga_hdp_control = RREG32(VGA_HDP_CONTROL); + d1crtc_control = RREG32(D1CRTC_CONTROL); + d2crtc_control = RREG32(D2CRTC_CONTROL); + old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24; + new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS); + new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS); + new_d1grph_primary += rdev->mc.vram_start - old_vram_start; + new_d1grph_secondary += rdev->mc.vram_start - old_vram_start; + new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS); + new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS); + new_d2grph_primary += rdev->mc.vram_start - old_vram_start; + new_d2grph_secondary += rdev->mc.vram_start - old_vram_start; + + /* Stop all video */ + WREG32(D1VGA_CONTROL, 0); + WREG32(D2VGA_CONTROL, 0); + WREG32(VGA_RENDER_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, 0); + WREG32(D2CRTC_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); + } + + /* Lockout access through VGA aperture*/ + WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); + + /* Update configuration */ + WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); + tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16; + tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); + WREG32(MC_VM_FB_LOCATION, tmp); + WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); + WREG32(HDP_NONSURFACE_INFO, (2 << 7)); + WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF); + if (rdev->flags & RADEON_IS_AGP) { + WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16); + WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16); + WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22); + } else { + WREG32(MC_VM_AGP_BASE, 0); + WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); + WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); + } + WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary); + WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary); + WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary); + WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary); + WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start); + + /* Unlock host access */ + WREG32(VGA_HDP_CONTROL, vga_hdp_control); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); + } + + /* Restore video state */ + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, d1crtc_control); + WREG32(D2CRTC_CONTROL, d2crtc_control); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + WREG32(D1VGA_CONTROL, d1vga_control); + WREG32(D2VGA_CONTROL, d2vga_control); + WREG32(VGA_RENDER_CONTROL, vga_render_control); } - -/* - * VRAM info - */ -void r600_vram_get_type(struct radeon_device *rdev) +int r600_mc_init(struct radeon_device *rdev) { - uint32_t tmp; + fixed20_12 a; + u32 tmp; int chansize; + int r; + /* Get VRAM informations */ rdev->mc.vram_width = 128; rdev->mc.vram_is_ddr = true; - - tmp = RREG32(R600_RAMCFG); - if (tmp & R600_CHANSIZE_OVERRIDE) { + tmp = RREG32(RAMCFG); + if (tmp & CHANSIZE_OVERRIDE) { chansize = 16; - } else if (tmp & R600_CHANSIZE) { + } else if (tmp & CHANSIZE_MASK) { chansize = 64; } else { chansize = 32; @@ -135,36 +352,1391 @@ void r600_vram_get_type(struct radeon_device *rdev) (rdev->family == CHIP_RV635)) { rdev->mc.vram_width = 2 * chansize; } -} - -void r600_vram_info(struct radeon_device *rdev) -{ - r600_vram_get_type(rdev); - rdev->mc.real_vram_size = RREG32(R600_CONFIG_MEMSIZE); - rdev->mc.mc_vram_size = rdev->mc.real_vram_size; - /* Could aper size report 0 ? */ rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); + /* Setup GPU memory space */ + rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); + rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); + if (rdev->flags & RADEON_IS_AGP) { + r = radeon_agp_init(rdev); + if (r) + return r; + /* gtt_size is setup by radeon_agp_init */ + rdev->mc.gtt_location = rdev->mc.agp_base; + tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size; + /* Try to put vram before or after AGP because we + * we want SYSTEM_APERTURE to cover both VRAM and + * AGP so that GPU can catch out of VRAM/AGP access + */ + if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) { + /* Enought place before */ + rdev->mc.vram_location = rdev->mc.gtt_location - + rdev->mc.mc_vram_size; + } else if (tmp > rdev->mc.mc_vram_size) { + /* Enought place after */ + rdev->mc.vram_location = rdev->mc.gtt_location + + rdev->mc.gtt_size; + } else { + /* Try to setup VRAM then AGP might not + * not work on some card + */ + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + } + } else { + if (rdev->family == CHIP_RS780 || rdev->family == CHIP_RS880) { + rdev->mc.vram_location = (RREG32(MC_VM_FB_LOCATION) & + 0xFFFF) << 24; + rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size; + if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) { + /* Enough place after vram */ + rdev->mc.gtt_location = tmp; + } else if (rdev->mc.vram_location >= rdev->mc.gtt_size) { + /* Enough place before vram */ + rdev->mc.gtt_location = 0; + } else { + /* Not enough place after or before shrink + * gart size + */ + if (rdev->mc.vram_location > (0xFFFFFFFFUL - tmp)) { + rdev->mc.gtt_location = 0; + rdev->mc.gtt_size = rdev->mc.vram_location; + } else { + rdev->mc.gtt_location = tmp; + rdev->mc.gtt_size = 0xFFFFFFFFUL - tmp; + } + } + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + } else { + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + } + } + rdev->mc.vram_start = rdev->mc.vram_location; + rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size; + rdev->mc.gtt_start = rdev->mc.gtt_location; + rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size; + /* FIXME: we should enforce default clock in case GPU is not in + * default setup + */ + a.full = rfixed_const(100); + rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk); + rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a); + return 0; } +/* We doesn't check that the GPU really needs a reset we simply do the + * reset, it's up to the caller to determine if the GPU needs one. We + * might add an helper function to check that. + */ +int r600_gpu_soft_reset(struct radeon_device *rdev) +{ + u32 grbm_busy_mask = S_008010_VC_BUSY(1) | S_008010_VGT_BUSY_NO_DMA(1) | + S_008010_VGT_BUSY(1) | S_008010_TA03_BUSY(1) | + S_008010_TC_BUSY(1) | S_008010_SX_BUSY(1) | + S_008010_SH_BUSY(1) | S_008010_SPI03_BUSY(1) | + S_008010_SMX_BUSY(1) | S_008010_SC_BUSY(1) | + S_008010_PA_BUSY(1) | S_008010_DB03_BUSY(1) | + S_008010_CR_BUSY(1) | S_008010_CB03_BUSY(1) | + S_008010_GUI_ACTIVE(1); + u32 grbm2_busy_mask = S_008014_SPI0_BUSY(1) | S_008014_SPI1_BUSY(1) | + S_008014_SPI2_BUSY(1) | S_008014_SPI3_BUSY(1) | + S_008014_TA0_BUSY(1) | S_008014_TA1_BUSY(1) | + S_008014_TA2_BUSY(1) | S_008014_TA3_BUSY(1) | + S_008014_DB0_BUSY(1) | S_008014_DB1_BUSY(1) | + S_008014_DB2_BUSY(1) | S_008014_DB3_BUSY(1) | + S_008014_CB0_BUSY(1) | S_008014_CB1_BUSY(1) | + S_008014_CB2_BUSY(1) | S_008014_CB3_BUSY(1); + u32 srbm_reset = 0; + + /* Disable CP parsing/prefetching */ + WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(0xff)); + /* Check if any of the rendering block is busy and reset it */ + if ((RREG32(R_008010_GRBM_STATUS) & grbm_busy_mask) || + (RREG32(R_008014_GRBM_STATUS2) & grbm2_busy_mask)) { + WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CR(1) | + S_008020_SOFT_RESET_DB(1) | + S_008020_SOFT_RESET_CB(1) | + S_008020_SOFT_RESET_PA(1) | + S_008020_SOFT_RESET_SC(1) | + S_008020_SOFT_RESET_SMX(1) | + S_008020_SOFT_RESET_SPI(1) | + S_008020_SOFT_RESET_SX(1) | + S_008020_SOFT_RESET_SH(1) | + S_008020_SOFT_RESET_TC(1) | + S_008020_SOFT_RESET_TA(1) | + S_008020_SOFT_RESET_VC(1) | + S_008020_SOFT_RESET_VGT(1)); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + udelay(50); + WREG32(R_008020_GRBM_SOFT_RESET, 0); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + } + /* Reset CP (we always reset CP) */ + WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CP(1)); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + udelay(50); + WREG32(R_008020_GRBM_SOFT_RESET, 0); + (void)RREG32(R_008020_GRBM_SOFT_RESET); + /* Reset others GPU block if necessary */ + if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_RLC(1); + if (G_000E50_GRBM_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_GRBM(1); + if (G_000E50_HI_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_IH(1); + if (G_000E50_VMC_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_VMC(1); + if (G_000E50_MCB_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDZ_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDY_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDX_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_MCDW_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_MC(1); + if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_RLC(1); + if (G_000E50_SEM_BUSY(RREG32(R_000E50_SRBM_STATUS))) + srbm_reset |= S_000E60_SOFT_RESET_SEM(1); + WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset); + (void)RREG32(R_000E60_SRBM_SOFT_RESET); + udelay(50); + WREG32(R_000E60_SRBM_SOFT_RESET, 0); + (void)RREG32(R_000E60_SRBM_SOFT_RESET); + /* Wait a little for things to settle down */ + udelay(50); + return 0; +} + +int r600_gpu_reset(struct radeon_device *rdev) +{ + return r600_gpu_soft_reset(rdev); +} + +static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes, + u32 num_backends, + u32 backend_disable_mask) +{ + u32 backend_map = 0; + u32 enabled_backends_mask; + u32 enabled_backends_count; + u32 cur_pipe; + u32 swizzle_pipe[R6XX_MAX_PIPES]; + u32 cur_backend; + u32 i; + + if (num_tile_pipes > R6XX_MAX_PIPES) + num_tile_pipes = R6XX_MAX_PIPES; + if (num_tile_pipes < 1) + num_tile_pipes = 1; + if (num_backends > R6XX_MAX_BACKENDS) + num_backends = R6XX_MAX_BACKENDS; + if (num_backends < 1) + num_backends = 1; + + enabled_backends_mask = 0; + enabled_backends_count = 0; + for (i = 0; i < R6XX_MAX_BACKENDS; ++i) { + if (((backend_disable_mask >> i) & 1) == 0) { + enabled_backends_mask |= (1 << i); + ++enabled_backends_count; + } + if (enabled_backends_count == num_backends) + break; + } + + if (enabled_backends_count == 0) { + enabled_backends_mask = 1; + enabled_backends_count = 1; + } + + if (enabled_backends_count != num_backends) + num_backends = enabled_backends_count; + + memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES); + switch (num_tile_pipes) { + case 1: + swizzle_pipe[0] = 0; + break; + case 2: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + break; + case 3: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + break; + case 4: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + break; + case 5: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + swizzle_pipe[4] = 4; + break; + case 6: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 5; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + break; + case 7: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + swizzle_pipe[6] = 5; + break; + case 8: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + swizzle_pipe[6] = 5; + swizzle_pipe[7] = 7; + break; + } + + cur_backend = 0; + for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { + while (((1 << cur_backend) & enabled_backends_mask) == 0) + cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; + + backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); + + cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; + } + + return backend_map; +} + +int r600_count_pipe_bits(uint32_t val) +{ + int i, ret = 0; + + for (i = 0; i < 32; i++) { + ret += val & 1; + val >>= 1; + } + return ret; +} + +void r600_gpu_init(struct radeon_device *rdev) +{ + u32 tiling_config; + u32 ramcfg; + u32 tmp; + int i, j; + u32 sq_config; + u32 sq_gpr_resource_mgmt_1 = 0; + u32 sq_gpr_resource_mgmt_2 = 0; + u32 sq_thread_resource_mgmt = 0; + u32 sq_stack_resource_mgmt_1 = 0; + u32 sq_stack_resource_mgmt_2 = 0; + + /* FIXME: implement */ + switch (rdev->family) { + case CHIP_R600: + rdev->config.r600.max_pipes = 4; + rdev->config.r600.max_tile_pipes = 8; + rdev->config.r600.max_simds = 4; + rdev->config.r600.max_backends = 4; + rdev->config.r600.max_gprs = 256; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 256; + rdev->config.r600.max_hw_contexts = 8; + rdev->config.r600.max_gs_threads = 16; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 2; + break; + case CHIP_RV630: + case CHIP_RV635: + rdev->config.r600.max_pipes = 2; + rdev->config.r600.max_tile_pipes = 2; + rdev->config.r600.max_simds = 3; + rdev->config.r600.max_backends = 1; + rdev->config.r600.max_gprs = 128; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 128; + rdev->config.r600.max_hw_contexts = 8; + rdev->config.r600.max_gs_threads = 4; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 2; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + rdev->config.r600.max_pipes = 1; + rdev->config.r600.max_tile_pipes = 1; + rdev->config.r600.max_simds = 2; + rdev->config.r600.max_backends = 1; + rdev->config.r600.max_gprs = 128; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 128; + rdev->config.r600.max_hw_contexts = 4; + rdev->config.r600.max_gs_threads = 4; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 1; + break; + case CHIP_RV670: + rdev->config.r600.max_pipes = 4; + rdev->config.r600.max_tile_pipes = 4; + rdev->config.r600.max_simds = 4; + rdev->config.r600.max_backends = 4; + rdev->config.r600.max_gprs = 192; + rdev->config.r600.max_threads = 192; + rdev->config.r600.max_stack_entries = 256; + rdev->config.r600.max_hw_contexts = 8; + rdev->config.r600.max_gs_threads = 16; + rdev->config.r600.sx_max_export_size = 128; + rdev->config.r600.sx_max_export_pos_size = 16; + rdev->config.r600.sx_max_export_smx_size = 128; + rdev->config.r600.sq_num_cf_insts = 2; + break; + default: + break; + } + + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + + WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); + + /* Setup tiling */ + tiling_config = 0; + ramcfg = RREG32(RAMCFG); + switch (rdev->config.r600.max_tile_pipes) { + case 1: + tiling_config |= PIPE_TILING(0); + break; + case 2: + tiling_config |= PIPE_TILING(1); + break; + case 4: + tiling_config |= PIPE_TILING(2); + break; + case 8: + tiling_config |= PIPE_TILING(3); + break; + default: + break; + } + tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); + tiling_config |= GROUP_SIZE(0); + tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT; + if (tmp > 3) { + tiling_config |= ROW_TILING(3); + tiling_config |= SAMPLE_SPLIT(3); + } else { + tiling_config |= ROW_TILING(tmp); + tiling_config |= SAMPLE_SPLIT(tmp); + } + tiling_config |= BANK_SWAPS(1); + tmp = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes, + rdev->config.r600.max_backends, + (0xff << rdev->config.r600.max_backends) & 0xff); + tiling_config |= BACKEND_MAP(tmp); + WREG32(GB_TILING_CONFIG, tiling_config); + WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); + WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); + + tmp = BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK); + WREG32(CC_RB_BACKEND_DISABLE, tmp); + + /* Setup pipes */ + tmp = INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK); + tmp |= INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK); + WREG32(CC_GC_SHADER_PIPE_CONFIG, tmp); + WREG32(GC_USER_SHADER_PIPE_CONFIG, tmp); + + tmp = R6XX_MAX_BACKENDS - r600_count_pipe_bits(tmp & INACTIVE_QD_PIPES_MASK); + WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); + WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK); + + /* Setup some CP states */ + WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | ROQ_IB2_START(0x2b))); + WREG32(CP_MEQ_THRESHOLDS, (MEQ_END(0x40) | ROQ_END(0x40))); + + WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | SYNC_GRADIENT | + SYNC_WALKER | SYNC_ALIGNER)); + /* Setup various GPU states */ + if (rdev->family == CHIP_RV670) + WREG32(ARB_GDEC_RD_CNTL, 0x00000021); + + tmp = RREG32(SX_DEBUG_1); + tmp |= SMX_EVENT_RELEASE; + if ((rdev->family > CHIP_R600)) + tmp |= ENABLE_NEW_SMX_ADDRESS; + WREG32(SX_DEBUG_1, tmp); + + if (((rdev->family) == CHIP_R600) || + ((rdev->family) == CHIP_RV630) || + ((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + WREG32(DB_DEBUG, PREZ_MUST_WAIT_FOR_POSTZ_DONE); + } else { + WREG32(DB_DEBUG, 0); + } + WREG32(DB_WATERMARKS, (DEPTH_FREE(4) | DEPTH_CACHELINE_FREE(16) | + DEPTH_FLUSH(16) | DEPTH_PENDING_FREE(4))); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + WREG32(VGT_NUM_INSTANCES, 0); + + WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0)); + WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(0)); + + tmp = RREG32(SQ_MS_FIFO_SIZES); + if (((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + tmp = (CACHE_FIFO_SIZE(0xa) | + FETCH_FIFO_HIWATER(0xa) | + DONE_FIFO_HIWATER(0xe0) | + ALU_UPDATE_FIFO_HIWATER(0x8)); + } else if (((rdev->family) == CHIP_R600) || + ((rdev->family) == CHIP_RV630)) { + tmp &= ~DONE_FIFO_HIWATER(0xff); + tmp |= DONE_FIFO_HIWATER(0x4); + } + WREG32(SQ_MS_FIFO_SIZES, tmp); + + /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT + * should be adjusted as needed by the 2D/3D drivers. This just sets default values + */ + sq_config = RREG32(SQ_CONFIG); + sq_config &= ~(PS_PRIO(3) | + VS_PRIO(3) | + GS_PRIO(3) | + ES_PRIO(3)); + sq_config |= (DX9_CONSTS | + VC_ENABLE | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + + if ((rdev->family) == CHIP_R600) { + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(124) | + NUM_VS_GPRS(124) | + NUM_CLAUSE_TEMP_GPRS(4)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(0) | + NUM_ES_GPRS(0)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(136) | + NUM_VS_THREADS(48) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(4)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(128) | + NUM_VS_STACK_ENTRIES(128)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(0) | + NUM_ES_STACK_ENTRIES(0)); + } else if (((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + /* no vertex cache */ + sq_config &= ~VC_ENABLE; + + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) | + NUM_VS_GPRS(44) | + NUM_CLAUSE_TEMP_GPRS(2)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) | + NUM_ES_GPRS(17)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(79) | + NUM_VS_THREADS(78) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(31)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) | + NUM_VS_STACK_ENTRIES(40)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) | + NUM_ES_STACK_ENTRIES(16)); + } else if (((rdev->family) == CHIP_RV630) || + ((rdev->family) == CHIP_RV635)) { + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) | + NUM_VS_GPRS(44) | + NUM_CLAUSE_TEMP_GPRS(2)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(18) | + NUM_ES_GPRS(18)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(79) | + NUM_VS_THREADS(78) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(31)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) | + NUM_VS_STACK_ENTRIES(40)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) | + NUM_ES_STACK_ENTRIES(16)); + } else if ((rdev->family) == CHIP_RV670) { + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) | + NUM_VS_GPRS(44) | + NUM_CLAUSE_TEMP_GPRS(2)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) | + NUM_ES_GPRS(17)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(79) | + NUM_VS_THREADS(78) | + NUM_GS_THREADS(4) | + NUM_ES_THREADS(31)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(64) | + NUM_VS_STACK_ENTRIES(64)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(64) | + NUM_ES_STACK_ENTRIES(64)); + } + + WREG32(SQ_CONFIG, sq_config); + WREG32(SQ_GPR_RESOURCE_MGMT_1, sq_gpr_resource_mgmt_1); + WREG32(SQ_GPR_RESOURCE_MGMT_2, sq_gpr_resource_mgmt_2); + WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); + WREG32(SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1); + WREG32(SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2); + + if (((rdev->family) == CHIP_RV610) || + ((rdev->family) == CHIP_RV620) || + ((rdev->family) == CHIP_RS780)) { + WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(TC_ONLY)); + } else { + WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC)); + } + + /* More default values. 2D/3D driver should adjust as needed */ + WREG32(PA_SC_AA_SAMPLE_LOCS_2S, (S0_X(0xc) | S0_Y(0x4) | + S1_X(0x4) | S1_Y(0xc))); + WREG32(PA_SC_AA_SAMPLE_LOCS_4S, (S0_X(0xe) | S0_Y(0xe) | + S1_X(0x2) | S1_Y(0x2) | + S2_X(0xa) | S2_Y(0x6) | + S3_X(0x6) | S3_Y(0xa))); + WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD0, (S0_X(0xe) | S0_Y(0xb) | + S1_X(0x4) | S1_Y(0xc) | + S2_X(0x1) | S2_Y(0x6) | + S3_X(0xa) | S3_Y(0xe))); + WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD1, (S4_X(0x6) | S4_Y(0x1) | + S5_X(0x0) | S5_Y(0x0) | + S6_X(0xb) | S6_Y(0x4) | + S7_X(0x7) | S7_Y(0x8))); + + WREG32(VGT_STRMOUT_EN, 0); + tmp = rdev->config.r600.max_pipes * 16; + switch (rdev->family) { + case CHIP_RV610: + case CHIP_RS780: + case CHIP_RV620: + tmp += 32; + break; + case CHIP_RV670: + tmp += 128; + break; + default: + break; + } + if (tmp > 256) { + tmp = 256; + } + WREG32(VGT_ES_PER_GS, 128); + WREG32(VGT_GS_PER_ES, tmp); + WREG32(VGT_GS_PER_VS, 2); + WREG32(VGT_GS_VERTEX_REUSE, 16); + + /* more default values. 2D/3D driver should adjust as needed */ + WREG32(PA_SC_LINE_STIPPLE_STATE, 0); + WREG32(VGT_STRMOUT_EN, 0); + WREG32(SX_MISC, 0); + WREG32(PA_SC_MODE_CNTL, 0); + WREG32(PA_SC_AA_CONFIG, 0); + WREG32(PA_SC_LINE_STIPPLE, 0); + WREG32(SPI_INPUT_Z, 0); + WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2)); + WREG32(CB_COLOR7_FRAG, 0); + + /* Clear render buffer base addresses */ + WREG32(CB_COLOR0_BASE, 0); + WREG32(CB_COLOR1_BASE, 0); + WREG32(CB_COLOR2_BASE, 0); + WREG32(CB_COLOR3_BASE, 0); + WREG32(CB_COLOR4_BASE, 0); + WREG32(CB_COLOR5_BASE, 0); + WREG32(CB_COLOR6_BASE, 0); + WREG32(CB_COLOR7_BASE, 0); + WREG32(CB_COLOR7_FRAG, 0); + + switch (rdev->family) { + case CHIP_RV610: + case CHIP_RS780: + case CHIP_RV620: + tmp = TC_L2_SIZE(8); + break; + case CHIP_RV630: + case CHIP_RV635: + tmp = TC_L2_SIZE(4); + break; + case CHIP_R600: + tmp = TC_L2_SIZE(0) | L2_DISABLE_LATE_HIT; + break; + default: + tmp = TC_L2_SIZE(0); + break; + } + WREG32(TC_CNTL, tmp); + + tmp = RREG32(HDP_HOST_PATH_CNTL); + WREG32(HDP_HOST_PATH_CNTL, tmp); + + tmp = RREG32(ARB_POP); + tmp |= ENABLE_TC128; + WREG32(ARB_POP, tmp); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA | + NUM_CLIP_SEQ(3))); + WREG32(PA_SC_ENHANCE, FORCE_EOV_MAX_CLK_CNT(4095)); +} + + /* * Indirect registers accessor */ -uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg) +u32 r600_pciep_rreg(struct radeon_device *rdev, u32 reg) { - uint32_t r; + u32 r; - WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff)); - (void)RREG32(R600_PCIE_PORT_INDEX); - r = RREG32(R600_PCIE_PORT_DATA); + WREG32(PCIE_PORT_INDEX, ((reg) & 0xff)); + (void)RREG32(PCIE_PORT_INDEX); + r = RREG32(PCIE_PORT_DATA); return r; } -void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) +void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) { - WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff)); - (void)RREG32(R600_PCIE_PORT_INDEX); - WREG32(R600_PCIE_PORT_DATA, (v)); - (void)RREG32(R600_PCIE_PORT_DATA); + WREG32(PCIE_PORT_INDEX, ((reg) & 0xff)); + (void)RREG32(PCIE_PORT_INDEX); + WREG32(PCIE_PORT_DATA, (v)); + (void)RREG32(PCIE_PORT_DATA); +} + + +/* + * CP & Ring + */ +void r600_cp_stop(struct radeon_device *rdev) +{ + WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); +} + +int r600_cp_init_microcode(struct radeon_device *rdev) +{ + struct platform_device *pdev; + const char *chip_name; + size_t pfp_req_size, me_req_size; + char fw_name[30]; + int err; + + DRM_DEBUG("\n"); + + pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0); + err = IS_ERR(pdev); + if (err) { + printk(KERN_ERR "radeon_cp: Failed to register firmware\n"); + return -EINVAL; + } + + switch (rdev->family) { + case CHIP_R600: chip_name = "R600"; break; + case CHIP_RV610: chip_name = "RV610"; break; + case CHIP_RV630: chip_name = "RV630"; break; + case CHIP_RV620: chip_name = "RV620"; break; + case CHIP_RV635: chip_name = "RV635"; break; + case CHIP_RV670: chip_name = "RV670"; break; + case CHIP_RS780: + case CHIP_RS880: chip_name = "RS780"; break; + case CHIP_RV770: chip_name = "RV770"; break; + case CHIP_RV730: + case CHIP_RV740: chip_name = "RV730"; break; + case CHIP_RV710: chip_name = "RV710"; break; + default: BUG(); + } + + if (rdev->family >= CHIP_RV770) { + pfp_req_size = R700_PFP_UCODE_SIZE * 4; + me_req_size = R700_PM4_UCODE_SIZE * 4; + } else { + pfp_req_size = PFP_UCODE_SIZE * 4; + me_req_size = PM4_UCODE_SIZE * 12; + } + + DRM_INFO("Loading %s CP Microcode\n", chip_name); + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); + err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->pfp_fw->size != pfp_req_size) { + printk(KERN_ERR + "r600_cp: Bogus length %zu in firmware \"%s\"\n", + rdev->pfp_fw->size, fw_name); + err = -EINVAL; + goto out; + } + + snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); + err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->me_fw->size != me_req_size) { + printk(KERN_ERR + "r600_cp: Bogus length %zu in firmware \"%s\"\n", + rdev->me_fw->size, fw_name); + err = -EINVAL; + } +out: + platform_device_unregister(pdev); + + if (err) { + if (err != -EINVAL) + printk(KERN_ERR + "r600_cp: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(rdev->pfp_fw); + rdev->pfp_fw = NULL; + release_firmware(rdev->me_fw); + rdev->me_fw = NULL; + } + return err; +} + +static int r600_cp_load_microcode(struct radeon_device *rdev) +{ + const __be32 *fw_data; + int i; + + if (!rdev->me_fw || !rdev->pfp_fw) + return -EINVAL; + + r600_cp_stop(rdev); + + WREG32(CP_RB_CNTL, RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3)); + + /* Reset cp */ + WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); + RREG32(GRBM_SOFT_RESET); + mdelay(15); + WREG32(GRBM_SOFT_RESET, 0); + + WREG32(CP_ME_RAM_WADDR, 0); + + fw_data = (const __be32 *)rdev->me_fw->data; + WREG32(CP_ME_RAM_WADDR, 0); + for (i = 0; i < PM4_UCODE_SIZE * 3; i++) + WREG32(CP_ME_RAM_DATA, + be32_to_cpup(fw_data++)); + + fw_data = (const __be32 *)rdev->pfp_fw->data; + WREG32(CP_PFP_UCODE_ADDR, 0); + for (i = 0; i < PFP_UCODE_SIZE; i++) + WREG32(CP_PFP_UCODE_DATA, + be32_to_cpup(fw_data++)); + + WREG32(CP_PFP_UCODE_ADDR, 0); + WREG32(CP_ME_RAM_WADDR, 0); + WREG32(CP_ME_RAM_RADDR, 0); + return 0; +} + +int r600_cp_start(struct radeon_device *rdev) +{ + int r; + uint32_t cp_me; + + r = radeon_ring_lock(rdev, 7); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } + radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5)); + radeon_ring_write(rdev, 0x1); + if (rdev->family < CHIP_RV770) { + radeon_ring_write(rdev, 0x3); + radeon_ring_write(rdev, rdev->config.r600.max_hw_contexts - 1); + } else { + radeon_ring_write(rdev, 0x0); + radeon_ring_write(rdev, rdev->config.rv770.max_hw_contexts - 1); + } + radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_unlock_commit(rdev); + + cp_me = 0xff; + WREG32(R_0086D8_CP_ME_CNTL, cp_me); + return 0; +} + +int r600_cp_resume(struct radeon_device *rdev) +{ + u32 tmp; + u32 rb_bufsz; + int r; + + /* Reset cp */ + WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); + RREG32(GRBM_SOFT_RESET); + mdelay(15); + WREG32(GRBM_SOFT_RESET, 0); + + /* Set ring buffer size */ + rb_bufsz = drm_order(rdev->cp.ring_size / 8); +#ifdef __BIG_ENDIAN + WREG32(CP_RB_CNTL, BUF_SWAP_32BIT | RB_NO_UPDATE | + (drm_order(4096/8) << 8) | rb_bufsz); +#else + WREG32(CP_RB_CNTL, RB_NO_UPDATE | (drm_order(4096/8) << 8) | rb_bufsz); +#endif + WREG32(CP_SEM_WAIT_TIMER, 0x4); + + /* Set the write pointer delay */ + WREG32(CP_RB_WPTR_DELAY, 0); + + /* Initialize the ring buffer's read and write pointers */ + tmp = RREG32(CP_RB_CNTL); + WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA); + WREG32(CP_RB_RPTR_WR, 0); + WREG32(CP_RB_WPTR, 0); + WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFF); + WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->cp.gpu_addr)); + mdelay(1); + WREG32(CP_RB_CNTL, tmp); + + WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8); + WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); + + rdev->cp.rptr = RREG32(CP_RB_RPTR); + rdev->cp.wptr = RREG32(CP_RB_WPTR); + + r600_cp_start(rdev); + rdev->cp.ready = true; + r = radeon_ring_test(rdev); + if (r) { + rdev->cp.ready = false; + return r; + } + return 0; +} + +void r600_cp_commit(struct radeon_device *rdev) +{ + WREG32(CP_RB_WPTR, rdev->cp.wptr); + (void)RREG32(CP_RB_WPTR); +} + +void r600_ring_init(struct radeon_device *rdev, unsigned ring_size) +{ + u32 rb_bufsz; + + /* Align ring size */ + rb_bufsz = drm_order(ring_size / 8); + ring_size = (1 << (rb_bufsz + 1)) * 4; + rdev->cp.ring_size = ring_size; + rdev->cp.align_mask = 16 - 1; +} + + +/* + * GPU scratch registers helpers function. + */ +void r600_scratch_init(struct radeon_device *rdev) +{ + int i; + + rdev->scratch.num_reg = 7; + for (i = 0; i < rdev->scratch.num_reg; i++) { + rdev->scratch.free[i] = true; + rdev->scratch.reg[i] = SCRATCH_REG0 + (i * 4); + } +} + +int r600_ring_test(struct radeon_device *rdev) +{ + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ring_lock(rdev, 3); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + radeon_scratch_free(rdev, scratch); + return r; + } + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); + radeon_ring_write(rdev, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ring test succeeded in %d usecs\n", i); + } else { + DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + return r; +} + +/* + * Writeback + */ +int r600_wb_init(struct radeon_device *rdev) +{ + int r; + + if (rdev->wb.wb_obj == NULL) { + r = radeon_object_create(rdev, NULL, 4096, + true, + RADEON_GEM_DOMAIN_GTT, + false, &rdev->wb.wb_obj); + if (r) { + DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r); + return r; + } + r = radeon_object_pin(rdev->wb.wb_obj, + RADEON_GEM_DOMAIN_GTT, + &rdev->wb.gpu_addr); + if (r) { + DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r); + return r; + } + r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb); + if (r) { + DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r); + return r; + } + } + WREG32(SCRATCH_ADDR, (rdev->wb.gpu_addr >> 8) & 0xFFFFFFFF); + WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + 1024) & 0xFFFFFFFC); + WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + 1024) & 0xFF); + WREG32(SCRATCH_UMSK, 0xff); + return 0; +} + +void r600_wb_fini(struct radeon_device *rdev) +{ + if (rdev->wb.wb_obj) { + radeon_object_kunmap(rdev->wb.wb_obj); + radeon_object_unpin(rdev->wb.wb_obj); + radeon_object_unref(&rdev->wb.wb_obj); + rdev->wb.wb = NULL; + rdev->wb.wb_obj = NULL; + } +} + + +/* + * CS + */ +void r600_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + /* Emit fence sequence & fire IRQ */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); + radeon_ring_write(rdev, fence->seq); +} + +int r600_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, + uint64_t dst_offset, + unsigned num_pages, + struct radeon_fence *fence) +{ + /* FIXME: implement */ + return 0; +} + +int r600_copy_blit(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_pages, struct radeon_fence *fence) +{ + r600_blit_prepare_copy(rdev, num_pages * 4096); + r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * 4096); + r600_blit_done_copy(rdev, fence); + return 0; +} + +int r600_irq_process(struct radeon_device *rdev) +{ + /* FIXME: implement */ + return 0; +} + +int r600_irq_set(struct radeon_device *rdev) +{ + /* FIXME: implement */ + return 0; +} + +int r600_set_surface_reg(struct radeon_device *rdev, int reg, + uint32_t tiling_flags, uint32_t pitch, + uint32_t offset, uint32_t obj_size) +{ + /* FIXME: implement */ + return 0; +} + +void r600_clear_surface_reg(struct radeon_device *rdev, int reg) +{ + /* FIXME: implement */ +} + + +bool r600_card_posted(struct radeon_device *rdev) +{ + uint32_t reg; + + /* first check CRTCs */ + reg = RREG32(D1CRTC_CONTROL) | + RREG32(D2CRTC_CONTROL); + if (reg & CRTC_EN) + return true; + + /* then check MEM_SIZE, in case the crtcs are off */ + if (RREG32(CONFIG_MEMSIZE)) + return true; + + return false; +} + +int r600_resume(struct radeon_device *rdev) +{ + int r; + + r600_gpu_reset(rdev); + r600_mc_resume(rdev); + r = r600_pcie_gart_enable(rdev); + if (r) + return r; + r600_gpu_init(rdev); + r = radeon_ring_init(rdev, rdev->cp.ring_size); + if (r) + return r; + r = r600_cp_load_microcode(rdev); + if (r) + return r; + r = r600_cp_resume(rdev); + if (r) + return r; + r = r600_wb_init(rdev); + if (r) + return r; + return 0; +} + +int r600_suspend(struct radeon_device *rdev) +{ + /* FIXME: we should wait for ring to be empty */ + r600_cp_stop(rdev); + return 0; +} + +/* Plan is to move initialization in that function and use + * helper function so that radeon_device_init pretty much + * do nothing more than calling asic specific function. This + * should also allow to remove a bunch of callback function + * like vram_info. + */ +int r600_init(struct radeon_device *rdev) +{ + int r; + + rdev->new_init_path = true; + r = radeon_dummy_page_init(rdev); + if (r) + return r; + if (r600_debugfs_mc_info_init(rdev)) { + DRM_ERROR("Failed to register debugfs file for mc !\n"); + } + /* This don't do much */ + r = radeon_gem_init(rdev); + if (r) + return r; + /* Read BIOS */ + if (!radeon_get_bios(rdev)) { + if (ASIC_IS_AVIVO(rdev)) + return -EINVAL; + } + /* Must be an ATOMBIOS */ + if (!rdev->is_atom_bios) + return -EINVAL; + r = radeon_atombios_init(rdev); + if (r) + return r; + /* Post card if necessary */ + if (!r600_card_posted(rdev) && rdev->bios) { + DRM_INFO("GPU not posted. posting now...\n"); + atom_asic_init(rdev->mode_info.atom_context); + } + /* Initialize scratch registers */ + r600_scratch_init(rdev); + /* Initialize surface registers */ + radeon_surface_init(rdev); + r = radeon_clocks_init(rdev); + if (r) + return r; + /* Fence driver */ + r = radeon_fence_driver_init(rdev); + if (r) + return r; + r = r600_mc_init(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + r600_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return r600_init(rdev); + } + return r; + } + /* Memory manager */ + r = radeon_object_init(rdev); + if (r) + return r; + rdev->cp.ring_obj = NULL; + r600_ring_init(rdev, 1024 * 1024); + + if (!rdev->me_fw || !rdev->pfp_fw) { + r = r600_cp_init_microcode(rdev); + if (r) { + DRM_ERROR("Failed to load firmware!\n"); + return r; + } + } + + r = r600_resume(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + r600_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return r600_init(rdev); + } + return r; + } + r = radeon_ib_pool_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); + return r; + } + r = r600_blit_init(rdev); + if (r) { + DRM_ERROR("radeon: failled blitter (%d).\n", r); + return r; + } + r = radeon_ib_test(rdev); + if (r) { + DRM_ERROR("radeon: failled testing IB (%d).\n", r); + return r; + } + return 0; +} + +void r600_fini(struct radeon_device *rdev) +{ + /* Suspend operations */ + r600_suspend(rdev); + + r600_blit_fini(rdev); + radeon_ring_fini(rdev); + r600_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); + radeon_gem_fini(rdev); + radeon_fence_driver_fini(rdev); + radeon_clocks_fini(rdev); +#if __OS_HAS_AGP + if (rdev->flags & RADEON_IS_AGP) + radeon_agp_fini(rdev); +#endif + radeon_object_fini(rdev); + if (rdev->is_atom_bios) + radeon_atombios_fini(rdev); + else + radeon_combios_fini(rdev); + kfree(rdev->bios); + rdev->bios = NULL; + radeon_dummy_page_fini(rdev); +} + + +/* + * CS stuff + */ +void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + /* FIXME: implement */ + radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(rdev, ib->gpu_addr & 0xFFFFFFFC); + radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF); + radeon_ring_write(rdev, ib->length_dw); +} + +int r600_ib_test(struct radeon_device *rdev) +{ + struct radeon_ib *ib; + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = radeon_scratch_get(rdev, &scratch); + if (r) { + DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = radeon_ib_get(rdev, &ib); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + ib->ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1); + ib->ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + ib->ptr[2] = 0xDEADBEEF; + ib->ptr[3] = PACKET2(0); + ib->ptr[4] = PACKET2(0); + ib->ptr[5] = PACKET2(0); + ib->ptr[6] = PACKET2(0); + ib->ptr[7] = PACKET2(0); + ib->ptr[8] = PACKET2(0); + ib->ptr[9] = PACKET2(0); + ib->ptr[10] = PACKET2(0); + ib->ptr[11] = PACKET2(0); + ib->ptr[12] = PACKET2(0); + ib->ptr[13] = PACKET2(0); + ib->ptr[14] = PACKET2(0); + ib->ptr[15] = PACKET2(0); + ib->length_dw = 16; + r = radeon_ib_schedule(rdev, ib); + if (r) { + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + return r; + } + r = radeon_fence_wait(ib->fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + return r; + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ib test succeeded in %u usecs\n", i); + } else { + DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); + return r; +} + + + + +/* + * Debugfs info + */ +#if defined(CONFIG_DEBUG_FS) + +static int r600_debugfs_cp_ring_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + uint32_t rdp, wdp; + unsigned count, i, j; + + radeon_ring_free_size(rdev); + rdp = RREG32(CP_RB_RPTR); + wdp = RREG32(CP_RB_WPTR); + count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask; + seq_printf(m, "CP_STAT 0x%08x\n", RREG32(CP_STAT)); + seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); + seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); + seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw); + seq_printf(m, "%u dwords in ring\n", count); + for (j = 0; j <= count; j++) { + i = (rdp + j) & rdev->cp.ptr_mask; + seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]); + } + return 0; +} + +static int r600_debugfs_mc_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + + DREG32_SYS(m, rdev, R_000E50_SRBM_STATUS); + DREG32_SYS(m, rdev, VM_L2_STATUS); + return 0; +} + +static struct drm_info_list r600_mc_info_list[] = { + {"r600_mc_info", r600_debugfs_mc_info, 0, NULL}, + {"r600_ring_info", r600_debugfs_cp_ring_info, 0, NULL}, +}; +#endif + +int r600_debugfs_mc_info_init(struct radeon_device *rdev) +{ +#if defined(CONFIG_DEBUG_FS) + return radeon_debugfs_add_files(rdev, r600_mc_info_list, ARRAY_SIZE(r600_mc_info_list)); +#else + return 0; +#endif } diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c new file mode 100644 index 000000000000..c51402e92493 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit.c @@ -0,0 +1,855 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher + */ +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" + +#include "r600_blit_shaders.h" + +#define DI_PT_RECTLIST 0x11 +#define DI_INDEX_SIZE_16_BIT 0x0 +#define DI_SRC_SEL_AUTO_INDEX 0x2 + +#define FMT_8 0x1 +#define FMT_5_6_5 0x8 +#define FMT_8_8_8_8 0x1a +#define COLOR_8 0x1 +#define COLOR_5_6_5 0x8 +#define COLOR_8_8_8_8 0x1a + +static inline void +set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr) +{ + u32 cb_color_info; + int pitch, slice; + RING_LOCALS; + DRM_DEBUG("\n"); + + h = (h + 7) & ~7; + if (h < 8) + h = 8; + + cb_color_info = ((format << 2) | (1 << 27)); + pitch = (w / 8) - 1; + slice = ((w * h) / 64) - 1; + + if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) && + ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) { + BEGIN_RING(21 + 2); + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(gpu_addr >> 8); + OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0)); + OUT_RING(2 << 0); + } else { + BEGIN_RING(21); + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(gpu_addr >> 8); + } + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((pitch << 0) | (slice << 10)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(cb_color_info); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + ADVANCE_RING(); +} + +static inline void +cp_set_surface_sync(drm_radeon_private_t *dev_priv, + u32 sync_type, u32 size, u64 mc_addr) +{ + u32 cp_coher_size; + RING_LOCALS; + DRM_DEBUG("\n"); + + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + BEGIN_RING(5); + OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3)); + OUT_RING(sync_type); + OUT_RING(cp_coher_size); + OUT_RING((mc_addr >> 8)); + OUT_RING(10); /* poll interval */ + ADVANCE_RING(); +} + +static inline void +set_shaders(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + u64 gpu_addr; + int shader_size, i; + u32 *vs, *ps; + uint32_t sq_pgm_resources; + RING_LOCALS; + DRM_DEBUG("\n"); + + /* load shaders */ + vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset); + ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256); + + shader_size = r6xx_vs_size; + for (i = 0; i < shader_size; i++) + vs[i] = r6xx_vs[i]; + shader_size = r6xx_ps_size; + for (i = 0; i < shader_size; i++) + ps[i] = r6xx_ps[i]; + + dev_priv->blit_vb->used = 512; + + gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset; + + /* setup shader regs */ + sq_pgm_resources = (1 << 0); + + BEGIN_RING(9 + 12); + /* VS */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(gpu_addr >> 8); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(sq_pgm_resources); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + + /* PS */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((gpu_addr + 256) >> 8); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(sq_pgm_resources | (1 << 28)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(2); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); + OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING(0); + ADVANCE_RING(); + + cp_set_surface_sync(dev_priv, + R600_SH_ACTION_ENA, 512, gpu_addr); +} + +static inline void +set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) +{ + uint32_t sq_vtx_constant_word2; + RING_LOCALS; + DRM_DEBUG("\n"); + + sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8)); + + BEGIN_RING(9); + OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + OUT_RING(0x460); + OUT_RING(gpu_addr & 0xffffffff); + OUT_RING(48 - 1); + OUT_RING(sq_vtx_constant_word2); + OUT_RING(1 << 0); + OUT_RING(0); + OUT_RING(0); + OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30); + ADVANCE_RING(); + + if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, 48, gpu_addr); + else + cp_set_surface_sync(dev_priv, + R600_VC_ACTION_ENA, 48, gpu_addr); +} + +static inline void +set_tex_resource(drm_radeon_private_t *dev_priv, + int format, int w, int h, int pitch, u64 gpu_addr) +{ + uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; + RING_LOCALS; + DRM_DEBUG("\n"); + + if (h < 1) + h = 1; + + sq_tex_resource_word0 = (1 << 0); + sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | + ((w - 1) << 19)); + + sq_tex_resource_word1 = (format << 26); + sq_tex_resource_word1 |= ((h - 1) << 0); + + sq_tex_resource_word4 = ((1 << 14) | + (0 << 16) | + (1 << 19) | + (2 << 22) | + (3 << 25)); + + BEGIN_RING(9); + OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + OUT_RING(0); + OUT_RING(sq_tex_resource_word0); + OUT_RING(sq_tex_resource_word1); + OUT_RING(gpu_addr >> 8); + OUT_RING(gpu_addr >> 8); + OUT_RING(sq_tex_resource_word4); + OUT_RING(0); + OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30); + ADVANCE_RING(); + +} + +static inline void +set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2) +{ + RING_LOCALS; + DRM_DEBUG("\n"); + + BEGIN_RING(12); + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); + OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((x1 << 0) | (y1 << 16)); + OUT_RING((x2 << 0) | (y2 << 16)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); + OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); + OUT_RING((x2 << 0) | (y2 << 16)); + + OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); + OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); + OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); + OUT_RING((x2 << 0) | (y2 << 16)); + ADVANCE_RING(); +} + +static inline void +draw_auto(drm_radeon_private_t *dev_priv) +{ + RING_LOCALS; + DRM_DEBUG("\n"); + + BEGIN_RING(10); + OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2); + OUT_RING(DI_PT_RECTLIST); + + OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + OUT_RING(DI_INDEX_SIZE_16_BIT); + + OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + OUT_RING(1); + + OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + OUT_RING(3); + OUT_RING(DI_SRC_SEL_AUTO_INDEX); + + ADVANCE_RING(); + COMMIT_RING(); +} + +static inline void +set_default_state(drm_radeon_private_t *dev_priv) +{ + int default_state_dw, i; + u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; + u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; + int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; + int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; + int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; + RING_LOCALS; + + switch ((dev_priv->flags & RADEON_FAMILY_MASK)) { + case CHIP_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV630: + case CHIP_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + default: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + } + + if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) + sq_config = 0; + else + sq_config = R600_VC_ENABLE; + + sq_config |= (R600_DX9_CONSTS | + R600_ALU_INST_PREFER_VECTOR | + R600_PS_PRIO(0) | + R600_VS_PRIO(1) | + R600_GS_PRIO(2) | + R600_ES_PRIO(3)); + + sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) | + R600_NUM_VS_GPRS(num_vs_gprs) | + R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); + sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) | + R600_NUM_ES_GPRS(num_es_gprs)); + sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) | + R600_NUM_VS_THREADS(num_vs_threads) | + R600_NUM_GS_THREADS(num_gs_threads) | + R600_NUM_ES_THREADS(num_es_threads)); + sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | + R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); + sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | + R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries)); + + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) { + default_state_dw = r7xx_default_size * 4; + BEGIN_RING(default_state_dw + 10); + for (i = 0; i < default_state_dw; i++) + OUT_RING(r7xx_default_state[i]); + } else { + default_state_dw = r6xx_default_size * 4; + BEGIN_RING(default_state_dw + 10); + for (i = 0; i < default_state_dw; i++) + OUT_RING(r6xx_default_state[i]); + } + OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); + OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); + /* SQ config */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6)); + OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2); + OUT_RING(sq_config); + OUT_RING(sq_gpr_resource_mgmt_1); + OUT_RING(sq_gpr_resource_mgmt_2); + OUT_RING(sq_thread_resource_mgmt); + OUT_RING(sq_stack_resource_mgmt_1); + OUT_RING(sq_stack_resource_mgmt_2); + ADVANCE_RING(); +} + +static inline uint32_t i2f(uint32_t input) +{ + u32 result, i, exponent, fraction; + + if ((input & 0x3fff) == 0) + result = 0; /* 0 is a special case */ + else { + exponent = 140; /* exponent biased by 127; */ + fraction = (input & 0x3fff) << 10; /* cheat and only + handle numbers below 2^^15 */ + for (i = 0; i < 14; i++) { + if (fraction & 0x800000) + break; + else { + fraction = fraction << 1; /* keep + shifting left until top bit = 1 */ + exponent = exponent - 1; + } + } + result = exponent << 23 | (fraction & 0x7fffff); /* mask + off top bit; assumed 1 */ + } + return result; +} + + +int r600_nomm_get_vb(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + dev_priv->blit_vb = radeon_freelist_get(dev); + if (!dev_priv->blit_vb) { + DRM_ERROR("Unable to allocate vertex buffer for blit\n"); + return -EAGAIN; + } + return 0; +} + +void r600_nomm_put_vb(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + dev_priv->blit_vb->used = 0; + radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb); +} + +void *r600_nomm_get_vb_ptr(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + return (((char *)dev->agp_buffer_map->handle + + dev_priv->blit_vb->offset + dev_priv->blit_vb->used)); +} + +int +r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + DRM_DEBUG("\n"); + + r600_nomm_get_vb(dev); + + dev_priv->blit_vb->file_priv = file_priv; + + set_default_state(dev_priv); + set_shaders(dev); + + return 0; +} + + +void +r600_done_blit_copy(struct drm_device *dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + RING_LOCALS; + DRM_DEBUG("\n"); + + BEGIN_RING(5); + OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); + OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); + /* wait for 3D idle clean */ + OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2); + OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN); + + ADVANCE_RING(); + COMMIT_RING(); + + r600_nomm_put_vb(dev); +} + +void +r600_blit_copy(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int size_bytes) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + int max_bytes; + u64 vb_addr; + u32 *vb; + + vb = r600_nomm_get_vb_ptr(dev); + + if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { + max_bytes = 8192; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = src_gpu_addr & 255; + int dst_x = dst_gpu_addr & 255; + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { + + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } + + vb[0] = i2f(dst_x); + vb[1] = 0; + vb[2] = i2f(src_x); + vb[3] = 0; + + vb[4] = i2f(dst_x); + vb[5] = i2f(h); + vb[6] = i2f(src_x); + vb[7] = i2f(h); + + vb[8] = i2f(dst_x + cur_size); + vb[9] = i2f(h); + vb[10] = i2f(src_x + cur_size); + vb[11] = i2f(h); + + /* src */ + set_tex_resource(dev_priv, FMT_8, + src_x + cur_size, h, src_x + cur_size, + src_gpu_addr); + + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst */ + set_render_target(dev_priv, COLOR_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors */ + set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h); + + /* Vertex buffer setup */ + vb_addr = dev_priv->gart_buffers_offset + + dev_priv->blit_vb->offset + + dev_priv->blit_vb->used; + set_vtx_resource(dev_priv, vb_addr); + + /* draw */ + draw_auto(dev_priv); + + cp_set_surface_sync(dev_priv, + R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + vb += 12; + dev_priv->blit_vb->used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } else { + max_bytes = 8192 * 4; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = (src_gpu_addr & 255); + int dst_x = (dst_gpu_addr & 255); + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } + + vb[0] = i2f(dst_x / 4); + vb[1] = 0; + vb[2] = i2f(src_x / 4); + vb[3] = 0; + + vb[4] = i2f(dst_x / 4); + vb[5] = i2f(h); + vb[6] = i2f(src_x / 4); + vb[7] = i2f(h); + + vb[8] = i2f((dst_x + cur_size) / 4); + vb[9] = i2f(h); + vb[10] = i2f((src_x + cur_size) / 4); + vb[11] = i2f(h); + + /* src */ + set_tex_resource(dev_priv, FMT_8_8_8_8, + (src_x + cur_size) / 4, + h, (src_x + cur_size) / 4, + src_gpu_addr); + + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst */ + set_render_target(dev_priv, COLOR_8_8_8_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors */ + set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h); + + /* Vertex buffer setup */ + vb_addr = dev_priv->gart_buffers_offset + + dev_priv->blit_vb->offset + + dev_priv->blit_vb->used; + set_vtx_resource(dev_priv, vb_addr); + + /* draw */ + draw_auto(dev_priv); + + cp_set_surface_sync(dev_priv, + R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + vb += 12; + dev_priv->blit_vb->used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } +} + +void +r600_blit_swap(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int sx, int sy, int dx, int dy, + int w, int h, int src_pitch, int dst_pitch, int cpp) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + int cb_format, tex_format; + u64 vb_addr; + u32 *vb; + + vb = (u32 *) ((char *)dev->agp_buffer_map->handle + + dev_priv->blit_vb->offset + dev_priv->blit_vb->used); + + if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { + + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } + + if (cpp == 4) { + cb_format = COLOR_8_8_8_8; + tex_format = FMT_8_8_8_8; + } else if (cpp == 2) { + cb_format = COLOR_5_6_5; + tex_format = FMT_5_6_5; + } else { + cb_format = COLOR_8; + tex_format = FMT_8; + } + + vb[0] = i2f(dx); + vb[1] = i2f(dy); + vb[2] = i2f(sx); + vb[3] = i2f(sy); + + vb[4] = i2f(dx); + vb[5] = i2f(dy + h); + vb[6] = i2f(sx); + vb[7] = i2f(sy + h); + + vb[8] = i2f(dx + w); + vb[9] = i2f(dy + h); + vb[10] = i2f(sx + w); + vb[11] = i2f(sy + h); + + /* src */ + set_tex_resource(dev_priv, tex_format, + src_pitch / cpp, + sy + h, src_pitch / cpp, + src_gpu_addr); + + cp_set_surface_sync(dev_priv, + R600_TC_ACTION_ENA, (src_pitch * (sy + h)), src_gpu_addr); + + /* dst */ + set_render_target(dev_priv, cb_format, + dst_pitch / cpp, dy + h, + dst_gpu_addr); + + /* scissors */ + set_scissors(dev_priv, dx, dy, dx + w, dy + h); + + /* Vertex buffer setup */ + vb_addr = dev_priv->gart_buffers_offset + + dev_priv->blit_vb->offset + + dev_priv->blit_vb->used; + set_vtx_resource(dev_priv, vb_addr); + + /* draw */ + draw_auto(dev_priv); + + cp_set_surface_sync(dev_priv, + R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, + dst_pitch * (dy + h), dst_gpu_addr); + + dev_priv->blit_vb->used += 12 * 4; +} diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c new file mode 100644 index 000000000000..5755647e688a --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -0,0 +1,777 @@ +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon.h" + +#include "r600d.h" +#include "r600_blit_shaders.h" + +#define DI_PT_RECTLIST 0x11 +#define DI_INDEX_SIZE_16_BIT 0x0 +#define DI_SRC_SEL_AUTO_INDEX 0x2 + +#define FMT_8 0x1 +#define FMT_5_6_5 0x8 +#define FMT_8_8_8_8 0x1a +#define COLOR_8 0x1 +#define COLOR_5_6_5 0x8 +#define COLOR_8_8_8_8 0x1a + +/* emits 21 on rv770+, 23 on r600 */ +static void +set_render_target(struct radeon_device *rdev, int format, + int w, int h, u64 gpu_addr) +{ + u32 cb_color_info; + int pitch, slice; + + h = (h + 7) & ~7; + if (h < 8) + h = 8; + + cb_color_info = ((format << 2) | (1 << 27)); + pitch = (w / 8) - 1; + slice = ((w * h) / 64) - 1; + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) { + radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0)); + radeon_ring_write(rdev, 2 << 0); + } + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (pitch << 0) | (slice << 10)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, cb_color_info); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); +} + +/* emits 5dw */ +static void +cp_set_surface_sync(struct radeon_device *rdev, + u32 sync_type, u32 size, + u64 mc_addr) +{ + u32 cp_coher_size; + + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(rdev, sync_type); + radeon_ring_write(rdev, cp_coher_size); + radeon_ring_write(rdev, mc_addr >> 8); + radeon_ring_write(rdev, 10); /* poll interval */ +} + +/* emits 21dw + 1 surface sync = 26dw */ +static void +set_shaders(struct radeon_device *rdev) +{ + u64 gpu_addr; + u32 sq_pgm_resources; + + /* setup shader regs */ + sq_pgm_resources = (1 << 0); + + /* VS */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_pgm_resources); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + /* PS */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_pgm_resources | (1 << 28)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 2); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); +} + +/* emits 9 + 1 sync (5) = 14*/ +static void +set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) +{ + u32 sq_vtx_constant_word2; + + sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); + radeon_ring_write(rdev, 0x460); + radeon_ring_write(rdev, gpu_addr & 0xffffffff); + radeon_ring_write(rdev, 48 - 1); + radeon_ring_write(rdev, sq_vtx_constant_word2); + radeon_ring_write(rdev, 1 << 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30); + + if ((rdev->family == CHIP_RV610) || + (rdev->family == CHIP_RV620) || + (rdev->family == CHIP_RS780) || + (rdev->family == CHIP_RS880) || + (rdev->family == CHIP_RV710)) + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, 48, gpu_addr); + else + cp_set_surface_sync(rdev, + PACKET3_VC_ACTION_ENA, 48, gpu_addr); +} + +/* emits 9 */ +static void +set_tex_resource(struct radeon_device *rdev, + int format, int w, int h, int pitch, + u64 gpu_addr) +{ + uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; + + if (h < 1) + h = 1; + + sq_tex_resource_word0 = (1 << 0); + sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | + ((w - 1) << 19)); + + sq_tex_resource_word1 = (format << 26); + sq_tex_resource_word1 |= ((h - 1) << 0); + + sq_tex_resource_word4 = ((1 << 14) | + (0 << 16) | + (1 << 19) | + (2 << 22) | + (3 << 25)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, sq_tex_resource_word0); + radeon_ring_write(rdev, sq_tex_resource_word1); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, sq_tex_resource_word4); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30); +} + +/* emits 12 */ +static void +set_scissors(struct radeon_device *rdev, int x1, int y1, + int x2, int y2) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); +} + +/* emits 10 */ +static void +draw_auto(struct radeon_device *rdev) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, DI_PT_RECTLIST); + + radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0)); + radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT); + + radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0)); + radeon_ring_write(rdev, 1); + + radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); + radeon_ring_write(rdev, 3); + radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX); + +} + +/* emits 14 */ +static void +set_default_state(struct radeon_device *rdev) +{ + u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; + u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; + int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; + int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; + int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; + u64 gpu_addr; + + switch (rdev->family) { + case CHIP_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV630: + case CHIP_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + default: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + } + + if ((rdev->family == CHIP_RV610) || + (rdev->family == CHIP_RV620) || + (rdev->family == CHIP_RS780) || + (rdev->family == CHIP_RS780) || + (rdev->family == CHIP_RV710)) + sq_config = 0; + else + sq_config = VC_ENABLE; + + sq_config |= (DX9_CONSTS | + ALU_INST_PREFER_VECTOR | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | + NUM_VS_GPRS(num_vs_gprs) | + NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | + NUM_ES_GPRS(num_es_gprs)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | + NUM_VS_THREADS(num_vs_threads) | + NUM_GS_THREADS(num_gs_threads) | + NUM_ES_THREADS(num_es_threads)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | + NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | + NUM_ES_STACK_ENTRIES(num_es_stack_entries)); + + /* emit an IB pointing at default state */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC); + radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF); + radeon_ring_write(rdev, (rdev->r600_blit.state_len / 4)); + + radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0)); + radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT); + /* SQ config */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6)); + radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_config); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); + radeon_ring_write(rdev, sq_thread_resource_mgmt); + radeon_ring_write(rdev, sq_stack_resource_mgmt_1); + radeon_ring_write(rdev, sq_stack_resource_mgmt_2); +} + +static inline uint32_t i2f(uint32_t input) +{ + u32 result, i, exponent, fraction; + + if ((input & 0x3fff) == 0) + result = 0; /* 0 is a special case */ + else { + exponent = 140; /* exponent biased by 127; */ + fraction = (input & 0x3fff) << 10; /* cheat and only + handle numbers below 2^^15 */ + for (i = 0; i < 14; i++) { + if (fraction & 0x800000) + break; + else { + fraction = fraction << 1; /* keep + shifting left until top bit = 1 */ + exponent = exponent - 1; + } + } + result = exponent << 23 | (fraction & 0x7fffff); /* mask + off top bit; assumed 1 */ + } + return result; +} + +int r600_blit_init(struct radeon_device *rdev) +{ + u32 obj_size; + int r; + void *ptr; + + rdev->r600_blit.state_offset = 0; + + if (rdev->family >= CHIP_RV770) + rdev->r600_blit.state_len = r7xx_default_size * 4; + else + rdev->r600_blit.state_len = r6xx_default_size * 4; + + obj_size = rdev->r600_blit.state_len; + obj_size = ALIGN(obj_size, 256); + + rdev->r600_blit.vs_offset = obj_size; + obj_size += r6xx_vs_size * 4; + obj_size = ALIGN(obj_size, 256); + + rdev->r600_blit.ps_offset = obj_size; + obj_size += r6xx_ps_size * 4; + obj_size = ALIGN(obj_size, 256); + + r = radeon_object_create(rdev, NULL, obj_size, + true, RADEON_GEM_DOMAIN_VRAM, + false, &rdev->r600_blit.shader_obj); + if (r) { + DRM_ERROR("r600 failed to allocate shader\n"); + return r; + } + + r = radeon_object_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_blit.shader_gpu_addr); + if (r) { + DRM_ERROR("failed to pin blit object %d\n", r); + return r; + } + + DRM_DEBUG("r6xx blit allocated bo @ 0x%16llx %08x vs %08x ps %08x\n", + rdev->r600_blit.shader_gpu_addr, obj_size, + rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset); + + r = radeon_object_kmap(rdev->r600_blit.shader_obj, &ptr); + if (r) { + DRM_ERROR("failed to map blit object %d\n", r); + return r; + } + + if (rdev->family >= CHIP_RV770) + memcpy_toio(ptr + rdev->r600_blit.state_offset, r7xx_default_state, rdev->r600_blit.state_len); + else + memcpy_toio(ptr + rdev->r600_blit.state_offset, r6xx_default_state, rdev->r600_blit.state_len); + + memcpy(ptr + rdev->r600_blit.vs_offset, r6xx_vs, r6xx_vs_size * 4); + memcpy(ptr + rdev->r600_blit.ps_offset, r6xx_ps, r6xx_ps_size * 4); + + radeon_object_kunmap(rdev->r600_blit.shader_obj); + return 0; +} + +void r600_blit_fini(struct radeon_device *rdev) +{ + radeon_object_unpin(rdev->r600_blit.shader_obj); + radeon_object_unref(&rdev->r600_blit.shader_obj); +} + +int r600_vb_ib_get(struct radeon_device *rdev) +{ + int r; + r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib); + if (r) { + DRM_ERROR("failed to get IB for vertex buffer\n"); + return r; + } + + rdev->r600_blit.vb_total = 64*1024; + rdev->r600_blit.vb_used = 0; + return 0; +} + +void r600_vb_ib_put(struct radeon_device *rdev) +{ + mutex_lock(&rdev->ib_pool.mutex); + radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence); + list_add_tail(&rdev->r600_blit.vb_ib->list, &rdev->ib_pool.scheduled_ibs); + mutex_unlock(&rdev->ib_pool.mutex); + radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); +} + +int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) +{ + int r; + int ring_size; + const int max_size = 8192*8192; + + r = r600_vb_ib_get(rdev); + WARN_ON(r); + + /* loops of emits 64 + fence emit possible */ + ring_size = ((size_bytes + max_size) / max_size) * 78; + /* set default + shaders */ + ring_size += 40; /* shaders + def state */ + ring_size += 3; /* fence emit for VB IB */ + ring_size += 5; /* done copy */ + ring_size += 3; /* fence emit for done copy */ + r = radeon_ring_lock(rdev, ring_size); + WARN_ON(r); + + set_default_state(rdev); /* 14 */ + set_shaders(rdev); /* 26 */ + return 0; +} + +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) +{ + int r; + + radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0)); + radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT); + /* wait for 3D idle clean */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit); + + if (rdev->r600_blit.vb_ib) + r600_vb_ib_put(rdev); + + if (fence) + r = radeon_fence_emit(rdev, fence); + + radeon_ring_unlock_commit(rdev); +} + +void r600_kms_blit_copy(struct radeon_device *rdev, + u64 src_gpu_addr, u64 dst_gpu_addr, + int size_bytes) +{ + int max_bytes; + u64 vb_gpu_addr; + u32 *vb; + + DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, + size_bytes, rdev->r600_blit.vb_used); + vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); + if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { + max_bytes = 8192; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = src_gpu_addr & 255; + int dst_x = dst_gpu_addr & 255; + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); + +#if 0 + r600_vb_ib_put(rdev); + + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!dev_priv->blit_vb) + return; + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); +#endif + } + + vb[0] = i2f(dst_x); + vb[1] = 0; + vb[2] = i2f(src_x); + vb[3] = 0; + + vb[4] = i2f(dst_x); + vb[5] = i2f(h); + vb[6] = i2f(src_x); + vb[7] = i2f(h); + + vb[8] = i2f(dst_x + cur_size); + vb[9] = i2f(h); + vb[10] = i2f(src_x + cur_size); + vb[11] = i2f(h); + + /* src 9 */ + set_tex_resource(rdev, FMT_8, + src_x + cur_size, h, src_x + cur_size, + src_gpu_addr); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst 23 */ + set_render_target(rdev, COLOR_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors 12 */ + set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); + + /* 14 */ + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); + + /* draw 10 */ + draw_auto(rdev); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + vb += 12; + rdev->r600_blit.vb_used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } else { + max_bytes = 8192 * 4; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = (src_gpu_addr & 255); + int dst_x = (dst_gpu_addr & 255); + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); + } +#if 0 + if ((rdev->blit_vb->used + 48) > rdev->blit_vb->total) { + r600_nomm_put_vb(dev); + r600_nomm_get_vb(dev); + if (!rdev->blit_vb) + return; + + set_shaders(dev); + vb = r600_nomm_get_vb_ptr(dev); + } +#endif + + vb[0] = i2f(dst_x / 4); + vb[1] = 0; + vb[2] = i2f(src_x / 4); + vb[3] = 0; + + vb[4] = i2f(dst_x / 4); + vb[5] = i2f(h); + vb[6] = i2f(src_x / 4); + vb[7] = i2f(h); + + vb[8] = i2f((dst_x + cur_size) / 4); + vb[9] = i2f(h); + vb[10] = i2f((src_x + cur_size) / 4); + vb[11] = i2f(h); + + /* src 9 */ + set_tex_resource(rdev, FMT_8_8_8_8, + (src_x + cur_size) / 4, + h, (src_x + cur_size) / 4, + src_gpu_addr); + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst 23 */ + set_render_target(rdev, COLOR_8_8_8_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors 12 */ + set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); + + /* Vertex buffer setup 14 */ + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); + + /* draw 10 */ + draw_auto(rdev); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + /* 78 ring dwords per loop */ + vb += 12; + rdev->r600_blit.vb_used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } +} + diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.c b/drivers/gpu/drm/radeon/r600_blit_shaders.c new file mode 100644 index 000000000000..d745e815c2e8 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit_shaders.c @@ -0,0 +1,1072 @@ + +#include +#include + +const u32 r6xx_default_state[] = +{ + 0xc0002400, + 0x00000000, + 0xc0012800, + 0x80000000, + 0x80000000, + 0xc0004600, + 0x00000016, + 0xc0016800, + 0x00000010, + 0x00028000, + 0xc0016800, + 0x00000010, + 0x00008000, + 0xc0016800, + 0x00000542, + 0x07000003, + 0xc0016800, + 0x000005c5, + 0x00000000, + 0xc0016800, + 0x00000363, + 0x00000000, + 0xc0016800, + 0x0000060c, + 0x82000000, + 0xc0016800, + 0x0000060e, + 0x01020204, + 0xc0016f00, + 0x00000000, + 0x00000000, + 0xc0016f00, + 0x00000001, + 0x00000000, + 0xc0096900, + 0x0000022a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000004, + 0x00000000, + 0xc0016900, + 0x0000000a, + 0x00000000, + 0xc0016900, + 0x0000000b, + 0x00000000, + 0xc0016900, + 0x0000010c, + 0x00000000, + 0xc0016900, + 0x0000010d, + 0x00000000, + 0xc0016900, + 0x00000200, + 0x00000000, + 0xc0016900, + 0x00000343, + 0x00000060, + 0xc0016900, + 0x00000344, + 0x00000040, + 0xc0016900, + 0x00000351, + 0x0000aa00, + 0xc0016900, + 0x00000104, + 0x00000000, + 0xc0016900, + 0x0000010e, + 0x00000000, + 0xc0046900, + 0x00000105, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0036900, + 0x00000109, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0046900, + 0x0000030c, + 0x01000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0046900, + 0x00000048, + 0x3f800000, + 0x00000000, + 0x3f800000, + 0x3f800000, + 0xc0016900, + 0x0000008e, + 0x0000000f, + 0xc0016900, + 0x00000080, + 0x00000000, + 0xc0016900, + 0x00000083, + 0x0000ffff, + 0xc0016900, + 0x00000084, + 0x00000000, + 0xc0016900, + 0x00000085, + 0x20002000, + 0xc0016900, + 0x00000086, + 0x00000000, + 0xc0016900, + 0x00000087, + 0x20002000, + 0xc0016900, + 0x00000088, + 0x00000000, + 0xc0016900, + 0x00000089, + 0x20002000, + 0xc0016900, + 0x0000008a, + 0x00000000, + 0xc0016900, + 0x0000008b, + 0x20002000, + 0xc0016900, + 0x0000008c, + 0x00000000, + 0xc0016900, + 0x00000094, + 0x80000000, + 0xc0016900, + 0x00000095, + 0x20002000, + 0xc0026900, + 0x000000b4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000096, + 0x80000000, + 0xc0016900, + 0x00000097, + 0x20002000, + 0xc0026900, + 0x000000b6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000098, + 0x80000000, + 0xc0016900, + 0x00000099, + 0x20002000, + 0xc0026900, + 0x000000b8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009a, + 0x80000000, + 0xc0016900, + 0x0000009b, + 0x20002000, + 0xc0026900, + 0x000000ba, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009c, + 0x80000000, + 0xc0016900, + 0x0000009d, + 0x20002000, + 0xc0026900, + 0x000000bc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009e, + 0x80000000, + 0xc0016900, + 0x0000009f, + 0x20002000, + 0xc0026900, + 0x000000be, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a0, + 0x80000000, + 0xc0016900, + 0x000000a1, + 0x20002000, + 0xc0026900, + 0x000000c0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a2, + 0x80000000, + 0xc0016900, + 0x000000a3, + 0x20002000, + 0xc0026900, + 0x000000c2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a4, + 0x80000000, + 0xc0016900, + 0x000000a5, + 0x20002000, + 0xc0026900, + 0x000000c4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a6, + 0x80000000, + 0xc0016900, + 0x000000a7, + 0x20002000, + 0xc0026900, + 0x000000c6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a8, + 0x80000000, + 0xc0016900, + 0x000000a9, + 0x20002000, + 0xc0026900, + 0x000000c8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000aa, + 0x80000000, + 0xc0016900, + 0x000000ab, + 0x20002000, + 0xc0026900, + 0x000000ca, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ac, + 0x80000000, + 0xc0016900, + 0x000000ad, + 0x20002000, + 0xc0026900, + 0x000000cc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ae, + 0x80000000, + 0xc0016900, + 0x000000af, + 0x20002000, + 0xc0026900, + 0x000000ce, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b0, + 0x80000000, + 0xc0016900, + 0x000000b1, + 0x20002000, + 0xc0026900, + 0x000000d0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b2, + 0x80000000, + 0xc0016900, + 0x000000b3, + 0x20002000, + 0xc0026900, + 0x000000d2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000293, + 0x00004010, + 0xc0016900, + 0x00000300, + 0x00000000, + 0xc0016900, + 0x00000301, + 0x00000000, + 0xc0016900, + 0x00000312, + 0xffffffff, + 0xc0016900, + 0x00000307, + 0x00000000, + 0xc0016900, + 0x00000308, + 0x00000000, + 0xc0016900, + 0x00000283, + 0x00000000, + 0xc0016900, + 0x00000292, + 0x00000000, + 0xc0066900, + 0x0000010f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000000, + 0xc0016900, + 0x00000207, + 0x00000000, + 0xc0016900, + 0x00000208, + 0x00000000, + 0xc0046900, + 0x00000303, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0xc0016900, + 0x00000205, + 0x00000004, + 0xc0016900, + 0x00000280, + 0x00000000, + 0xc0016900, + 0x00000281, + 0x00000000, + 0xc0016900, + 0x0000037e, + 0x00000000, + 0xc0016900, + 0x00000382, + 0x00000000, + 0xc0016900, + 0x00000380, + 0x00000000, + 0xc0016900, + 0x00000383, + 0x00000000, + 0xc0016900, + 0x00000381, + 0x00000000, + 0xc0016900, + 0x00000282, + 0x00000008, + 0xc0016900, + 0x00000302, + 0x0000002d, + 0xc0016900, + 0x0000037f, + 0x00000000, + 0xc0016900, + 0x000001b2, + 0x00000000, + 0xc0016900, + 0x000001b6, + 0x00000000, + 0xc0016900, + 0x000001b7, + 0x00000000, + 0xc0016900, + 0x000001b8, + 0x00000000, + 0xc0016900, + 0x000001b9, + 0x00000000, + 0xc0016900, + 0x00000225, + 0x00000000, + 0xc0016900, + 0x00000229, + 0x00000000, + 0xc0016900, + 0x00000237, + 0x00000000, + 0xc0016900, + 0x00000100, + 0x00000800, + 0xc0016900, + 0x00000101, + 0x00000000, + 0xc0016900, + 0x00000102, + 0x00000000, + 0xc0016900, + 0x000002a8, + 0x00000000, + 0xc0016900, + 0x000002a9, + 0x00000000, + 0xc0016900, + 0x00000103, + 0x00000000, + 0xc0016900, + 0x00000284, + 0x00000000, + 0xc0016900, + 0x00000290, + 0x00000000, + 0xc0016900, + 0x00000285, + 0x00000000, + 0xc0016900, + 0x00000286, + 0x00000000, + 0xc0016900, + 0x00000287, + 0x00000000, + 0xc0016900, + 0x00000288, + 0x00000000, + 0xc0016900, + 0x00000289, + 0x00000000, + 0xc0016900, + 0x0000028a, + 0x00000000, + 0xc0016900, + 0x0000028b, + 0x00000000, + 0xc0016900, + 0x0000028c, + 0x00000000, + 0xc0016900, + 0x0000028d, + 0x00000000, + 0xc0016900, + 0x0000028e, + 0x00000000, + 0xc0016900, + 0x0000028f, + 0x00000000, + 0xc0016900, + 0x000002a1, + 0x00000000, + 0xc0016900, + 0x000002a5, + 0x00000000, + 0xc0016900, + 0x000002ac, + 0x00000000, + 0xc0016900, + 0x000002ad, + 0x00000000, + 0xc0016900, + 0x000002ae, + 0x00000000, + 0xc0016900, + 0x000002c8, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000100, + 0xc0016900, + 0x00000204, + 0x00010000, + 0xc0036e00, + 0x00000000, + 0x00000012, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x0000008f, + 0x0000000f, + 0xc0016900, + 0x000001e8, + 0x00000001, + 0xc0016900, + 0x00000202, + 0x00cc0000, + 0xc0016900, + 0x00000205, + 0x00000244, + 0xc0016900, + 0x00000203, + 0x00000210, + 0xc0016900, + 0x000001b1, + 0x00000000, + 0xc0016900, + 0x00000185, + 0x00000000, + 0xc0016900, + 0x000001b3, + 0x00000001, + 0xc0016900, + 0x000001b4, + 0x00000000, + 0xc0016900, + 0x00000191, + 0x00000b00, + 0xc0016900, + 0x000001b5, + 0x00000000, +}; + +const u32 r7xx_default_state[] = +{ + 0xc0012800, + 0x80000000, + 0x80000000, + 0xc0004600, + 0x00000016, + 0xc0016800, + 0x00000010, + 0x00028000, + 0xc0016800, + 0x00000010, + 0x00008000, + 0xc0016800, + 0x00000542, + 0x07000002, + 0xc0016800, + 0x000005c5, + 0x00000000, + 0xc0016800, + 0x00000363, + 0x00004000, + 0xc0016800, + 0x0000060c, + 0x00000000, + 0xc0016800, + 0x0000060e, + 0x00420204, + 0xc0016f00, + 0x00000000, + 0x00000000, + 0xc0016f00, + 0x00000001, + 0x00000000, + 0xc0096900, + 0x0000022a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000004, + 0x00000000, + 0xc0016900, + 0x0000000a, + 0x00000000, + 0xc0016900, + 0x0000000b, + 0x00000000, + 0xc0016900, + 0x0000010c, + 0x00000000, + 0xc0016900, + 0x0000010d, + 0x00000000, + 0xc0016900, + 0x00000200, + 0x00000000, + 0xc0016900, + 0x00000343, + 0x00000060, + 0xc0016900, + 0x00000344, + 0x00000000, + 0xc0016900, + 0x00000351, + 0x0000aa00, + 0xc0016900, + 0x00000104, + 0x00000000, + 0xc0016900, + 0x0000010e, + 0x00000000, + 0xc0046900, + 0x00000105, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0046900, + 0x0000030c, + 0x01000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x0000008e, + 0x0000000f, + 0xc0016900, + 0x00000080, + 0x00000000, + 0xc0016900, + 0x00000083, + 0x0000ffff, + 0xc0016900, + 0x00000084, + 0x00000000, + 0xc0016900, + 0x00000085, + 0x20002000, + 0xc0016900, + 0x00000086, + 0x00000000, + 0xc0016900, + 0x00000087, + 0x20002000, + 0xc0016900, + 0x00000088, + 0x00000000, + 0xc0016900, + 0x00000089, + 0x20002000, + 0xc0016900, + 0x0000008a, + 0x00000000, + 0xc0016900, + 0x0000008b, + 0x20002000, + 0xc0016900, + 0x0000008c, + 0xaaaaaaaa, + 0xc0016900, + 0x00000094, + 0x80000000, + 0xc0016900, + 0x00000095, + 0x20002000, + 0xc0026900, + 0x000000b4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000096, + 0x80000000, + 0xc0016900, + 0x00000097, + 0x20002000, + 0xc0026900, + 0x000000b6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000098, + 0x80000000, + 0xc0016900, + 0x00000099, + 0x20002000, + 0xc0026900, + 0x000000b8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009a, + 0x80000000, + 0xc0016900, + 0x0000009b, + 0x20002000, + 0xc0026900, + 0x000000ba, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009c, + 0x80000000, + 0xc0016900, + 0x0000009d, + 0x20002000, + 0xc0026900, + 0x000000bc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x0000009e, + 0x80000000, + 0xc0016900, + 0x0000009f, + 0x20002000, + 0xc0026900, + 0x000000be, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a0, + 0x80000000, + 0xc0016900, + 0x000000a1, + 0x20002000, + 0xc0026900, + 0x000000c0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a2, + 0x80000000, + 0xc0016900, + 0x000000a3, + 0x20002000, + 0xc0026900, + 0x000000c2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a4, + 0x80000000, + 0xc0016900, + 0x000000a5, + 0x20002000, + 0xc0026900, + 0x000000c4, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a6, + 0x80000000, + 0xc0016900, + 0x000000a7, + 0x20002000, + 0xc0026900, + 0x000000c6, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000a8, + 0x80000000, + 0xc0016900, + 0x000000a9, + 0x20002000, + 0xc0026900, + 0x000000c8, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000aa, + 0x80000000, + 0xc0016900, + 0x000000ab, + 0x20002000, + 0xc0026900, + 0x000000ca, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ac, + 0x80000000, + 0xc0016900, + 0x000000ad, + 0x20002000, + 0xc0026900, + 0x000000cc, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000ae, + 0x80000000, + 0xc0016900, + 0x000000af, + 0x20002000, + 0xc0026900, + 0x000000ce, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b0, + 0x80000000, + 0xc0016900, + 0x000000b1, + 0x20002000, + 0xc0026900, + 0x000000d0, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x000000b2, + 0x80000000, + 0xc0016900, + 0x000000b3, + 0x20002000, + 0xc0026900, + 0x000000d2, + 0x00000000, + 0x3f800000, + 0xc0016900, + 0x00000293, + 0x00514000, + 0xc0016900, + 0x00000300, + 0x00000000, + 0xc0016900, + 0x00000301, + 0x00000000, + 0xc0016900, + 0x00000312, + 0xffffffff, + 0xc0016900, + 0x00000307, + 0x00000000, + 0xc0016900, + 0x00000308, + 0x00000000, + 0xc0016900, + 0x00000283, + 0x00000000, + 0xc0016900, + 0x00000292, + 0x00000000, + 0xc0066900, + 0x0000010f, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000000, + 0xc0016900, + 0x00000207, + 0x00000000, + 0xc0016900, + 0x00000208, + 0x00000000, + 0xc0046900, + 0x00000303, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0x3f800000, + 0xc0016900, + 0x00000205, + 0x00000004, + 0xc0016900, + 0x00000280, + 0x00000000, + 0xc0016900, + 0x00000281, + 0x00000000, + 0xc0016900, + 0x0000037e, + 0x00000000, + 0xc0016900, + 0x00000382, + 0x00000000, + 0xc0016900, + 0x00000380, + 0x00000000, + 0xc0016900, + 0x00000383, + 0x00000000, + 0xc0016900, + 0x00000381, + 0x00000000, + 0xc0016900, + 0x00000282, + 0x00000008, + 0xc0016900, + 0x00000302, + 0x0000002d, + 0xc0016900, + 0x0000037f, + 0x00000000, + 0xc0016900, + 0x000001b2, + 0x00000001, + 0xc0016900, + 0x000001b6, + 0x00000000, + 0xc0016900, + 0x000001b7, + 0x00000000, + 0xc0016900, + 0x000001b8, + 0x00000000, + 0xc0016900, + 0x000001b9, + 0x00000000, + 0xc0016900, + 0x00000225, + 0x00000000, + 0xc0016900, + 0x00000229, + 0x00000000, + 0xc0016900, + 0x00000237, + 0x00000000, + 0xc0016900, + 0x00000100, + 0x00000800, + 0xc0016900, + 0x00000101, + 0x00000000, + 0xc0016900, + 0x00000102, + 0x00000000, + 0xc0016900, + 0x000002a8, + 0x00000000, + 0xc0016900, + 0x000002a9, + 0x00000000, + 0xc0016900, + 0x00000103, + 0x00000000, + 0xc0016900, + 0x00000284, + 0x00000000, + 0xc0016900, + 0x00000290, + 0x00000000, + 0xc0016900, + 0x00000285, + 0x00000000, + 0xc0016900, + 0x00000286, + 0x00000000, + 0xc0016900, + 0x00000287, + 0x00000000, + 0xc0016900, + 0x00000288, + 0x00000000, + 0xc0016900, + 0x00000289, + 0x00000000, + 0xc0016900, + 0x0000028a, + 0x00000000, + 0xc0016900, + 0x0000028b, + 0x00000000, + 0xc0016900, + 0x0000028c, + 0x00000000, + 0xc0016900, + 0x0000028d, + 0x00000000, + 0xc0016900, + 0x0000028e, + 0x00000000, + 0xc0016900, + 0x0000028f, + 0x00000000, + 0xc0016900, + 0x000002a1, + 0x00000000, + 0xc0016900, + 0x000002a5, + 0x00000000, + 0xc0016900, + 0x000002ac, + 0x00000000, + 0xc0016900, + 0x000002ad, + 0x00000000, + 0xc0016900, + 0x000002ae, + 0x00000000, + 0xc0016900, + 0x000002c8, + 0x00000000, + 0xc0016900, + 0x00000206, + 0x00000100, + 0xc0016900, + 0x00000204, + 0x00010000, + 0xc0036e00, + 0x00000000, + 0x00000012, + 0x00000000, + 0x00000000, + 0xc0016900, + 0x0000008f, + 0x0000000f, + 0xc0016900, + 0x000001e8, + 0x00000001, + 0xc0016900, + 0x00000202, + 0x00cc0000, + 0xc0016900, + 0x00000205, + 0x00000244, + 0xc0016900, + 0x00000203, + 0x00000210, + 0xc0016900, + 0x000001b1, + 0x00000000, + 0xc0016900, + 0x00000185, + 0x00000000, + 0xc0016900, + 0x000001b3, + 0x00000001, + 0xc0016900, + 0x000001b4, + 0x00000000, + 0xc0016900, + 0x00000191, + 0x00000b00, + 0xc0016900, + 0x000001b5, + 0x00000000, +}; + +/* same for r6xx/r7xx */ +const u32 r6xx_vs[] = +{ + 0x00000004, + 0x81000000, + 0x0000203c, + 0x94000b08, + 0x00004000, + 0x14200b1a, + 0x00000000, + 0x00000000, + 0x3c000000, + 0x68cd1000, + 0x00080000, + 0x00000000, +}; + +const u32 r6xx_ps[] = +{ + 0x00000002, + 0x80800000, + 0x00000000, + 0x94200688, + 0x00000010, + 0x000d1000, + 0xb0800000, + 0x00000000, +}; + +const u32 r6xx_ps_size = ARRAY_SIZE(r6xx_ps); +const u32 r6xx_vs_size = ARRAY_SIZE(r6xx_vs); +const u32 r6xx_default_size = ARRAY_SIZE(r6xx_default_state); +const u32 r7xx_default_size = ARRAY_SIZE(r7xx_default_state); diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/radeon/r600_blit_shaders.h new file mode 100644 index 000000000000..fdc3b378cbb0 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_blit_shaders.h @@ -0,0 +1,14 @@ + +#ifndef R600_BLIT_SHADERS_H +#define R600_BLIT_SHADERS_H + +extern const u32 r6xx_ps[]; +extern const u32 r6xx_vs[]; +extern const u32 r7xx_default_state[]; +extern const u32 r6xx_default_state[]; + + +extern const u32 r6xx_ps_size, r6xx_vs_size; +extern const u32 r6xx_default_size, r7xx_default_size; + +#endif diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 8327912de964..6d5a711c2e91 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -58,6 +58,12 @@ MODULE_FIRMWARE("radeon/RV730_me.bin"); MODULE_FIRMWARE("radeon/RV710_pfp.bin"); MODULE_FIRMWARE("radeon/RV710_me.bin"); + +int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, + unsigned family, u32 *ib, int *l); +void r600_cs_legacy_init(void); + + # define ATI_PCIGART_PAGE_SIZE 4096 /**< PCI GART page size */ # define ATI_PCIGART_PAGE_MASK (~(ATI_PCIGART_PAGE_SIZE-1)) @@ -1857,6 +1863,8 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, DRM_DEBUG("\n"); + mutex_init(&dev_priv->cs_mutex); + r600_cs_legacy_init(); /* if we require new memory map but we don't have it fail */ if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) { DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n"); @@ -1888,7 +1896,7 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, /* Enable vblank on CRTC1 for older X servers */ dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1; - + dev_priv->do_boxes = 0; dev_priv->cp_mode = init->cp_mode; /* We don't support anything other than bus-mastering ring mode, @@ -1974,11 +1982,11 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, } else #endif { - dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset; + dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset; dev_priv->ring_rptr->handle = - (void *)dev_priv->ring_rptr->offset; + (void *)(unsigned long)dev_priv->ring_rptr->offset; dev->agp_buffer_map->handle = - (void *)dev->agp_buffer_map->offset; + (void *)(unsigned long)dev->agp_buffer_map->offset; DRM_DEBUG("dev_priv->cp_ring->handle %p\n", dev_priv->cp_ring->handle); @@ -2282,3 +2290,239 @@ int r600_cp_dispatch_indirect(struct drm_device *dev, return 0; } + +void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_master *master = file_priv->master; + struct drm_radeon_master_private *master_priv = master->driver_priv; + drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv; + int nbox = sarea_priv->nbox; + struct drm_clip_rect *pbox = sarea_priv->boxes; + int i, cpp, src_pitch, dst_pitch; + uint64_t src, dst; + RING_LOCALS; + DRM_DEBUG("\n"); + + if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888) + cpp = 4; + else + cpp = 2; + + if (sarea_priv->pfCurrentPage == 0) { + src_pitch = dev_priv->back_pitch; + dst_pitch = dev_priv->front_pitch; + src = dev_priv->back_offset + dev_priv->fb_location; + dst = dev_priv->front_offset + dev_priv->fb_location; + } else { + src_pitch = dev_priv->front_pitch; + dst_pitch = dev_priv->back_pitch; + src = dev_priv->front_offset + dev_priv->fb_location; + dst = dev_priv->back_offset + dev_priv->fb_location; + } + + if (r600_prepare_blit_copy(dev, file_priv)) { + DRM_ERROR("unable to allocate vertex buffer for swap buffer\n"); + return; + } + for (i = 0; i < nbox; i++) { + int x = pbox[i].x1; + int y = pbox[i].y1; + int w = pbox[i].x2 - x; + int h = pbox[i].y2 - y; + + DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h); + + r600_blit_swap(dev, + src, dst, + x, y, x, y, w, h, + src_pitch, dst_pitch, cpp); + } + r600_done_blit_copy(dev); + + /* Increment the frame counter. The client-side 3D driver must + * throttle the framerate by waiting for this value before + * performing the swapbuffer ioctl. + */ + sarea_priv->last_frame++; + + BEGIN_RING(3); + R600_FRAME_AGE(sarea_priv->last_frame); + ADVANCE_RING(); +} + +int r600_cp_dispatch_texture(struct drm_device *dev, + struct drm_file *file_priv, + drm_radeon_texture_t *tex, + drm_radeon_tex_image_t *image) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_buf *buf; + u32 *buffer; + const u8 __user *data; + int size, pass_size; + u64 src_offset, dst_offset; + + if (!radeon_check_offset(dev_priv, tex->offset)) { + DRM_ERROR("Invalid destination offset\n"); + return -EINVAL; + } + + /* this might fail for zero-sized uploads - are those illegal? */ + if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) { + DRM_ERROR("Invalid final destination offset\n"); + return -EINVAL; + } + + size = tex->height * tex->pitch; + + if (size == 0) + return 0; + + dst_offset = tex->offset; + + if (r600_prepare_blit_copy(dev, file_priv)) { + DRM_ERROR("unable to allocate vertex buffer for swap buffer\n"); + return -EAGAIN; + } + do { + data = (const u8 __user *)image->data; + pass_size = size; + + buf = radeon_freelist_get(dev); + if (!buf) { + DRM_DEBUG("EAGAIN\n"); + if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image))) + return -EFAULT; + return -EAGAIN; + } + + if (pass_size > buf->total) + pass_size = buf->total; + + /* Dispatch the indirect buffer. + */ + buffer = + (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset); + + if (DRM_COPY_FROM_USER(buffer, data, pass_size)) { + DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size); + return -EFAULT; + } + + buf->file_priv = file_priv; + buf->used = pass_size; + src_offset = dev_priv->gart_buffers_offset + buf->offset; + + r600_blit_copy(dev, src_offset, dst_offset, pass_size); + + radeon_cp_discard_buffer(dev, file_priv->master, buf); + + /* Update the input parameters for next time */ + image->data = (const u8 __user *)image->data + pass_size; + dst_offset += pass_size; + size -= pass_size; + } while (size > 0); + r600_done_blit_copy(dev); + + return 0; +} + +/* + * Legacy cs ioctl + */ +static u32 radeon_cs_id_get(struct drm_radeon_private *radeon) +{ + /* FIXME: check if wrap affect last reported wrap & sequence */ + radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF; + if (!radeon->cs_id_scnt) { + /* increment wrap counter */ + radeon->cs_id_wcnt += 0x01000000; + /* valid sequence counter start at 1 */ + radeon->cs_id_scnt = 1; + } + return (radeon->cs_id_scnt | radeon->cs_id_wcnt); +} + +static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id) +{ + RING_LOCALS; + + *id = radeon_cs_id_get(dev_priv); + + /* SCRATCH 2 */ + BEGIN_RING(3); + R600_CLEAR_AGE(*id); + ADVANCE_RING(); + COMMIT_RING(); +} + +static int r600_ib_get(struct drm_device *dev, + struct drm_file *fpriv, + struct drm_buf **buffer) +{ + struct drm_buf *buf; + + *buffer = NULL; + buf = radeon_freelist_get(dev); + if (!buf) { + return -EBUSY; + } + buf->file_priv = fpriv; + *buffer = buf; + return 0; +} + +static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf, + struct drm_file *fpriv, int l, int r) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + if (buf) { + if (!r) + r600_cp_dispatch_indirect(dev, buf, 0, l * 4); + radeon_cp_discard_buffer(dev, fpriv->master, buf); + COMMIT_RING(); + } +} + +int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv) +{ + struct drm_radeon_private *dev_priv = dev->dev_private; + struct drm_radeon_cs *cs = data; + struct drm_buf *buf; + unsigned family; + int l, r = 0; + u32 *ib, cs_id = 0; + + if (dev_priv == NULL) { + DRM_ERROR("called with no initialization\n"); + return -EINVAL; + } + family = dev_priv->flags & RADEON_FAMILY_MASK; + if (family < CHIP_R600) { + DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n"); + return -EINVAL; + } + mutex_lock(&dev_priv->cs_mutex); + /* get ib */ + r = r600_ib_get(dev, fpriv, &buf); + if (r) { + DRM_ERROR("ib_get failed\n"); + goto out; + } + ib = dev->agp_buffer_map->handle + buf->offset; + /* now parse command stream */ + r = r600_cs_legacy(dev, data, fpriv, family, ib, &l); + if (r) { + goto out; + } + +out: + r600_ib_free(dev, buf, fpriv, l, r); + /* emit cs id sequence */ + r600_cs_id_emit(dev_priv, &cs_id); + cs->cs_id = cs_id; + mutex_unlock(&dev_priv->cs_mutex); + return r; +} diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c new file mode 100644 index 000000000000..39bf6349351b --- /dev/null +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -0,0 +1,658 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#include "drmP.h" +#include "radeon.h" +#include "radeon_share.h" +#include "r600d.h" +#include "avivod.h" + +static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc); +static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc); +typedef int (*next_reloc_t)(struct radeon_cs_parser*, struct radeon_cs_reloc**); +static next_reloc_t r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_mm; + +/** + * r600_cs_packet_parse() - parse cp packet and point ib index to next packet + * @parser: parser structure holding parsing context. + * @pkt: where to store packet informations + * + * Assume that chunk_ib_index is properly set. Will return -EINVAL + * if packet is bigger than remaining ib size. or if packets is unknown. + **/ +int r600_cs_packet_parse(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx) +{ + struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; + uint32_t header; + + if (idx >= ib_chunk->length_dw) { + DRM_ERROR("Can not parse packet at %d after CS end %d !\n", + idx, ib_chunk->length_dw); + return -EINVAL; + } + header = ib_chunk->kdata[idx]; + pkt->idx = idx; + pkt->type = CP_PACKET_GET_TYPE(header); + pkt->count = CP_PACKET_GET_COUNT(header); + pkt->one_reg_wr = 0; + switch (pkt->type) { + case PACKET_TYPE0: + pkt->reg = CP_PACKET0_GET_REG(header); + break; + case PACKET_TYPE3: + pkt->opcode = CP_PACKET3_GET_OPCODE(header); + break; + case PACKET_TYPE2: + pkt->count = -1; + break; + default: + DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); + return -EINVAL; + } + if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { + DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", + pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); + return -EINVAL; + } + return 0; +} + +/** + * r600_cs_packet_next_reloc_mm() - parse next packet which should be reloc packet3 + * @parser: parser structure holding parsing context. + * @data: pointer to relocation data + * @offset_start: starting offset + * @offset_mask: offset mask (to align start offset on) + * @reloc: reloc informations + * + * Check next packet is relocation packet3, do bo validation and compute + * GPU offset using the provided start. + **/ +static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc) +{ + struct radeon_cs_chunk *ib_chunk; + struct radeon_cs_chunk *relocs_chunk; + struct radeon_cs_packet p3reloc; + unsigned idx; + int r; + + if (p->chunk_relocs_idx == -1) { + DRM_ERROR("No relocation chunk !\n"); + return -EINVAL; + } + *cs_reloc = NULL; + ib_chunk = &p->chunks[p->chunk_ib_idx]; + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; + r = r600_cs_packet_parse(p, &p3reloc, p->idx); + if (r) { + return r; + } + p->idx += p3reloc.count + 2; + if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { + DRM_ERROR("No packet3 for relocation for packet at %d.\n", + p3reloc.idx); + return -EINVAL; + } + idx = ib_chunk->kdata[p3reloc.idx + 1]; + if (idx >= relocs_chunk->length_dw) { + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", + idx, relocs_chunk->length_dw); + return -EINVAL; + } + /* FIXME: we assume reloc size is 4 dwords */ + *cs_reloc = p->relocs_ptr[(idx / 4)]; + return 0; +} + +/** + * r600_cs_packet_next_reloc_nomm() - parse next packet which should be reloc packet3 + * @parser: parser structure holding parsing context. + * @data: pointer to relocation data + * @offset_start: starting offset + * @offset_mask: offset mask (to align start offset on) + * @reloc: reloc informations + * + * Check next packet is relocation packet3, do bo validation and compute + * GPU offset using the provided start. + **/ +static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc) +{ + struct radeon_cs_chunk *ib_chunk; + struct radeon_cs_chunk *relocs_chunk; + struct radeon_cs_packet p3reloc; + unsigned idx; + int r; + + if (p->chunk_relocs_idx == -1) { + DRM_ERROR("No relocation chunk !\n"); + return -EINVAL; + } + *cs_reloc = NULL; + ib_chunk = &p->chunks[p->chunk_ib_idx]; + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; + r = r600_cs_packet_parse(p, &p3reloc, p->idx); + if (r) { + return r; + } + p->idx += p3reloc.count + 2; + if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { + DRM_ERROR("No packet3 for relocation for packet at %d.\n", + p3reloc.idx); + return -EINVAL; + } + idx = ib_chunk->kdata[p3reloc.idx + 1]; + if (idx >= relocs_chunk->length_dw) { + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", + idx, relocs_chunk->length_dw); + return -EINVAL; + } + *cs_reloc = &p->relocs[0]; + (*cs_reloc)->lobj.gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32; + (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0]; + return 0; +} + +static int r600_packet0_check(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, unsigned reg) +{ + switch (reg) { + case AVIVO_D1MODE_VLINE_START_END: + case AVIVO_D2MODE_VLINE_START_END: + break; + default: + printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", + reg, idx); + return -EINVAL; + } + return 0; +} + +static int r600_cs_parse_packet0(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt) +{ + unsigned reg, i; + unsigned idx; + int r; + + idx = pkt->idx + 1; + reg = pkt->reg; + for (i = 0; i <= pkt->count; i++, idx++, reg += 4) { + r = r600_packet0_check(p, pkt, idx, reg); + if (r) { + return r; + } + } + return 0; +} + +static int r600_packet3_check(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt) +{ + struct radeon_cs_chunk *ib_chunk; + struct radeon_cs_reloc *reloc; + volatile u32 *ib; + unsigned idx; + unsigned i; + unsigned start_reg, end_reg, reg; + int r; + + ib = p->ib->ptr; + ib_chunk = &p->chunks[p->chunk_ib_idx]; + idx = pkt->idx + 1; + switch (pkt->opcode) { + case PACKET3_START_3D_CMDBUF: + if (p->family >= CHIP_RV770 || pkt->count) { + DRM_ERROR("bad START_3D\n"); + return -EINVAL; + } + break; + case PACKET3_CONTEXT_CONTROL: + if (pkt->count != 1) { + DRM_ERROR("bad CONTEXT_CONTROL\n"); + return -EINVAL; + } + break; + case PACKET3_INDEX_TYPE: + case PACKET3_NUM_INSTANCES: + if (pkt->count) { + DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n"); + return -EINVAL; + } + break; + case PACKET3_DRAW_INDEX: + if (pkt->count != 3) { + DRM_ERROR("bad DRAW_INDEX\n"); + return -EINVAL; + } + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad DRAW_INDEX\n"); + return -EINVAL; + } + ib[idx+0] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+1] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + break; + case PACKET3_DRAW_INDEX_AUTO: + if (pkt->count != 1) { + DRM_ERROR("bad DRAW_INDEX_AUTO\n"); + return -EINVAL; + } + break; + case PACKET3_DRAW_INDEX_IMMD_BE: + case PACKET3_DRAW_INDEX_IMMD: + if (pkt->count < 2) { + DRM_ERROR("bad DRAW_INDEX_IMMD\n"); + return -EINVAL; + } + break; + case PACKET3_WAIT_REG_MEM: + if (pkt->count != 5) { + DRM_ERROR("bad WAIT_REG_MEM\n"); + return -EINVAL; + } + /* bit 4 is reg (0) or mem (1) */ + if (ib_chunk->kdata[idx+0] & 0x10) { + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad WAIT_REG_MEM\n"); + return -EINVAL; + } + ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+2] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + } + break; + case PACKET3_SURFACE_SYNC: + if (pkt->count != 3) { + DRM_ERROR("bad SURFACE_SYNC\n"); + return -EINVAL; + } + /* 0xffffffff/0x0 is flush all cache flag */ + if (ib_chunk->kdata[idx+1] != 0xffffffff || + ib_chunk->kdata[idx+2] != 0) { + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SURFACE_SYNC\n"); + return -EINVAL; + } + ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + } + break; + case PACKET3_EVENT_WRITE: + if (pkt->count != 2 && pkt->count != 0) { + DRM_ERROR("bad EVENT_WRITE\n"); + return -EINVAL; + } + if (pkt->count) { + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad EVENT_WRITE\n"); + return -EINVAL; + } + ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + } + break; + case PACKET3_EVENT_WRITE_EOP: + if (pkt->count != 4) { + DRM_ERROR("bad EVENT_WRITE_EOP\n"); + return -EINVAL; + } + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad EVENT_WRITE\n"); + return -EINVAL; + } + ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + break; + case PACKET3_SET_CONFIG_REG: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONFIG_REG_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) || + (start_reg >= PACKET3_SET_CONFIG_REG_END) || + (end_reg >= PACKET3_SET_CONFIG_REG_END)) { + DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + return -EINVAL; + } + for (i = 0; i < pkt->count; i++) { + reg = start_reg + (4 * i); + switch (reg) { + case CP_COHER_BASE: + /* use PACKET3_SURFACE_SYNC */ + return -EINVAL; + default: + break; + } + } + break; + case PACKET3_SET_CONTEXT_REG: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONTEXT_REG_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) || + (start_reg >= PACKET3_SET_CONTEXT_REG_END) || + (end_reg >= PACKET3_SET_CONTEXT_REG_END)) { + DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n"); + return -EINVAL; + } + for (i = 0; i < pkt->count; i++) { + reg = start_reg + (4 * i); + switch (reg) { + case DB_DEPTH_BASE: + case CB_COLOR0_BASE: + case CB_COLOR1_BASE: + case CB_COLOR2_BASE: + case CB_COLOR3_BASE: + case CB_COLOR4_BASE: + case CB_COLOR5_BASE: + case CB_COLOR6_BASE: + case CB_COLOR7_BASE: + case SQ_PGM_START_FS: + case SQ_PGM_START_ES: + case SQ_PGM_START_VS: + case SQ_PGM_START_GS: + case SQ_PGM_START_PS: + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + ib[idx+1+i] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + break; + case VGT_DMA_BASE: + case VGT_DMA_BASE_HI: + /* These should be handled by DRAW_INDEX packet 3 */ + case VGT_STRMOUT_BASE_OFFSET_0: + case VGT_STRMOUT_BASE_OFFSET_1: + case VGT_STRMOUT_BASE_OFFSET_2: + case VGT_STRMOUT_BASE_OFFSET_3: + case VGT_STRMOUT_BASE_OFFSET_HI_0: + case VGT_STRMOUT_BASE_OFFSET_HI_1: + case VGT_STRMOUT_BASE_OFFSET_HI_2: + case VGT_STRMOUT_BASE_OFFSET_HI_3: + case VGT_STRMOUT_BUFFER_BASE_0: + case VGT_STRMOUT_BUFFER_BASE_1: + case VGT_STRMOUT_BUFFER_BASE_2: + case VGT_STRMOUT_BUFFER_BASE_3: + case VGT_STRMOUT_BUFFER_OFFSET_0: + case VGT_STRMOUT_BUFFER_OFFSET_1: + case VGT_STRMOUT_BUFFER_OFFSET_2: + case VGT_STRMOUT_BUFFER_OFFSET_3: + /* These should be handled by STRMOUT_BUFFER packet 3 */ + DRM_ERROR("bad context reg: 0x%08x\n", reg); + return -EINVAL; + default: + break; + } + } + break; + case PACKET3_SET_RESOURCE: + if (pkt->count % 7) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + start_reg = (ib[idx+0] << 2) + PACKET3_SET_RESOURCE_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) || + (start_reg >= PACKET3_SET_RESOURCE_END) || + (end_reg >= PACKET3_SET_RESOURCE_END)) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + for (i = 0; i < (pkt->count / 7); i++) { + switch (G__SQ_VTX_CONSTANT_TYPE(ib[idx+(i*7)+6+1])) { + case SQ_TEX_VTX_VALID_TEXTURE: + /* tex base */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + ib[idx+1+(i*7)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + /* tex mip base */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + ib[idx+1+(i*7)+3] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + break; + case SQ_TEX_VTX_VALID_BUFFER: + /* vtx base */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + ib[idx+1+(i*7)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff); + ib[idx+1+(i*7)+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff; + break; + case SQ_TEX_VTX_INVALID_TEXTURE: + case SQ_TEX_VTX_INVALID_BUFFER: + default: + DRM_ERROR("bad SET_RESOURCE\n"); + return -EINVAL; + } + } + break; + case PACKET3_SET_ALU_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_ALU_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) || + (start_reg >= PACKET3_SET_ALU_CONST_END) || + (end_reg >= PACKET3_SET_ALU_CONST_END)) { + DRM_ERROR("bad SET_ALU_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_BOOL_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_BOOL_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) || + (start_reg >= PACKET3_SET_BOOL_CONST_END) || + (end_reg >= PACKET3_SET_BOOL_CONST_END)) { + DRM_ERROR("bad SET_BOOL_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_LOOP_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_LOOP_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) || + (start_reg >= PACKET3_SET_LOOP_CONST_END) || + (end_reg >= PACKET3_SET_LOOP_CONST_END)) { + DRM_ERROR("bad SET_LOOP_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_CTL_CONST: + start_reg = (ib[idx+0] << 2) + PACKET3_SET_CTL_CONST_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) || + (start_reg >= PACKET3_SET_CTL_CONST_END) || + (end_reg >= PACKET3_SET_CTL_CONST_END)) { + DRM_ERROR("bad SET_CTL_CONST\n"); + return -EINVAL; + } + break; + case PACKET3_SET_SAMPLER: + if (pkt->count % 3) { + DRM_ERROR("bad SET_SAMPLER\n"); + return -EINVAL; + } + start_reg = (ib[idx+0] << 2) + PACKET3_SET_SAMPLER_OFFSET; + end_reg = 4 * pkt->count + start_reg - 4; + if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) || + (start_reg >= PACKET3_SET_SAMPLER_END) || + (end_reg >= PACKET3_SET_SAMPLER_END)) { + DRM_ERROR("bad SET_SAMPLER\n"); + return -EINVAL; + } + break; + case PACKET3_SURFACE_BASE_UPDATE: + if (p->family >= CHIP_RV770 || p->family == CHIP_R600) { + DRM_ERROR("bad SURFACE_BASE_UPDATE\n"); + return -EINVAL; + } + if (pkt->count) { + DRM_ERROR("bad SURFACE_BASE_UPDATE\n"); + return -EINVAL; + } + break; + case PACKET3_NOP: + break; + default: + DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + return -EINVAL; + } + return 0; +} + +int r600_cs_parse(struct radeon_cs_parser *p) +{ + struct radeon_cs_packet pkt; + int r; + + do { + r = r600_cs_packet_parse(p, &pkt, p->idx); + if (r) { + return r; + } + p->idx += pkt.count + 2; + switch (pkt.type) { + case PACKET_TYPE0: + r = r600_cs_parse_packet0(p, &pkt); + break; + case PACKET_TYPE2: + break; + case PACKET_TYPE3: + r = r600_packet3_check(p, &pkt); + break; + default: + DRM_ERROR("Unknown packet type %d !\n", pkt.type); + return -EINVAL; + } + if (r) { + return r; + } + } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); +#if 0 + for (r = 0; r < p->ib->length_dw; r++) { + printk(KERN_INFO "%05d 0x%08X\n", r, p->ib->ptr[r]); + mdelay(1); + } +#endif + return 0; +} + +static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p) +{ + if (p->chunk_relocs_idx == -1) { + return 0; + } + p->relocs = kcalloc(1, sizeof(struct radeon_cs_reloc), GFP_KERNEL); + if (p->relocs == NULL) { + return -ENOMEM; + } + return 0; +} + +/** + * cs_parser_fini() - clean parser states + * @parser: parser structure holding parsing context. + * @error: error number + * + * If error is set than unvalidate buffer, otherwise just free memory + * used by parsing context. + **/ +static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error) +{ + unsigned i; + + kfree(parser->relocs); + for (i = 0; i < parser->nchunks; i++) { + kfree(parser->chunks[i].kdata); + } + kfree(parser->chunks); + kfree(parser->chunks_array); +} + +int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, + unsigned family, u32 *ib, int *l) +{ + struct radeon_cs_parser parser; + struct radeon_cs_chunk *ib_chunk; + struct radeon_ib fake_ib; + int r; + + /* initialize parser */ + memset(&parser, 0, sizeof(struct radeon_cs_parser)); + parser.filp = filp; + parser.rdev = NULL; + parser.family = family; + parser.ib = &fake_ib; + fake_ib.ptr = ib; + r = radeon_cs_parser_init(&parser, data); + if (r) { + DRM_ERROR("Failed to initialize parser !\n"); + r600_cs_parser_fini(&parser, r); + return r; + } + r = r600_cs_parser_relocs_legacy(&parser); + if (r) { + DRM_ERROR("Failed to parse relocation !\n"); + r600_cs_parser_fini(&parser, r); + return r; + } + /* Copy the packet into the IB, the parser will read from the + * input memory (cached) and write to the IB (which can be + * uncached). */ + ib_chunk = &parser.chunks[parser.chunk_ib_idx]; + parser.ib->length_dw = ib_chunk->length_dw; + memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4); + *l = parser.ib->length_dw; + r = r600_cs_parse(&parser); + if (r) { + DRM_ERROR("Invalid command stream !\n"); + r600_cs_parser_fini(&parser, r); + return r; + } + r600_cs_parser_fini(&parser, r); + return r; +} + +void r600_cs_legacy_init(void) +{ + r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; +} diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h new file mode 100644 index 000000000000..723295f59281 --- /dev/null +++ b/drivers/gpu/drm/radeon/r600d.h @@ -0,0 +1,661 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2009 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef R600D_H +#define R600D_H + +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) + +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) + +#define R6XX_MAX_SH_GPRS 256 +#define R6XX_MAX_TEMP_GPRS 16 +#define R6XX_MAX_SH_THREADS 256 +#define R6XX_MAX_SH_STACK_ENTRIES 4096 +#define R6XX_MAX_BACKENDS 8 +#define R6XX_MAX_BACKENDS_MASK 0xff +#define R6XX_MAX_SIMDS 8 +#define R6XX_MAX_SIMDS_MASK 0xff +#define R6XX_MAX_PIPES 8 +#define R6XX_MAX_PIPES_MASK 0xff + +/* PTE flags */ +#define PTE_VALID (1 << 0) +#define PTE_SYSTEM (1 << 1) +#define PTE_SNOOPED (1 << 2) +#define PTE_READABLE (1 << 5) +#define PTE_WRITEABLE (1 << 6) + +/* Registers */ +#define ARB_POP 0x2418 +#define ENABLE_TC128 (1 << 30) +#define ARB_GDEC_RD_CNTL 0x246C + +#define CC_GC_SHADER_PIPE_CONFIG 0x8950 +#define CC_RB_BACKEND_DISABLE 0x98F4 +#define BACKEND_DISABLE(x) ((x) << 16) + +#define CB_COLOR0_BASE 0x28040 +#define CB_COLOR1_BASE 0x28044 +#define CB_COLOR2_BASE 0x28048 +#define CB_COLOR3_BASE 0x2804C +#define CB_COLOR4_BASE 0x28050 +#define CB_COLOR5_BASE 0x28054 +#define CB_COLOR6_BASE 0x28058 +#define CB_COLOR7_BASE 0x2805C +#define CB_COLOR7_FRAG 0x280FC + +#define CB_COLOR0_SIZE 0x28060 +#define CB_COLOR0_VIEW 0x28080 +#define CB_COLOR0_INFO 0x280a0 +#define CB_COLOR0_TILE 0x280c0 +#define CB_COLOR0_FRAG 0x280e0 +#define CB_COLOR0_MASK 0x28100 + +#define CONFIG_MEMSIZE 0x5428 +#define CP_STAT 0x8680 +#define CP_COHER_BASE 0x85F8 +#define CP_DEBUG 0xC1FC +#define R_0086D8_CP_ME_CNTL 0x86D8 +#define S_0086D8_CP_ME_HALT(x) (((x) & 1)<<28) +#define C_0086D8_CP_ME_HALT(x) ((x) & 0xEFFFFFFF) +#define CP_ME_RAM_DATA 0xC160 +#define CP_ME_RAM_RADDR 0xC158 +#define CP_ME_RAM_WADDR 0xC15C +#define CP_MEQ_THRESHOLDS 0x8764 +#define MEQ_END(x) ((x) << 16) +#define ROQ_END(x) ((x) << 24) +#define CP_PERFMON_CNTL 0x87FC +#define CP_PFP_UCODE_ADDR 0xC150 +#define CP_PFP_UCODE_DATA 0xC154 +#define CP_QUEUE_THRESHOLDS 0x8760 +#define ROQ_IB1_START(x) ((x) << 0) +#define ROQ_IB2_START(x) ((x) << 8) +#define CP_RB_BASE 0xC100 +#define CP_RB_CNTL 0xC104 +#define RB_BUFSZ(x) ((x)<<0) +#define RB_BLKSZ(x) ((x)<<8) +#define RB_NO_UPDATE (1<<27) +#define RB_RPTR_WR_ENA (1<<31) +#define BUF_SWAP_32BIT (2 << 16) +#define CP_RB_RPTR 0x8700 +#define CP_RB_RPTR_ADDR 0xC10C +#define CP_RB_RPTR_ADDR_HI 0xC110 +#define CP_RB_RPTR_WR 0xC108 +#define CP_RB_WPTR 0xC114 +#define CP_RB_WPTR_ADDR 0xC118 +#define CP_RB_WPTR_ADDR_HI 0xC11C +#define CP_RB_WPTR_DELAY 0x8704 +#define CP_ROQ_IB1_STAT 0x8784 +#define CP_ROQ_IB2_STAT 0x8788 +#define CP_SEM_WAIT_TIMER 0x85BC + +#define DB_DEBUG 0x9830 +#define PREZ_MUST_WAIT_FOR_POSTZ_DONE (1 << 31) +#define DB_DEPTH_BASE 0x2800C +#define DB_WATERMARKS 0x9838 +#define DEPTH_FREE(x) ((x) << 0) +#define DEPTH_FLUSH(x) ((x) << 5) +#define DEPTH_PENDING_FREE(x) ((x) << 15) +#define DEPTH_CACHELINE_FREE(x) ((x) << 20) + +#define DCP_TILING_CONFIG 0x6CA0 +#define PIPE_TILING(x) ((x) << 1) +#define BANK_TILING(x) ((x) << 4) +#define GROUP_SIZE(x) ((x) << 6) +#define ROW_TILING(x) ((x) << 8) +#define BANK_SWAPS(x) ((x) << 11) +#define SAMPLE_SPLIT(x) ((x) << 14) +#define BACKEND_MAP(x) ((x) << 16) + +#define GB_TILING_CONFIG 0x98F0 + +#define GC_USER_SHADER_PIPE_CONFIG 0x8954 +#define INACTIVE_QD_PIPES(x) ((x) << 8) +#define INACTIVE_QD_PIPES_MASK 0x0000FF00 +#define INACTIVE_SIMDS(x) ((x) << 16) +#define INACTIVE_SIMDS_MASK 0x00FF0000 + +#define SQ_CONFIG 0x8c00 +# define VC_ENABLE (1 << 0) +# define EXPORT_SRC_C (1 << 1) +# define DX9_CONSTS (1 << 2) +# define ALU_INST_PREFER_VECTOR (1 << 3) +# define DX10_CLAMP (1 << 4) +# define CLAUSE_SEQ_PRIO(x) ((x) << 8) +# define PS_PRIO(x) ((x) << 24) +# define VS_PRIO(x) ((x) << 26) +# define GS_PRIO(x) ((x) << 28) +# define ES_PRIO(x) ((x) << 30) +#define SQ_GPR_RESOURCE_MGMT_1 0x8c04 +# define NUM_PS_GPRS(x) ((x) << 0) +# define NUM_VS_GPRS(x) ((x) << 16) +# define NUM_CLAUSE_TEMP_GPRS(x) ((x) << 28) +#define SQ_GPR_RESOURCE_MGMT_2 0x8c08 +# define NUM_GS_GPRS(x) ((x) << 0) +# define NUM_ES_GPRS(x) ((x) << 16) +#define SQ_THREAD_RESOURCE_MGMT 0x8c0c +# define NUM_PS_THREADS(x) ((x) << 0) +# define NUM_VS_THREADS(x) ((x) << 8) +# define NUM_GS_THREADS(x) ((x) << 16) +# define NUM_ES_THREADS(x) ((x) << 24) +#define SQ_STACK_RESOURCE_MGMT_1 0x8c10 +# define NUM_PS_STACK_ENTRIES(x) ((x) << 0) +# define NUM_VS_STACK_ENTRIES(x) ((x) << 16) +#define SQ_STACK_RESOURCE_MGMT_2 0x8c14 +# define NUM_GS_STACK_ENTRIES(x) ((x) << 0) +# define NUM_ES_STACK_ENTRIES(x) ((x) << 16) + +#define GRBM_CNTL 0x8000 +# define GRBM_READ_TIMEOUT(x) ((x) << 0) +#define GRBM_STATUS 0x8010 +#define CMDFIFO_AVAIL_MASK 0x0000001F +#define GUI_ACTIVE (1<<31) +#define GRBM_STATUS2 0x8014 +#define GRBM_SOFT_RESET 0x8020 +#define SOFT_RESET_CP (1<<0) + +#define HDP_HOST_PATH_CNTL 0x2C00 +#define HDP_NONSURFACE_BASE 0x2C04 +#define HDP_NONSURFACE_INFO 0x2C08 +#define HDP_NONSURFACE_SIZE 0x2C0C +#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 +#define HDP_TILING_CONFIG 0x2F3C + +#define MC_VM_AGP_TOP 0x2184 +#define MC_VM_AGP_BOT 0x2188 +#define MC_VM_AGP_BASE 0x218C +#define MC_VM_FB_LOCATION 0x2180 +#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C +#define ENABLE_L1_TLB (1 << 0) +#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1) +#define ENABLE_L1_STRICT_ORDERING (1 << 2) +#define SYSTEM_ACCESS_MODE_MASK 0x000000C0 +#define SYSTEM_ACCESS_MODE_SHIFT 6 +#define SYSTEM_ACCESS_MODE_PA_ONLY (0 << 6) +#define SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 6) +#define SYSTEM_ACCESS_MODE_IN_SYS (2 << 6) +#define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 6) +#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 8) +#define SYSTEM_APERTURE_UNMAPPED_ACCESS_DEFAULT_PAGE (1 << 8) +#define ENABLE_SEMAPHORE_MODE (1 << 10) +#define ENABLE_WAIT_L2_QUERY (1 << 11) +#define EFFECTIVE_L1_TLB_SIZE(x) (((x) & 7) << 12) +#define EFFECTIVE_L1_TLB_SIZE_MASK 0x00007000 +#define EFFECTIVE_L1_TLB_SIZE_SHIFT 12 +#define EFFECTIVE_L1_QUEUE_SIZE(x) (((x) & 7) << 15) +#define EFFECTIVE_L1_QUEUE_SIZE_MASK 0x00038000 +#define EFFECTIVE_L1_QUEUE_SIZE_SHIFT 15 +#define MC_VM_L1_TLB_MCD_RD_B_CNTL 0x21A0 +#define MC_VM_L1_TLB_MCB_RD_GFX_CNTL 0x21FC +#define MC_VM_L1_TLB_MCB_RD_HDP_CNTL 0x2204 +#define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL 0x2208 +#define MC_VM_L1_TLB_MCB_RD_SEM_CNTL 0x220C +#define MC_VM_L1_TLB_MCB_RD_SYS_CNTL 0x2200 +#define MC_VM_L1_TLB_MCD_WR_A_CNTL 0x21A4 +#define MC_VM_L1_TLB_MCD_WR_B_CNTL 0x21A8 +#define MC_VM_L1_TLB_MCB_WR_GFX_CNTL 0x2210 +#define MC_VM_L1_TLB_MCB_WR_HDP_CNTL 0x2218 +#define MC_VM_L1_TLB_MCB_WR_PDMA_CNTL 0x221C +#define MC_VM_L1_TLB_MCB_WR_SEM_CNTL 0x2220 +#define MC_VM_L1_TLB_MCB_WR_SYS_CNTL 0x2214 +#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2190 +#define LOGICAL_PAGE_NUMBER_MASK 0x000FFFFF +#define LOGICAL_PAGE_NUMBER_SHIFT 0 +#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2194 +#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198 + +#define PA_CL_ENHANCE 0x8A14 +#define CLIP_VTX_REORDER_ENA (1 << 0) +#define NUM_CLIP_SEQ(x) ((x) << 1) +#define PA_SC_AA_CONFIG 0x28C04 +#define PA_SC_AA_SAMPLE_LOCS_2S 0x8B40 +#define PA_SC_AA_SAMPLE_LOCS_4S 0x8B44 +#define PA_SC_AA_SAMPLE_LOCS_8S_WD0 0x8B48 +#define PA_SC_AA_SAMPLE_LOCS_8S_WD1 0x8B4C +#define S0_X(x) ((x) << 0) +#define S0_Y(x) ((x) << 4) +#define S1_X(x) ((x) << 8) +#define S1_Y(x) ((x) << 12) +#define S2_X(x) ((x) << 16) +#define S2_Y(x) ((x) << 20) +#define S3_X(x) ((x) << 24) +#define S3_Y(x) ((x) << 28) +#define S4_X(x) ((x) << 0) +#define S4_Y(x) ((x) << 4) +#define S5_X(x) ((x) << 8) +#define S5_Y(x) ((x) << 12) +#define S6_X(x) ((x) << 16) +#define S6_Y(x) ((x) << 20) +#define S7_X(x) ((x) << 24) +#define S7_Y(x) ((x) << 28) +#define PA_SC_CLIPRECT_RULE 0x2820c +#define PA_SC_ENHANCE 0x8BF0 +#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0) +#define FORCE_EOV_MAX_TILE_CNT(x) ((x) << 12) +#define PA_SC_LINE_STIPPLE 0x28A0C +#define PA_SC_LINE_STIPPLE_STATE 0x8B10 +#define PA_SC_MODE_CNTL 0x28A4C +#define PA_SC_MULTI_CHIP_CNTL 0x8B20 + +#define PA_SC_SCREEN_SCISSOR_TL 0x28030 +#define PA_SC_GENERIC_SCISSOR_TL 0x28240 +#define PA_SC_WINDOW_SCISSOR_TL 0x28204 + +#define PCIE_PORT_INDEX 0x0038 +#define PCIE_PORT_DATA 0x003C + +#define RAMCFG 0x2408 +#define NOOFBANK_SHIFT 0 +#define NOOFBANK_MASK 0x00000001 +#define NOOFRANK_SHIFT 1 +#define NOOFRANK_MASK 0x00000002 +#define NOOFROWS_SHIFT 2 +#define NOOFROWS_MASK 0x0000001C +#define NOOFCOLS_SHIFT 5 +#define NOOFCOLS_MASK 0x00000060 +#define CHANSIZE_SHIFT 7 +#define CHANSIZE_MASK 0x00000080 +#define BURSTLENGTH_SHIFT 8 +#define BURSTLENGTH_MASK 0x00000100 +#define CHANSIZE_OVERRIDE (1 << 10) + +#define SCRATCH_REG0 0x8500 +#define SCRATCH_REG1 0x8504 +#define SCRATCH_REG2 0x8508 +#define SCRATCH_REG3 0x850C +#define SCRATCH_REG4 0x8510 +#define SCRATCH_REG5 0x8514 +#define SCRATCH_REG6 0x8518 +#define SCRATCH_REG7 0x851C +#define SCRATCH_UMSK 0x8540 +#define SCRATCH_ADDR 0x8544 + +#define SPI_CONFIG_CNTL 0x9100 +#define GPR_WRITE_PRIORITY(x) ((x) << 0) +#define DISABLE_INTERP_1 (1 << 5) +#define SPI_CONFIG_CNTL_1 0x913C +#define VTX_DONE_DELAY(x) ((x) << 0) +#define INTERP_ONE_PRIM_PER_ROW (1 << 4) +#define SPI_INPUT_Z 0x286D8 +#define SPI_PS_IN_CONTROL_0 0x286CC +#define NUM_INTERP(x) ((x)<<0) +#define POSITION_ENA (1<<8) +#define POSITION_CENTROID (1<<9) +#define POSITION_ADDR(x) ((x)<<10) +#define PARAM_GEN(x) ((x)<<15) +#define PARAM_GEN_ADDR(x) ((x)<<19) +#define BARYC_SAMPLE_CNTL(x) ((x)<<26) +#define PERSP_GRADIENT_ENA (1<<28) +#define LINEAR_GRADIENT_ENA (1<<29) +#define POSITION_SAMPLE (1<<30) +#define BARYC_AT_SAMPLE_ENA (1<<31) +#define SPI_PS_IN_CONTROL_1 0x286D0 +#define GEN_INDEX_PIX (1<<0) +#define GEN_INDEX_PIX_ADDR(x) ((x)<<1) +#define FRONT_FACE_ENA (1<<8) +#define FRONT_FACE_CHAN(x) ((x)<<9) +#define FRONT_FACE_ALL_BITS (1<<11) +#define FRONT_FACE_ADDR(x) ((x)<<12) +#define FOG_ADDR(x) ((x)<<17) +#define FIXED_PT_POSITION_ENA (1<<24) +#define FIXED_PT_POSITION_ADDR(x) ((x)<<25) + +#define SQ_MS_FIFO_SIZES 0x8CF0 +#define CACHE_FIFO_SIZE(x) ((x) << 0) +#define FETCH_FIFO_HIWATER(x) ((x) << 8) +#define DONE_FIFO_HIWATER(x) ((x) << 16) +#define ALU_UPDATE_FIFO_HIWATER(x) ((x) << 24) +#define SQ_PGM_START_ES 0x28880 +#define SQ_PGM_START_FS 0x28894 +#define SQ_PGM_START_GS 0x2886C +#define SQ_PGM_START_PS 0x28840 +#define SQ_PGM_RESOURCES_PS 0x28850 +#define SQ_PGM_EXPORTS_PS 0x28854 +#define SQ_PGM_CF_OFFSET_PS 0x288cc +#define SQ_PGM_START_VS 0x28858 +#define SQ_PGM_RESOURCES_VS 0x28868 +#define SQ_PGM_CF_OFFSET_VS 0x288d0 +#define SQ_VTX_CONSTANT_WORD6_0 0x38018 +#define S__SQ_VTX_CONSTANT_TYPE(x) (((x) & 3) << 30) +#define G__SQ_VTX_CONSTANT_TYPE(x) (((x) >> 30) & 3) +#define SQ_TEX_VTX_INVALID_TEXTURE 0x0 +#define SQ_TEX_VTX_INVALID_BUFFER 0x1 +#define SQ_TEX_VTX_VALID_TEXTURE 0x2 +#define SQ_TEX_VTX_VALID_BUFFER 0x3 + + +#define SX_MISC 0x28350 +#define SX_DEBUG_1 0x9054 +#define SMX_EVENT_RELEASE (1 << 0) +#define ENABLE_NEW_SMX_ADDRESS (1 << 16) + +#define TA_CNTL_AUX 0x9508 +#define DISABLE_CUBE_WRAP (1 << 0) +#define DISABLE_CUBE_ANISO (1 << 1) +#define SYNC_GRADIENT (1 << 24) +#define SYNC_WALKER (1 << 25) +#define SYNC_ALIGNER (1 << 26) +#define BILINEAR_PRECISION_6_BIT (0 << 31) +#define BILINEAR_PRECISION_8_BIT (1 << 31) + +#define TC_CNTL 0x9608 +#define TC_L2_SIZE(x) ((x)<<5) +#define L2_DISABLE_LATE_HIT (1<<9) + + +#define VGT_CACHE_INVALIDATION 0x88C4 +#define CACHE_INVALIDATION(x) ((x)<<0) +#define VC_ONLY 0 +#define TC_ONLY 1 +#define VC_AND_TC 2 +#define VGT_DMA_BASE 0x287E8 +#define VGT_DMA_BASE_HI 0x287E4 +#define VGT_ES_PER_GS 0x88CC +#define VGT_GS_PER_ES 0x88C8 +#define VGT_GS_PER_VS 0x88E8 +#define VGT_GS_VERTEX_REUSE 0x88D4 +#define VGT_PRIMITIVE_TYPE 0x8958 +#define VGT_NUM_INSTANCES 0x8974 +#define VGT_OUT_DEALLOC_CNTL 0x28C5C +#define DEALLOC_DIST_MASK 0x0000007F +#define VGT_STRMOUT_BASE_OFFSET_0 0x28B10 +#define VGT_STRMOUT_BASE_OFFSET_1 0x28B14 +#define VGT_STRMOUT_BASE_OFFSET_2 0x28B18 +#define VGT_STRMOUT_BASE_OFFSET_3 0x28B1c +#define VGT_STRMOUT_BASE_OFFSET_HI_0 0x28B44 +#define VGT_STRMOUT_BASE_OFFSET_HI_1 0x28B48 +#define VGT_STRMOUT_BASE_OFFSET_HI_2 0x28B4c +#define VGT_STRMOUT_BASE_OFFSET_HI_3 0x28B50 +#define VGT_STRMOUT_BUFFER_BASE_0 0x28AD8 +#define VGT_STRMOUT_BUFFER_BASE_1 0x28AE8 +#define VGT_STRMOUT_BUFFER_BASE_2 0x28AF8 +#define VGT_STRMOUT_BUFFER_BASE_3 0x28B08 +#define VGT_STRMOUT_BUFFER_OFFSET_0 0x28ADC +#define VGT_STRMOUT_BUFFER_OFFSET_1 0x28AEC +#define VGT_STRMOUT_BUFFER_OFFSET_2 0x28AFC +#define VGT_STRMOUT_BUFFER_OFFSET_3 0x28B0C +#define VGT_STRMOUT_EN 0x28AB0 +#define VGT_VERTEX_REUSE_BLOCK_CNTL 0x28C58 +#define VTX_REUSE_DEPTH_MASK 0x000000FF +#define VGT_EVENT_INITIATOR 0x28a90 +# define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0) + +#define VM_CONTEXT0_CNTL 0x1410 +#define ENABLE_CONTEXT (1 << 0) +#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) +#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) +#define VM_CONTEXT0_INVALIDATION_LOW_ADDR 0x1490 +#define VM_CONTEXT0_INVALIDATION_HIGH_ADDR 0x14B0 +#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x1574 +#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x1594 +#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x15B4 +#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x1554 +#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 +#define REQUEST_TYPE(x) (((x) & 0xf) << 0) +#define RESPONSE_TYPE_MASK 0x000000F0 +#define RESPONSE_TYPE_SHIFT 4 +#define VM_L2_CNTL 0x1400 +#define ENABLE_L2_CACHE (1 << 0) +#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1) +#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9) +#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 13) +#define VM_L2_CNTL2 0x1404 +#define INVALIDATE_ALL_L1_TLBS (1 << 0) +#define INVALIDATE_L2_CACHE (1 << 1) +#define VM_L2_CNTL3 0x1408 +#define BANK_SELECT_0(x) (((x) & 0x1f) << 0) +#define BANK_SELECT_1(x) (((x) & 0x1f) << 5) +#define L2_CACHE_UPDATE_MODE(x) (((x) & 3) << 10) +#define VM_L2_STATUS 0x140C +#define L2_BUSY (1 << 0) + +#define WAIT_UNTIL 0x8040 +#define WAIT_2D_IDLE_bit (1 << 14) +#define WAIT_3D_IDLE_bit (1 << 15) +#define WAIT_2D_IDLECLEAN_bit (1 << 16) +#define WAIT_3D_IDLECLEAN_bit (1 << 17) + + + +/* + * PM4 + */ +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) +#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ + (((reg) >> 2) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) +#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) + +/* Packet 3 types */ +#define PACKET3_NOP 0x10 +#define PACKET3_INDIRECT_BUFFER_END 0x17 +#define PACKET3_SET_PREDICATION 0x20 +#define PACKET3_REG_RMW 0x21 +#define PACKET3_COND_EXEC 0x22 +#define PACKET3_PRED_EXEC 0x23 +#define PACKET3_START_3D_CMDBUF 0x24 +#define PACKET3_DRAW_INDEX_2 0x27 +#define PACKET3_CONTEXT_CONTROL 0x28 +#define PACKET3_DRAW_INDEX_IMMD_BE 0x29 +#define PACKET3_INDEX_TYPE 0x2A +#define PACKET3_DRAW_INDEX 0x2B +#define PACKET3_DRAW_INDEX_AUTO 0x2D +#define PACKET3_DRAW_INDEX_IMMD 0x2E +#define PACKET3_NUM_INSTANCES 0x2F +#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 +#define PACKET3_INDIRECT_BUFFER_MP 0x38 +#define PACKET3_MEM_SEMAPHORE 0x39 +#define PACKET3_MPEG_INDEX 0x3A +#define PACKET3_WAIT_REG_MEM 0x3C +#define PACKET3_MEM_WRITE 0x3D +#define PACKET3_INDIRECT_BUFFER 0x32 +#define PACKET3_CP_INTERRUPT 0x40 +#define PACKET3_SURFACE_SYNC 0x43 +# define PACKET3_CB0_DEST_BASE_ENA (1 << 6) +# define PACKET3_TC_ACTION_ENA (1 << 23) +# define PACKET3_VC_ACTION_ENA (1 << 24) +# define PACKET3_CB_ACTION_ENA (1 << 25) +# define PACKET3_DB_ACTION_ENA (1 << 26) +# define PACKET3_SH_ACTION_ENA (1 << 27) +# define PACKET3_SMX_ACTION_ENA (1 << 28) +#define PACKET3_ME_INITIALIZE 0x44 +#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) +#define PACKET3_COND_WRITE 0x45 +#define PACKET3_EVENT_WRITE 0x46 +#define PACKET3_EVENT_WRITE_EOP 0x47 +#define PACKET3_ONE_REG_WRITE 0x57 +#define PACKET3_SET_CONFIG_REG 0x68 +#define PACKET3_SET_CONFIG_REG_OFFSET 0x00008000 +#define PACKET3_SET_CONFIG_REG_END 0x0000ac00 +#define PACKET3_SET_CONTEXT_REG 0x69 +#define PACKET3_SET_CONTEXT_REG_OFFSET 0x00028000 +#define PACKET3_SET_CONTEXT_REG_END 0x00029000 +#define PACKET3_SET_ALU_CONST 0x6A +#define PACKET3_SET_ALU_CONST_OFFSET 0x00030000 +#define PACKET3_SET_ALU_CONST_END 0x00032000 +#define PACKET3_SET_BOOL_CONST 0x6B +#define PACKET3_SET_BOOL_CONST_OFFSET 0x0003e380 +#define PACKET3_SET_BOOL_CONST_END 0x00040000 +#define PACKET3_SET_LOOP_CONST 0x6C +#define PACKET3_SET_LOOP_CONST_OFFSET 0x0003e200 +#define PACKET3_SET_LOOP_CONST_END 0x0003e380 +#define PACKET3_SET_RESOURCE 0x6D +#define PACKET3_SET_RESOURCE_OFFSET 0x00038000 +#define PACKET3_SET_RESOURCE_END 0x0003c000 +#define PACKET3_SET_SAMPLER 0x6E +#define PACKET3_SET_SAMPLER_OFFSET 0x0003c000 +#define PACKET3_SET_SAMPLER_END 0x0003cff0 +#define PACKET3_SET_CTL_CONST 0x6F +#define PACKET3_SET_CTL_CONST_OFFSET 0x0003cff0 +#define PACKET3_SET_CTL_CONST_END 0x0003e200 +#define PACKET3_SURFACE_BASE_UPDATE 0x73 + + +#define R_008020_GRBM_SOFT_RESET 0x8020 +#define S_008020_SOFT_RESET_CP(x) (((x) & 1) << 0) +#define S_008020_SOFT_RESET_CB(x) (((x) & 1) << 1) +#define S_008020_SOFT_RESET_CR(x) (((x) & 1) << 2) +#define S_008020_SOFT_RESET_DB(x) (((x) & 1) << 3) +#define S_008020_SOFT_RESET_PA(x) (((x) & 1) << 5) +#define S_008020_SOFT_RESET_SC(x) (((x) & 1) << 6) +#define S_008020_SOFT_RESET_SMX(x) (((x) & 1) << 7) +#define S_008020_SOFT_RESET_SPI(x) (((x) & 1) << 8) +#define S_008020_SOFT_RESET_SH(x) (((x) & 1) << 9) +#define S_008020_SOFT_RESET_SX(x) (((x) & 1) << 10) +#define S_008020_SOFT_RESET_TC(x) (((x) & 1) << 11) +#define S_008020_SOFT_RESET_TA(x) (((x) & 1) << 12) +#define S_008020_SOFT_RESET_VC(x) (((x) & 1) << 13) +#define S_008020_SOFT_RESET_VGT(x) (((x) & 1) << 14) +#define R_008010_GRBM_STATUS 0x8010 +#define S_008010_CMDFIFO_AVAIL(x) (((x) & 0x1F) << 0) +#define S_008010_CP_RQ_PENDING(x) (((x) & 1) << 6) +#define S_008010_CF_RQ_PENDING(x) (((x) & 1) << 7) +#define S_008010_PF_RQ_PENDING(x) (((x) & 1) << 8) +#define S_008010_GRBM_EE_BUSY(x) (((x) & 1) << 10) +#define S_008010_VC_BUSY(x) (((x) & 1) << 11) +#define S_008010_DB03_CLEAN(x) (((x) & 1) << 12) +#define S_008010_CB03_CLEAN(x) (((x) & 1) << 13) +#define S_008010_VGT_BUSY_NO_DMA(x) (((x) & 1) << 16) +#define S_008010_VGT_BUSY(x) (((x) & 1) << 17) +#define S_008010_TA03_BUSY(x) (((x) & 1) << 18) +#define S_008010_TC_BUSY(x) (((x) & 1) << 19) +#define S_008010_SX_BUSY(x) (((x) & 1) << 20) +#define S_008010_SH_BUSY(x) (((x) & 1) << 21) +#define S_008010_SPI03_BUSY(x) (((x) & 1) << 22) +#define S_008010_SMX_BUSY(x) (((x) & 1) << 23) +#define S_008010_SC_BUSY(x) (((x) & 1) << 24) +#define S_008010_PA_BUSY(x) (((x) & 1) << 25) +#define S_008010_DB03_BUSY(x) (((x) & 1) << 26) +#define S_008010_CR_BUSY(x) (((x) & 1) << 27) +#define S_008010_CP_COHERENCY_BUSY(x) (((x) & 1) << 28) +#define S_008010_CP_BUSY(x) (((x) & 1) << 29) +#define S_008010_CB03_BUSY(x) (((x) & 1) << 30) +#define S_008010_GUI_ACTIVE(x) (((x) & 1) << 31) +#define G_008010_CMDFIFO_AVAIL(x) (((x) >> 0) & 0x1F) +#define G_008010_CP_RQ_PENDING(x) (((x) >> 6) & 1) +#define G_008010_CF_RQ_PENDING(x) (((x) >> 7) & 1) +#define G_008010_PF_RQ_PENDING(x) (((x) >> 8) & 1) +#define G_008010_GRBM_EE_BUSY(x) (((x) >> 10) & 1) +#define G_008010_VC_BUSY(x) (((x) >> 11) & 1) +#define G_008010_DB03_CLEAN(x) (((x) >> 12) & 1) +#define G_008010_CB03_CLEAN(x) (((x) >> 13) & 1) +#define G_008010_VGT_BUSY_NO_DMA(x) (((x) >> 16) & 1) +#define G_008010_VGT_BUSY(x) (((x) >> 17) & 1) +#define G_008010_TA03_BUSY(x) (((x) >> 18) & 1) +#define G_008010_TC_BUSY(x) (((x) >> 19) & 1) +#define G_008010_SX_BUSY(x) (((x) >> 20) & 1) +#define G_008010_SH_BUSY(x) (((x) >> 21) & 1) +#define G_008010_SPI03_BUSY(x) (((x) >> 22) & 1) +#define G_008010_SMX_BUSY(x) (((x) >> 23) & 1) +#define G_008010_SC_BUSY(x) (((x) >> 24) & 1) +#define G_008010_PA_BUSY(x) (((x) >> 25) & 1) +#define G_008010_DB03_BUSY(x) (((x) >> 26) & 1) +#define G_008010_CR_BUSY(x) (((x) >> 27) & 1) +#define G_008010_CP_COHERENCY_BUSY(x) (((x) >> 28) & 1) +#define G_008010_CP_BUSY(x) (((x) >> 29) & 1) +#define G_008010_CB03_BUSY(x) (((x) >> 30) & 1) +#define G_008010_GUI_ACTIVE(x) (((x) >> 31) & 1) +#define R_008014_GRBM_STATUS2 0x8014 +#define S_008014_CR_CLEAN(x) (((x) & 1) << 0) +#define S_008014_SMX_CLEAN(x) (((x) & 1) << 1) +#define S_008014_SPI0_BUSY(x) (((x) & 1) << 8) +#define S_008014_SPI1_BUSY(x) (((x) & 1) << 9) +#define S_008014_SPI2_BUSY(x) (((x) & 1) << 10) +#define S_008014_SPI3_BUSY(x) (((x) & 1) << 11) +#define S_008014_TA0_BUSY(x) (((x) & 1) << 12) +#define S_008014_TA1_BUSY(x) (((x) & 1) << 13) +#define S_008014_TA2_BUSY(x) (((x) & 1) << 14) +#define S_008014_TA3_BUSY(x) (((x) & 1) << 15) +#define S_008014_DB0_BUSY(x) (((x) & 1) << 16) +#define S_008014_DB1_BUSY(x) (((x) & 1) << 17) +#define S_008014_DB2_BUSY(x) (((x) & 1) << 18) +#define S_008014_DB3_BUSY(x) (((x) & 1) << 19) +#define S_008014_CB0_BUSY(x) (((x) & 1) << 20) +#define S_008014_CB1_BUSY(x) (((x) & 1) << 21) +#define S_008014_CB2_BUSY(x) (((x) & 1) << 22) +#define S_008014_CB3_BUSY(x) (((x) & 1) << 23) +#define G_008014_CR_CLEAN(x) (((x) >> 0) & 1) +#define G_008014_SMX_CLEAN(x) (((x) >> 1) & 1) +#define G_008014_SPI0_BUSY(x) (((x) >> 8) & 1) +#define G_008014_SPI1_BUSY(x) (((x) >> 9) & 1) +#define G_008014_SPI2_BUSY(x) (((x) >> 10) & 1) +#define G_008014_SPI3_BUSY(x) (((x) >> 11) & 1) +#define G_008014_TA0_BUSY(x) (((x) >> 12) & 1) +#define G_008014_TA1_BUSY(x) (((x) >> 13) & 1) +#define G_008014_TA2_BUSY(x) (((x) >> 14) & 1) +#define G_008014_TA3_BUSY(x) (((x) >> 15) & 1) +#define G_008014_DB0_BUSY(x) (((x) >> 16) & 1) +#define G_008014_DB1_BUSY(x) (((x) >> 17) & 1) +#define G_008014_DB2_BUSY(x) (((x) >> 18) & 1) +#define G_008014_DB3_BUSY(x) (((x) >> 19) & 1) +#define G_008014_CB0_BUSY(x) (((x) >> 20) & 1) +#define G_008014_CB1_BUSY(x) (((x) >> 21) & 1) +#define G_008014_CB2_BUSY(x) (((x) >> 22) & 1) +#define G_008014_CB3_BUSY(x) (((x) >> 23) & 1) +#define R_000E50_SRBM_STATUS 0x0E50 +#define G_000E50_RLC_RQ_PENDING(x) (((x) >> 3) & 1) +#define G_000E50_RCU_RQ_PENDING(x) (((x) >> 4) & 1) +#define G_000E50_GRBM_RQ_PENDING(x) (((x) >> 5) & 1) +#define G_000E50_HI_RQ_PENDING(x) (((x) >> 6) & 1) +#define G_000E50_IO_EXTERN_SIGNAL(x) (((x) >> 7) & 1) +#define G_000E50_VMC_BUSY(x) (((x) >> 8) & 1) +#define G_000E50_MCB_BUSY(x) (((x) >> 9) & 1) +#define G_000E50_MCDZ_BUSY(x) (((x) >> 10) & 1) +#define G_000E50_MCDY_BUSY(x) (((x) >> 11) & 1) +#define G_000E50_MCDX_BUSY(x) (((x) >> 12) & 1) +#define G_000E50_MCDW_BUSY(x) (((x) >> 13) & 1) +#define G_000E50_SEM_BUSY(x) (((x) >> 14) & 1) +#define G_000E50_RLC_BUSY(x) (((x) >> 15) & 1) +#define R_000E60_SRBM_SOFT_RESET 0x0E60 +#define S_000E60_SOFT_RESET_BIF(x) (((x) & 1) << 1) +#define S_000E60_SOFT_RESET_CG(x) (((x) & 1) << 2) +#define S_000E60_SOFT_RESET_CMC(x) (((x) & 1) << 3) +#define S_000E60_SOFT_RESET_CSC(x) (((x) & 1) << 4) +#define S_000E60_SOFT_RESET_DC(x) (((x) & 1) << 5) +#define S_000E60_SOFT_RESET_GRBM(x) (((x) & 1) << 8) +#define S_000E60_SOFT_RESET_HDP(x) (((x) & 1) << 9) +#define S_000E60_SOFT_RESET_IH(x) (((x) & 1) << 10) +#define S_000E60_SOFT_RESET_MC(x) (((x) & 1) << 11) +#define S_000E60_SOFT_RESET_RLC(x) (((x) & 1) << 13) +#define S_000E60_SOFT_RESET_ROM(x) (((x) & 1) << 14) +#define S_000E60_SOFT_RESET_SEM(x) (((x) & 1) << 15) +#define S_000E60_SOFT_RESET_TSC(x) (((x) & 1) << 16) +#define S_000E60_SOFT_RESET_VMC(x) (((x) & 1) << 17) + +#endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e47f2fc294ce..3299733ac300 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -50,8 +50,8 @@ #include #include "radeon_mode.h" +#include "radeon_share.h" #include "radeon_reg.h" -#include "r300.h" /* * Modules parameters. @@ -112,10 +112,11 @@ enum radeon_family { CHIP_RV635, CHIP_RV670, CHIP_RS780, + CHIP_RS880, CHIP_RV770, CHIP_RV730, CHIP_RV710, - CHIP_RS880, + CHIP_RV740, CHIP_LAST, }; @@ -152,10 +153,21 @@ struct radeon_device; */ bool radeon_get_bios(struct radeon_device *rdev); + +/* + * Dummy page + */ +struct radeon_dummy_page { + struct page *page; + dma_addr_t addr; +}; +int radeon_dummy_page_init(struct radeon_device *rdev); +void radeon_dummy_page_fini(struct radeon_device *rdev); + + /* * Clocks */ - struct radeon_clock { struct radeon_pll p1pll; struct radeon_pll p2pll; @@ -166,6 +178,7 @@ struct radeon_clock { uint32_t default_sclk; }; + /* * Fences. */ @@ -332,14 +345,18 @@ struct radeon_mc { resource_size_t aper_size; resource_size_t aper_base; resource_size_t agp_base; - unsigned gtt_location; - unsigned gtt_size; - unsigned vram_location; /* for some chips with <= 32MB we need to lie * about vram size near mc fb location */ - unsigned mc_vram_size; + u64 mc_vram_size; + u64 gtt_location; + u64 gtt_size; + u64 gtt_start; + u64 gtt_end; + u64 vram_location; + u64 vram_start; + u64 vram_end; unsigned vram_width; - unsigned real_vram_size; + u64 real_vram_size; int vram_mtrr; bool vram_is_ddr; }; @@ -411,6 +428,16 @@ struct radeon_cp { bool ready; }; +struct r600_blit { + struct radeon_object *shader_obj; + u64 shader_gpu_addr; + u32 vs_offset, ps_offset; + u32 state_offset; + u32 state_len; + u32 vb_used, vb_total; + struct radeon_ib *vb_ib; +}; + int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib); @@ -463,6 +490,7 @@ struct radeon_cs_parser { int chunk_relocs_idx; struct radeon_ib *ib; void *track; + unsigned family; }; struct radeon_cs_packet { @@ -559,6 +587,9 @@ int r100_debugfs_cp_init(struct radeon_device *rdev); */ struct radeon_asic { int (*init)(struct radeon_device *rdev); + void (*fini)(struct radeon_device *rdev); + int (*resume)(struct radeon_device *rdev); + int (*suspend)(struct radeon_device *rdev); void (*errata)(struct radeon_device *rdev); void (*vram_info)(struct radeon_device *rdev); int (*gpu_reset)(struct radeon_device *rdev); @@ -573,7 +604,11 @@ struct radeon_asic { int (*cp_init)(struct radeon_device *rdev, unsigned ring_size); void (*cp_fini)(struct radeon_device *rdev); void (*cp_disable)(struct radeon_device *rdev); + void (*cp_commit)(struct radeon_device *rdev); void (*ring_start)(struct radeon_device *rdev); + int (*ring_test)(struct radeon_device *rdev); + void (*ring_ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); + int (*ib_test)(struct radeon_device *rdev); int (*irq_set)(struct radeon_device *rdev); int (*irq_process)(struct radeon_device *rdev); u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc); @@ -613,6 +648,8 @@ struct r100_asic { union radeon_asic_config { struct r300_asic r300; struct r100_asic r100; + struct r600_asic r600; + struct rv770_asic rv770; }; @@ -698,12 +735,16 @@ struct radeon_device { struct radeon_pm pm; struct mutex cs_mutex; struct radeon_wb wb; + struct radeon_dummy_page dummy_page; bool gpu_lockup; bool shutdown; bool suspend; bool need_dma32; + bool new_init_path; struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES]; - const struct firmware *fw; /* firmware */ + const struct firmware *me_fw; /* all family ME firmware */ + const struct firmware *pfp_fw; /* r6/700 PFP firmware */ + struct r600_blit r600_blit; }; int radeon_device_init(struct radeon_device *rdev, @@ -713,6 +754,13 @@ int radeon_device_init(struct radeon_device *rdev, void radeon_device_fini(struct radeon_device *rdev); int radeon_gpu_wait_for_idle(struct radeon_device *rdev); +/* r600 blit */ +int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes); +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence); +void r600_kms_blit_copy(struct radeon_device *rdev, + u64 src_gpu_addr, u64 dst_gpu_addr, + int size_bytes); + static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) { if (reg < 0x10000) @@ -740,6 +788,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32 #define RREG8(reg) readb(((void __iomem *)rdev->rmmio) + (reg)) #define WREG8(reg, v) writeb(v, ((void __iomem *)rdev->rmmio) + (reg)) #define RREG32(reg) r100_mm_rreg(rdev, (reg)) +#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg))) #define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v)) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) @@ -763,6 +812,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32 tmp_ |= ((val) & ~(mask)); \ WREG32_PLL(reg, tmp_); \ } while (0) +#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg))) /* * Indirect registers accessor @@ -827,51 +877,6 @@ void radeon_atombios_fini(struct radeon_device *rdev); /* * RING helpers. */ -#define CP_PACKET0 0x00000000 -#define PACKET0_BASE_INDEX_SHIFT 0 -#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) -#define PACKET0_COUNT_SHIFT 16 -#define PACKET0_COUNT_MASK (0x3fff << 16) -#define CP_PACKET1 0x40000000 -#define CP_PACKET2 0x80000000 -#define PACKET2_PAD_SHIFT 0 -#define PACKET2_PAD_MASK (0x3fffffff << 0) -#define CP_PACKET3 0xC0000000 -#define PACKET3_IT_OPCODE_SHIFT 8 -#define PACKET3_IT_OPCODE_MASK (0xff << 8) -#define PACKET3_COUNT_SHIFT 16 -#define PACKET3_COUNT_MASK (0x3fff << 16) -/* PACKET3 op code */ -#define PACKET3_NOP 0x10 -#define PACKET3_3D_DRAW_VBUF 0x28 -#define PACKET3_3D_DRAW_IMMD 0x29 -#define PACKET3_3D_DRAW_INDX 0x2A -#define PACKET3_3D_LOAD_VBPNTR 0x2F -#define PACKET3_INDX_BUFFER 0x33 -#define PACKET3_3D_DRAW_VBUF_2 0x34 -#define PACKET3_3D_DRAW_IMMD_2 0x35 -#define PACKET3_3D_DRAW_INDX_2 0x36 -#define PACKET3_BITBLT_MULTI 0x9B - -#define PACKET0(reg, n) (CP_PACKET0 | \ - REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ - REG_SET(PACKET0_COUNT, (n))) -#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) -#define PACKET3(op, n) (CP_PACKET3 | \ - REG_SET(PACKET3_IT_OPCODE, (op)) | \ - REG_SET(PACKET3_COUNT, (n))) - -#define PACKET_TYPE0 0 -#define PACKET_TYPE1 1 -#define PACKET_TYPE2 2 -#define PACKET_TYPE3 3 - -#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) -#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) -#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) -#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) -#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) - static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) { #if DRM_DEBUG_CODE @@ -890,6 +895,9 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) * ASICs macro. */ #define radeon_init(rdev) (rdev)->asic->init((rdev)) +#define radeon_fini(rdev) (rdev)->asic->fini((rdev)) +#define radeon_resume(rdev) (rdev)->asic->resume((rdev)) +#define radeon_suspend(rdev) (rdev)->asic->suspend((rdev)) #define radeon_cs_parse(p) rdev->asic->cs_parse((p)) #define radeon_errata(rdev) (rdev)->asic->errata((rdev)) #define radeon_vram_info(rdev) (rdev)->asic->vram_info((rdev)) @@ -905,7 +913,11 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) #define radeon_cp_init(rdev,rsize) (rdev)->asic->cp_init((rdev), (rsize)) #define radeon_cp_fini(rdev) (rdev)->asic->cp_fini((rdev)) #define radeon_cp_disable(rdev) (rdev)->asic->cp_disable((rdev)) +#define radeon_cp_commit(rdev) (rdev)->asic->cp_commit((rdev)) #define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev)) +#define radeon_ring_test(rdev) (rdev)->asic->ring_test((rdev)) +#define radeon_ring_ib_execute(rdev, ib) (rdev)->asic->ring_ib_execute((rdev), (ib)) +#define radeon_ib_test(rdev) (rdev)->asic->ib_test((rdev)) #define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev)) #define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev)) #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index c9cbd8ae1f95..e87bb915a6de 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -60,6 +60,7 @@ int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr); int r100_cp_init(struct radeon_device *rdev, unsigned ring_size); void r100_cp_fini(struct radeon_device *rdev); void r100_cp_disable(struct radeon_device *rdev); +void r100_cp_commit(struct radeon_device *rdev); void r100_ring_start(struct radeon_device *rdev); int r100_irq_set(struct radeon_device *rdev); int r100_irq_process(struct radeon_device *rdev); @@ -78,6 +79,9 @@ int r100_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t offset, uint32_t obj_size); int r100_clear_surface_reg(struct radeon_device *rdev, int reg); void r100_bandwidth_update(struct radeon_device *rdev); +void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +int r100_ib_test(struct radeon_device *rdev); +int r100_ring_test(struct radeon_device *rdev); static struct radeon_asic r100_asic = { .init = &r100_init, @@ -95,7 +99,11 @@ static struct radeon_asic r100_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r100_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -156,7 +164,11 @@ static struct radeon_asic r300_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -197,7 +209,11 @@ static struct radeon_asic r420_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -245,7 +261,11 @@ static struct radeon_asic rs400_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &r100_irq_set, .irq_process = &r100_irq_process, .get_vblank_counter = &r100_get_vblank_counter, @@ -298,7 +318,11 @@ static struct radeon_asic rs600_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -341,7 +365,11 @@ static struct radeon_asic rs690_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &r300_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -391,7 +419,11 @@ static struct radeon_asic rv515_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &rv515_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -434,7 +466,11 @@ static struct radeon_asic r520_asic = { .cp_init = &r100_cp_init, .cp_fini = &r100_cp_fini, .cp_disable = &r100_cp_disable, + .cp_commit = &r100_cp_commit, .ring_start = &rv515_ring_start, + .ring_test = &r100_ring_test, + .ring_ib_execute = &r100_ring_ib_execute, + .ib_test = &r100_ib_test, .irq_set = &rs600_irq_set, .irq_process = &rs600_irq_process, .get_vblank_counter = &rs600_get_vblank_counter, @@ -453,9 +489,127 @@ static struct radeon_asic r520_asic = { }; /* - * r600,rv610,rv630,rv620,rv635,rv670,rs780,rv770,rv730,rv710 + * r600,rv610,rv630,rv620,rv635,rv670,rs780,rs880 */ +int r600_init(struct radeon_device *rdev); +void r600_fini(struct radeon_device *rdev); +int r600_suspend(struct radeon_device *rdev); +int r600_resume(struct radeon_device *rdev); +int r600_wb_init(struct radeon_device *rdev); +void r600_wb_fini(struct radeon_device *rdev); +void r600_cp_commit(struct radeon_device *rdev); +void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg); void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); +int r600_cs_parse(struct radeon_cs_parser *p); +void r600_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +int r600_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, + uint64_t dst_offset, + unsigned num_pages, + struct radeon_fence *fence); +int r600_irq_process(struct radeon_device *rdev); +int r600_irq_set(struct radeon_device *rdev); +int r600_gpu_reset(struct radeon_device *rdev); +int r600_set_surface_reg(struct radeon_device *rdev, int reg, + uint32_t tiling_flags, uint32_t pitch, + uint32_t offset, uint32_t obj_size); +int r600_clear_surface_reg(struct radeon_device *rdev, int reg); +void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +int r600_ib_test(struct radeon_device *rdev); +int r600_ring_test(struct radeon_device *rdev); +int r600_copy_blit(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_pages, struct radeon_fence *fence); + +static struct radeon_asic r600_asic = { + .errata = NULL, + .init = &r600_init, + .fini = &r600_fini, + .suspend = &r600_suspend, + .resume = &r600_resume, + .cp_commit = &r600_cp_commit, + .vram_info = NULL, + .gpu_reset = &r600_gpu_reset, + .mc_init = NULL, + .mc_fini = NULL, + .wb_init = &r600_wb_init, + .wb_fini = &r600_wb_fini, + .gart_enable = NULL, + .gart_disable = NULL, + .gart_tlb_flush = &r600_pcie_gart_tlb_flush, + .gart_set_page = &rs600_gart_set_page, + .cp_init = NULL, + .cp_fini = NULL, + .cp_disable = NULL, + .ring_start = NULL, + .ring_test = &r600_ring_test, + .ring_ib_execute = &r600_ring_ib_execute, + .ib_test = &r600_ib_test, + .irq_set = &r600_irq_set, + .irq_process = &r600_irq_process, + .fence_ring_emit = &r600_fence_ring_emit, + .cs_parse = &r600_cs_parse, + .copy_blit = &r600_copy_blit, + .copy_dma = &r600_copy_blit, + .copy = NULL, + .set_engine_clock = &radeon_atom_set_engine_clock, + .set_memory_clock = &radeon_atom_set_memory_clock, + .set_pcie_lanes = NULL, + .set_clock_gating = &radeon_atom_set_clock_gating, + .set_surface_reg = r600_set_surface_reg, + .clear_surface_reg = r600_clear_surface_reg, + .bandwidth_update = &r520_bandwidth_update, +}; + +/* + * rv770,rv730,rv710,rv740 + */ +int rv770_init(struct radeon_device *rdev); +void rv770_fini(struct radeon_device *rdev); +int rv770_suspend(struct radeon_device *rdev); +int rv770_resume(struct radeon_device *rdev); +int rv770_gpu_reset(struct radeon_device *rdev); + +static struct radeon_asic rv770_asic = { + .errata = NULL, + .init = &rv770_init, + .fini = &rv770_fini, + .suspend = &rv770_suspend, + .resume = &rv770_resume, + .cp_commit = &r600_cp_commit, + .vram_info = NULL, + .gpu_reset = &rv770_gpu_reset, + .mc_init = NULL, + .mc_fini = NULL, + .wb_init = &r600_wb_init, + .wb_fini = &r600_wb_fini, + .gart_enable = NULL, + .gart_disable = NULL, + .gart_tlb_flush = &r600_pcie_gart_tlb_flush, + .gart_set_page = &rs600_gart_set_page, + .cp_init = NULL, + .cp_fini = NULL, + .cp_disable = NULL, + .ring_start = NULL, + .ring_test = &r600_ring_test, + .ring_ib_execute = &r600_ring_ib_execute, + .ib_test = &r600_ib_test, + .irq_set = &r600_irq_set, + .irq_process = &r600_irq_process, + .fence_ring_emit = &r600_fence_ring_emit, + .cs_parse = &r600_cs_parse, + .copy_blit = &r600_copy_blit, + .copy_dma = &r600_copy_blit, + .copy = NULL, + .set_engine_clock = &radeon_atom_set_engine_clock, + .set_memory_clock = &radeon_atom_set_memory_clock, + .set_pcie_lanes = NULL, + .set_clock_gating = &radeon_atom_set_clock_gating, + .set_surface_reg = r600_set_surface_reg, + .clear_surface_reg = r600_clear_surface_reg, + .bandwidth_update = &r520_bandwidth_update, +}; #endif diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index bba9b4bd8f5c..a8fb392c9cd6 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -370,10 +370,6 @@ bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev) && record-> ucRecordType <= ATOM_MAX_OBJECT_RECORD_NUMBER) { - DRM_ERROR - ("record type %d\n", - record-> - ucRecordType); switch (record-> ucRecordType) { case ATOM_I2C_RECORD_TYPE: diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c index a37cbce53181..152eef13197a 100644 --- a/drivers/gpu/drm/radeon/radeon_clocks.c +++ b/drivers/gpu/drm/radeon/radeon_clocks.c @@ -102,10 +102,12 @@ void radeon_get_clock_info(struct drm_device *dev) p1pll->reference_div = 12; if (p2pll->reference_div < 2) p2pll->reference_div = 12; - if (spll->reference_div < 2) - spll->reference_div = - RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) & - RADEON_M_SPLL_REF_DIV_MASK; + if (rdev->family < CHIP_RS600) { + if (spll->reference_div < 2) + spll->reference_div = + RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) & + RADEON_M_SPLL_REF_DIV_MASK; + } if (mpll->reference_div < 2) mpll->reference_div = spll->reference_div; } else { diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 7693f7c67bd3..f2469c511789 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -37,7 +37,7 @@ /* * Clear GPU surface registers. */ -static void radeon_surface_init(struct radeon_device *rdev) +void radeon_surface_init(struct radeon_device *rdev) { /* FIXME: check this out */ if (rdev->family < CHIP_R600) { @@ -56,7 +56,7 @@ static void radeon_surface_init(struct radeon_device *rdev) /* * GPU scratch registers helpers function. */ -static void radeon_scratch_init(struct radeon_device *rdev) +void radeon_scratch_init(struct radeon_device *rdev) { int i; @@ -156,16 +156,14 @@ int radeon_mc_setup(struct radeon_device *rdev) tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1); rdev->mc.gtt_location = tmp; } - DRM_INFO("radeon: VRAM %uM\n", rdev->mc.real_vram_size >> 20); + DRM_INFO("radeon: VRAM %uM\n", (unsigned)(rdev->mc.mc_vram_size >> 20)); DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n", - rdev->mc.vram_location, - rdev->mc.vram_location + rdev->mc.mc_vram_size - 1); - if (rdev->mc.real_vram_size != rdev->mc.mc_vram_size) - DRM_INFO("radeon: VRAM less than aperture workaround enabled\n"); - DRM_INFO("radeon: GTT %uM\n", rdev->mc.gtt_size >> 20); + (unsigned)rdev->mc.vram_location, + (unsigned)(rdev->mc.vram_location + rdev->mc.mc_vram_size - 1)); + DRM_INFO("radeon: GTT %uM\n", (unsigned)(rdev->mc.gtt_size >> 20)); DRM_INFO("radeon: GTT from 0x%08X to 0x%08X\n", - rdev->mc.gtt_location, - rdev->mc.gtt_location + rdev->mc.gtt_size - 1); + (unsigned)rdev->mc.gtt_location, + (unsigned)(rdev->mc.gtt_location + rdev->mc.gtt_size - 1)); return 0; } @@ -205,6 +203,31 @@ static bool radeon_card_posted(struct radeon_device *rdev) } +int radeon_dummy_page_init(struct radeon_device *rdev) +{ + rdev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO); + if (rdev->dummy_page.page == NULL) + return -ENOMEM; + rdev->dummy_page.addr = pci_map_page(rdev->pdev, rdev->dummy_page.page, + 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (!rdev->dummy_page.addr) { + __free_page(rdev->dummy_page.page); + rdev->dummy_page.page = NULL; + return -ENOMEM; + } + return 0; +} + +void radeon_dummy_page_fini(struct radeon_device *rdev) +{ + if (rdev->dummy_page.page == NULL) + return; + pci_unmap_page(rdev->pdev, rdev->dummy_page.addr, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + __free_page(rdev->dummy_page.page); + rdev->dummy_page.page = NULL; +} + /* * Registers accessors functions. @@ -323,9 +346,15 @@ int radeon_asic_init(struct radeon_device *rdev) case CHIP_RV635: case CHIP_RV670: case CHIP_RS780: + case CHIP_RS880: + rdev->asic = &r600_asic; + break; case CHIP_RV770: case CHIP_RV730: case CHIP_RV710: + case CHIP_RV740: + rdev->asic = &rv770_asic; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -448,7 +477,7 @@ int radeon_device_init(struct radeon_device *rdev, struct pci_dev *pdev, uint32_t flags) { - int r, ret; + int r, ret = 0; int dma_bits; DRM_INFO("radeon: Initializing kernel modesetting.\n"); @@ -487,10 +516,6 @@ int radeon_device_init(struct radeon_device *rdev, if (r) { return r; } - r = radeon_init(rdev); - if (r) { - return r; - } /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. @@ -521,111 +546,118 @@ int radeon_device_init(struct radeon_device *rdev, DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)rdev->rmmio_base); DRM_INFO("register mmio size: %u\n", (unsigned)rdev->rmmio_size); - /* Setup errata flags */ - radeon_errata(rdev); - /* Initialize scratch registers */ - radeon_scratch_init(rdev); - /* Initialize surface registers */ - radeon_surface_init(rdev); + rdev->new_init_path = false; + r = radeon_init(rdev); + if (r) { + return r; + } + if (!rdev->new_init_path) { + /* Setup errata flags */ + radeon_errata(rdev); + /* Initialize scratch registers */ + radeon_scratch_init(rdev); + /* Initialize surface registers */ + radeon_surface_init(rdev); - /* TODO: disable VGA need to use VGA request */ - /* BIOS*/ - if (!radeon_get_bios(rdev)) { - if (ASIC_IS_AVIVO(rdev)) - return -EINVAL; - } - if (rdev->is_atom_bios) { - r = radeon_atombios_init(rdev); - if (r) { - return r; + /* TODO: disable VGA need to use VGA request */ + /* BIOS*/ + if (!radeon_get_bios(rdev)) { + if (ASIC_IS_AVIVO(rdev)) + return -EINVAL; } - } else { - r = radeon_combios_init(rdev); - if (r) { - return r; - } - } - /* Reset gpu before posting otherwise ATOM will enter infinite loop */ - if (radeon_gpu_reset(rdev)) { - /* FIXME: what do we want to do here ? */ - } - /* check if cards are posted or not */ - if (!radeon_card_posted(rdev) && rdev->bios) { - DRM_INFO("GPU not posted. posting now...\n"); if (rdev->is_atom_bios) { - atom_asic_init(rdev->mode_info.atom_context); + r = radeon_atombios_init(rdev); + if (r) { + return r; + } } else { - radeon_combios_asic_init(rdev->ddev); + r = radeon_combios_init(rdev); + if (r) { + return r; + } } - } - /* Initialize clocks */ - r = radeon_clocks_init(rdev); - if (r) { - return r; - } - /* Get vram informations */ - radeon_vram_info(rdev); + /* Reset gpu before posting otherwise ATOM will enter infinite loop */ + if (radeon_gpu_reset(rdev)) { + /* FIXME: what do we want to do here ? */ + } + /* check if cards are posted or not */ + if (!radeon_card_posted(rdev) && rdev->bios) { + DRM_INFO("GPU not posted. posting now...\n"); + if (rdev->is_atom_bios) { + atom_asic_init(rdev->mode_info.atom_context); + } else { + radeon_combios_asic_init(rdev->ddev); + } + } + /* Initialize clocks */ + r = radeon_clocks_init(rdev); + if (r) { + return r; + } + /* Get vram informations */ + radeon_vram_info(rdev); - /* Add an MTRR for the VRAM */ - rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size, - MTRR_TYPE_WRCOMB, 1); - DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n", - rdev->mc.real_vram_size >> 20, - (unsigned)rdev->mc.aper_size >> 20); - DRM_INFO("RAM width %dbits %cDR\n", - rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S'); - /* Initialize memory controller (also test AGP) */ - r = radeon_mc_init(rdev); - if (r) { - return r; - } - /* Fence driver */ - r = radeon_fence_driver_init(rdev); - if (r) { - return r; - } - r = radeon_irq_kms_init(rdev); - if (r) { - return r; - } - /* Memory manager */ - r = radeon_object_init(rdev); - if (r) { - return r; - } - /* Initialize GART (initialize after TTM so we can allocate - * memory through TTM but finalize after TTM) */ - r = radeon_gart_enable(rdev); - if (!r) { - r = radeon_gem_init(rdev); - } + /* Add an MTRR for the VRAM */ + rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size, + MTRR_TYPE_WRCOMB, 1); + DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n", + (unsigned)(rdev->mc.mc_vram_size >> 20), + (unsigned)(rdev->mc.aper_size >> 20)); + DRM_INFO("RAM width %dbits %cDR\n", + rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S'); + /* Initialize memory controller (also test AGP) */ + r = radeon_mc_init(rdev); + if (r) { + return r; + } + /* Fence driver */ + r = radeon_fence_driver_init(rdev); + if (r) { + return r; + } + r = radeon_irq_kms_init(rdev); + if (r) { + return r; + } + /* Memory manager */ + r = radeon_object_init(rdev); + if (r) { + return r; + } + /* Initialize GART (initialize after TTM so we can allocate + * memory through TTM but finalize after TTM) */ + r = radeon_gart_enable(rdev); + if (!r) { + r = radeon_gem_init(rdev); + } - /* 1M ring buffer */ - if (!r) { - r = radeon_cp_init(rdev, 1024 * 1024); - } - if (!r) { - r = radeon_wb_init(rdev); - if (r) { - DRM_ERROR("radeon: failled initializing WB (%d).\n", r); - return r; + /* 1M ring buffer */ + if (!r) { + r = radeon_cp_init(rdev, 1024 * 1024); } - } - if (!r) { - r = radeon_ib_pool_init(rdev); - if (r) { - DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); - return r; + if (!r) { + r = radeon_wb_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing WB (%d).\n", r); + return r; + } } - } - if (!r) { - r = radeon_ib_test(rdev); - if (r) { - DRM_ERROR("radeon: failled testing IB (%d).\n", r); - return r; + if (!r) { + r = radeon_ib_pool_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); + return r; + } } + if (!r) { + r = radeon_ib_test(rdev); + if (r) { + DRM_ERROR("radeon: failled testing IB (%d).\n", r); + return r; + } + } + ret = r; } - ret = r; r = radeon_modeset_init(rdev); if (r) { return r; @@ -651,26 +683,29 @@ void radeon_device_fini(struct radeon_device *rdev) rdev->shutdown = true; /* Order matter so becarefull if you rearrange anythings */ radeon_modeset_fini(rdev); - radeon_ib_pool_fini(rdev); - radeon_cp_fini(rdev); - radeon_wb_fini(rdev); - radeon_gem_fini(rdev); - radeon_object_fini(rdev); - /* mc_fini must be after object_fini */ - radeon_mc_fini(rdev); + if (!rdev->new_init_path) { + radeon_ib_pool_fini(rdev); + radeon_cp_fini(rdev); + radeon_wb_fini(rdev); + radeon_gem_fini(rdev); + radeon_mc_fini(rdev); #if __OS_HAS_AGP - radeon_agp_fini(rdev); + radeon_agp_fini(rdev); #endif - radeon_irq_kms_fini(rdev); - radeon_fence_driver_fini(rdev); - radeon_clocks_fini(rdev); - if (rdev->is_atom_bios) { - radeon_atombios_fini(rdev); + radeon_irq_kms_fini(rdev); + radeon_fence_driver_fini(rdev); + radeon_clocks_fini(rdev); + radeon_object_fini(rdev); + if (rdev->is_atom_bios) { + radeon_atombios_fini(rdev); + } else { + radeon_combios_fini(rdev); + } + kfree(rdev->bios); + rdev->bios = NULL; } else { - radeon_combios_fini(rdev); + radeon_fini(rdev); } - kfree(rdev->bios); - rdev->bios = NULL; iounmap(rdev->rmmio); rdev->rmmio = NULL; } @@ -708,9 +743,12 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) /* wait for gpu to finish processing current batch */ radeon_fence_wait_last(rdev); - radeon_cp_disable(rdev); - radeon_gart_disable(rdev); - + if (!rdev->new_init_path) { + radeon_cp_disable(rdev); + radeon_gart_disable(rdev); + } else { + radeon_suspend(rdev); + } /* evict remaining vram memory */ radeon_object_evict_vram(rdev); @@ -746,33 +784,37 @@ int radeon_resume_kms(struct drm_device *dev) if (radeon_gpu_reset(rdev)) { /* FIXME: what do we want to do here ? */ } - /* post card */ - if (rdev->is_atom_bios) { - atom_asic_init(rdev->mode_info.atom_context); + if (!rdev->new_init_path) { + /* post card */ + if (rdev->is_atom_bios) { + atom_asic_init(rdev->mode_info.atom_context); + } else { + radeon_combios_asic_init(rdev->ddev); + } + /* Initialize clocks */ + r = radeon_clocks_init(rdev); + if (r) { + release_console_sem(); + return r; + } + /* Enable IRQ */ + rdev->irq.sw_int = true; + radeon_irq_set(rdev); + /* Initialize GPU Memory Controller */ + r = radeon_mc_init(rdev); + if (r) { + goto out; + } + r = radeon_gart_enable(rdev); + if (r) { + goto out; + } + r = radeon_cp_init(rdev, rdev->cp.ring_size); + if (r) { + goto out; + } } else { - radeon_combios_asic_init(rdev->ddev); - } - /* Initialize clocks */ - r = radeon_clocks_init(rdev); - if (r) { - release_console_sem(); - return r; - } - /* Enable IRQ */ - rdev->irq.sw_int = true; - radeon_irq_set(rdev); - /* Initialize GPU Memory Controller */ - r = radeon_mc_init(rdev); - if (r) { - goto out; - } - r = radeon_gart_enable(rdev); - if (r) { - goto out; - } - r = radeon_cp_init(rdev, rdev->cp.ring_size); - if (r) { - goto out; + radeon_resume(rdev); } out: fb_set_suspend(rdev->fbdev_info, 0); diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 40294a07976f..c7b185924f6c 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -356,6 +356,12 @@ typedef struct drm_radeon_private { int r700_sc_hiz_tile_fifo_size; int r700_sc_earlyz_tile_fifo_fize; + struct mutex cs_mutex; + u32 cs_id_scnt; + u32 cs_id_wcnt; + /* r6xx/r7xx drm blit vertex buffer */ + struct drm_buf *blit_vb; + /* firmware */ const struct firmware *me_fw, *pfp_fw; } drm_radeon_private_t; @@ -396,6 +402,9 @@ static __inline__ int radeon_check_offset(drm_radeon_private_t *dev_priv, (off >= gart_start && off <= gart_end)); } +/* radeon_state.c */ +extern void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf); + /* radeon_cp.c */ extern int radeon_cp_init(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int radeon_cp_start(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -487,6 +496,22 @@ extern int r600_cp_dispatch_indirect(struct drm_device *dev, struct drm_buf *buf, int start, int end); extern int r600_page_table_init(struct drm_device *dev); extern void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info); +extern int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv); +extern void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv); +extern int r600_cp_dispatch_texture(struct drm_device *dev, + struct drm_file *file_priv, + drm_radeon_texture_t *tex, + drm_radeon_tex_image_t *image); +/* r600_blit.c */ +extern int r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv); +extern void r600_done_blit_copy(struct drm_device *dev); +extern void r600_blit_copy(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int size_bytes); +extern void r600_blit_swap(struct drm_device *dev, + uint64_t src_gpu_addr, uint64_t dst_gpu_addr, + int sx, int sy, int dx, int dy, + int w, int h, int src_pitch, int dst_pitch, int cpp); /* Flags for stats.boxes */ @@ -1114,13 +1139,71 @@ extern u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index); # define RADEON_CNTL_BITBLT_MULTI 0x00009B00 # define RADEON_CNTL_SET_SCISSORS 0xC0001E00 -# define R600_IT_INDIRECT_BUFFER 0x00003200 -# define R600_IT_ME_INITIALIZE 0x00004400 +# define R600_IT_INDIRECT_BUFFER_END 0x00001700 +# define R600_IT_SET_PREDICATION 0x00002000 +# define R600_IT_REG_RMW 0x00002100 +# define R600_IT_COND_EXEC 0x00002200 +# define R600_IT_PRED_EXEC 0x00002300 +# define R600_IT_START_3D_CMDBUF 0x00002400 +# define R600_IT_DRAW_INDEX_2 0x00002700 +# define R600_IT_CONTEXT_CONTROL 0x00002800 +# define R600_IT_DRAW_INDEX_IMMD_BE 0x00002900 +# define R600_IT_INDEX_TYPE 0x00002A00 +# define R600_IT_DRAW_INDEX 0x00002B00 +# define R600_IT_DRAW_INDEX_AUTO 0x00002D00 +# define R600_IT_DRAW_INDEX_IMMD 0x00002E00 +# define R600_IT_NUM_INSTANCES 0x00002F00 +# define R600_IT_STRMOUT_BUFFER_UPDATE 0x00003400 +# define R600_IT_INDIRECT_BUFFER_MP 0x00003800 +# define R600_IT_MEM_SEMAPHORE 0x00003900 +# define R600_IT_MPEG_INDEX 0x00003A00 +# define R600_IT_WAIT_REG_MEM 0x00003C00 +# define R600_IT_MEM_WRITE 0x00003D00 +# define R600_IT_INDIRECT_BUFFER 0x00003200 +# define R600_IT_CP_INTERRUPT 0x00004000 +# define R600_IT_SURFACE_SYNC 0x00004300 +# define R600_CB0_DEST_BASE_ENA (1 << 6) +# define R600_TC_ACTION_ENA (1 << 23) +# define R600_VC_ACTION_ENA (1 << 24) +# define R600_CB_ACTION_ENA (1 << 25) +# define R600_DB_ACTION_ENA (1 << 26) +# define R600_SH_ACTION_ENA (1 << 27) +# define R600_SMX_ACTION_ENA (1 << 28) +# define R600_IT_ME_INITIALIZE 0x00004400 # define R600_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) -# define R600_IT_EVENT_WRITE 0x00004600 -# define R600_IT_SET_CONFIG_REG 0x00006800 -# define R600_SET_CONFIG_REG_OFFSET 0x00008000 -# define R600_SET_CONFIG_REG_END 0x0000ac00 +# define R600_IT_COND_WRITE 0x00004500 +# define R600_IT_EVENT_WRITE 0x00004600 +# define R600_IT_EVENT_WRITE_EOP 0x00004700 +# define R600_IT_ONE_REG_WRITE 0x00005700 +# define R600_IT_SET_CONFIG_REG 0x00006800 +# define R600_SET_CONFIG_REG_OFFSET 0x00008000 +# define R600_SET_CONFIG_REG_END 0x0000ac00 +# define R600_IT_SET_CONTEXT_REG 0x00006900 +# define R600_SET_CONTEXT_REG_OFFSET 0x00028000 +# define R600_SET_CONTEXT_REG_END 0x00029000 +# define R600_IT_SET_ALU_CONST 0x00006A00 +# define R600_SET_ALU_CONST_OFFSET 0x00030000 +# define R600_SET_ALU_CONST_END 0x00032000 +# define R600_IT_SET_BOOL_CONST 0x00006B00 +# define R600_SET_BOOL_CONST_OFFSET 0x0003e380 +# define R600_SET_BOOL_CONST_END 0x00040000 +# define R600_IT_SET_LOOP_CONST 0x00006C00 +# define R600_SET_LOOP_CONST_OFFSET 0x0003e200 +# define R600_SET_LOOP_CONST_END 0x0003e380 +# define R600_IT_SET_RESOURCE 0x00006D00 +# define R600_SET_RESOURCE_OFFSET 0x00038000 +# define R600_SET_RESOURCE_END 0x0003c000 +# define R600_SQ_TEX_VTX_INVALID_TEXTURE 0x0 +# define R600_SQ_TEX_VTX_INVALID_BUFFER 0x1 +# define R600_SQ_TEX_VTX_VALID_TEXTURE 0x2 +# define R600_SQ_TEX_VTX_VALID_BUFFER 0x3 +# define R600_IT_SET_SAMPLER 0x00006E00 +# define R600_SET_SAMPLER_OFFSET 0x0003c000 +# define R600_SET_SAMPLER_END 0x0003cff0 +# define R600_IT_SET_CTL_CONST 0x00006F00 +# define R600_SET_CTL_CONST_OFFSET 0x0003cff0 +# define R600_SET_CTL_CONST_END 0x0003e200 +# define R600_IT_SURFACE_BASE_UPDATE 0x00007300 #define RADEON_CP_PACKET_MASK 0xC0000000 #define RADEON_CP_PACKET_COUNT_MASK 0x3fff0000 @@ -1598,6 +1681,52 @@ extern u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index); #define R600_CB_COLOR7_BASE 0x2805c #define R600_CB_COLOR7_FRAG 0x280fc +#define R600_CB_COLOR0_SIZE 0x28060 +#define R600_CB_COLOR0_VIEW 0x28080 +#define R600_CB_COLOR0_INFO 0x280a0 +#define R600_CB_COLOR0_TILE 0x280c0 +#define R600_CB_COLOR0_FRAG 0x280e0 +#define R600_CB_COLOR0_MASK 0x28100 + +#define AVIVO_D1MODE_VLINE_START_END 0x6538 +#define AVIVO_D2MODE_VLINE_START_END 0x6d38 +#define R600_CP_COHER_BASE 0x85f8 +#define R600_DB_DEPTH_BASE 0x2800c +#define R600_SQ_PGM_START_FS 0x28894 +#define R600_SQ_PGM_START_ES 0x28880 +#define R600_SQ_PGM_START_VS 0x28858 +#define R600_SQ_PGM_RESOURCES_VS 0x28868 +#define R600_SQ_PGM_CF_OFFSET_VS 0x288d0 +#define R600_SQ_PGM_START_GS 0x2886c +#define R600_SQ_PGM_START_PS 0x28840 +#define R600_SQ_PGM_RESOURCES_PS 0x28850 +#define R600_SQ_PGM_EXPORTS_PS 0x28854 +#define R600_SQ_PGM_CF_OFFSET_PS 0x288cc +#define R600_VGT_DMA_BASE 0x287e8 +#define R600_VGT_DMA_BASE_HI 0x287e4 +#define R600_VGT_STRMOUT_BASE_OFFSET_0 0x28b10 +#define R600_VGT_STRMOUT_BASE_OFFSET_1 0x28b14 +#define R600_VGT_STRMOUT_BASE_OFFSET_2 0x28b18 +#define R600_VGT_STRMOUT_BASE_OFFSET_3 0x28b1c +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_0 0x28b44 +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_1 0x28b48 +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_2 0x28b4c +#define R600_VGT_STRMOUT_BASE_OFFSET_HI_3 0x28b50 +#define R600_VGT_STRMOUT_BUFFER_BASE_0 0x28ad8 +#define R600_VGT_STRMOUT_BUFFER_BASE_1 0x28ae8 +#define R600_VGT_STRMOUT_BUFFER_BASE_2 0x28af8 +#define R600_VGT_STRMOUT_BUFFER_BASE_3 0x28b08 +#define R600_VGT_STRMOUT_BUFFER_OFFSET_0 0x28adc +#define R600_VGT_STRMOUT_BUFFER_OFFSET_1 0x28aec +#define R600_VGT_STRMOUT_BUFFER_OFFSET_2 0x28afc +#define R600_VGT_STRMOUT_BUFFER_OFFSET_3 0x28b0c + +#define R600_VGT_PRIMITIVE_TYPE 0x8958 + +#define R600_PA_SC_SCREEN_SCISSOR_TL 0x28030 +#define R600_PA_SC_GENERIC_SCISSOR_TL 0x28240 +#define R600_PA_SC_WINDOW_SCISSOR_TL 0x28204 + #define R600_TC_CNTL 0x9608 # define R600_TC_L2_SIZE(x) ((x) << 5) # define R600_L2_DISABLE_LATE_HIT (1 << 9) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index b4e48dd2e859..506dd4dd3a24 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -53,9 +53,9 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) * away */ WREG32(rdev->fence_drv.scratch_reg, fence->seq); - } else { + } else radeon_fence_ring_emit(rdev, fence); - } + fence->emited = true; fence->timeout = jiffies + ((2000 * HZ) / 1000); list_del(&fence->list); @@ -168,7 +168,47 @@ bool radeon_fence_signaled(struct radeon_fence *fence) return signaled; } -int radeon_fence_wait(struct radeon_fence *fence, bool interruptible) +int r600_fence_wait(struct radeon_fence *fence, bool intr, bool lazy) +{ + struct radeon_device *rdev; + unsigned long cur_jiffies; + unsigned long timeout; + int ret = 0; + + cur_jiffies = jiffies; + timeout = HZ / 100; + + if (time_after(fence->timeout, cur_jiffies)) { + timeout = fence->timeout - cur_jiffies; + } + + rdev = fence->rdev; + + __set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); + + while (1) { + if (radeon_fence_signaled(fence)) + break; + + if (time_after_eq(jiffies, timeout)) { + ret = -EBUSY; + break; + } + + if (lazy) + schedule_timeout(1); + + if (intr && signal_pending(current)) { + ret = -ERESTART; + break; + } + } + __set_current_state(TASK_RUNNING); + return ret; +} + + +int radeon_fence_wait(struct radeon_fence *fence, bool intr) { struct radeon_device *rdev; unsigned long cur_jiffies; @@ -176,7 +216,6 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible) bool expired = false; int r; - if (fence == NULL) { WARN(1, "Querying an invalid fence : %p !\n", fence); return 0; @@ -185,13 +224,18 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible) if (radeon_fence_signaled(fence)) { return 0; } + + if (rdev->family >= CHIP_R600) + return r600_fence_wait(fence, intr, 0); + retry: cur_jiffies = jiffies; timeout = HZ / 100; if (time_after(fence->timeout, cur_jiffies)) { timeout = fence->timeout - cur_jiffies; } - if (interruptible) { + + if (intr) { r = wait_event_interruptible_timeout(rdev->fence_drv.queue, radeon_fence_signaled(fence), timeout); if (unlikely(r == -ERESTARTSYS)) { diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h index 28be2f1165cb..21da871a793c 100644 --- a/drivers/gpu/drm/radeon/radeon_reg.h +++ b/drivers/gpu/drm/radeon/radeon_reg.h @@ -3255,6 +3255,24 @@ #define RADEON_CP_RB_WPTR 0x0714 #define RADEON_CP_RB_RPTR_WR 0x071c +#define RADEON_SCRATCH_UMSK 0x0770 +#define RADEON_SCRATCH_ADDR 0x0774 + +#define R600_CP_RB_BASE 0xc100 +#define R600_CP_RB_CNTL 0xc104 +# define R600_RB_BUFSZ(x) ((x) << 0) +# define R600_RB_BLKSZ(x) ((x) << 8) +# define R600_RB_NO_UPDATE (1 << 27) +# define R600_RB_RPTR_WR_ENA (1 << 31) +#define R600_CP_RB_RPTR_WR 0xc108 +#define R600_CP_RB_RPTR_ADDR 0xc10c +#define R600_CP_RB_RPTR_ADDR_HI 0xc110 +#define R600_CP_RB_WPTR 0xc114 +#define R600_CP_RB_WPTR_ADDR 0xc118 +#define R600_CP_RB_WPTR_ADDR_HI 0xc11c +#define R600_CP_RB_RPTR 0x8700 +#define R600_CP_RB_WPTR_DELAY 0x8704 + #define RADEON_CP_IB_BASE 0x0738 #define RADEON_CP_IB_BUFSZ 0x073c diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 60d159308b88..aa9837a6aa75 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -110,7 +110,6 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) return; } list_del(&tmp->list); - INIT_LIST_HEAD(&tmp->list); if (tmp->fence) { radeon_fence_unref(&tmp->fence); } @@ -119,19 +118,11 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) mutex_unlock(&rdev->ib_pool.mutex); } -static void radeon_ib_align(struct radeon_device *rdev, struct radeon_ib *ib) -{ - while ((ib->length_dw & rdev->cp.align_mask)) { - ib->ptr[ib->length_dw++] = PACKET2(0); - } -} - int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) { int r = 0; mutex_lock(&rdev->ib_pool.mutex); - radeon_ib_align(rdev, ib); if (!ib->length_dw || !rdev->cp.ready) { /* TODO: Nothings in the ib we should report. */ mutex_unlock(&rdev->ib_pool.mutex); @@ -145,9 +136,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) mutex_unlock(&rdev->ib_pool.mutex); return r; } - radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1)); - radeon_ring_write(rdev, ib->gpu_addr); - radeon_ring_write(rdev, ib->length_dw); + radeon_ring_ib_execute(rdev, ib); radeon_fence_emit(rdev, ib->fence); radeon_ring_unlock_commit(rdev); list_add_tail(&ib->list, &rdev->ib_pool.scheduled_ibs); @@ -215,69 +204,16 @@ void radeon_ib_pool_fini(struct radeon_device *rdev) mutex_unlock(&rdev->ib_pool.mutex); } -int radeon_ib_test(struct radeon_device *rdev) -{ - struct radeon_ib *ib; - uint32_t scratch; - uint32_t tmp = 0; - unsigned i; - int r; - - r = radeon_scratch_get(rdev, &scratch); - if (r) { - DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - r = radeon_ib_get(rdev, &ib); - if (r) { - return r; - } - ib->ptr[0] = PACKET0(scratch, 0); - ib->ptr[1] = 0xDEADBEEF; - ib->ptr[2] = PACKET2(0); - ib->ptr[3] = PACKET2(0); - ib->ptr[4] = PACKET2(0); - ib->ptr[5] = PACKET2(0); - ib->ptr[6] = PACKET2(0); - ib->ptr[7] = PACKET2(0); - ib->length_dw = 8; - r = radeon_ib_schedule(rdev, ib); - if (r) { - radeon_scratch_free(rdev, scratch); - radeon_ib_free(rdev, &ib); - return r; - } - r = radeon_fence_wait(ib->fence, false); - if (r) { - return r; - } - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - break; - } - DRM_UDELAY(1); - } - if (i < rdev->usec_timeout) { - DRM_INFO("ib test succeeded in %u usecs\n", i); - } else { - DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", - scratch, tmp); - r = -EINVAL; - } - radeon_scratch_free(rdev, scratch); - radeon_ib_free(rdev, &ib); - return r; -} - /* * Ring. */ void radeon_ring_free_size(struct radeon_device *rdev) { - rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); + if (rdev->family >= CHIP_R600) + rdev->cp.rptr = RREG32(R600_CP_RB_RPTR); + else + rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); /* This works because ring_size is a power of 2 */ rdev->cp.ring_free_dw = (rdev->cp.rptr + (rdev->cp.ring_size / 4)); rdev->cp.ring_free_dw -= rdev->cp.wptr; @@ -320,11 +256,10 @@ void radeon_ring_unlock_commit(struct radeon_device *rdev) count_dw_pad = (rdev->cp.align_mask + 1) - (rdev->cp.wptr & rdev->cp.align_mask); for (i = 0; i < count_dw_pad; i++) { - radeon_ring_write(rdev, PACKET2(0)); + radeon_ring_write(rdev, 2 << 30); } DRM_MEMORYBARRIER(); - WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); - (void)RREG32(RADEON_CP_RB_WPTR); + radeon_cp_commit(rdev); mutex_unlock(&rdev->cp.mutex); } @@ -334,46 +269,6 @@ void radeon_ring_unlock_undo(struct radeon_device *rdev) mutex_unlock(&rdev->cp.mutex); } -int radeon_ring_test(struct radeon_device *rdev) -{ - uint32_t scratch; - uint32_t tmp = 0; - unsigned i; - int r; - - r = radeon_scratch_get(rdev, &scratch); - if (r) { - DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - r = radeon_ring_lock(rdev, 2); - if (r) { - DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); - radeon_scratch_free(rdev, scratch); - return r; - } - radeon_ring_write(rdev, PACKET0(scratch, 0)); - radeon_ring_write(rdev, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev); - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - break; - } - DRM_UDELAY(1); - } - if (i < rdev->usec_timeout) { - DRM_INFO("ring test succeeded in %d usecs\n", i); - } else { - DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n", - scratch, tmp); - r = -EINVAL; - } - radeon_scratch_free(rdev, scratch); - return r; -} - int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size) { int r; diff --git a/drivers/gpu/drm/radeon/radeon_share.h b/drivers/gpu/drm/radeon/radeon_share.h index 63a773578f17..5f9e358ab506 100644 --- a/drivers/gpu/drm/radeon/radeon_share.h +++ b/drivers/gpu/drm/radeon/radeon_share.h @@ -28,12 +28,89 @@ #ifndef __RADEON_SHARE_H__ #define __RADEON_SHARE_H__ +/* Common */ +struct radeon_device; +struct radeon_cs_parser; +int radeon_clocks_init(struct radeon_device *rdev); +void radeon_clocks_fini(struct radeon_device *rdev); +void radeon_scratch_init(struct radeon_device *rdev); +void radeon_surface_init(struct radeon_device *rdev); +int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data); + + +/* R100, RV100, RS100, RV200, RS200, R200, RV250, RS300, RV280 */ void r100_vram_init_sizes(struct radeon_device *rdev); + +/* R300, R350, RV350, RV380 */ +struct r300_asic { + const unsigned *reg_safe_bm; + unsigned reg_safe_bm_size; +}; + + +/* RS690, RS740 */ void rs690_line_buffer_adjust(struct radeon_device *rdev, struct drm_display_mode *mode1, struct drm_display_mode *mode2); + +/* RV515 */ void rv515_bandwidth_avivo_update(struct radeon_device *rdev); + +/* R600, RV610, RV630, RV620, RV635, RV670, RS780, RS880 */ +bool r600_card_posted(struct radeon_device *rdev); +void r600_cp_stop(struct radeon_device *rdev); +void r600_ring_init(struct radeon_device *rdev, unsigned ring_size); +int r600_cp_resume(struct radeon_device *rdev); +int r600_count_pipe_bits(uint32_t val); +int r600_gart_clear_page(struct radeon_device *rdev, int i); +int r600_mc_wait_for_idle(struct radeon_device *rdev); +void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); +int r600_ib_test(struct radeon_device *rdev); +int r600_ring_test(struct radeon_device *rdev); +int r600_wb_init(struct radeon_device *rdev); +void r600_wb_fini(struct radeon_device *rdev); +void r600_scratch_init(struct radeon_device *rdev); +int r600_blit_init(struct radeon_device *rdev); +void r600_blit_fini(struct radeon_device *rdev); +int r600_cp_init_microcode(struct radeon_device *rdev); +struct r600_asic { + unsigned max_pipes; + unsigned max_tile_pipes; + unsigned max_simds; + unsigned max_backends; + unsigned max_gprs; + unsigned max_threads; + unsigned max_stack_entries; + unsigned max_hw_contexts; + unsigned max_gs_threads; + unsigned sx_max_export_size; + unsigned sx_max_export_pos_size; + unsigned sx_max_export_smx_size; + unsigned sq_num_cf_insts; +}; + +/* RV770, RV7300, RV710 */ +struct rv770_asic { + unsigned max_pipes; + unsigned max_tile_pipes; + unsigned max_simds; + unsigned max_backends; + unsigned max_gprs; + unsigned max_threads; + unsigned max_stack_entries; + unsigned max_hw_contexts; + unsigned max_gs_threads; + unsigned sx_max_export_size; + unsigned sx_max_export_pos_size; + unsigned sx_max_export_smx_size; + unsigned sq_num_cf_insts; + unsigned sx_num_of_sets; + unsigned sc_prim_fifo_size; + unsigned sc_hiz_tile_fifo_size; + unsigned sc_earlyz_tile_fifo_fize; +}; + #endif diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 2882f40d5ec5..aad0c6fafcf4 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -1546,7 +1546,7 @@ static void radeon_cp_dispatch_vertex(struct drm_device * dev, } while (i < nbox); } -static void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf) +void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf) { drm_radeon_private_t *dev_priv = dev->dev_private; struct drm_radeon_master_private *master_priv = master->driver_priv; @@ -2213,7 +2213,10 @@ static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *f if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS) sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS; - radeon_cp_dispatch_swap(dev, file_priv->master); + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) + r600_cp_dispatch_swap(dev, file_priv); + else + radeon_cp_dispatch_swap(dev, file_priv->master); sarea_priv->ctx_owner = 0; COMMIT_RING(); @@ -2412,7 +2415,10 @@ static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file RING_SPACE_TEST_WITH_RETURN(dev_priv); VB_AGE_TEST_WITH_RETURN(dev_priv); - ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image); + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) + ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image); + else + ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image); return ret; } @@ -2495,8 +2501,9 @@ static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_fil radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end); } - if (indirect->discard) + if (indirect->discard) { radeon_cp_discard_buffer(dev, file_priv->master, buf); + } COMMIT_RING(); return 0; @@ -3227,7 +3234,8 @@ struct drm_ioctl_desc radeon_ioctls[] = { DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH), DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH), DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH) + DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH) }; int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index dc7a44274ea8..acd889c94549 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -376,9 +376,8 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, radeon_move_null(bo, new_mem); return 0; } - if (!rdev->cp.ready) { + if (!rdev->cp.ready || rdev->asic->copy == NULL) { /* use memcpy */ - DRM_ERROR("CP is not ready use memcpy.\n"); goto memcpy; } @@ -495,7 +494,7 @@ int radeon_ttm_init(struct radeon_device *rdev) return r; } DRM_INFO("radeon: %uM of VRAM memory ready\n", - rdev->mc.real_vram_size / (1024 * 1024)); + (unsigned)rdev->mc.real_vram_size / (1024 * 1024)); r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_TT, 0, ((rdev->mc.gtt_size) >> PAGE_SHIFT)); if (r) { @@ -503,7 +502,7 @@ int radeon_ttm_init(struct radeon_device *rdev) return r; } DRM_INFO("radeon: %uM of GTT memory ready.\n", - rdev->mc.gtt_size / (1024 * 1024)); + (unsigned)(rdev->mc.gtt_size / (1024 * 1024))); if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) { rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping; } diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index b29affd9c5d8..8c3ea7e36060 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -63,7 +63,7 @@ void rs400_gart_adjust_size(struct radeon_device *rdev) break; default: DRM_ERROR("Unable to use IGP GART size %uM\n", - rdev->mc.gtt_size >> 20); + (unsigned)(rdev->mc.gtt_size >> 20)); DRM_ERROR("Valid GART size for IGP are 32M,64M,128M,256M,512M,1G,2G\n"); DRM_ERROR("Forcing to 32M GART size\n"); rdev->mc.gtt_size = 32 * 1024 * 1024; diff --git a/drivers/gpu/drm/radeon/rs780.c b/drivers/gpu/drm/radeon/rs780.c deleted file mode 100644 index 0affcff81825..000000000000 --- a/drivers/gpu/drm/radeon/rs780.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2008 Advanced Micro Devices, Inc. - * Copyright 2008 Red Hat Inc. - * Copyright 2009 Jerome Glisse. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Dave Airlie - * Alex Deucher - * Jerome Glisse - */ -#include "drmP.h" -#include "radeon_reg.h" -#include "radeon.h" - -/* rs780 depends on : */ -void rs600_mc_disable_clients(struct radeon_device *rdev); - -/* This files gather functions specifics to: - * rs780 - * - * Some of these functions might be used by newer ASICs. - */ -int rs780_mc_wait_for_idle(struct radeon_device *rdev); -void rs780_gpu_init(struct radeon_device *rdev); - - -/* - * MC - */ -int rs780_mc_init(struct radeon_device *rdev) -{ - rs780_gpu_init(rdev); - /* FIXME: implement */ - - rs600_mc_disable_clients(rdev); - if (rs780_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); - } - return 0; -} - -void rs780_mc_fini(struct radeon_device *rdev) -{ - /* FIXME: implement */ -} - - -/* - * Global GPU functions - */ -void rs780_errata(struct radeon_device *rdev) -{ - rdev->pll_errata = 0; -} - -int rs780_mc_wait_for_idle(struct radeon_device *rdev) -{ - /* FIXME: implement */ - return 0; -} - -void rs780_gpu_init(struct radeon_device *rdev) -{ - /* FIXME: implement */ -} - - -/* - * VRAM info - */ -void rs780_vram_get_type(struct radeon_device *rdev) -{ - /* FIXME: implement */ -} - -void rs780_vram_info(struct radeon_device *rdev) -{ - rs780_vram_get_type(rdev); - - /* FIXME: implement */ - /* Could aper size report 0 ? */ - rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); - rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); -} diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 97965c430c1f..99e397f16384 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -27,7 +27,7 @@ */ #include #include "drmP.h" -#include "rv515r.h" +#include "rv515d.h" #include "radeon.h" #include "radeon_share.h" diff --git a/drivers/gpu/drm/radeon/rv515r.h b/drivers/gpu/drm/radeon/rv515d.h similarity index 78% rename from drivers/gpu/drm/radeon/rv515r.h rename to drivers/gpu/drm/radeon/rv515d.h index f3cf84039906..a65e17ec1c08 100644 --- a/drivers/gpu/drm/radeon/rv515r.h +++ b/drivers/gpu/drm/radeon/rv515d.h @@ -25,10 +25,12 @@ * Alex Deucher * Jerome Glisse */ -#ifndef RV515R_H -#define RV515R_H +#ifndef __RV515D_H__ +#define __RV515D_H__ -/* RV515 registers */ +/* + * RV515 registers + */ #define PCIE_INDEX 0x0030 #define PCIE_DATA 0x0034 #define MC_IND_INDEX 0x0070 @@ -166,5 +168,53 @@ #define MC_GLOBW_INIT_LAT_MASK 0xF0000000 +/* + * PM4 packet + */ +#define CP_PACKET0 0x00000000 +#define PACKET0_BASE_INDEX_SHIFT 0 +#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0) +#define PACKET0_COUNT_SHIFT 16 +#define PACKET0_COUNT_MASK (0x3fff << 16) +#define CP_PACKET1 0x40000000 +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) +#define CP_PACKET3 0xC0000000 +#define PACKET3_IT_OPCODE_SHIFT 8 +#define PACKET3_IT_OPCODE_MASK (0xff << 8) +#define PACKET3_COUNT_SHIFT 16 +#define PACKET3_COUNT_MASK (0x3fff << 16) +/* PACKET3 op code */ +#define PACKET3_NOP 0x10 +#define PACKET3_3D_DRAW_VBUF 0x28 +#define PACKET3_3D_DRAW_IMMD 0x29 +#define PACKET3_3D_DRAW_INDX 0x2A +#define PACKET3_3D_LOAD_VBPNTR 0x2F +#define PACKET3_INDX_BUFFER 0x33 +#define PACKET3_3D_DRAW_VBUF_2 0x34 +#define PACKET3_3D_DRAW_IMMD_2 0x35 +#define PACKET3_3D_DRAW_INDX_2 0x36 +#define PACKET3_BITBLT_MULTI 0x9B + +#define PACKET0(reg, n) (CP_PACKET0 | \ + REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \ + REG_SET(PACKET0_COUNT, (n))) +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) +#define PACKET3(op, n) (CP_PACKET3 | \ + REG_SET(PACKET3_IT_OPCODE, (op)) | \ + REG_SET(PACKET3_COUNT, (n))) + +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2) +#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) + #endif diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 21d8ffd57308..57765f6d5b20 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -25,100 +25,975 @@ * Alex Deucher * Jerome Glisse */ +#include +#include #include "drmP.h" -#include "radeon_reg.h" #include "radeon.h" +#include "radeon_share.h" +#include "rv770d.h" +#include "avivod.h" +#include "atom.h" -/* rv770,rv730,rv710 depends on : */ -void rs600_mc_disable_clients(struct radeon_device *rdev); +#define R700_PFP_UCODE_SIZE 848 +#define R700_PM4_UCODE_SIZE 1360 -/* This files gather functions specifics to: - * rv770,rv730,rv710 - * - * Some of these functions might be used by newer ASICs. +static void rv770_gpu_init(struct radeon_device *rdev); +void rv770_fini(struct radeon_device *rdev); + + +/* + * GART */ -int rv770_mc_wait_for_idle(struct radeon_device *rdev); -void rv770_gpu_init(struct radeon_device *rdev); +int rv770_pcie_gart_enable(struct radeon_device *rdev) +{ + u32 tmp; + int r, i; + + /* Initialize common gart structure */ + r = radeon_gart_init(rdev); + if (r) { + return r; + } + rdev->gart.table_size = rdev->gart.num_gpu_pages * 8; + r = radeon_gart_table_vram_alloc(rdev); + if (r) { + return r; + } + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | + ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL2, 0); + WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); + /* Setup TLB control */ + tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING | + SYSTEM_ACCESS_MODE_NOT_IN_SYS | + SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | + EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); + WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); + WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12); + WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); + WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); + WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, + (u32)(rdev->dummy_page.addr >> 12)); + for (i = 1; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); + + r600_pcie_gart_tlb_flush(rdev); + rdev->gart.ready = true; + return 0; +} + +void rv770_pcie_gart_disable(struct radeon_device *rdev) +{ + u32 tmp; + int i; + + /* Clear ptes*/ + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + r600_gart_clear_page(rdev, i); + r600_pcie_gart_tlb_flush(rdev); + /* Disable all tables */ + for (i = 0; i < 7; i++) + WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); + + /* Setup L2 cache */ + WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING | + EFFECTIVE_L2_QUEUE_SIZE(7)); + WREG32(VM_L2_CNTL2, 0); + WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); + /* Setup TLB control */ + tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); + WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); + WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); +} /* * MC */ -int rv770_mc_init(struct radeon_device *rdev) +static void rv770_mc_resume(struct radeon_device *rdev) { - uint32_t tmp; + u32 d1vga_control, d2vga_control; + u32 vga_render_control, vga_hdp_control; + u32 d1crtc_control, d2crtc_control; + u32 new_d1grph_primary, new_d1grph_secondary; + u32 new_d2grph_primary, new_d2grph_secondary; + u64 old_vram_start; + u32 tmp; + int i, j; - rv770_gpu_init(rdev); + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); - /* setup the gart before changing location so we can ask to - * discard unmapped mc request - */ - /* FIXME: disable out of gart access */ - tmp = rdev->mc.gtt_location / 4096; - tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp); - tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096; - tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp); - WREG32(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp); + d1vga_control = RREG32(D1VGA_CONTROL); + d2vga_control = RREG32(D2VGA_CONTROL); + vga_render_control = RREG32(VGA_RENDER_CONTROL); + vga_hdp_control = RREG32(VGA_HDP_CONTROL); + d1crtc_control = RREG32(D1CRTC_CONTROL); + d2crtc_control = RREG32(D2CRTC_CONTROL); + old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24; + new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS); + new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS); + new_d1grph_primary += rdev->mc.vram_start - old_vram_start; + new_d1grph_secondary += rdev->mc.vram_start - old_vram_start; + new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS); + new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS); + new_d2grph_primary += rdev->mc.vram_start - old_vram_start; + new_d2grph_secondary += rdev->mc.vram_start - old_vram_start; - rs600_mc_disable_clients(rdev); - if (rv770_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); + /* Stop all video */ + WREG32(D1VGA_CONTROL, 0); + WREG32(D2VGA_CONTROL, 0); + WREG32(VGA_RENDER_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, 0); + WREG32(D2CRTC_CONTROL, 0); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); } - tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1; - tmp = REG_SET(R700_MC_FB_TOP, tmp >> 24); - tmp |= REG_SET(R700_MC_FB_BASE, rdev->mc.vram_location >> 24); - WREG32(R700_MC_VM_FB_LOCATION, tmp); - tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1; - tmp = REG_SET(R700_MC_AGP_TOP, tmp >> 22); - WREG32(R700_MC_VM_AGP_TOP, tmp); - tmp = REG_SET(R700_MC_AGP_BOT, rdev->mc.gtt_location >> 22); - WREG32(R700_MC_VM_AGP_BOT, tmp); - return 0; -} + /* Lockout access through VGA aperture*/ + WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); -void rv770_mc_fini(struct radeon_device *rdev) -{ - /* FIXME: implement */ + /* Update configuration */ + WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12); + WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); + tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16; + tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); + WREG32(MC_VM_FB_LOCATION, tmp); + WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); + WREG32(HDP_NONSURFACE_INFO, (2 << 7)); + WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF); + if (rdev->flags & RADEON_IS_AGP) { + WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16); + WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16); + WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22); + } else { + WREG32(MC_VM_AGP_BASE, 0); + WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); + WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); + } + WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary); + WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary); + WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary); + WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary); + WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start); + + /* Unlock host access */ + WREG32(VGA_HDP_CONTROL, vga_hdp_control); + + mdelay(1); + if (r600_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "[drm] MC not idle !\n"); + } + + /* Restore video state */ + WREG32(D1CRTC_UPDATE_LOCK, 1); + WREG32(D2CRTC_UPDATE_LOCK, 1); + WREG32(D1CRTC_CONTROL, d1crtc_control); + WREG32(D2CRTC_CONTROL, d2crtc_control); + WREG32(D1CRTC_UPDATE_LOCK, 0); + WREG32(D2CRTC_UPDATE_LOCK, 0); + WREG32(D1VGA_CONTROL, d1vga_control); + WREG32(D2VGA_CONTROL, d2vga_control); + WREG32(VGA_RENDER_CONTROL, vga_render_control); } /* - * Global GPU functions + * CP. */ -void rv770_errata(struct radeon_device *rdev) +void r700_cp_stop(struct radeon_device *rdev) { - rdev->pll_errata = 0; + WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); } -int rv770_mc_wait_for_idle(struct radeon_device *rdev) + +static int rv770_cp_load_microcode(struct radeon_device *rdev) { - /* FIXME: implement */ + const __be32 *fw_data; + int i; + + if (!rdev->me_fw || !rdev->pfp_fw) + return -EINVAL; + + r700_cp_stop(rdev); + WREG32(CP_RB_CNTL, RB_NO_UPDATE | (15 << 8) | (3 << 0)); + + /* Reset cp */ + WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); + RREG32(GRBM_SOFT_RESET); + mdelay(15); + WREG32(GRBM_SOFT_RESET, 0); + + fw_data = (const __be32 *)rdev->pfp_fw->data; + WREG32(CP_PFP_UCODE_ADDR, 0); + for (i = 0; i < R700_PFP_UCODE_SIZE; i++) + WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); + WREG32(CP_PFP_UCODE_ADDR, 0); + + fw_data = (const __be32 *)rdev->me_fw->data; + WREG32(CP_ME_RAM_WADDR, 0); + for (i = 0; i < R700_PM4_UCODE_SIZE; i++) + WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); + + WREG32(CP_PFP_UCODE_ADDR, 0); + WREG32(CP_ME_RAM_WADDR, 0); + WREG32(CP_ME_RAM_RADDR, 0); return 0; } -void rv770_gpu_init(struct radeon_device *rdev) -{ - /* FIXME: implement */ -} - /* - * VRAM info + * Core functions */ -void rv770_vram_get_type(struct radeon_device *rdev) +static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes, + u32 num_backends, + u32 backend_disable_mask) { - /* FIXME: implement */ + u32 backend_map = 0; + u32 enabled_backends_mask; + u32 enabled_backends_count; + u32 cur_pipe; + u32 swizzle_pipe[R7XX_MAX_PIPES]; + u32 cur_backend; + u32 i; + + if (num_tile_pipes > R7XX_MAX_PIPES) + num_tile_pipes = R7XX_MAX_PIPES; + if (num_tile_pipes < 1) + num_tile_pipes = 1; + if (num_backends > R7XX_MAX_BACKENDS) + num_backends = R7XX_MAX_BACKENDS; + if (num_backends < 1) + num_backends = 1; + + enabled_backends_mask = 0; + enabled_backends_count = 0; + for (i = 0; i < R7XX_MAX_BACKENDS; ++i) { + if (((backend_disable_mask >> i) & 1) == 0) { + enabled_backends_mask |= (1 << i); + ++enabled_backends_count; + } + if (enabled_backends_count == num_backends) + break; + } + + if (enabled_backends_count == 0) { + enabled_backends_mask = 1; + enabled_backends_count = 1; + } + + if (enabled_backends_count != num_backends) + num_backends = enabled_backends_count; + + memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES); + switch (num_tile_pipes) { + case 1: + swizzle_pipe[0] = 0; + break; + case 2: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + break; + case 3: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 1; + break; + case 4: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 3; + swizzle_pipe[3] = 1; + break; + case 5: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 1; + swizzle_pipe[4] = 3; + break; + case 6: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 5; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 1; + break; + case 7: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 1; + swizzle_pipe[6] = 5; + break; + case 8: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 1; + swizzle_pipe[6] = 7; + swizzle_pipe[7] = 5; + break; + } + + cur_backend = 0; + for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { + while (((1 << cur_backend) & enabled_backends_mask) == 0) + cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; + + backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); + + cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; + } + + return backend_map; } -void rv770_vram_info(struct radeon_device *rdev) +static void rv770_gpu_init(struct radeon_device *rdev) { - rv770_vram_get_type(rdev); + int i, j, num_qd_pipes; + u32 sx_debug_1; + u32 smx_dc_ctl0; + u32 num_gs_verts_per_thread; + u32 vgt_gs_per_es; + u32 gs_prim_buffer_depth = 0; + u32 sq_ms_fifo_sizes; + u32 sq_config; + u32 sq_thread_resource_mgmt; + u32 hdp_host_path_cntl; + u32 sq_dyn_gpr_size_simd_ab_0; + u32 backend_map; + u32 gb_tiling_config = 0; + u32 cc_rb_backend_disable = 0; + u32 cc_gc_shader_pipe_config = 0; + u32 mc_arb_ramcfg; + u32 db_debug4; - /* FIXME: implement */ + /* setup chip specs */ + switch (rdev->family) { + case CHIP_RV770: + rdev->config.rv770.max_pipes = 4; + rdev->config.rv770.max_tile_pipes = 8; + rdev->config.rv770.max_simds = 10; + rdev->config.rv770.max_backends = 4; + rdev->config.rv770.max_gprs = 256; + rdev->config.rv770.max_threads = 248; + rdev->config.rv770.max_stack_entries = 512; + rdev->config.rv770.max_hw_contexts = 8; + rdev->config.rv770.max_gs_threads = 16 * 2; + rdev->config.rv770.sx_max_export_size = 128; + rdev->config.rv770.sx_max_export_pos_size = 16; + rdev->config.rv770.sx_max_export_smx_size = 112; + rdev->config.rv770.sq_num_cf_insts = 2; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0xF9; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + break; + case CHIP_RV730: + rdev->config.rv770.max_pipes = 2; + rdev->config.rv770.max_tile_pipes = 4; + rdev->config.rv770.max_simds = 8; + rdev->config.rv770.max_backends = 2; + rdev->config.rv770.max_gprs = 128; + rdev->config.rv770.max_threads = 248; + rdev->config.rv770.max_stack_entries = 256; + rdev->config.rv770.max_hw_contexts = 8; + rdev->config.rv770.max_gs_threads = 16 * 2; + rdev->config.rv770.sx_max_export_size = 256; + rdev->config.rv770.sx_max_export_pos_size = 32; + rdev->config.rv770.sx_max_export_smx_size = 224; + rdev->config.rv770.sq_num_cf_insts = 2; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0xf9; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + if (rdev->config.rv770.sx_max_export_pos_size > 16) { + rdev->config.rv770.sx_max_export_pos_size -= 16; + rdev->config.rv770.sx_max_export_smx_size += 16; + } + break; + case CHIP_RV710: + rdev->config.rv770.max_pipes = 2; + rdev->config.rv770.max_tile_pipes = 2; + rdev->config.rv770.max_simds = 2; + rdev->config.rv770.max_backends = 1; + rdev->config.rv770.max_gprs = 256; + rdev->config.rv770.max_threads = 192; + rdev->config.rv770.max_stack_entries = 256; + rdev->config.rv770.max_hw_contexts = 4; + rdev->config.rv770.max_gs_threads = 8 * 2; + rdev->config.rv770.sx_max_export_size = 128; + rdev->config.rv770.sx_max_export_pos_size = 16; + rdev->config.rv770.sx_max_export_smx_size = 112; + rdev->config.rv770.sq_num_cf_insts = 1; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0x40; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + break; + case CHIP_RV740: + rdev->config.rv770.max_pipes = 4; + rdev->config.rv770.max_tile_pipes = 4; + rdev->config.rv770.max_simds = 8; + rdev->config.rv770.max_backends = 4; + rdev->config.rv770.max_gprs = 256; + rdev->config.rv770.max_threads = 248; + rdev->config.rv770.max_stack_entries = 512; + rdev->config.rv770.max_hw_contexts = 8; + rdev->config.rv770.max_gs_threads = 16 * 2; + rdev->config.rv770.sx_max_export_size = 256; + rdev->config.rv770.sx_max_export_pos_size = 32; + rdev->config.rv770.sx_max_export_smx_size = 224; + rdev->config.rv770.sq_num_cf_insts = 2; + + rdev->config.rv770.sx_num_of_sets = 7; + rdev->config.rv770.sc_prim_fifo_size = 0x100; + rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; + rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; + + if (rdev->config.rv770.sx_max_export_pos_size > 16) { + rdev->config.rv770.sx_max_export_pos_size -= 16; + rdev->config.rv770.sx_max_export_smx_size += 16; + } + break; + default: + break; + } + + /* Initialize HDP */ + j = 0; + for (i = 0; i < 32; i++) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + j += 0x18; + } + + WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); + + /* setup tiling, simd, pipe config */ + mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); + + switch (rdev->config.rv770.max_tile_pipes) { + case 1: + gb_tiling_config |= PIPE_TILING(0); + break; + case 2: + gb_tiling_config |= PIPE_TILING(1); + break; + case 4: + gb_tiling_config |= PIPE_TILING(2); + break; + case 8: + gb_tiling_config |= PIPE_TILING(3); + break; + default: + break; + } + + if (rdev->family == CHIP_RV770) + gb_tiling_config |= BANK_TILING(1); + else + gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_SHIFT) >> NOOFBANK_MASK); + + gb_tiling_config |= GROUP_SIZE(0); + + if (((mc_arb_ramcfg & NOOFROWS_MASK) & NOOFROWS_SHIFT) > 3) { + gb_tiling_config |= ROW_TILING(3); + gb_tiling_config |= SAMPLE_SPLIT(3); + } else { + gb_tiling_config |= + ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT)); + gb_tiling_config |= + SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT)); + } + + gb_tiling_config |= BANK_SWAPS(1); + + backend_map = r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes, + rdev->config.rv770.max_backends, + (0xff << rdev->config.rv770.max_backends) & 0xff); + gb_tiling_config |= BACKEND_MAP(backend_map); + + cc_gc_shader_pipe_config = + INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << rdev->config.rv770.max_pipes) & R7XX_MAX_PIPES_MASK); + cc_gc_shader_pipe_config |= + INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << rdev->config.rv770.max_simds) & R7XX_MAX_SIMDS_MASK); + + cc_rb_backend_disable = + BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK); + + WREG32(GB_TILING_CONFIG, gb_tiling_config); + WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); + WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); + + WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); + WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); + + WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CGTS_SYS_TCC_DISABLE, 0); + WREG32(CGTS_TCC_DISABLE, 0); + WREG32(CGTS_USER_SYS_TCC_DISABLE, 0); + WREG32(CGTS_USER_TCC_DISABLE, 0); + + num_qd_pipes = + R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK); + WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK); + WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK); + + /* set HW defaults for 3D engine */ + WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | + ROQ_IB2_START(0x2b))); + + WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30)); + + WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | + SYNC_GRADIENT | + SYNC_WALKER | + SYNC_ALIGNER)); + + sx_debug_1 = RREG32(SX_DEBUG_1); + sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS; + WREG32(SX_DEBUG_1, sx_debug_1); + + smx_dc_ctl0 = RREG32(SMX_DC_CTL0); + smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff); + smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1); + WREG32(SMX_DC_CTL0, smx_dc_ctl0); + + WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) | + GS_FLUSH_CTL(4) | + ACK_FLUSH_CTL(3) | + SYNC_FLUSH_CTL)); + + if (rdev->family == CHIP_RV770) + WREG32(DB_DEBUG3, DB_CLK_OFF_DELAY(0x1f)); + else { + db_debug4 = RREG32(DB_DEBUG4); + db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER; + WREG32(DB_DEBUG4, db_debug4); + } + + WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) | + POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) | + SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1))); + + WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) | + SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) | + SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize))); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + + WREG32(VGT_NUM_INSTANCES, 1); + + WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0)); + + WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); + + WREG32(CP_PERFMON_CNTL, 0); + + sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) | + DONE_FIFO_HIWATER(0xe0) | + ALU_UPDATE_FIFO_HIWATER(0x8)); + switch (rdev->family) { + case CHIP_RV770: + sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1); + break; + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + default: + sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4); + break; + } + WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes); + + /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT + * should be adjusted as needed by the 2D/3D drivers. This just sets default values + */ + sq_config = RREG32(SQ_CONFIG); + sq_config &= ~(PS_PRIO(3) | + VS_PRIO(3) | + GS_PRIO(3) | + ES_PRIO(3)); + sq_config |= (DX9_CONSTS | + VC_ENABLE | + EXPORT_SRC_C | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + if (rdev->family == CHIP_RV710) + /* no vertex cache */ + sq_config &= ~VC_ENABLE; + + WREG32(SQ_CONFIG, sq_config); + + WREG32(SQ_GPR_RESOURCE_MGMT_1, (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) | + NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) | + NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2))); + + WREG32(SQ_GPR_RESOURCE_MGMT_2, (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) | + NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64))); + + sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) | + NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) | + NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8)); + if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads) + sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads); + else + sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8); + WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); + + WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) | + NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4))); + + WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) | + NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4))); + + sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) | + SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) | + SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) | + SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64)); + + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0); + WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0); + + WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | + FORCE_EOV_MAX_REZ_CNT(255))); + + if (rdev->family == CHIP_RV710) + WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) | + AUTO_INVLD_EN(ES_AND_GS_AUTO))); + else + WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) | + AUTO_INVLD_EN(ES_AND_GS_AUTO))); + + switch (rdev->family) { + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV740: + gs_prim_buffer_depth = 384; + break; + case CHIP_RV710: + gs_prim_buffer_depth = 128; + break; + default: + break; + } + + num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16; + vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread; + /* Max value for this is 256 */ + if (vgt_gs_per_es > 256) + vgt_gs_per_es = 256; + + WREG32(VGT_ES_PER_GS, 128); + WREG32(VGT_GS_PER_ES, vgt_gs_per_es); + WREG32(VGT_GS_PER_VS, 2); + + /* more default values. 2D/3D driver should adjust as needed */ + WREG32(VGT_GS_VERTEX_REUSE, 16); + WREG32(PA_SC_LINE_STIPPLE_STATE, 0); + WREG32(VGT_STRMOUT_EN, 0); + WREG32(SX_MISC, 0); + WREG32(PA_SC_MODE_CNTL, 0); + WREG32(PA_SC_EDGERULE, 0xaaaaaaaa); + WREG32(PA_SC_AA_CONFIG, 0); + WREG32(PA_SC_CLIPRECT_RULE, 0xffff); + WREG32(PA_SC_LINE_STIPPLE, 0); + WREG32(SPI_INPUT_Z, 0); + WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2)); + WREG32(CB_COLOR7_FRAG, 0); + + /* clear render buffer base addresses */ + WREG32(CB_COLOR0_BASE, 0); + WREG32(CB_COLOR1_BASE, 0); + WREG32(CB_COLOR2_BASE, 0); + WREG32(CB_COLOR3_BASE, 0); + WREG32(CB_COLOR4_BASE, 0); + WREG32(CB_COLOR5_BASE, 0); + WREG32(CB_COLOR6_BASE, 0); + WREG32(CB_COLOR7_BASE, 0); + + WREG32(TCP_CNTL, 0); + + hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); + WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); + + WREG32(PA_SC_MULTI_CHIP_CNTL, 0); + + WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA | + NUM_CLIP_SEQ(3))); + +} + +int rv770_mc_init(struct radeon_device *rdev) +{ + fixed20_12 a; + u32 tmp; + int r; + + /* Get VRAM informations */ + /* FIXME: Don't know how to determine vram width, need to check + * vram_width usage + */ + rdev->mc.vram_width = 128; + rdev->mc.vram_is_ddr = true; /* Could aper size report 0 ? */ rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); + /* Setup GPU memory space */ + rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); + rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); + if (rdev->flags & RADEON_IS_AGP) { + r = radeon_agp_init(rdev); + if (r) + return r; + /* gtt_size is setup by radeon_agp_init */ + rdev->mc.gtt_location = rdev->mc.agp_base; + tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size; + /* Try to put vram before or after AGP because we + * we want SYSTEM_APERTURE to cover both VRAM and + * AGP so that GPU can catch out of VRAM/AGP access + */ + if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) { + /* Enought place before */ + rdev->mc.vram_location = rdev->mc.gtt_location - + rdev->mc.mc_vram_size; + } else if (tmp > rdev->mc.mc_vram_size) { + /* Enought place after */ + rdev->mc.vram_location = rdev->mc.gtt_location + + rdev->mc.gtt_size; + } else { + /* Try to setup VRAM then AGP might not + * not work on some card + */ + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + } + } else { + rdev->mc.vram_location = 0x00000000UL; + rdev->mc.gtt_location = rdev->mc.mc_vram_size; + rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + } + rdev->mc.vram_start = rdev->mc.vram_location; + rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size; + rdev->mc.gtt_start = rdev->mc.gtt_location; + rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size; + /* FIXME: we should enforce default clock in case GPU is not in + * default setup + */ + a.full = rfixed_const(100); + rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk); + rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a); + return 0; +} +int rv770_gpu_reset(struct radeon_device *rdev) +{ + /* FIXME: implement */ + return 0; +} + +int rv770_resume(struct radeon_device *rdev) +{ + int r; + + rv770_mc_resume(rdev); + r = rv770_pcie_gart_enable(rdev); + if (r) + return r; + rv770_gpu_init(rdev); + r = radeon_ring_init(rdev, rdev->cp.ring_size); + if (r) + return r; + r = rv770_cp_load_microcode(rdev); + if (r) + return r; + r = r600_cp_resume(rdev); + if (r) + return r; + r = r600_wb_init(rdev); + if (r) + return r; + return 0; +} + +int rv770_suspend(struct radeon_device *rdev) +{ + /* FIXME: we should wait for ring to be empty */ + r700_cp_stop(rdev); + return 0; +} + +/* Plan is to move initialization in that function and use + * helper function so that radeon_device_init pretty much + * do nothing more than calling asic specific function. This + * should also allow to remove a bunch of callback function + * like vram_info. + */ +int rv770_init(struct radeon_device *rdev) +{ + int r; + + rdev->new_init_path = true; + r = radeon_dummy_page_init(rdev); + if (r) + return r; + /* This don't do much */ + r = radeon_gem_init(rdev); + if (r) + return r; + /* Read BIOS */ + if (!radeon_get_bios(rdev)) { + if (ASIC_IS_AVIVO(rdev)) + return -EINVAL; + } + /* Must be an ATOMBIOS */ + if (!rdev->is_atom_bios) + return -EINVAL; + r = radeon_atombios_init(rdev); + if (r) + return r; + /* Post card if necessary */ + if (!r600_card_posted(rdev) && rdev->bios) { + DRM_INFO("GPU not posted. posting now...\n"); + atom_asic_init(rdev->mode_info.atom_context); + } + /* Initialize scratch registers */ + r600_scratch_init(rdev); + /* Initialize surface registers */ + radeon_surface_init(rdev); + r = radeon_clocks_init(rdev); + if (r) + return r; + /* Fence driver */ + r = radeon_fence_driver_init(rdev); + if (r) + return r; + r = rv770_mc_init(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + rv770_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return rv770_init(rdev); + } + return r; + } + /* Memory manager */ + r = radeon_object_init(rdev); + if (r) + return r; + rdev->cp.ring_obj = NULL; + r600_ring_init(rdev, 1024 * 1024); + + if (!rdev->me_fw || !rdev->pfp_fw) { + r = r600_cp_init_microcode(rdev); + if (r) { + DRM_ERROR("Failed to load firmware!\n"); + return r; + } + } + + r = rv770_resume(rdev); + if (r) { + if (rdev->flags & RADEON_IS_AGP) { + /* Retry with disabling AGP */ + rv770_fini(rdev); + rdev->flags &= ~RADEON_IS_AGP; + return rv770_init(rdev); + } + return r; + } + r = r600_blit_init(rdev); + if (r) { + DRM_ERROR("radeon: failled blitter (%d).\n", r); + return r; + } + r = radeon_ib_pool_init(rdev); + if (r) { + DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r); + return r; + } + r = radeon_ib_test(rdev); + if (r) { + DRM_ERROR("radeon: failled testing IB (%d).\n", r); + return r; + } + return 0; +} + +void rv770_fini(struct radeon_device *rdev) +{ + r600_blit_fini(rdev); + radeon_ring_fini(rdev); + rv770_pcie_gart_disable(rdev); + radeon_gart_table_vram_free(rdev); + radeon_gart_fini(rdev); + radeon_gem_fini(rdev); + radeon_fence_driver_fini(rdev); + radeon_clocks_fini(rdev); +#if __OS_HAS_AGP + if (rdev->flags & RADEON_IS_AGP) + radeon_agp_fini(rdev); +#endif + radeon_object_fini(rdev); + if (rdev->is_atom_bios) { + radeon_atombios_fini(rdev); + } else { + radeon_combios_fini(rdev); + } + kfree(rdev->bios); + rdev->bios = NULL; + radeon_dummy_page_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h new file mode 100644 index 000000000000..4b9c3d6396ff --- /dev/null +++ b/drivers/gpu/drm/radeon/rv770d.h @@ -0,0 +1,341 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2009 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#ifndef RV770_H +#define RV770_H + +#define R7XX_MAX_SH_GPRS 256 +#define R7XX_MAX_TEMP_GPRS 16 +#define R7XX_MAX_SH_THREADS 256 +#define R7XX_MAX_SH_STACK_ENTRIES 4096 +#define R7XX_MAX_BACKENDS 8 +#define R7XX_MAX_BACKENDS_MASK 0xff +#define R7XX_MAX_SIMDS 16 +#define R7XX_MAX_SIMDS_MASK 0xffff +#define R7XX_MAX_PIPES 8 +#define R7XX_MAX_PIPES_MASK 0xff + +/* Registers */ +#define CB_COLOR0_BASE 0x28040 +#define CB_COLOR1_BASE 0x28044 +#define CB_COLOR2_BASE 0x28048 +#define CB_COLOR3_BASE 0x2804C +#define CB_COLOR4_BASE 0x28050 +#define CB_COLOR5_BASE 0x28054 +#define CB_COLOR6_BASE 0x28058 +#define CB_COLOR7_BASE 0x2805C +#define CB_COLOR7_FRAG 0x280FC + +#define CC_GC_SHADER_PIPE_CONFIG 0x8950 +#define CC_RB_BACKEND_DISABLE 0x98F4 +#define BACKEND_DISABLE(x) ((x) << 16) +#define CC_SYS_RB_BACKEND_DISABLE 0x3F88 + +#define CGTS_SYS_TCC_DISABLE 0x3F90 +#define CGTS_TCC_DISABLE 0x9148 +#define CGTS_USER_SYS_TCC_DISABLE 0x3F94 +#define CGTS_USER_TCC_DISABLE 0x914C + +#define CONFIG_MEMSIZE 0x5428 + +#define CP_ME_CNTL 0x86D8 +#define CP_ME_HALT (1<<28) +#define CP_PFP_HALT (1<<26) +#define CP_ME_RAM_DATA 0xC160 +#define CP_ME_RAM_RADDR 0xC158 +#define CP_ME_RAM_WADDR 0xC15C +#define CP_MEQ_THRESHOLDS 0x8764 +#define STQ_SPLIT(x) ((x) << 0) +#define CP_PERFMON_CNTL 0x87FC +#define CP_PFP_UCODE_ADDR 0xC150 +#define CP_PFP_UCODE_DATA 0xC154 +#define CP_QUEUE_THRESHOLDS 0x8760 +#define ROQ_IB1_START(x) ((x) << 0) +#define ROQ_IB2_START(x) ((x) << 8) +#define CP_RB_CNTL 0xC104 +#define RB_BUFSZ(x) ((x)<<0) +#define RB_BLKSZ(x) ((x)<<8) +#define RB_NO_UPDATE (1<<27) +#define RB_RPTR_WR_ENA (1<<31) +#define BUF_SWAP_32BIT (2 << 16) +#define CP_RB_RPTR 0x8700 +#define CP_RB_RPTR_ADDR 0xC10C +#define CP_RB_RPTR_ADDR_HI 0xC110 +#define CP_RB_RPTR_WR 0xC108 +#define CP_RB_WPTR 0xC114 +#define CP_RB_WPTR_ADDR 0xC118 +#define CP_RB_WPTR_ADDR_HI 0xC11C +#define CP_RB_WPTR_DELAY 0x8704 +#define CP_SEM_WAIT_TIMER 0x85BC + +#define DB_DEBUG3 0x98B0 +#define DB_CLK_OFF_DELAY(x) ((x) << 11) +#define DB_DEBUG4 0x9B8C +#define DISABLE_TILE_COVERED_FOR_PS_ITER (1 << 6) + +#define DCP_TILING_CONFIG 0x6CA0 +#define PIPE_TILING(x) ((x) << 1) +#define BANK_TILING(x) ((x) << 4) +#define GROUP_SIZE(x) ((x) << 6) +#define ROW_TILING(x) ((x) << 8) +#define BANK_SWAPS(x) ((x) << 11) +#define SAMPLE_SPLIT(x) ((x) << 14) +#define BACKEND_MAP(x) ((x) << 16) + +#define GB_TILING_CONFIG 0x98F0 + +#define GC_USER_SHADER_PIPE_CONFIG 0x8954 +#define INACTIVE_QD_PIPES(x) ((x) << 8) +#define INACTIVE_QD_PIPES_MASK 0x0000FF00 +#define INACTIVE_SIMDS(x) ((x) << 16) +#define INACTIVE_SIMDS_MASK 0x00FF0000 + +#define GRBM_CNTL 0x8000 +#define GRBM_READ_TIMEOUT(x) ((x) << 0) +#define GRBM_SOFT_RESET 0x8020 +#define SOFT_RESET_CP (1<<0) +#define GRBM_STATUS 0x8010 +#define CMDFIFO_AVAIL_MASK 0x0000000F +#define GUI_ACTIVE (1<<31) +#define GRBM_STATUS2 0x8014 + +#define HDP_HOST_PATH_CNTL 0x2C00 +#define HDP_NONSURFACE_BASE 0x2C04 +#define HDP_NONSURFACE_INFO 0x2C08 +#define HDP_NONSURFACE_SIZE 0x2C0C +#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 +#define HDP_TILING_CONFIG 0x2F3C + +#define MC_ARB_RAMCFG 0x2760 +#define NOOFBANK_SHIFT 0 +#define NOOFBANK_MASK 0x00000003 +#define NOOFRANK_SHIFT 2 +#define NOOFRANK_MASK 0x00000004 +#define NOOFROWS_SHIFT 3 +#define NOOFROWS_MASK 0x00000038 +#define NOOFCOLS_SHIFT 6 +#define NOOFCOLS_MASK 0x000000C0 +#define CHANSIZE_SHIFT 8 +#define CHANSIZE_MASK 0x00000100 +#define BURSTLENGTH_SHIFT 9 +#define BURSTLENGTH_MASK 0x00000200 +#define MC_VM_AGP_TOP 0x2028 +#define MC_VM_AGP_BOT 0x202C +#define MC_VM_AGP_BASE 0x2030 +#define MC_VM_FB_LOCATION 0x2024 +#define MC_VM_MB_L1_TLB0_CNTL 0x2234 +#define MC_VM_MB_L1_TLB1_CNTL 0x2238 +#define MC_VM_MB_L1_TLB2_CNTL 0x223C +#define MC_VM_MB_L1_TLB3_CNTL 0x2240 +#define ENABLE_L1_TLB (1 << 0) +#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1) +#define SYSTEM_ACCESS_MODE_PA_ONLY (0 << 3) +#define SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 3) +#define SYSTEM_ACCESS_MODE_IN_SYS (2 << 3) +#define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 3) +#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 5) +#define EFFECTIVE_L1_TLB_SIZE(x) ((x)<<15) +#define EFFECTIVE_L1_QUEUE_SIZE(x) ((x)<<18) +#define MC_VM_MD_L1_TLB0_CNTL 0x2654 +#define MC_VM_MD_L1_TLB1_CNTL 0x2658 +#define MC_VM_MD_L1_TLB2_CNTL 0x265C +#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x203C +#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2038 +#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2034 + +#define PA_CL_ENHANCE 0x8A14 +#define CLIP_VTX_REORDER_ENA (1 << 0) +#define NUM_CLIP_SEQ(x) ((x) << 1) +#define PA_SC_AA_CONFIG 0x28C04 +#define PA_SC_CLIPRECT_RULE 0x2820C +#define PA_SC_EDGERULE 0x28230 +#define PA_SC_FIFO_SIZE 0x8BCC +#define SC_PRIM_FIFO_SIZE(x) ((x) << 0) +#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 12) +#define PA_SC_FORCE_EOV_MAX_CNTS 0x8B24 +#define FORCE_EOV_MAX_CLK_CNT(x) ((x)<<0) +#define FORCE_EOV_MAX_REZ_CNT(x) ((x)<<16) +#define PA_SC_LINE_STIPPLE 0x28A0C +#define PA_SC_LINE_STIPPLE_STATE 0x8B10 +#define PA_SC_MODE_CNTL 0x28A4C +#define PA_SC_MULTI_CHIP_CNTL 0x8B20 +#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 20) + +#define SCRATCH_REG0 0x8500 +#define SCRATCH_REG1 0x8504 +#define SCRATCH_REG2 0x8508 +#define SCRATCH_REG3 0x850C +#define SCRATCH_REG4 0x8510 +#define SCRATCH_REG5 0x8514 +#define SCRATCH_REG6 0x8518 +#define SCRATCH_REG7 0x851C +#define SCRATCH_UMSK 0x8540 +#define SCRATCH_ADDR 0x8544 + +#define SMX_DC_CTL0 0xA020 +#define USE_HASH_FUNCTION (1 << 0) +#define CACHE_DEPTH(x) ((x) << 1) +#define FLUSH_ALL_ON_EVENT (1 << 10) +#define STALL_ON_EVENT (1 << 11) +#define SMX_EVENT_CTL 0xA02C +#define ES_FLUSH_CTL(x) ((x) << 0) +#define GS_FLUSH_CTL(x) ((x) << 3) +#define ACK_FLUSH_CTL(x) ((x) << 6) +#define SYNC_FLUSH_CTL (1 << 8) + +#define SPI_CONFIG_CNTL 0x9100 +#define GPR_WRITE_PRIORITY(x) ((x) << 0) +#define DISABLE_INTERP_1 (1 << 5) +#define SPI_CONFIG_CNTL_1 0x913C +#define VTX_DONE_DELAY(x) ((x) << 0) +#define INTERP_ONE_PRIM_PER_ROW (1 << 4) +#define SPI_INPUT_Z 0x286D8 +#define SPI_PS_IN_CONTROL_0 0x286CC +#define NUM_INTERP(x) ((x)<<0) +#define POSITION_ENA (1<<8) +#define POSITION_CENTROID (1<<9) +#define POSITION_ADDR(x) ((x)<<10) +#define PARAM_GEN(x) ((x)<<15) +#define PARAM_GEN_ADDR(x) ((x)<<19) +#define BARYC_SAMPLE_CNTL(x) ((x)<<26) +#define PERSP_GRADIENT_ENA (1<<28) +#define LINEAR_GRADIENT_ENA (1<<29) +#define POSITION_SAMPLE (1<<30) +#define BARYC_AT_SAMPLE_ENA (1<<31) + +#define SQ_CONFIG 0x8C00 +#define VC_ENABLE (1 << 0) +#define EXPORT_SRC_C (1 << 1) +#define DX9_CONSTS (1 << 2) +#define ALU_INST_PREFER_VECTOR (1 << 3) +#define DX10_CLAMP (1 << 4) +#define CLAUSE_SEQ_PRIO(x) ((x) << 8) +#define PS_PRIO(x) ((x) << 24) +#define VS_PRIO(x) ((x) << 26) +#define GS_PRIO(x) ((x) << 28) +#define SQ_DYN_GPR_SIZE_SIMD_AB_0 0x8DB0 +#define SIMDA_RING0(x) ((x)<<0) +#define SIMDA_RING1(x) ((x)<<8) +#define SIMDB_RING0(x) ((x)<<16) +#define SIMDB_RING1(x) ((x)<<24) +#define SQ_DYN_GPR_SIZE_SIMD_AB_1 0x8DB4 +#define SQ_DYN_GPR_SIZE_SIMD_AB_2 0x8DB8 +#define SQ_DYN_GPR_SIZE_SIMD_AB_3 0x8DBC +#define SQ_DYN_GPR_SIZE_SIMD_AB_4 0x8DC0 +#define SQ_DYN_GPR_SIZE_SIMD_AB_5 0x8DC4 +#define SQ_DYN_GPR_SIZE_SIMD_AB_6 0x8DC8 +#define SQ_DYN_GPR_SIZE_SIMD_AB_7 0x8DCC +#define ES_PRIO(x) ((x) << 30) +#define SQ_GPR_RESOURCE_MGMT_1 0x8C04 +#define NUM_PS_GPRS(x) ((x) << 0) +#define NUM_VS_GPRS(x) ((x) << 16) +#define DYN_GPR_ENABLE (1 << 27) +#define NUM_CLAUSE_TEMP_GPRS(x) ((x) << 28) +#define SQ_GPR_RESOURCE_MGMT_2 0x8C08 +#define NUM_GS_GPRS(x) ((x) << 0) +#define NUM_ES_GPRS(x) ((x) << 16) +#define SQ_MS_FIFO_SIZES 0x8CF0 +#define CACHE_FIFO_SIZE(x) ((x) << 0) +#define FETCH_FIFO_HIWATER(x) ((x) << 8) +#define DONE_FIFO_HIWATER(x) ((x) << 16) +#define ALU_UPDATE_FIFO_HIWATER(x) ((x) << 24) +#define SQ_STACK_RESOURCE_MGMT_1 0x8C10 +#define NUM_PS_STACK_ENTRIES(x) ((x) << 0) +#define NUM_VS_STACK_ENTRIES(x) ((x) << 16) +#define SQ_STACK_RESOURCE_MGMT_2 0x8C14 +#define NUM_GS_STACK_ENTRIES(x) ((x) << 0) +#define NUM_ES_STACK_ENTRIES(x) ((x) << 16) +#define SQ_THREAD_RESOURCE_MGMT 0x8C0C +#define NUM_PS_THREADS(x) ((x) << 0) +#define NUM_VS_THREADS(x) ((x) << 8) +#define NUM_GS_THREADS(x) ((x) << 16) +#define NUM_ES_THREADS(x) ((x) << 24) + +#define SX_DEBUG_1 0x9058 +#define ENABLE_NEW_SMX_ADDRESS (1 << 16) +#define SX_EXPORT_BUFFER_SIZES 0x900C +#define COLOR_BUFFER_SIZE(x) ((x) << 0) +#define POSITION_BUFFER_SIZE(x) ((x) << 8) +#define SMX_BUFFER_SIZE(x) ((x) << 16) +#define SX_MISC 0x28350 + +#define TA_CNTL_AUX 0x9508 +#define DISABLE_CUBE_WRAP (1 << 0) +#define DISABLE_CUBE_ANISO (1 << 1) +#define SYNC_GRADIENT (1 << 24) +#define SYNC_WALKER (1 << 25) +#define SYNC_ALIGNER (1 << 26) +#define BILINEAR_PRECISION_6_BIT (0 << 31) +#define BILINEAR_PRECISION_8_BIT (1 << 31) + +#define TCP_CNTL 0x9610 + +#define VGT_CACHE_INVALIDATION 0x88C4 +#define CACHE_INVALIDATION(x) ((x)<<0) +#define VC_ONLY 0 +#define TC_ONLY 1 +#define VC_AND_TC 2 +#define AUTO_INVLD_EN(x) ((x) << 6) +#define NO_AUTO 0 +#define ES_AUTO 1 +#define GS_AUTO 2 +#define ES_AND_GS_AUTO 3 +#define VGT_ES_PER_GS 0x88CC +#define VGT_GS_PER_ES 0x88C8 +#define VGT_GS_PER_VS 0x88E8 +#define VGT_GS_VERTEX_REUSE 0x88D4 +#define VGT_NUM_INSTANCES 0x8974 +#define VGT_OUT_DEALLOC_CNTL 0x28C5C +#define DEALLOC_DIST_MASK 0x0000007F +#define VGT_STRMOUT_EN 0x28AB0 +#define VGT_VERTEX_REUSE_BLOCK_CNTL 0x28C58 +#define VTX_REUSE_DEPTH_MASK 0x000000FF + +#define VM_CONTEXT0_CNTL 0x1410 +#define ENABLE_CONTEXT (1 << 0) +#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) +#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) +#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153C +#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C +#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155C +#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x1518 +#define VM_L2_CNTL 0x1400 +#define ENABLE_L2_CACHE (1 << 0) +#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1) +#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9) +#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 14) +#define VM_L2_CNTL2 0x1404 +#define INVALIDATE_ALL_L1_TLBS (1 << 0) +#define INVALIDATE_L2_CACHE (1 << 1) +#define VM_L2_CNTL3 0x1408 +#define BANK_SELECT(x) ((x) << 0) +#define CACHE_UPDATE_MODE(x) ((x) << 6) +#define VM_L2_STATUS 0x140C +#define L2_BUSY (1 << 0) + +#define WAIT_UNTIL 0x8040 + +#endif