Merge branch 'drm-next-4.20' of git://people.freedesktop.org/~agd5f/linux into drm-next

This is a new pull for drm-next on top of last weeks with the following
changes:
- Fixed 64 bit divide
- Fixed vram type on vega20
- Misc vega20 fixes
- Misc DC fixes
- Fix GDS/GWS/OA domain handling

Previous changes from last week:
amdgpu/kfd:
- Picasso (new APU) support
- Raven2 (new APU) support
- Vega20 enablement
- ACP powergating improvements
- Add ABGR/XBGR display support
- VCN JPEG engine support
- Initial xGMI support
- Use load balancing for engine scheduling
- Lots of new documentation
- Rework and clean up i2c and aux handling in DC
- Add DP YCbCr 4:2:0 support in DC
- Add DMCU firmware loading for Raven (used for ABM and PSR)
- New debugfs features in DC
- LVDS support in DC
- Implement wave kill for gfx/compute (light weight reset for shaders)
- Use AGP aperture to avoid gart mappings when possible
- GPUVM performance improvements
- Bulk moves for more efficient GPUVM LRU handling
- Merge amdgpu and amdkfd into one module
- Enable gfxoff and stutter mode on Raven
- Misc cleanups

Scheduler:
- Load balancing support
- Bug fixes

ttm:
- Bulk move functionality
- Bug fixes

radeon:
- Misc cleanups

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180920150438.12693-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2018-09-21 09:52:34 +10:00
commit 36c9c3c911
280 changed files with 81925 additions and 8278 deletions

View File

@ -505,7 +505,7 @@ GPU Scheduler
Overview Overview
-------- --------
.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:doc: Overview :doc: Overview
Scheduler Function References Scheduler Function References
@ -514,5 +514,5 @@ Scheduler Function References
.. kernel-doc:: include/drm/gpu_scheduler.h .. kernel-doc:: include/drm/gpu_scheduler.h
:internal: :internal:
.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:export: :export:

View File

@ -285,8 +285,6 @@ source "drivers/gpu/drm/bridge/Kconfig"
source "drivers/gpu/drm/sti/Kconfig" source "drivers/gpu/drm/sti/Kconfig"
source "drivers/gpu/drm/amd/amdkfd/Kconfig"
source "drivers/gpu/drm/imx/Kconfig" source "drivers/gpu/drm/imx/Kconfig"
source "drivers/gpu/drm/v3d/Kconfig" source "drivers/gpu/drm/v3d/Kconfig"

View File

@ -42,3 +42,4 @@ config DRM_AMDGPU_GART_DEBUGFS
source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/acp/Kconfig"
source "drivers/gpu/drm/amd/display/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig"
source "drivers/gpu/drm/amd/amdkfd/Kconfig"

View File

@ -35,7 +35,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH) \ -I$(FULL_AMD_DISPLAY_PATH) \
-I$(FULL_AMD_DISPLAY_PATH)/include \ -I$(FULL_AMD_DISPLAY_PATH)/include \
-I$(FULL_AMD_DISPLAY_PATH)/dc \ -I$(FULL_AMD_DISPLAY_PATH)/dc \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
-I$(FULL_AMD_PATH)/amdkfd
amdgpu-y := amdgpu_drv.o amdgpu-y := amdgpu_drv.o
@ -51,8 +52,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
amdgpu_ids.o amdgpu_gmc.o amdgpu_xgmi.o
# add asic specific block # add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@ -62,7 +63,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \ amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o vega20_reg_init.o nbio_v7_4.o
# add DF block # add DF block
amdgpu-y += \ amdgpu-y += \
@ -73,7 +74,7 @@ amdgpu-y += \
amdgpu-y += \ amdgpu-y += \
gmc_v7_0.o \ gmc_v7_0.o \
gmc_v8_0.o \ gmc_v8_0.o \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o
# add IH block # add IH block
amdgpu-y += \ amdgpu-y += \
@ -88,7 +89,8 @@ amdgpu-y += \
amdgpu-y += \ amdgpu-y += \
amdgpu_psp.o \ amdgpu_psp.o \
psp_v3_1.o \ psp_v3_1.o \
psp_v10_0.o psp_v10_0.o \
psp_v11_0.o
# add SMC block # add SMC block
amdgpu-y += \ amdgpu-y += \
@ -108,6 +110,7 @@ amdgpu-y += \
# add async DMA block # add async DMA block
amdgpu-y += \ amdgpu-y += \
amdgpu_sdma.o \
sdma_v2_4.o \ sdma_v2_4.o \
sdma_v3_0.o \ sdma_v3_0.o \
sdma_v4_0.o sdma_v4_0.o
@ -134,6 +137,9 @@ amdgpu-y += \
amdgpu-y += amdgpu_amdkfd.o amdgpu-y += amdgpu_amdkfd.o
ifneq ($(CONFIG_HSA_AMD),) ifneq ($(CONFIG_HSA_AMD),)
AMDKFD_PATH := ../amdkfd
include $(FULL_AMD_PATH)/amdkfd/Makefile
amdgpu-y += $(AMDKFD_FILES)
amdgpu-y += \ amdgpu-y += \
amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gpuvm.o \

View File

@ -28,6 +28,8 @@
#ifndef __AMDGPU_H__ #ifndef __AMDGPU_H__
#define __AMDGPU_H__ #define __AMDGPU_H__
#include "amdgpu_ctx.h"
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/list.h> #include <linux/list.h>
@ -69,12 +71,15 @@
#include "amdgpu_vcn.h" #include "amdgpu_vcn.h"
#include "amdgpu_mn.h" #include "amdgpu_mn.h"
#include "amdgpu_gmc.h" #include "amdgpu_gmc.h"
#include "amdgpu_gfx.h"
#include "amdgpu_sdma.h"
#include "amdgpu_dm.h" #include "amdgpu_dm.h"
#include "amdgpu_virt.h" #include "amdgpu_virt.h"
#include "amdgpu_gart.h" #include "amdgpu_gart.h"
#include "amdgpu_debugfs.h" #include "amdgpu_debugfs.h"
#include "amdgpu_job.h" #include "amdgpu_job.h"
#include "amdgpu_bo_list.h" #include "amdgpu_bo_list.h"
#include "amdgpu_gem.h"
/* /*
* Modules parameters. * Modules parameters.
@ -141,16 +146,12 @@ extern int amdgpu_cik_support;
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
#define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2)
/* AMDGPU_IB_POOL_SIZE must be a power of 2 */ /* AMDGPU_IB_POOL_SIZE must be a power of 2 */
#define AMDGPU_IB_POOL_SIZE 16 #define AMDGPU_IB_POOL_SIZE 16
#define AMDGPU_DEBUGFS_MAX_COMPONENTS 32 #define AMDGPU_DEBUGFS_MAX_COMPONENTS 32
#define AMDGPUFB_CONN_LIMIT 4 #define AMDGPUFB_CONN_LIMIT 4
#define AMDGPU_BIOS_NUM_SCRATCH 16 #define AMDGPU_BIOS_NUM_SCRATCH 16
/* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 2
/* hard reset data */ /* hard reset data */
#define AMDGPU_ASIC_RESET_DATA 0x39d5e86b #define AMDGPU_ASIC_RESET_DATA 0x39d5e86b
@ -171,13 +172,6 @@ extern int amdgpu_cik_support;
#define AMDGPU_RESET_VCE (1 << 13) #define AMDGPU_RESET_VCE (1 << 13)
#define AMDGPU_RESET_VCE1 (1 << 14) #define AMDGPU_RESET_VCE1 (1 << 14)
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
#define AMDGPU_GFX_SAFE_MODE 0x00000001L
#define AMDGPU_GFX_PG_DISABLED_MODE 0x00000002L
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
/* max cursor sizes (in pixels) */ /* max cursor sizes (in pixels) */
#define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128 #define CIK_CURSOR_HEIGHT 128
@ -205,13 +199,6 @@ enum amdgpu_cp_irq {
AMDGPU_CP_IRQ_LAST AMDGPU_CP_IRQ_LAST
}; };
enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_TRAP0 = 0,
AMDGPU_SDMA_IRQ_TRAP1,
AMDGPU_SDMA_IRQ_LAST
};
enum amdgpu_thermal_irq { enum amdgpu_thermal_irq {
AMDGPU_THERMAL_IRQ_LOW_TO_HIGH = 0, AMDGPU_THERMAL_IRQ_LOW_TO_HIGH = 0,
AMDGPU_THERMAL_IRQ_HIGH_TO_LOW, AMDGPU_THERMAL_IRQ_HIGH_TO_LOW,
@ -224,6 +211,10 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_LAST AMDGPU_CP_KIQ_IRQ_LAST
}; };
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 20
int amdgpu_device_ip_set_clockgating_state(void *dev, int amdgpu_device_ip_set_clockgating_state(void *dev,
enum amd_ip_block_type block_type, enum amd_ip_block_type block_type,
enum amd_clockgating_state state); enum amd_clockgating_state state);
@ -271,70 +262,6 @@ amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
int amdgpu_device_ip_block_add(struct amdgpu_device *adev, int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
const struct amdgpu_ip_block_version *ip_block_version); const struct amdgpu_ip_block_version *ip_block_version);
/* provided by hw blocks that can move/clear data. e.g., gfx or sdma */
struct amdgpu_buffer_funcs {
/* maximum bytes in a single operation */
uint32_t copy_max_bytes;
/* number of dw to reserve per operation */
unsigned copy_num_dw;
/* used for buffer migration */
void (*emit_copy_buffer)(struct amdgpu_ib *ib,
/* src addr in bytes */
uint64_t src_offset,
/* dst addr in bytes */
uint64_t dst_offset,
/* number of byte to transfer */
uint32_t byte_count);
/* maximum bytes in a single operation */
uint32_t fill_max_bytes;
/* number of dw to reserve per operation */
unsigned fill_num_dw;
/* used for buffer clearing */
void (*emit_fill_buffer)(struct amdgpu_ib *ib,
/* value to write to memory */
uint32_t src_data,
/* dst addr in bytes */
uint64_t dst_offset,
/* number of byte to fill */
uint32_t byte_count);
};
/* provided by hw blocks that can write ptes, e.g., sdma */
struct amdgpu_vm_pte_funcs {
/* number of dw to reserve per operation */
unsigned copy_pte_num_dw;
/* copy pte entries from GART */
void (*copy_pte)(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src,
unsigned count);
/* write pte one entry at a time with addr mapping */
void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
uint64_t value, unsigned count,
uint32_t incr);
/* for linear pte/pde updates without addr mapping */
void (*set_pte_pde)(struct amdgpu_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint64_t flags);
};
/* provided by the ih block */
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
u32 (*get_wptr)(struct amdgpu_device *adev);
bool (*prescreen_iv)(struct amdgpu_device *adev);
void (*decode_iv)(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
void (*set_rptr)(struct amdgpu_device *adev);
};
/* /*
* BIOS. * BIOS.
*/ */
@ -360,34 +287,6 @@ struct amdgpu_clock {
uint32_t max_pixel_clock; uint32_t max_pixel_clock;
}; };
/*
* GEM.
*/
#define AMDGPU_GEM_DOMAIN_MAX 0x3
#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
void amdgpu_gem_object_free(struct drm_gem_object *obj);
int amdgpu_gem_object_open(struct drm_gem_object *obj,
struct drm_file *file_priv);
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv);
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
struct drm_gem_object *
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sg);
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags);
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf);
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
/* sub-allocation manager, it has to be protected by another lock. /* sub-allocation manager, it has to be protected by another lock.
* By conception this is an helper for other part of the driver * By conception this is an helper for other part of the driver
* like the indirect buffer or semaphore, which both have their * like the indirect buffer or semaphore, which both have their
@ -437,22 +336,6 @@ struct amdgpu_sa_bo {
struct dma_fence *fence; struct dma_fence *fence;
}; };
/*
* GEM objects.
*/
void amdgpu_gem_force_release(struct amdgpu_device *adev);
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
struct drm_gem_object **obj);
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
int amdgpu_mode_dumb_mmap(struct drm_file *filp,
struct drm_device *dev,
uint32_t handle, uint64_t *offset_p);
int amdgpu_fence_slab_init(void); int amdgpu_fence_slab_init(void);
void amdgpu_fence_slab_fini(void); void amdgpu_fence_slab_fini(void);
@ -599,84 +482,6 @@ struct amdgpu_ib {
extern const struct drm_sched_backend_ops amdgpu_sched_ops; extern const struct drm_sched_backend_ops amdgpu_sched_ops;
/*
* Queue manager
*/
struct amdgpu_queue_mapper {
int hw_ip;
struct mutex lock;
/* protected by lock */
struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
};
struct amdgpu_queue_mgr {
struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
};
int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr);
int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr);
int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr,
u32 hw_ip, u32 instance, u32 ring,
struct amdgpu_ring **out_ring);
/*
* context related structures
*/
struct amdgpu_ctx_ring {
uint64_t sequence;
struct dma_fence **fences;
struct drm_sched_entity entity;
};
struct amdgpu_ctx {
struct kref refcount;
struct amdgpu_device *adev;
struct amdgpu_queue_mgr queue_mgr;
unsigned reset_counter;
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
struct dma_fence **fences;
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
struct mutex lock;
atomic_t guilty;
};
struct amdgpu_ctx_mgr {
struct amdgpu_device *adev;
struct mutex lock;
/* protected by lock */
struct idr ctx_handles;
};
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
struct dma_fence *fence, uint64_t *seq);
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq);
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority);
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
/* /*
* file private structure * file private structure
*/ */
@ -690,271 +495,6 @@ struct amdgpu_fpriv {
struct amdgpu_ctx_mgr ctx_mgr; struct amdgpu_ctx_mgr ctx_mgr;
}; };
/*
* GFX stuff
*/
#include "clearstate_defs.h"
struct amdgpu_rlc_funcs {
void (*enter_safe_mode)(struct amdgpu_device *adev);
void (*exit_safe_mode)(struct amdgpu_device *adev);
};
struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
volatile uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
volatile uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
volatile uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
bool in_safe_mode;
const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */
u32 save_and_restore_offset;
u32 clear_state_descriptor_offset;
u32 avail_scratch_ram_locations;
u32 reg_restore_list_size;
u32 reg_list_format_start;
u32 reg_list_format_separate_start;
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
u32 reg_list_format_direct_reg_list_length;
u32 save_restore_list_cntl_size_bytes;
u32 save_restore_list_gpm_size_bytes;
u32 save_restore_list_srm_size_bytes;
u32 *register_list_format;
u32 *register_restore;
u8 *save_restore_list_cntl;
u8 *save_restore_list_gpm;
u8 *save_restore_list_srm;
bool is_rlc_v2_1;
};
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
struct amdgpu_bo *mec_fw_obj;
u64 mec_fw_gpu_addr;
u32 num_mec;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
/* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
struct amdgpu_kiq {
u64 eop_gpu_addr;
struct amdgpu_bo *eop_obj;
spinlock_t ring_lock;
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
};
/*
* GPU scratch registers structures, functions & helpers
*/
struct amdgpu_scratch {
unsigned num_reg;
uint32_t reg_base;
uint32_t free_mask;
};
/*
* GFX configurations
*/
#define AMDGPU_GFX_MAX_SE 4
#define AMDGPU_GFX_MAX_SH_PER_SE 2
struct amdgpu_rb_config {
uint32_t rb_backend_disable;
uint32_t user_rb_backend_disable;
uint32_t raster_config;
uint32_t raster_config_1;
};
struct gb_addr_config {
uint16_t pipe_interleave_size;
uint8_t num_pipes;
uint8_t max_compress_frags;
uint8_t num_banks;
uint8_t num_se;
uint8_t num_rb_per_se;
};
struct amdgpu_gfx_config {
unsigned max_shader_engines;
unsigned max_tile_pipes;
unsigned max_cu_per_sh;
unsigned max_sh_per_se;
unsigned max_backends_per_se;
unsigned max_texture_channel_caches;
unsigned max_gprs;
unsigned max_gs_threads;
unsigned max_hw_contexts;
unsigned sc_prim_fifo_size_frontend;
unsigned sc_prim_fifo_size_backend;
unsigned sc_hiz_tile_fifo_size;
unsigned sc_earlyz_tile_fifo_size;
unsigned num_tile_pipes;
unsigned backend_enable_mask;
unsigned mem_max_burst_length_bytes;
unsigned mem_row_size_in_kb;
unsigned shader_engine_tile_size;
unsigned num_gpus;
unsigned multi_gpu_tile_size;
unsigned mc_arb_ramcfg;
unsigned gb_addr_config;
unsigned num_rbs;
unsigned gs_vgt_table_depth;
unsigned gs_prim_buffer_depth;
uint32_t tile_mode_array[32];
uint32_t macrotile_mode_array[16];
struct gb_addr_config gb_addr_config_fields;
struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
/* gfx configure feature */
uint32_t double_offchip_lds_buf;
/* cached value of DB_DEBUG2 */
uint32_t db_debug2;
};
struct amdgpu_cu_info {
uint32_t simd_per_cu;
uint32_t max_waves_per_simd;
uint32_t wave_front_size;
uint32_t max_scratch_slots_per_cu;
uint32_t lds_size;
/* total active CU number */
uint32_t number;
uint32_t ao_cu_mask;
uint32_t ao_cu_bitmap[4][4];
uint32_t bitmap[4][4];
};
struct amdgpu_gfx_funcs {
/* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields);
void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
};
struct amdgpu_ngg_buf {
struct amdgpu_bo *bo;
uint64_t gpu_addr;
uint32_t size;
uint32_t bo_size;
};
enum {
NGG_PRIM = 0,
NGG_POS,
NGG_CNTL,
NGG_PARAM,
NGG_BUF_MAX
};
struct amdgpu_ngg {
struct amdgpu_ngg_buf buf[NGG_BUF_MAX];
uint32_t gds_reserve_addr;
uint32_t gds_reserve_size;
bool init;
};
struct sq_work {
struct work_struct work;
unsigned ih_data;
};
struct amdgpu_gfx {
struct mutex gpu_clock_mutex;
struct amdgpu_gfx_config config;
struct amdgpu_rlc rlc;
struct amdgpu_mec mec;
struct amdgpu_kiq kiq;
struct amdgpu_scratch scratch;
const struct firmware *me_fw; /* ME firmware */
uint32_t me_fw_version;
const struct firmware *pfp_fw; /* PFP firmware */
uint32_t pfp_fw_version;
const struct firmware *ce_fw; /* CE firmware */
uint32_t ce_fw_version;
const struct firmware *rlc_fw; /* RLC firmware */
uint32_t rlc_fw_version;
const struct firmware *mec_fw; /* MEC firmware */
uint32_t mec_fw_version;
const struct firmware *mec2_fw; /* MEC2 firmware */
uint32_t mec2_fw_version;
uint32_t me_feature_version;
uint32_t ce_feature_version;
uint32_t pfp_feature_version;
uint32_t rlc_feature_version;
uint32_t rlc_srlc_fw_version;
uint32_t rlc_srlc_feature_version;
uint32_t rlc_srlg_fw_version;
uint32_t rlc_srlg_feature_version;
uint32_t rlc_srls_fw_version;
uint32_t rlc_srls_feature_version;
uint32_t mec_feature_version;
uint32_t mec2_feature_version;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsigned num_gfx_rings;
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
unsigned num_compute_rings;
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
struct amdgpu_irq_src priv_inst_irq;
struct amdgpu_irq_src cp_ecc_error_irq;
struct amdgpu_irq_src sq_irq;
struct sq_work sq_work;
/* gfx status */
uint32_t gfx_current_status;
/* ce ram size*/
unsigned ce_ram_size;
struct amdgpu_cu_info cu_info;
const struct amdgpu_gfx_funcs *funcs;
/* reset mask */
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
/* s3/s4 mask */
bool in_suspend;
/* NGG */
struct amdgpu_ngg ngg;
/* pipe reservation */
struct mutex pipe_reserve_mutex;
DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib); unsigned size, struct amdgpu_ib *ib);
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
@ -986,7 +526,7 @@ struct amdgpu_cs_parser {
/* scheduler job object */ /* scheduler job object */
struct amdgpu_job *job; struct amdgpu_job *job;
struct amdgpu_ring *ring; struct drm_sched_entity *entity;
/* buffer objects */ /* buffer objects */
struct ww_acquire_ctx ticket; struct ww_acquire_ctx ticket;
@ -1037,58 +577,6 @@ struct amdgpu_wb {
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb); void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
/*
* SDMA
*/
struct amdgpu_sdma_instance {
/* SDMA firmware */
const struct firmware *fw;
uint32_t fw_version;
uint32_t feature_version;
struct amdgpu_ring ring;
bool burst_nop;
};
struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
#ifdef CONFIG_DRM_AMDGPU_SI
//SI DMA has a difference trap irq number for the second engine
struct amdgpu_irq_src trap_irq_1;
#endif
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
int num_instances;
uint32_t srbm_soft_reset;
};
/*
* Firmware
*/
enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_DIRECT = 0,
AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_PSP,
};
struct amdgpu_firmware {
struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
enum amdgpu_firmware_load_type load_type;
struct amdgpu_bo *fw_buf;
unsigned int fw_size;
unsigned int max_ucodes;
/* firmwares are loaded by psp instead of smu from vega10 */
const struct amdgpu_psp_funcs *funcs;
struct amdgpu_bo *rbuf;
struct mutex mutex;
/* gpu info firmware data pointer */
const struct firmware *gpu_info_fw;
void *fw_buf_ptr;
uint64_t fw_buf_mc;
};
/* /*
* Benchmarking * Benchmarking
*/ */
@ -1166,23 +654,9 @@ struct amdgpu_asic_funcs {
/* /*
* IOCTL. * IOCTL.
*/ */
int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp); struct drm_file *filp);
int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp); struct drm_file *filp);
@ -1190,9 +664,6 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp); struct drm_file *filp);
int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
/* VRAM scratch page for HDP bug, default vram page */ /* VRAM scratch page for HDP bug, default vram page */
struct amdgpu_vram_scratch { struct amdgpu_vram_scratch {
struct amdgpu_bo *robj; struct amdgpu_bo *robj;
@ -1666,22 +1137,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8)) #define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8))
#define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16)) #define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16))
static inline struct amdgpu_sdma_instance *
amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
if (&adev->sdma.instance[i].ring == ring)
break;
if (i < AMDGPU_MAX_SDMA_INSTANCES)
return &adev->sdma.instance[i];
else
return NULL;
}
/* /*
* ASICs macro. * ASICs macro.
*/ */
@ -1700,74 +1155,16 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
#define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
/* Common functions */ /* Common functions */
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job* job, bool force); struct amdgpu_job* job);
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev);
void amdgpu_display_update_priority(struct amdgpu_device *adev);
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
u64 num_vis_bytes); u64 num_vis_bytes);
void amdgpu_device_vram_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc, u64 base);
void amdgpu_device_gart_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev); int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 *registers, const u32 *registers,
@ -1818,6 +1215,12 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg); unsigned long arg);
/*
* functions used by amdgpu_xgmi.c
*/
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
/* /*
* functions used by amdgpu_encoder.c * functions used by amdgpu_encoder.c
*/ */

View File

@ -116,136 +116,47 @@ static int acp_sw_fini(void *handle)
return 0; return 0;
} }
/* power off a tile/block within ACP */
static int acp_suspend_tile(void *cgs_dev, int tile)
{
u32 val = 0;
u32 count = 0;
if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
pr_err("Invalid ACP tile : %d to suspend\n", tile);
return -1;
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
val &= ACP_TILE_ON_MASK;
if (val == 0x0) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
val = val | (1 << tile);
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
0x500 + tile);
count = ACP_TIMEOUT_LOOP;
while (true) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+ tile);
val = val & ACP_TILE_ON_MASK;
if (val == ACP_TILE_OFF_MASK)
break;
if (--count == 0) {
pr_err("Timeout reading ACP PGFSM status\n");
return -ETIMEDOUT;
}
udelay(100);
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
val |= ACP_TILE_OFF_RETAIN_REG_MASK;
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
}
return 0;
}
/* power on a tile/block within ACP */
static int acp_resume_tile(void *cgs_dev, int tile)
{
u32 val = 0;
u32 count = 0;
if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
pr_err("Invalid ACP tile to resume\n");
return -1;
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
val = val & ACP_TILE_ON_MASK;
if (val != 0x0) {
cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
0x600 + tile);
count = ACP_TIMEOUT_LOOP;
while (true) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+ tile);
val = val & ACP_TILE_ON_MASK;
if (val == 0x0)
break;
if (--count == 0) {
pr_err("Timeout reading ACP PGFSM status\n");
return -ETIMEDOUT;
}
udelay(100);
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
if (tile == ACP_TILE_P1)
val = val & (ACP_TILE_P1_MASK);
else if (tile == ACP_TILE_P2)
val = val & (ACP_TILE_P2_MASK);
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
}
return 0;
}
struct acp_pm_domain { struct acp_pm_domain {
void *cgs_dev; void *adev;
struct generic_pm_domain gpd; struct generic_pm_domain gpd;
}; };
static int acp_poweroff(struct generic_pm_domain *genpd) static int acp_poweroff(struct generic_pm_domain *genpd)
{ {
int i, ret;
struct acp_pm_domain *apd; struct acp_pm_domain *apd;
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd); apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) { if (apd != NULL) {
/* Donot return abruptly if any of power tile fails to suspend. adev = apd->adev;
* Log it and continue powering off other tile /* call smu to POWER GATE ACP block
*/ * smu will
for (i = 4; i >= 0 ; i--) { * 1. turn off the acp clock
ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i); * 2. power off the acp tiles
if (ret) * 3. check and enter ulv state
pr_err("ACP tile %d tile suspend failed\n", i); */
} if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
} }
return 0; return 0;
} }
static int acp_poweron(struct generic_pm_domain *genpd) static int acp_poweron(struct generic_pm_domain *genpd)
{ {
int i, ret;
struct acp_pm_domain *apd; struct acp_pm_domain *apd;
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd); apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) { if (apd != NULL) {
for (i = 0; i < 2; i++) { adev = apd->adev;
ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i); /* call smu to UNGATE ACP block
if (ret) { * smu will
pr_err("ACP tile %d resume failed\n", i); * 1. exit ulv
break; * 2. turn on acp clock
} * 3. power on acp tiles
} */
if (adev->powerplay.pp_funcs->set_powergating_by_smu)
/* Disable DSPs which are not going to be used */ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
for (i = 0; i < 3; i++) {
ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i);
/* Continue suspending other DSP, even if one fails */
if (ret)
pr_err("ACP DSP %d suspend failed\n", i);
}
} }
return 0; return 0;
} }
@ -289,30 +200,31 @@ static int acp_hw_init(void *handle)
r = amd_acp_hw_init(adev->acp.cgs_device, r = amd_acp_hw_init(adev->acp.cgs_device,
ip_block->version->major, ip_block->version->minor); ip_block->version->major, ip_block->version->minor);
/* -ENODEV means board uses AZ rather than ACP */ /* -ENODEV means board uses AZ rather than ACP */
if (r == -ENODEV) if (r == -ENODEV) {
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
return 0; return 0;
else if (r) } else if (r) {
return r; return r;
}
if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
return -EINVAL; return -EINVAL;
acp_base = adev->rmmio_base; acp_base = adev->rmmio_base;
if (adev->asic_type != CHIP_STONEY) {
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
if (adev->acp.acp_genpd == NULL)
return -ENOMEM;
adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
adev->acp.acp_genpd->gpd.power_off = acp_poweroff; if (adev->acp.acp_genpd == NULL)
adev->acp.acp_genpd->gpd.power_on = acp_poweron; return -ENOMEM;
adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
adev->acp.acp_genpd->gpd.power_on = acp_poweron;
adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device; adev->acp.acp_genpd->adev = adev;
pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
}
adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
GFP_KERNEL); GFP_KERNEL);
@ -429,17 +341,16 @@ static int acp_hw_init(void *handle)
if (r) if (r)
return r; return r;
if (adev->asic_type != CHIP_STONEY) { for (i = 0; i < ACP_DEVS ; i++) {
for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); if (r) {
if (r) { dev_err(dev, "Failed to add dev to genpd\n");
dev_err(dev, "Failed to add dev to genpd\n"); return r;
return r;
}
} }
} }
/* Assert Soft reset of ACP */ /* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
@ -497,8 +408,10 @@ static int acp_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* return early if no ACP */ /* return early if no ACP */
if (!adev->acp.acp_cell) if (!adev->acp.acp_genpd) {
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
return 0; return 0;
}
/* Assert Soft reset of ACP */ /* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
@ -536,19 +449,17 @@ static int acp_hw_fini(void *handle)
udelay(100); udelay(100);
} }
if (adev->acp.acp_genpd) { for (i = 0; i < ACP_DEVS ; i++) {
for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); ret = pm_genpd_remove_device(dev);
ret = pm_genpd_remove_device(dev); /* If removal fails, dont giveup and try rest */
/* If removal fails, dont giveup and try rest */ if (ret)
if (ret) dev_err(dev, "remove dev from genpd failed\n");
dev_err(dev, "remove dev from genpd failed\n");
}
kfree(adev->acp.acp_genpd);
} }
mfd_remove_devices(adev->acp.parent); mfd_remove_devices(adev->acp.parent);
kfree(adev->acp.acp_res); kfree(adev->acp.acp_res);
kfree(adev->acp.acp_genpd);
kfree(adev->acp.acp_cell); kfree(adev->acp.acp_cell);
return 0; return 0;
@ -556,11 +467,21 @@ static int acp_hw_fini(void *handle)
static int acp_suspend(void *handle) static int acp_suspend(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* power up on suspend */
if (!adev->acp.acp_cell)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
return 0; return 0;
} }
static int acp_resume(void *handle) static int acp_resume(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* power down again on resume */
if (!adev->acp.acp_cell)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
return 0; return 0;
} }
@ -593,6 +514,12 @@ static int acp_set_clockgating_state(void *handle,
static int acp_set_powergating_state(void *handle, static int acp_set_powergating_state(void *handle,
enum amd_powergating_state state) enum amd_powergating_state state)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = state == AMD_PG_STATE_GATE ? true : false;
if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
return 0; return 0;
} }

View File

@ -31,6 +31,7 @@
#include <drm/drm_crtc_helper.h> #include <drm/drm_crtc_helper.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_pm.h" #include "amdgpu_pm.h"
#include "amdgpu_display.h"
#include "amd_acpi.h" #include "amd_acpi.h"
#include "atom.h" #include "atom.h"

View File

@ -28,7 +28,6 @@
#include <linux/module.h> #include <linux/module.h>
const struct kgd2kfd_calls *kgd2kfd; const struct kgd2kfd_calls *kgd2kfd;
bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
static const unsigned int compute_vmid_bitmap = 0xFF00; static const unsigned int compute_vmid_bitmap = 0xFF00;
@ -36,45 +35,23 @@ int amdgpu_amdkfd_init(void)
{ {
int ret; int ret;
#if defined(CONFIG_HSA_AMD_MODULE) #ifdef CONFIG_HSA_AMD
int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
kgd2kfd_init_p = symbol_request(kgd2kfd_init);
if (kgd2kfd_init_p == NULL)
return -ENOENT;
ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret) {
symbol_put(kgd2kfd_init);
kgd2kfd = NULL;
}
#elif defined(CONFIG_HSA_AMD)
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret) if (ret)
kgd2kfd = NULL; kgd2kfd = NULL;
amdgpu_amdkfd_gpuvm_init_mem_limits();
#else #else
kgd2kfd = NULL; kgd2kfd = NULL;
ret = -ENOENT; ret = -ENOENT;
#endif #endif
#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
amdgpu_amdkfd_gpuvm_init_mem_limits();
#endif
return ret; return ret;
} }
void amdgpu_amdkfd_fini(void) void amdgpu_amdkfd_fini(void)
{ {
if (kgd2kfd) { if (kgd2kfd)
kgd2kfd->exit(); kgd2kfd->exit();
symbol_put(kgd2kfd_init);
}
} }
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
@ -155,7 +132,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
.gpuvm_size = min(adev->vm_manager.max_pfn .gpuvm_size = min(adev->vm_manager.max_pfn
<< AMDGPU_GPU_PAGE_SHIFT, << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_VA_HOLE_START), AMDGPU_GMC_HOLE_START),
.drm_render_minor = adev->ddev->render->index .drm_render_minor = adev->ddev->render->index
}; };
@ -267,7 +244,8 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
amdgpu_device_gpu_recover(adev, NULL, false); if (amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL);
} }
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
@ -433,6 +411,13 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
} }
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
return adev->gmc.xgmi.hive_id;
}
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr, uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len) uint32_t *ib_cmd, uint32_t ib_len)
@ -506,7 +491,7 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false; return false;
} }
#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD) #ifndef CONFIG_HSA_AMD
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
{ {
return false; return false;

View File

@ -145,6 +145,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
#define read_user_wptr(mmptr, wptr, dst) \ #define read_user_wptr(mmptr, wptr, dst) \
({ \ ({ \
@ -162,16 +163,17 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
}) })
/* GPUVM API */ /* GPUVM API */
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
void **process_info, void **vm, void **process_info,
struct dma_fence **ef); struct dma_fence **ef);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp, struct file *filp, unsigned int pasid,
void **vm, void **process_info, void **vm, void **process_info,
struct dma_fence **ef); struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm); struct amdgpu_vm *vm);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size, struct kgd_dev *kgd, uint64_t va, uint64_t size,

View File

@ -205,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base, .set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,

View File

@ -164,6 +164,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base, .set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,

View File

@ -201,6 +201,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base, .set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
@ -214,7 +215,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs_vmid = invalidate_tlbs_vmid, .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib, .submit_ib = amdgpu_amdkfd_submit_ib,
.gpu_recover = amdgpu_amdkfd_gpu_reset, .gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle .set_compute_idle = amdgpu_amdkfd_set_compute_idle,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
}; };
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)

View File

@ -364,7 +364,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
struct amdgpu_bo *pd = vm->root.base.bo; struct amdgpu_bo *pd = vm->root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
struct amdgpu_vm_parser param; struct amdgpu_vm_parser param;
uint64_t addr, flags = AMDGPU_PTE_VALID;
int ret; int ret;
param.domain = AMDGPU_GEM_DOMAIN_VRAM; param.domain = AMDGPU_GEM_DOMAIN_VRAM;
@ -383,9 +382,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
return ret; return ret;
} }
addr = amdgpu_bo_gpu_offset(vm->root.base.bo); vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
vm->pd_phys_addr = addr;
if (vm->use_cpu_for_update) { if (vm->use_cpu_for_update) {
ret = amdgpu_bo_kmap(pd, NULL); ret = amdgpu_bo_kmap(pd, NULL);
@ -678,7 +675,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
if (!ctx->vm_pd) if (!ctx->vm_pd)
return -ENOMEM; return -ENOMEM;
ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0; ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true; ctx->kfd_bo.tv.shared = true;
@ -743,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
return -ENOMEM; return -ENOMEM;
} }
ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0; ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true; ctx->kfd_bo.tv.shared = true;
@ -1003,8 +998,8 @@ create_evict_fence_fail:
return ret; return ret;
} }
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
void **process_info, void **vm, void **process_info,
struct dma_fence **ef) struct dma_fence **ef)
{ {
struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_device *adev = get_amdgpu_device(kgd);
@ -1016,7 +1011,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
return -ENOMEM; return -ENOMEM;
/* Initialize AMDGPU part of the VM */ /* Initialize AMDGPU part of the VM */
ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0); ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid);
if (ret) { if (ret) {
pr_err("Failed init vm ret %d\n", ret); pr_err("Failed init vm ret %d\n", ret);
goto amdgpu_vm_init_fail; goto amdgpu_vm_init_fail;
@ -1039,7 +1034,7 @@ amdgpu_vm_init_fail:
} }
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp, struct file *filp, unsigned int pasid,
void **vm, void **process_info, void **vm, void **process_info,
struct dma_fence **ef) struct dma_fence **ef)
{ {
@ -1054,7 +1049,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
return -EINVAL; return -EINVAL;
/* Convert VM into a compute VM */ /* Convert VM into a compute VM */
ret = amdgpu_vm_make_compute(adev, avm); ret = amdgpu_vm_make_compute(adev, avm, pasid);
if (ret) if (ret)
return ret; return ret;
@ -1117,6 +1112,25 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
kfree(vm); kfree(vm);
} }
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
if (WARN_ON(!kgd || !vm))
return;
pr_debug("Releasing process vm %p\n", vm);
/* The original pasid of amdgpu vm has already been
* released during making a amdgpu vm to a compute vm
* The current pasid is managed by kfd and will be
* released on kfd process destroy. Set amdgpu pasid
* to 0 to avoid duplicate release.
*/
amdgpu_vm_release_compute(adev, avm);
}
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
{ {
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;

View File

@ -29,6 +29,7 @@
#include "amdgpu_atombios.h" #include "amdgpu_atombios.h"
#include "amdgpu_atomfirmware.h" #include "amdgpu_atomfirmware.h"
#include "amdgpu_i2c.h" #include "amdgpu_i2c.h"
#include "amdgpu_display.h"
#include "atom.h" #include "atom.h"
#include "atom-bits.h" #include "atom-bits.h"

View File

@ -117,6 +117,10 @@ union igp_info {
union umc_info { union umc_info {
struct atom_umc_info_v3_1 v31; struct atom_umc_info_v3_1 v31;
}; };
union vram_info {
struct atom_vram_info_header_v2_3 v23;
};
/* /*
* Return vram width from integrated system info table, if available, * Return vram width from integrated system info table, if available,
* or 0 if not. * or 0 if not.
@ -174,7 +178,7 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
case ATOM_DGPU_VRAM_TYPE_GDDR5: case ATOM_DGPU_VRAM_TYPE_GDDR5:
vram_type = AMDGPU_VRAM_TYPE_GDDR5; vram_type = AMDGPU_VRAM_TYPE_GDDR5;
break; break;
case ATOM_DGPU_VRAM_TYPE_HBM: case ATOM_DGPU_VRAM_TYPE_HBM2:
vram_type = AMDGPU_VRAM_TYPE_HBM; vram_type = AMDGPU_VRAM_TYPE_HBM;
break; break;
default: default:
@ -195,7 +199,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
int index; int index;
u16 data_offset, size; u16 data_offset, size;
union igp_info *igp_info; union igp_info *igp_info;
union umc_info *umc_info; union vram_info *vram_info;
u8 frev, crev; u8 frev, crev;
u8 mem_type; u8 mem_type;
@ -204,7 +208,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
integratedsysteminfo); integratedsysteminfo);
else else
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
umc_info); vram_info);
if (amdgpu_atom_parse_data_header(mode_info->atom_context, if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size, index, &size,
&frev, &crev, &data_offset)) { &frev, &crev, &data_offset)) {
@ -219,11 +223,11 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
return 0; return 0;
} }
} else { } else {
umc_info = (union umc_info *) vram_info = (union vram_info *)
(mode_info->atom_context->bios + data_offset); (mode_info->atom_context->bios + data_offset);
switch (crev) { switch (crev) {
case 1: case 3:
mem_type = umc_info->v31.vram_type; mem_type = vram_info->v23.vram_module[0].memory_type;
return convert_atom_mem_type_to_vram_type(adev, mem_type); return convert_atom_mem_type_to_vram_type(adev, mem_type);
default: default:
return 0; return 0;

View File

@ -49,8 +49,11 @@ static void amdgpu_bo_list_free(struct kref *ref)
refcount); refcount);
struct amdgpu_bo_list_entry *e; struct amdgpu_bo_list_entry *e;
amdgpu_bo_list_for_each_entry(e, list) amdgpu_bo_list_for_each_entry(e, list) {
amdgpu_bo_unref(&e->robj); struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
amdgpu_bo_unref(&bo);
}
call_rcu(&list->rhead, amdgpu_bo_list_free_rcu); call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
} }
@ -67,7 +70,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
unsigned i; unsigned i;
int r; int r;
if (num_entries > SIZE_MAX / sizeof(struct amdgpu_bo_list_entry)) if (num_entries > (SIZE_MAX - sizeof(struct amdgpu_bo_list))
/ sizeof(struct amdgpu_bo_list_entry))
return -EINVAL; return -EINVAL;
size = sizeof(struct amdgpu_bo_list); size = sizeof(struct amdgpu_bo_list);
@ -111,21 +115,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
entry = &array[last_entry++]; entry = &array[last_entry++];
} }
entry->robj = bo;
entry->priority = min(info[i].bo_priority, entry->priority = min(info[i].bo_priority,
AMDGPU_BO_LIST_MAX_PRIORITY); AMDGPU_BO_LIST_MAX_PRIORITY);
entry->tv.bo = &entry->robj->tbo; entry->tv.bo = &bo->tbo;
entry->tv.shared = !entry->robj->prime_shared_count; entry->tv.shared = !bo->prime_shared_count;
if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
list->gds_obj = entry->robj; list->gds_obj = bo;
if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS) if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
list->gws_obj = entry->robj; list->gws_obj = bo;
if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA) if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
list->oa_obj = entry->robj; list->oa_obj = bo;
total_size += amdgpu_bo_size(entry->robj); total_size += amdgpu_bo_size(bo);
trace_amdgpu_bo_list_set(list, entry->robj); trace_amdgpu_bo_list_set(list, bo);
} }
list->first_userptr = first_userptr; list->first_userptr = first_userptr;
@ -137,8 +140,11 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
return 0; return 0;
error_free: error_free:
while (i--) while (i--) {
amdgpu_bo_unref(&array[i].robj); struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
amdgpu_bo_unref(&bo);
}
kvfree(list); kvfree(list);
return r; return r;
@ -190,9 +196,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
* with the same priority, i.e. it must be stable. * with the same priority, i.e. it must be stable.
*/ */
amdgpu_bo_list_for_each_entry(e, list) { amdgpu_bo_list_for_each_entry(e, list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
unsigned priority = e->priority; unsigned priority = e->priority;
if (!e->robj->parent) if (!bo->parent)
list_add_tail(&e->tv.head, &bucket[priority]); list_add_tail(&e->tv.head, &bucket[priority]);
e->user_pages = NULL; e->user_pages = NULL;

View File

@ -32,7 +32,6 @@ struct amdgpu_bo_va;
struct amdgpu_fpriv; struct amdgpu_fpriv;
struct amdgpu_bo_list_entry { struct amdgpu_bo_list_entry {
struct amdgpu_bo *robj;
struct ttm_validate_buffer tv; struct ttm_validate_buffer tv;
struct amdgpu_bo_va *bo_va; struct amdgpu_bo_va *bo_va;
uint32_t priority; uint32_t priority;

View File

@ -34,6 +34,7 @@
#include "atombios_dp.h" #include "atombios_dp.h"
#include "amdgpu_connectors.h" #include "amdgpu_connectors.h"
#include "amdgpu_i2c.h" #include "amdgpu_i2c.h"
#include "amdgpu_display.h"
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>

View File

@ -32,38 +32,47 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_trace.h" #include "amdgpu_trace.h"
#include "amdgpu_gmc.h" #include "amdgpu_gmc.h"
#include "amdgpu_gem.h"
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
struct drm_amdgpu_cs_chunk_fence *data, struct drm_amdgpu_cs_chunk_fence *data,
uint32_t *offset) uint32_t *offset)
{ {
struct drm_gem_object *gobj; struct drm_gem_object *gobj;
struct amdgpu_bo *bo;
unsigned long size; unsigned long size;
int r;
gobj = drm_gem_object_lookup(p->filp, data->handle); gobj = drm_gem_object_lookup(p->filp, data->handle);
if (gobj == NULL) if (gobj == NULL)
return -EINVAL; return -EINVAL;
p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
p->uf_entry.priority = 0; p->uf_entry.priority = 0;
p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; p->uf_entry.tv.bo = &bo->tbo;
p->uf_entry.tv.shared = true; p->uf_entry.tv.shared = true;
p->uf_entry.user_pages = NULL; p->uf_entry.user_pages = NULL;
size = amdgpu_bo_size(p->uf_entry.robj); drm_gem_object_put_unlocked(gobj);
if (size != PAGE_SIZE || (data->offset + 8) > size)
return -EINVAL; size = amdgpu_bo_size(bo);
if (size != PAGE_SIZE || (data->offset + 8) > size) {
r = -EINVAL;
goto error_unref;
}
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
r = -EINVAL;
goto error_unref;
}
*offset = data->offset; *offset = data->offset;
drm_gem_object_put_unlocked(gobj);
if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
amdgpu_bo_unref(&p->uf_entry.robj);
return -EINVAL;
}
return 0; return 0;
error_unref:
amdgpu_bo_unref(&bo);
return r;
} }
static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
@ -221,7 +230,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
goto free_all_kdata; goto free_all_kdata;
} }
if (p->uf_entry.robj) if (p->uf_entry.tv.bo)
p->job->uf_addr = uf_offset; p->job->uf_addr = uf_offset;
kfree(chunk_array); kfree(chunk_array);
@ -450,13 +459,13 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
p->evictable = list_prev_entry(p->evictable, tv.head)) { p->evictable = list_prev_entry(p->evictable, tv.head)) {
struct amdgpu_bo_list_entry *candidate = p->evictable; struct amdgpu_bo_list_entry *candidate = p->evictable;
struct amdgpu_bo *bo = candidate->robj; struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
bool update_bytes_moved_vis; bool update_bytes_moved_vis;
uint32_t other; uint32_t other;
/* If we reached our current BO we can forget it */ /* If we reached our current BO we can forget it */
if (candidate->robj == validated) if (bo == validated)
break; break;
/* We can't move pinned BOs here */ /* We can't move pinned BOs here */
@ -521,7 +530,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
int r; int r;
list_for_each_entry(lobj, validated, tv.head) { list_for_each_entry(lobj, validated, tv.head) {
struct amdgpu_bo *bo = lobj->robj; struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
bool binding_userptr = false; bool binding_userptr = false;
struct mm_struct *usermm; struct mm_struct *usermm;
@ -596,7 +605,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
INIT_LIST_HEAD(&duplicates); INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
if (p->uf_entry.robj && !p->uf_entry.robj->parent) if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
list_add(&p->uf_entry.tv.head, &p->validated); list_add(&p->uf_entry.tv.head, &p->validated);
while (1) { while (1) {
@ -612,7 +621,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
INIT_LIST_HEAD(&need_pages); INIT_LIST_HEAD(&need_pages);
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = e->robj; struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
&e->user_invalidated) && e->user_pages) { &e->user_invalidated) && e->user_pages) {
@ -631,7 +640,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
list_del(&e->tv.head); list_del(&e->tv.head);
list_add(&e->tv.head, &need_pages); list_add(&e->tv.head, &need_pages);
amdgpu_bo_unreserve(e->robj); amdgpu_bo_unreserve(bo);
} }
} }
@ -650,7 +659,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
/* Fill the page arrays for all userptrs. */ /* Fill the page arrays for all userptrs. */
list_for_each_entry(e, &need_pages, tv.head) { list_for_each_entry(e, &need_pages, tv.head) {
struct ttm_tt *ttm = e->robj->tbo.ttm; struct ttm_tt *ttm = e->tv.bo->ttm;
e->user_pages = kvmalloc_array(ttm->num_pages, e->user_pages = kvmalloc_array(ttm->num_pages,
sizeof(struct page*), sizeof(struct page*),
@ -709,23 +718,23 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
oa = p->bo_list->oa_obj; oa = p->bo_list->oa_obj;
amdgpu_bo_list_for_each_entry(e, p->bo_list) amdgpu_bo_list_for_each_entry(e, p->bo_list)
e->bo_va = amdgpu_vm_bo_find(vm, e->robj); e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
if (gds) { if (gds) {
p->job->gds_base = amdgpu_bo_gpu_offset(gds); p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
p->job->gds_size = amdgpu_bo_size(gds); p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
} }
if (gws) { if (gws) {
p->job->gws_base = amdgpu_bo_gpu_offset(gws); p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
p->job->gws_size = amdgpu_bo_size(gws); p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
} }
if (oa) { if (oa) {
p->job->oa_base = amdgpu_bo_gpu_offset(oa); p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
p->job->oa_size = amdgpu_bo_size(oa); p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
} }
if (!r && p->uf_entry.robj) { if (!r && p->uf_entry.tv.bo) {
struct amdgpu_bo *uf = p->uf_entry.robj; struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
r = amdgpu_ttm_alloc_gart(&uf->tbo); r = amdgpu_ttm_alloc_gart(&uf->tbo);
p->job->uf_addr += amdgpu_bo_gpu_offset(uf); p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
@ -741,8 +750,7 @@ error_free_pages:
if (!e->user_pages) if (!e->user_pages)
continue; continue;
release_pages(e->user_pages, release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
e->robj->tbo.ttm->num_pages);
kvfree(e->user_pages); kvfree(e->user_pages);
} }
@ -755,9 +763,11 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
int r; int r;
list_for_each_entry(e, &p->validated, tv.head) { list_for_each_entry(e, &p->validated, tv.head) {
struct reservation_object *resv = e->robj->tbo.resv; struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
struct reservation_object *resv = bo->tbo.resv;
r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
amdgpu_bo_explicit_sync(e->robj)); amdgpu_bo_explicit_sync(bo));
if (r) if (r)
return r; return r;
@ -800,11 +810,16 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
kfree(parser->chunks); kfree(parser->chunks);
if (parser->job) if (parser->job)
amdgpu_job_free(parser->job); amdgpu_job_free(parser->job);
amdgpu_bo_unref(&parser->uf_entry.robj); if (parser->uf_entry.tv.bo) {
struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
amdgpu_bo_unref(&uf);
}
} }
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
{ {
struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_device *adev = p->adev; struct amdgpu_device *adev = p->adev;
struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_vm *vm = &fpriv->vm;
@ -813,6 +828,71 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
int r; int r;
/* Only for UVD/VCE VM emulation */
if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
unsigned i, j;
for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct amdgpu_bo_va_mapping *m;
struct amdgpu_bo *aobj = NULL;
struct amdgpu_cs_chunk *chunk;
uint64_t offset, va_start;
struct amdgpu_ib *ib;
uint8_t *kptr;
chunk = &p->chunks[i];
ib = &p->job->ibs[j];
chunk_ib = chunk->kdata;
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
if (r) {
DRM_ERROR("IB va_start is invalid\n");
return r;
}
if ((va_start + chunk_ib->ib_bytes) >
(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
DRM_ERROR("IB va_start+ib_bytes is invalid\n");
return -EINVAL;
}
/* the IB should be reserved at this point */
r = amdgpu_bo_kmap(aobj, (void **)&kptr);
if (r) {
return r;
}
offset = m->start * AMDGPU_GPU_PAGE_SIZE;
kptr += va_start - offset;
if (ring->funcs->parse_cs) {
memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
amdgpu_bo_kunmap(aobj);
r = amdgpu_ring_parse_cs(ring, p, j);
if (r)
return r;
} else {
ib->ptr = (uint32_t *)kptr;
r = amdgpu_ring_patch_cs_in_place(ring, p, j);
amdgpu_bo_kunmap(aobj);
if (r)
return r;
}
j++;
}
}
if (!p->job->vm)
return amdgpu_cs_sync_rings(p);
r = amdgpu_vm_clear_freed(adev, vm, NULL); r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r) if (r)
return r; return r;
@ -845,7 +925,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
struct dma_fence *f; struct dma_fence *f;
/* ignore duplicates */ /* ignore duplicates */
bo = e->robj; bo = ttm_to_amdgpu_bo(e->tv.bo);
if (!bo) if (!bo)
continue; continue;
@ -875,101 +955,25 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
if (r)
return r;
p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
if (amdgpu_vm_debug) { if (amdgpu_vm_debug) {
/* Invalidate all BOs to test for userspace bugs */ /* Invalidate all BOs to test for userspace bugs */
amdgpu_bo_list_for_each_entry(e, p->bo_list) { amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
/* ignore duplicates */ /* ignore duplicates */
if (!e->robj) if (!bo)
continue; continue;
amdgpu_vm_bo_invalidate(adev, e->robj, false); amdgpu_vm_bo_invalidate(adev, bo, false);
} }
} }
return r;
}
static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
struct amdgpu_cs_parser *p)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_ring *ring = p->ring;
int r;
/* Only for UVD/VCE VM emulation */
if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {
unsigned i, j;
for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct amdgpu_bo_va_mapping *m;
struct amdgpu_bo *aobj = NULL;
struct amdgpu_cs_chunk *chunk;
uint64_t offset, va_start;
struct amdgpu_ib *ib;
uint8_t *kptr;
chunk = &p->chunks[i];
ib = &p->job->ibs[j];
chunk_ib = chunk->kdata;
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
if (r) {
DRM_ERROR("IB va_start is invalid\n");
return r;
}
if ((va_start + chunk_ib->ib_bytes) >
(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
DRM_ERROR("IB va_start+ib_bytes is invalid\n");
return -EINVAL;
}
/* the IB should be reserved at this point */
r = amdgpu_bo_kmap(aobj, (void **)&kptr);
if (r) {
return r;
}
offset = m->start * AMDGPU_GPU_PAGE_SIZE;
kptr += va_start - offset;
if (p->ring->funcs->parse_cs) {
memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
amdgpu_bo_kunmap(aobj);
r = amdgpu_ring_parse_cs(ring, p, j);
if (r)
return r;
} else {
ib->ptr = (uint32_t *)kptr;
r = amdgpu_ring_patch_cs_in_place(ring, p, j);
amdgpu_bo_kunmap(aobj);
if (r)
return r;
}
j++;
}
}
if (p->job->vm) {
p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
r = amdgpu_bo_vm_update_pte(p);
if (r)
return r;
r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
if (r)
return r;
}
return amdgpu_cs_sync_rings(p); return amdgpu_cs_sync_rings(p);
} }
@ -978,14 +982,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
{ {
struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_vm *vm = &fpriv->vm;
int i, j;
int r, ce_preempt = 0, de_preempt = 0; int r, ce_preempt = 0, de_preempt = 0;
struct amdgpu_ring *ring;
int i, j;
for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
struct amdgpu_cs_chunk *chunk; struct amdgpu_cs_chunk *chunk;
struct amdgpu_ib *ib; struct amdgpu_ib *ib;
struct drm_amdgpu_cs_chunk_ib *chunk_ib; struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct amdgpu_ring *ring; struct drm_sched_entity *entity;
chunk = &parser->chunks[i]; chunk = &parser->chunks[i];
ib = &parser->job->ibs[j]; ib = &parser->job->ibs[j];
@ -1007,8 +1012,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return -EINVAL; return -EINVAL;
} }
r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
chunk_ib->ip_instance, chunk_ib->ring, &ring); chunk_ib->ip_instance, chunk_ib->ring,
&entity);
if (r) if (r)
return r; return r;
@ -1016,14 +1022,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
parser->job->preamble_status |= parser->job->preamble_status |=
AMDGPU_PREAMBLE_IB_PRESENT; AMDGPU_PREAMBLE_IB_PRESENT;
if (parser->ring && parser->ring != ring) if (parser->entity && parser->entity != entity)
return -EINVAL; return -EINVAL;
parser->ring = ring; parser->entity = entity;
r = amdgpu_ib_get(adev, vm, ring = to_amdgpu_ring(entity->rq->sched);
ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
ib); chunk_ib->ib_bytes : 0, ib);
if (r) { if (r) {
DRM_ERROR("Failed to get ib !\n"); DRM_ERROR("Failed to get ib !\n");
return r; return r;
@ -1037,12 +1043,13 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
} }
/* UVD & VCE fw doesn't support user fences */ /* UVD & VCE fw doesn't support user fences */
ring = to_amdgpu_ring(parser->entity->rq->sched);
if (parser->job->uf_addr && ( if (parser->job->uf_addr && (
parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD || ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) ring->funcs->type == AMDGPU_RING_TYPE_VCE))
return -EINVAL; return -EINVAL;
return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx); return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
} }
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@ -1058,24 +1065,23 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
sizeof(struct drm_amdgpu_cs_chunk_dep); sizeof(struct drm_amdgpu_cs_chunk_dep);
for (i = 0; i < num_deps; ++i) { for (i = 0; i < num_deps; ++i) {
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
struct drm_sched_entity *entity;
struct dma_fence *fence; struct dma_fence *fence;
ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
if (ctx == NULL) if (ctx == NULL)
return -EINVAL; return -EINVAL;
r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
deps[i].ip_type, deps[i].ip_instance,
deps[i].ip_instance, deps[i].ring, &entity);
deps[i].ring, &ring);
if (r) { if (r) {
amdgpu_ctx_put(ctx); amdgpu_ctx_put(ctx);
return r; return r;
} }
fence = amdgpu_ctx_get_fence(ctx, ring, fence = amdgpu_ctx_get_fence(ctx, entity,
deps[i].handle); deps[i].handle);
if (IS_ERR(fence)) { if (IS_ERR(fence)) {
r = PTR_ERR(fence); r = PTR_ERR(fence);
@ -1194,9 +1200,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs) union drm_amdgpu_cs *cs)
{ {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_ring *ring = p->ring; struct drm_sched_entity *entity = p->entity;
struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
enum drm_sched_priority priority; enum drm_sched_priority priority;
struct amdgpu_ring *ring;
struct amdgpu_bo_list_entry *e; struct amdgpu_bo_list_entry *e;
struct amdgpu_job *job; struct amdgpu_job *job;
uint64_t seq; uint64_t seq;
@ -1213,7 +1219,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
/* No memory allocation is allowed while holding the mn lock */ /* No memory allocation is allowed while holding the mn lock */
amdgpu_mn_lock(p->mn); amdgpu_mn_lock(p->mn);
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = e->robj; struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
r = -ERESTARTSYS; r = -ERESTARTSYS;
@ -1224,15 +1230,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job->owner = p->filp; job->owner = p->filp;
p->fence = dma_fence_get(&job->base.s_fence->finished); p->fence = dma_fence_get(&job->base.s_fence->finished);
r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
if (r) {
dma_fence_put(p->fence);
dma_fence_put(&job->base.s_fence->finished);
amdgpu_job_free(job);
amdgpu_mn_unlock(p->mn);
return r;
}
amdgpu_cs_post_dependencies(p); amdgpu_cs_post_dependencies(p);
if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
@ -1254,6 +1252,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
ring = to_amdgpu_ring(entity->rq->sched); ring = to_amdgpu_ring(entity->rq->sched);
amdgpu_ring_priority_get(ring, priority); amdgpu_ring_priority_get(ring, priority);
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn); amdgpu_mn_unlock(p->mn);
@ -1293,6 +1293,12 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (r) if (r)
goto out; goto out;
r = amdgpu_cs_dependencies(adev, &parser);
if (r) {
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
goto out;
}
r = amdgpu_cs_parser_bos(&parser, data); r = amdgpu_cs_parser_bos(&parser, data);
if (r) { if (r) {
if (r == -ENOMEM) if (r == -ENOMEM)
@ -1304,16 +1310,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
reserved_buffers = true; reserved_buffers = true;
r = amdgpu_cs_dependencies(adev, &parser);
if (r) {
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
goto out;
}
for (i = 0; i < parser.job->num_ibs; i++) for (i = 0; i < parser.job->num_ibs; i++)
trace_amdgpu_cs(&parser, i); trace_amdgpu_cs(&parser, i);
r = amdgpu_cs_ib_vm_chunk(adev, &parser); r = amdgpu_cs_vm_handling(&parser);
if (r) if (r)
goto out; goto out;
@ -1337,9 +1337,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp) struct drm_file *filp)
{ {
union drm_amdgpu_wait_cs *wait = data; union drm_amdgpu_wait_cs *wait = data;
struct amdgpu_device *adev = dev->dev_private;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
struct amdgpu_ring *ring = NULL; struct drm_sched_entity *entity;
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
struct dma_fence *fence; struct dma_fence *fence;
long r; long r;
@ -1348,15 +1347,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
if (ctx == NULL) if (ctx == NULL)
return -EINVAL; return -EINVAL;
r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
wait->in.ip_type, wait->in.ip_instance, wait->in.ring, &entity);
wait->in.ring, &ring);
if (r) { if (r) {
amdgpu_ctx_put(ctx); amdgpu_ctx_put(ctx);
return r; return r;
} }
fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
if (IS_ERR(fence)) if (IS_ERR(fence))
r = PTR_ERR(fence); r = PTR_ERR(fence);
else if (fence) { else if (fence) {
@ -1388,7 +1386,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
struct drm_file *filp, struct drm_file *filp,
struct drm_amdgpu_fence *user) struct drm_amdgpu_fence *user)
{ {
struct amdgpu_ring *ring; struct drm_sched_entity *entity;
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
struct dma_fence *fence; struct dma_fence *fence;
int r; int r;
@ -1397,14 +1395,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
if (ctx == NULL) if (ctx == NULL)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
user->ip_instance, user->ring, &ring); user->ring, &entity);
if (r) { if (r) {
amdgpu_ctx_put(ctx); amdgpu_ctx_put(ctx);
return ERR_PTR(r); return ERR_PTR(r);
} }
fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
amdgpu_ctx_put(ctx); amdgpu_ctx_put(ctx);
return fence; return fence;

View File

@ -27,6 +27,30 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_sched.h" #include "amdgpu_sched.h"
#define to_amdgpu_ctx_entity(e) \
container_of((e), struct amdgpu_ctx_entity, entity)
const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_GFX] = 1,
[AMDGPU_HW_IP_COMPUTE] = 4,
[AMDGPU_HW_IP_DMA] = 2,
[AMDGPU_HW_IP_UVD] = 1,
[AMDGPU_HW_IP_VCE] = 1,
[AMDGPU_HW_IP_UVD_ENC] = 1,
[AMDGPU_HW_IP_VCN_DEC] = 1,
[AMDGPU_HW_IP_VCN_ENC] = 1,
};
static int amdgput_ctx_total_num_entities(void)
{
unsigned i, num_entities = 0;
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
num_entities += amdgpu_ctx_num_entities[i];
return num_entities;
}
static int amdgpu_ctx_priority_permit(struct drm_file *filp, static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority) enum drm_sched_priority priority)
{ {
@ -48,6 +72,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
struct drm_file *filp, struct drm_file *filp,
struct amdgpu_ctx *ctx) struct amdgpu_ctx *ctx)
{ {
unsigned num_entities = amdgput_ctx_total_num_entities();
unsigned i, j; unsigned i, j;
int r; int r;
@ -60,51 +85,104 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
memset(ctx, 0, sizeof(*ctx)); memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev; ctx->adev = adev;
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock); ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
sizeof(struct dma_fence*), GFP_KERNEL); sizeof(struct dma_fence*), GFP_KERNEL);
if (!ctx->fences) if (!ctx->fences)
return -ENOMEM; return -ENOMEM;
mutex_init(&ctx->lock); ctx->entities[0] = kcalloc(num_entities,
sizeof(struct amdgpu_ctx_entity),
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { GFP_KERNEL);
ctx->rings[i].sequence = 1; if (!ctx->entities[0]) {
ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; r = -ENOMEM;
goto error_free_fences;
} }
for (i = 0; i < num_entities; ++i) {
struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
entity->sequence = 1;
entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
}
for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
ctx->entities[i] = ctx->entities[i - 1] +
amdgpu_ctx_num_entities[i - 1];
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter; ctx->reset_counter_query = ctx->reset_counter;
ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
ctx->init_priority = priority; ctx->init_priority = priority;
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
/* create context entity for each ring */ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
for (i = 0; i < adev->num_rings; i++) { struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
struct amdgpu_ring *ring = adev->rings[i]; struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
struct drm_sched_rq *rq; unsigned num_rings;
rq = &ring->sched.sched_rq[priority]; switch (i) {
case AMDGPU_HW_IP_GFX:
rings[0] = &adev->gfx.gfx_ring[0];
num_rings = 1;
break;
case AMDGPU_HW_IP_COMPUTE:
for (j = 0; j < adev->gfx.num_compute_rings; ++j)
rings[j] = &adev->gfx.compute_ring[j];
num_rings = adev->gfx.num_compute_rings;
break;
case AMDGPU_HW_IP_DMA:
for (j = 0; j < adev->sdma.num_instances; ++j)
rings[j] = &adev->sdma.instance[j].ring;
num_rings = adev->sdma.num_instances;
break;
case AMDGPU_HW_IP_UVD:
rings[0] = &adev->uvd.inst[0].ring;
num_rings = 1;
break;
case AMDGPU_HW_IP_VCE:
rings[0] = &adev->vce.ring[0];
num_rings = 1;
break;
case AMDGPU_HW_IP_UVD_ENC:
rings[0] = &adev->uvd.inst[0].ring_enc[0];
num_rings = 1;
break;
case AMDGPU_HW_IP_VCN_DEC:
rings[0] = &adev->vcn.ring_dec;
num_rings = 1;
break;
case AMDGPU_HW_IP_VCN_ENC:
rings[0] = &adev->vcn.ring_enc[0];
num_rings = 1;
break;
case AMDGPU_HW_IP_VCN_JPEG:
rings[0] = &adev->vcn.ring_jpeg;
num_rings = 1;
break;
}
if (ring == &adev->gfx.kiq.ring) for (j = 0; j < num_rings; ++j)
continue; rqs[j] = &rings[j]->sched.sched_rq[priority];
r = drm_sched_entity_init(&ctx->rings[i].entity, for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
&rq, 1, &ctx->guilty); r = drm_sched_entity_init(&ctx->entities[i][j].entity,
rqs, num_rings, &ctx->guilty);
if (r) if (r)
goto failed; goto error_cleanup_entities;
} }
r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
if (r)
goto failed;
return 0; return 0;
failed: error_cleanup_entities:
for (j = 0; j < i; j++) for (i = 0; i < num_entities; ++i)
drm_sched_entity_destroy(&ctx->rings[j].entity); drm_sched_entity_destroy(&ctx->entities[0][i].entity);
kfree(ctx->entities[0]);
error_free_fences:
kfree(ctx->fences); kfree(ctx->fences);
ctx->fences = NULL; ctx->fences = NULL;
return r; return r;
@ -113,25 +191,47 @@ failed:
static void amdgpu_ctx_fini(struct kref *ref) static void amdgpu_ctx_fini(struct kref *ref)
{ {
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev; struct amdgpu_device *adev = ctx->adev;
unsigned i, j; unsigned i, j;
if (!adev) if (!adev)
return; return;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) for (i = 0; i < num_entities; ++i)
for (j = 0; j < amdgpu_sched_jobs; ++j) for (j = 0; j < amdgpu_sched_jobs; ++j)
dma_fence_put(ctx->rings[i].fences[j]); dma_fence_put(ctx->entities[0][i].fences[j]);
kfree(ctx->fences); kfree(ctx->fences);
ctx->fences = NULL; kfree(ctx->entities[0]);
amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
mutex_destroy(&ctx->lock); mutex_destroy(&ctx->lock);
kfree(ctx); kfree(ctx);
} }
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity)
{
if (hw_ip >= AMDGPU_HW_IP_NUM) {
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
return -EINVAL;
}
/* Right now all IPs have only one instance - multiple rings. */
if (instance != 0) {
DRM_DEBUG("invalid ip instance: %d\n", instance);
return -EINVAL;
}
if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
return -EINVAL;
}
*entity = &ctx->entities[hw_ip][ring].entity;
return 0;
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev, static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv, struct amdgpu_fpriv *fpriv,
struct drm_file *filp, struct drm_file *filp,
@ -168,17 +268,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref) static void amdgpu_ctx_do_release(struct kref *ref)
{ {
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
unsigned num_entities;
u32 i; u32 i;
ctx = container_of(ref, struct amdgpu_ctx, refcount); ctx = container_of(ref, struct amdgpu_ctx, refcount);
for (i = 0; i < ctx->adev->num_rings; i++) { num_entities = 0;
for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
num_entities += amdgpu_ctx_num_entities[i];
if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) for (i = 0; i < num_entities; i++)
continue; drm_sched_entity_destroy(&ctx->entities[0][i].entity);
drm_sched_entity_destroy(&ctx->rings[i].entity);
}
amdgpu_ctx_fini(ref); amdgpu_ctx_fini(ref);
} }
@ -334,56 +434,56 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
return 0; return 0;
} }
int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
struct dma_fence *fence, uint64_t* handler) struct drm_sched_entity *entity,
struct dma_fence *fence, uint64_t* handle)
{ {
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
uint64_t seq = cring->sequence; uint64_t seq = centity->sequence;
unsigned idx = 0;
struct dma_fence *other = NULL; struct dma_fence *other = NULL;
unsigned idx = 0;
idx = seq & (amdgpu_sched_jobs - 1); idx = seq & (amdgpu_sched_jobs - 1);
other = cring->fences[idx]; other = centity->fences[idx];
if (other) if (other)
BUG_ON(!dma_fence_is_signaled(other)); BUG_ON(!dma_fence_is_signaled(other));
dma_fence_get(fence); dma_fence_get(fence);
spin_lock(&ctx->ring_lock); spin_lock(&ctx->ring_lock);
cring->fences[idx] = fence; centity->fences[idx] = fence;
cring->sequence++; centity->sequence++;
spin_unlock(&ctx->ring_lock); spin_unlock(&ctx->ring_lock);
dma_fence_put(other); dma_fence_put(other);
if (handler) if (handle)
*handler = seq; *handle = seq;
return 0;
} }
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq) struct drm_sched_entity *entity,
uint64_t seq)
{ {
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
struct dma_fence *fence; struct dma_fence *fence;
spin_lock(&ctx->ring_lock); spin_lock(&ctx->ring_lock);
if (seq == ~0ull) if (seq == ~0ull)
seq = ctx->rings[ring->idx].sequence - 1; seq = centity->sequence - 1;
if (seq >= cring->sequence) { if (seq >= centity->sequence) {
spin_unlock(&ctx->ring_lock); spin_unlock(&ctx->ring_lock);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
if (seq + amdgpu_sched_jobs < cring->sequence) { if (seq + amdgpu_sched_jobs < centity->sequence) {
spin_unlock(&ctx->ring_lock); spin_unlock(&ctx->ring_lock);
return NULL; return NULL;
} }
fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]); fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
spin_unlock(&ctx->ring_lock); spin_unlock(&ctx->ring_lock);
return fence; return fence;
@ -392,35 +492,28 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority) enum drm_sched_priority priority)
{ {
int i; unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev;
struct drm_sched_rq *rq;
struct drm_sched_entity *entity;
struct amdgpu_ring *ring;
enum drm_sched_priority ctx_prio; enum drm_sched_priority ctx_prio;
unsigned i;
ctx->override_priority = priority; ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority; ctx->init_priority : ctx->override_priority;
for (i = 0; i < adev->num_rings; i++) { for (i = 0; i < num_entities; i++) {
ring = adev->rings[i]; struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
entity = &ctx->rings[i].entity;
rq = &ring->sched.sched_rq[ctx_prio];
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) drm_sched_entity_set_priority(entity, ctx_prio);
continue;
drm_sched_entity_set_rq(entity, rq);
} }
} }
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id) int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity)
{ {
struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id]; struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1); unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
struct dma_fence *other = cring->fences[idx]; struct dma_fence *other = centity->fences[idx];
if (other) { if (other) {
signed long r; signed long r;
@ -444,6 +537,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
{ {
unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
struct idr *idp; struct idr *idp;
uint32_t id, i; uint32_t id, i;
@ -459,13 +553,11 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
return; return;
} }
for (i = 0; i < ctx->adev->num_rings; i++) { for (i = 0; i < num_entities; i++) {
struct drm_sched_entity *entity;
if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) entity = &ctx->entities[0][i].entity;
continue; max_wait = drm_sched_entity_flush(entity, max_wait);
max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
max_wait);
} }
} }
mutex_unlock(&mgr->lock); mutex_unlock(&mgr->lock);
@ -473,6 +565,7 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{ {
unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
struct idr *idp; struct idr *idp;
uint32_t id, i; uint32_t id, i;
@ -484,16 +577,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
if (!ctx->adev) if (!ctx->adev)
return; return;
for (i = 0; i < ctx->adev->num_rings; i++) { if (kref_read(&ctx->refcount) != 1) {
DRM_ERROR("ctx %p is still alive\n", ctx);
if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) continue;
continue;
if (kref_read(&ctx->refcount) == 1)
drm_sched_entity_fini(&ctx->rings[i].entity);
else
DRM_ERROR("ctx %p is still alive\n", ctx);
} }
for (i = 0; i < num_entities; i++)
drm_sched_entity_fini(&ctx->entities[0][i].entity);
} }
} }

View File

@ -0,0 +1,88 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_CTX_H__
#define __AMDGPU_CTX_H__
#include "amdgpu_ring.h"
struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
struct amdgpu_ctx_entity {
uint64_t sequence;
struct dma_fence **fences;
struct drm_sched_entity entity;
};
struct amdgpu_ctx {
struct kref refcount;
struct amdgpu_device *adev;
unsigned reset_counter;
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
struct dma_fence **fences;
struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
struct mutex lock;
atomic_t guilty;
};
struct amdgpu_ctx_mgr {
struct amdgpu_device *adev;
struct mutex lock;
/* protected by lock */
struct idr ctx_handles;
};
extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity);
void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
struct dma_fence *fence, uint64_t *seq);
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
uint64_t seq);
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority);
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
#endif

View File

@ -62,6 +62,8 @@
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000 #define AMDGPU_RESUME_MS 2000
@ -651,71 +653,6 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
__clear_bit(wb, adev->wb.used); __clear_bit(wb, adev->wb.used);
} }
/**
* amdgpu_device_vram_location - try to find VRAM location
*
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
* @base: base address at which to put VRAM
*
* Function will try to place VRAM at base address provided
* as parameter.
*/
void amdgpu_device_vram_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc, u64 base)
{
uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
mc->vram_start = base;
mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
if (limit && limit < mc->real_vram_size)
mc->real_vram_size = limit;
dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
mc->mc_vram_size >> 20, mc->vram_start,
mc->vram_end, mc->real_vram_size >> 20);
}
/**
* amdgpu_device_gart_location - try to find GART location
*
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
*
* Function will place try to place GART before or after VRAM.
*
* If GART size is bigger than space left then we ajust GART size.
* Thus function will never fails.
*/
void amdgpu_device_gart_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc)
{
u64 size_af, size_bf;
mc->gart_size += adev->pm.smu_prv_buffer_size;
size_af = adev->gmc.mc_mask - mc->vram_end;
size_bf = mc->vram_start;
if (size_bf > size_af) {
if (mc->gart_size > size_bf) {
dev_warn(adev->dev, "limiting GART\n");
mc->gart_size = size_bf;
}
mc->gart_start = 0;
} else {
if (mc->gart_size > size_af) {
dev_warn(adev->dev, "limiting GART\n");
mc->gart_size = size_af;
}
/* VCE doesn't like it when BOs cross a 4GB segment, so align
* the GART base on a 4GB boundary as well.
*/
mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL);
}
mc->gart_end = mc->gart_start + mc->gart_size - 1;
dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
mc->gart_size >> 20, mc->gart_start, mc->gart_end);
}
/** /**
* amdgpu_device_resize_fb_bar - try to resize FB BAR * amdgpu_device_resize_fb_bar - try to resize FB BAR
* *
@ -1397,7 +1334,12 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
chip_name = "vega12"; chip_name = "vega12";
break; break;
case CHIP_RAVEN: case CHIP_RAVEN:
chip_name = "raven"; if (adev->rev_id >= 8)
chip_name = "raven2";
else if (adev->pdev->device == 0x15d8)
chip_name = "picasso";
else
chip_name = "raven";
break; break;
} }
@ -1551,6 +1493,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
} }
adev->powerplay.pp_feature = amdgpu_pp_feature_mask; adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
if (amdgpu_sriov_vf(adev))
adev->powerplay.pp_feature &= ~PP_GFXOFF_MASK;
for (i = 0; i < adev->num_ip_blocks; i++) { for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
@ -1651,6 +1595,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
adev->ip_blocks[i].status.hw = true; adev->ip_blocks[i].status.hw = true;
} }
amdgpu_xgmi_add_device(adev);
amdgpu_amdkfd_device_init(adev); amdgpu_amdkfd_device_init(adev);
if (amdgpu_sriov_vf(adev)) if (amdgpu_sriov_vf(adev))
@ -1690,24 +1635,27 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
} }
/** /**
* amdgpu_device_ip_late_set_cg_state - late init for clockgating * amdgpu_device_set_cg_state - set clockgating for amdgpu device
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* *
* Late initialization pass enabling clockgating for hardware IPs.
* The list of all the hardware IPs that make up the asic is walked and the * The list of all the hardware IPs that make up the asic is walked and the
* set_clockgating_state callbacks are run. This stage is run late * set_clockgating_state callbacks are run.
* in the init process. * Late initialization pass enabling clockgating for hardware IPs.
* Fini or suspend, pass disabling clockgating for hardware IPs.
* Returns 0 on success, negative error code on failure. * Returns 0 on success, negative error code on failure.
*/ */
static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{ {
int i = 0, r; int i, j, r;
if (amdgpu_emu_mode == 1) if (amdgpu_emu_mode == 1)
return 0; return 0;
for (i = 0; i < adev->num_ip_blocks; i++) { for (j = 0; j < adev->num_ip_blocks; j++) {
i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
if (!adev->ip_blocks[i].status.valid) if (!adev->ip_blocks[i].status.valid)
continue; continue;
/* skip CG for VCE/UVD, it's handled specially */ /* skip CG for VCE/UVD, it's handled specially */
@ -1717,7 +1665,7 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
adev->ip_blocks[i].version->funcs->set_clockgating_state) { adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */ /* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_GATE); state);
if (r) { if (r) {
DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r); adev->ip_blocks[i].version->funcs->name, r);
@ -1729,14 +1677,15 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
return 0; return 0;
} }
static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev) static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
{ {
int i = 0, r; int i, j, r;
if (amdgpu_emu_mode == 1) if (amdgpu_emu_mode == 1)
return 0; return 0;
for (i = 0; i < adev->num_ip_blocks; i++) { for (j = 0; j < adev->num_ip_blocks; j++) {
i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
if (!adev->ip_blocks[i].status.valid) if (!adev->ip_blocks[i].status.valid)
continue; continue;
/* skip CG for VCE/UVD, it's handled specially */ /* skip CG for VCE/UVD, it's handled specially */
@ -1746,7 +1695,7 @@ static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev)
adev->ip_blocks[i].version->funcs->set_powergating_state) { adev->ip_blocks[i].version->funcs->set_powergating_state) {
/* enable powergating to save power */ /* enable powergating to save power */
r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
AMD_PG_STATE_GATE); state);
if (r) { if (r) {
DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r); adev->ip_blocks[i].version->funcs->name, r);
@ -1787,8 +1736,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
} }
} }
amdgpu_device_ip_late_set_cg_state(adev); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_ip_late_set_pg_state(adev); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
queue_delayed_work(system_wq, &adev->late_init_work, queue_delayed_work(system_wq, &adev->late_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS)); msecs_to_jiffies(AMDGPU_RESUME_MS));
@ -1814,22 +1763,15 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
int i, r; int i, r;
amdgpu_amdkfd_device_fini(adev); amdgpu_amdkfd_device_fini(adev);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
/* need to disable SMC first */ /* need to disable SMC first */
for (i = 0; i < adev->num_ip_blocks; i++) { for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.hw) if (!adev->ip_blocks[i].status.hw)
continue; continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC && if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* ungate blocks before hw fini so that we can shutdown the blocks safely */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_UNGATE);
if (r) {
DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false);
r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
/* XXX handle errors */ /* XXX handle errors */
if (r) { if (r) {
@ -1845,20 +1787,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.hw) if (!adev->ip_blocks[i].status.hw)
continue; continue;
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* ungate blocks before hw fini so that we can shutdown the blocks safely */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_UNGATE);
if (r) {
DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
}
r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
/* XXX handle errors */ /* XXX handle errors */
if (r) { if (r) {
@ -1906,13 +1834,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
} }
/** /**
* amdgpu_device_ip_late_init_func_handler - work handler for clockgating * amdgpu_device_ip_late_init_func_handler - work handler for ib test
* *
* @work: work_struct * @work: work_struct.
*
* Work handler for amdgpu_device_ip_late_set_cg_state. We put the
* clockgating setup into a worker thread to speed up driver init and
* resume from suspend.
*/ */
static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
{ {
@ -1925,6 +1849,19 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
DRM_ERROR("ib ring test failed (%d).\n", r); DRM_ERROR("ib ring test failed (%d).\n", r);
} }
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
mutex_lock(&adev->gfx.gfx_off_mutex);
if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
adev->gfx.gfx_off_state = true;
}
mutex_unlock(&adev->gfx.gfx_off_mutex);
}
/** /**
* amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
* *
@ -1940,23 +1877,14 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
{ {
int i, r; int i, r;
if (amdgpu_sriov_vf(adev)) amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_virt_request_full_gpu(adev, false); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) { for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid) if (!adev->ip_blocks[i].status.valid)
continue; continue;
/* displays are handled separately */ /* displays are handled separately */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
/* ungate blocks so that suspend can properly shut them down */
if (adev->ip_blocks[i].version->funcs->set_clockgating_state) {
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_UNGATE);
if (r) {
DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
}
}
/* XXX handle errors */ /* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev); r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */ /* XXX handle errors */
@ -1967,9 +1895,6 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
} }
} }
if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, false);
return 0; return 0;
} }
@ -1988,36 +1913,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
{ {
int i, r; int i, r;
if (amdgpu_sriov_vf(adev))
amdgpu_virt_request_full_gpu(adev, false);
/* ungate SMC block first */
r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
AMD_CG_STATE_UNGATE);
if (r) {
DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r);
}
/* call smu to disable gfx off feature first when suspend */
if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) { for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid) if (!adev->ip_blocks[i].status.valid)
continue; continue;
/* displays are handled in phase1 */ /* displays are handled in phase1 */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
continue; continue;
/* ungate blocks so that suspend can properly shut them down */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_UNGATE);
if (r) {
DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
}
}
/* XXX handle errors */ /* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev); r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */ /* XXX handle errors */
@ -2027,9 +1928,6 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
} }
} }
if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, false);
return 0; return 0;
} }
@ -2048,11 +1946,17 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
{ {
int r; int r;
if (amdgpu_sriov_vf(adev))
amdgpu_virt_request_full_gpu(adev, false);
r = amdgpu_device_ip_suspend_phase1(adev); r = amdgpu_device_ip_suspend_phase1(adev);
if (r) if (r)
return r; return r;
r = amdgpu_device_ip_suspend_phase2(adev); r = amdgpu_device_ip_suspend_phase2(adev);
if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, false);
return r; return r;
} }
@ -2063,6 +1967,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
static enum amd_ip_block_type ip_order[] = { static enum amd_ip_block_type ip_order[] = {
AMD_IP_BLOCK_TYPE_GMC, AMD_IP_BLOCK_TYPE_GMC,
AMD_IP_BLOCK_TYPE_COMMON, AMD_IP_BLOCK_TYPE_COMMON,
AMD_IP_BLOCK_TYPE_PSP,
AMD_IP_BLOCK_TYPE_IH, AMD_IP_BLOCK_TYPE_IH,
}; };
@ -2093,7 +1998,6 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
static enum amd_ip_block_type ip_order[] = { static enum amd_ip_block_type ip_order[] = {
AMD_IP_BLOCK_TYPE_SMC, AMD_IP_BLOCK_TYPE_SMC,
AMD_IP_BLOCK_TYPE_PSP,
AMD_IP_BLOCK_TYPE_DCE, AMD_IP_BLOCK_TYPE_DCE,
AMD_IP_BLOCK_TYPE_GFX, AMD_IP_BLOCK_TYPE_GFX,
AMD_IP_BLOCK_TYPE_SDMA, AMD_IP_BLOCK_TYPE_SDMA,
@ -2335,7 +2239,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL; adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL; adev->vm_manager.vm_pte_funcs = NULL;
adev->vm_manager.vm_pte_num_rings = 0; adev->vm_manager.vm_pte_num_rqs = 0;
adev->gmc.gmc_funcs = NULL; adev->gmc.gmc_funcs = NULL;
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
@ -2367,6 +2271,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->gfx.gpu_clock_mutex); mutex_init(&adev->gfx.gpu_clock_mutex);
mutex_init(&adev->srbm_mutex); mutex_init(&adev->srbm_mutex);
mutex_init(&adev->gfx.pipe_reserve_mutex); mutex_init(&adev->gfx.pipe_reserve_mutex);
mutex_init(&adev->gfx.gfx_off_mutex);
mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock); mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock); mutex_init(&adev->virt.vf_errors.lock);
@ -2393,7 +2298,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_DELAYED_WORK(&adev->late_init_work, INIT_DELAYED_WORK(&adev->late_init_work,
amdgpu_device_ip_late_init_func_handler); amdgpu_device_ip_late_init_func_handler);
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
amdgpu_device_delay_enable_gfx_off);
adev->gfx.gfx_off_req_count = 1;
adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
/* Registers mapping */ /* Registers mapping */
@ -2705,6 +2613,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
if (fbcon) if (fbcon)
amdgpu_fbdev_set_suspend(adev, 1); amdgpu_fbdev_set_suspend(adev, 1);
cancel_delayed_work_sync(&adev->late_init_work);
if (!amdgpu_device_has_dc_support(adev)) { if (!amdgpu_device_has_dc_support(adev)) {
/* turn off display hw */ /* turn off display hw */
drm_modeset_lock_all(dev); drm_modeset_lock_all(dev);
@ -3041,71 +2951,22 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
} }
/** /**
* amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers * amdgpu_device_recover_vram - Recover some VRAM contents
*
* @adev: amdgpu_device pointer
* @ring: amdgpu_ring for the engine handling the buffer operations
* @bo: amdgpu_bo buffer whose shadow is being restored
* @fence: dma_fence associated with the operation
*
* Restores the VRAM buffer contents from the shadow in GTT. Used to
* restore things like GPUVM page tables after a GPU reset where
* the contents of VRAM might be lost.
* Returns 0 on success, negative error code on failure.
*/
static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
struct dma_fence **fence)
{
uint32_t domain;
int r;
if (!bo->shadow)
return 0;
r = amdgpu_bo_reserve(bo, true);
if (r)
return r;
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
/* if bo has been evicted, then no need to recover */
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
r = amdgpu_bo_validate(bo->shadow);
if (r) {
DRM_ERROR("bo validate failed!\n");
goto err;
}
r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
NULL, fence, true);
if (r) {
DRM_ERROR("recover page table failed!\n");
goto err;
}
}
err:
amdgpu_bo_unreserve(bo);
return r;
}
/**
* amdgpu_device_handle_vram_lost - Handle the loss of VRAM contents
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* *
* Restores the contents of VRAM buffers from the shadows in GTT. Used to * Restores the contents of VRAM buffers from the shadows in GTT. Used to
* restore things like GPUVM page tables after a GPU reset where * restore things like GPUVM page tables after a GPU reset where
* the contents of VRAM might be lost. * the contents of VRAM might be lost.
* Returns 0 on success, 1 on failure. *
* Returns:
* 0 on success, negative error code on failure.
*/ */
static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev) static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
{ {
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_bo *bo, *tmp;
struct dma_fence *fence = NULL, *next = NULL; struct dma_fence *fence = NULL, *next = NULL;
long r = 1; struct amdgpu_bo *shadow;
int i = 0; long r = 1, tmo;
long tmo;
if (amdgpu_sriov_runtime(adev)) if (amdgpu_sriov_runtime(adev))
tmo = msecs_to_jiffies(8000); tmo = msecs_to_jiffies(8000);
@ -3114,44 +2975,40 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
DRM_INFO("recover vram bo from shadow start\n"); DRM_INFO("recover vram bo from shadow start\n");
mutex_lock(&adev->shadow_list_lock); mutex_lock(&adev->shadow_list_lock);
list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
next = NULL;
amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next); /* No need to recover an evicted BO */
if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
continue;
r = amdgpu_bo_restore_shadow(shadow, &next);
if (r)
break;
if (fence) { if (fence) {
r = dma_fence_wait_timeout(fence, false, tmo); r = dma_fence_wait_timeout(fence, false, tmo);
if (r == 0) dma_fence_put(fence);
pr_err("wait fence %p[%d] timeout\n", fence, i); fence = next;
else if (r < 0) if (r <= 0)
pr_err("wait fence %p[%d] interrupted\n", fence, i);
if (r < 1) {
dma_fence_put(fence);
fence = next;
break; break;
} } else {
i++; fence = next;
} }
dma_fence_put(fence);
fence = next;
} }
mutex_unlock(&adev->shadow_list_lock); mutex_unlock(&adev->shadow_list_lock);
if (fence) { if (fence)
r = dma_fence_wait_timeout(fence, false, tmo); tmo = dma_fence_wait_timeout(fence, false, tmo);
if (r == 0)
pr_err("wait fence %p[%d] timeout\n", fence, i);
else if (r < 0)
pr_err("wait fence %p[%d] interrupted\n", fence, i);
}
dma_fence_put(fence); dma_fence_put(fence);
if (r > 0) if (r <= 0 || tmo <= 0) {
DRM_INFO("recover vram bo from shadow done\n");
else
DRM_ERROR("recover vram bo from shadow failed\n"); DRM_ERROR("recover vram bo from shadow failed\n");
return -EIO;
}
return (r > 0) ? 0 : 1; DRM_INFO("recover vram bo from shadow done\n");
return 0;
} }
/** /**
@ -3225,8 +3082,8 @@ out:
} }
} }
if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost)) if (!r)
r = amdgpu_device_handle_vram_lost(adev); r = amdgpu_device_recover_vram(adev);
return r; return r;
} }
@ -3272,38 +3129,50 @@ error:
amdgpu_virt_release_full_gpu(adev, true); amdgpu_virt_release_full_gpu(adev, true);
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
atomic_inc(&adev->vram_lost_counter); atomic_inc(&adev->vram_lost_counter);
r = amdgpu_device_handle_vram_lost(adev); r = amdgpu_device_recover_vram(adev);
} }
return r; return r;
} }
/**
* amdgpu_device_should_recover_gpu - check if we should try GPU recovery
*
* @adev: amdgpu device pointer
*
* Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
* a hung GPU.
*/
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
{
if (!amdgpu_device_ip_check_soft_reset(adev)) {
DRM_INFO("Timeout, but no hardware hang detected.\n");
return false;
}
if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 &&
!amdgpu_sriov_vf(adev))) {
DRM_INFO("GPU recovery disabled.\n");
return false;
}
return true;
}
/** /**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler * amdgpu_device_gpu_recover - reset the asic and recover scheduler
* *
* @adev: amdgpu device pointer * @adev: amdgpu device pointer
* @job: which job trigger hang * @job: which job trigger hang
* @force: forces reset regardless of amdgpu_gpu_recovery
* *
* Attempt to reset the GPU if it has hung (all asics). * Attempt to reset the GPU if it has hung (all asics).
* Returns 0 for success or an error on failure. * Returns 0 for success or an error on failure.
*/ */
int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job, bool force) struct amdgpu_job *job)
{ {
int i, r, resched; int i, r, resched;
if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
return 0;
}
if (!force && (amdgpu_gpu_recovery == 0 ||
(amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) {
DRM_INFO("GPU recovery disabled.\n");
return 0;
}
dev_info(adev->dev, "GPU reset begin!\n"); dev_info(adev->dev, "GPU reset begin!\n");
mutex_lock(&adev->lock_reset); mutex_lock(&adev->lock_reset);

View File

@ -23,6 +23,21 @@
#ifndef __AMDGPU_DISPLAY_H__ #ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
void amdgpu_display_update_priority(struct amdgpu_device *adev);
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
struct drm_framebuffer * struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev, amdgpu_display_user_framebuffer_create(struct drm_device *dev,

View File

@ -36,6 +36,7 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_irq.h" #include "amdgpu_irq.h"
#include "amdgpu_gem.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
@ -113,8 +114,8 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32; uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu = NULL; char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL; char *amdgpu_virtual_display = NULL;
/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/ /* OverDrive(bit 14) disabled by default*/
uint amdgpu_pp_feature_mask = 0xfffd3fff; uint amdgpu_pp_feature_mask = 0xffffbfff;
int amdgpu_ngg = 0; int amdgpu_ngg = 0;
int amdgpu_prim_buf_per_se = 0; int amdgpu_prim_buf_per_se = 0;
int amdgpu_pos_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0;
@ -531,6 +532,102 @@ MODULE_PARM_DESC(smu_memory_pool_size,
"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444); module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
#ifdef CONFIG_HSA_AMD
/**
* DOC: sched_policy (int)
* Set scheduling policy. Default is HWS(hardware scheduling) with over-subscription.
* Setting 1 disables over-subscription. Setting 2 disables HWS and statically
* assigns queues to HQDs.
*/
int sched_policy = KFD_SCHED_POLICY_HWS;
module_param(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
/**
* DOC: hws_max_conc_proc (int)
* Maximum number of processes that HWS can schedule concurrently. The maximum is the
* number of VMIDs assigned to the HWS, which is also the default.
*/
int hws_max_conc_proc = 8;
module_param(hws_max_conc_proc, int, 0444);
MODULE_PARM_DESC(hws_max_conc_proc,
"Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
/**
* DOC: cwsr_enable (int)
* CWSR(compute wave store and resume) allows the GPU to preempt shader execution in
* the middle of a compute wave. Default is 1 to enable this feature. Setting 0
* disables it.
*/
int cwsr_enable = 1;
module_param(cwsr_enable, int, 0444);
MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
/**
* DOC: max_num_of_queues_per_device (int)
* Maximum number of queues per device. Valid setting is between 1 and 4096. Default
* is 4096.
*/
int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device,
"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
/**
* DOC: send_sigterm (int)
* Send sigterm to HSA process on unhandled exceptions. Default is not to send sigterm
* but just print errors on dmesg. Setting 1 enables sending sigterm.
*/
int send_sigterm;
module_param(send_sigterm, int, 0444);
MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
/**
* DOC: debug_largebar (int)
* Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
* system. This limits the VRAM size reported to ROCm applications to the visible
* size, usually 256MB.
* Default value is 0, diabled.
*/
int debug_largebar;
module_param(debug_largebar, int, 0444);
MODULE_PARM_DESC(debug_largebar,
"Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
/**
* DOC: ignore_crat (int)
* Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
* table to get information about AMD APUs. This option can serve as a workaround on
* systems with a broken CRAT table.
*/
int ignore_crat;
module_param(ignore_crat, int, 0444);
MODULE_PARM_DESC(ignore_crat,
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
/**
* DOC: noretry (int)
* This parameter sets sh_mem_config.retry_disable. Default value, 0, enables retry.
* Setting 1 disables retry.
* Retry is needed for recoverable page faults.
*/
int noretry;
module_param(noretry, int, 0644);
MODULE_PARM_DESC(noretry,
"Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
/**
* DOC: halt_if_hws_hang (int)
* Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
* Setting 1 enables halt on hang.
*/
int halt_if_hws_hang;
module_param(halt_if_hws_hang, int, 0644);
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
#endif
static const struct pci_device_id pciidlist[] = { static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI #ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@ -769,14 +866,15 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
/* Vega 20 */ /* Vega 20 */
{0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
/* Raven */ /* Raven */
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
{0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
{0, 0, 0} {0, 0, 0}
}; };
@ -803,14 +901,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV; return -ENODEV;
} }
/*
* Initialize amdkfd before starting radeon. If it was not loaded yet,
* defer radeon probing
*/
ret = amdgpu_amdkfd_init();
if (ret == -EPROBE_DEFER)
return ret;
/* Get rid of things like offb */ /* Get rid of things like offb */
ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb"); ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb");
if (ret) if (ret)
@ -855,8 +945,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
{ {
struct drm_device *dev = pci_get_drvdata(pdev); struct drm_device *dev = pci_get_drvdata(pdev);
drm_dev_unregister(dev); DRM_ERROR("Device removal is currently not supported outside of fbcon\n");
drm_dev_put(dev); drm_dev_unplug(dev);
pci_disable_device(pdev); pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
} }
@ -1151,6 +1241,10 @@ static int __init amdgpu_init(void)
pdriver = &amdgpu_kms_pci_driver; pdriver = &amdgpu_kms_pci_driver;
driver->num_ioctls = amdgpu_max_kms_ioctl; driver->num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler(); amdgpu_register_atpx_handler();
/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
amdgpu_amdkfd_init();
/* let modprobe override vga console setting */ /* let modprobe override vga console setting */
return pci_register_driver(pdriver); return pci_register_driver(pdriver);

View File

@ -28,6 +28,7 @@
#include <drm/amdgpu_drm.h> #include <drm/amdgpu_drm.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_connectors.h" #include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "atom.h" #include "atom.h"
#include "atombios_encoders.h" #include "atombios_encoders.h"

View File

@ -33,6 +33,7 @@
#include <drm/amdgpu_drm.h> #include <drm/amdgpu_drm.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "cikd.h" #include "cikd.h"
#include "amdgpu_gem.h"
#include <drm/drm_fb_helper.h> #include <drm/drm_fb_helper.h>

View File

@ -195,19 +195,6 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
return 0; return 0;
} }
/**
* amdgpu_fence_schedule_fallback - schedule fallback check
*
* @ring: pointer to struct amdgpu_ring
*
* Start a timer as fallback to our interrupts.
*/
static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
{
mod_timer(&ring->fence_drv.fallback_timer,
jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
}
/** /**
* amdgpu_fence_process - check for fence activity * amdgpu_fence_process - check for fence activity
* *
@ -229,9 +216,6 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
if (seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
if (unlikely(seq == last_seq)) if (unlikely(seq == last_seq))
return; return;
@ -262,21 +246,6 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
} while (last_seq != seq); } while (last_seq != seq);
} }
/**
* amdgpu_fence_fallback - fallback for hardware interrupts
*
* @work: delayed work item
*
* Checks for fence activity.
*/
static void amdgpu_fence_fallback(struct timer_list *t)
{
struct amdgpu_ring *ring = from_timer(ring, t,
fence_drv.fallback_timer);
amdgpu_fence_process(ring);
}
/** /**
* amdgpu_fence_wait_empty - wait for all fences to signal * amdgpu_fence_wait_empty - wait for all fences to signal
* *
@ -424,8 +393,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
atomic_set(&ring->fence_drv.last_seq, 0); atomic_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false; ring->fence_drv.initialized = false;
timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1; ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
spin_lock_init(&ring->fence_drv.lock); spin_lock_init(&ring->fence_drv.lock);
ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *), ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
@ -501,7 +468,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, ring->fence_drv.irq_src, amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type); ring->fence_drv.irq_type);
drm_sched_fini(&ring->sched); drm_sched_fini(&ring->sched);
del_timer_sync(&ring->fence_drv.fallback_timer);
for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
dma_fence_put(ring->fence_drv.fences[j]); dma_fence_put(ring->fence_drv.fences[j]);
kfree(ring->fence_drv.fences); kfree(ring->fence_drv.fences);
@ -594,27 +560,6 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
return (const char *)fence->ring->name; return (const char *)fence->ring->name;
} }
/**
* amdgpu_fence_enable_signaling - enable signalling on fence
* @fence: fence
*
* This function is called with fence_queue lock held, and adds a callback
* to fence_queue that checks if this fence is signaled, and if so it
* signals the fence and removes itself.
*/
static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
{
struct amdgpu_fence *fence = to_amdgpu_fence(f);
struct amdgpu_ring *ring = fence->ring;
if (!timer_pending(&ring->fence_drv.fallback_timer))
amdgpu_fence_schedule_fallback(ring);
DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
return true;
}
/** /**
* amdgpu_fence_free - free up the fence memory * amdgpu_fence_free - free up the fence memory
* *
@ -645,7 +590,6 @@ static void amdgpu_fence_release(struct dma_fence *f)
static const struct dma_fence_ops amdgpu_fence_ops = { static const struct dma_fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name, .get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name, .get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling,
.release = amdgpu_fence_release, .release = amdgpu_fence_release,
}; };
@ -701,7 +645,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
seq_printf(m, "gpu recover\n"); seq_printf(m, "gpu recover\n");
amdgpu_device_gpu_recover(adev, NULL, true); amdgpu_device_gpu_recover(adev, NULL);
return 0; return 0;
} }

View File

@ -112,7 +112,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
{ {
int r; int r;
if (adev->gart.robj == NULL) { if (adev->gart.bo == NULL) {
struct amdgpu_bo_param bp; struct amdgpu_bo_param bp;
memset(&bp, 0, sizeof(bp)); memset(&bp, 0, sizeof(bp));
@ -123,7 +123,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
bp.type = ttm_bo_type_kernel; bp.type = ttm_bo_type_kernel;
bp.resv = NULL; bp.resv = NULL;
r = amdgpu_bo_create(adev, &bp, &adev->gart.robj); r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
if (r) { if (r) {
return r; return r;
} }
@ -145,19 +145,18 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
{ {
int r; int r;
r = amdgpu_bo_reserve(adev->gart.robj, false); r = amdgpu_bo_reserve(adev->gart.bo, false);
if (unlikely(r != 0)) if (unlikely(r != 0))
return r; return r;
r = amdgpu_bo_pin(adev->gart.robj, AMDGPU_GEM_DOMAIN_VRAM); r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM);
if (r) { if (r) {
amdgpu_bo_unreserve(adev->gart.robj); amdgpu_bo_unreserve(adev->gart.bo);
return r; return r;
} }
r = amdgpu_bo_kmap(adev->gart.robj, &adev->gart.ptr); r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr);
if (r) if (r)
amdgpu_bo_unpin(adev->gart.robj); amdgpu_bo_unpin(adev->gart.bo);
amdgpu_bo_unreserve(adev->gart.robj); amdgpu_bo_unreserve(adev->gart.bo);
adev->gart.table_addr = amdgpu_bo_gpu_offset(adev->gart.robj);
return r; return r;
} }
@ -173,14 +172,14 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
{ {
int r; int r;
if (adev->gart.robj == NULL) { if (adev->gart.bo == NULL) {
return; return;
} }
r = amdgpu_bo_reserve(adev->gart.robj, true); r = amdgpu_bo_reserve(adev->gart.bo, true);
if (likely(r == 0)) { if (likely(r == 0)) {
amdgpu_bo_kunmap(adev->gart.robj); amdgpu_bo_kunmap(adev->gart.bo);
amdgpu_bo_unpin(adev->gart.robj); amdgpu_bo_unpin(adev->gart.bo);
amdgpu_bo_unreserve(adev->gart.robj); amdgpu_bo_unreserve(adev->gart.bo);
adev->gart.ptr = NULL; adev->gart.ptr = NULL;
} }
} }
@ -196,10 +195,10 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
*/ */
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
{ {
if (adev->gart.robj == NULL) { if (adev->gart.bo == NULL) {
return; return;
} }
amdgpu_bo_unref(&adev->gart.robj); amdgpu_bo_unref(&adev->gart.bo);
} }
/* /*

View File

@ -40,8 +40,7 @@ struct amdgpu_bo;
#define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE) #define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE)
struct amdgpu_gart { struct amdgpu_gart {
u64 table_addr; struct amdgpu_bo *bo;
struct amdgpu_bo *robj;
void *ptr; void *ptr;
unsigned num_gpu_pages; unsigned num_gpu_pages;
unsigned num_cpu_pages; unsigned num_cpu_pages;

View File

@ -24,13 +24,6 @@
#ifndef __AMDGPU_GDS_H__ #ifndef __AMDGPU_GDS_H__
#define __AMDGPU_GDS_H__ #define __AMDGPU_GDS_H__
/* Because TTM request that alloacted buffer should be PAGE_SIZE aligned,
* we should report GDS/GWS/OA size as PAGE_SIZE aligned
* */
#define AMDGPU_GDS_SHIFT 2
#define AMDGPU_GWS_SHIFT PAGE_SHIFT
#define AMDGPU_OA_SHIFT PAGE_SHIFT
struct amdgpu_ring; struct amdgpu_ring;
struct amdgpu_bo; struct amdgpu_bo;

View File

@ -244,16 +244,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL; return -EINVAL;
} }
flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS) /* GDS allocations must be DW aligned */
size = size << AMDGPU_GDS_SHIFT; if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS)
else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS) size = ALIGN(size, 4);
size = size << AMDGPU_GWS_SHIFT;
else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
size = size << AMDGPU_OA_SHIFT;
else
return -EINVAL;
} }
size = roundup(size, PAGE_SIZE);
if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
r = amdgpu_bo_reserve(vm->root.base.bo, false); r = amdgpu_bo_reserve(vm->root.base.bo, false);
@ -572,16 +566,16 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL; return -EINVAL;
} }
if (args->va_address >= AMDGPU_VA_HOLE_START && if (args->va_address >= AMDGPU_GMC_HOLE_START &&
args->va_address < AMDGPU_VA_HOLE_END) { args->va_address < AMDGPU_GMC_HOLE_END) {
dev_dbg(&dev->pdev->dev, dev_dbg(&dev->pdev->dev,
"va_address 0x%LX is in VA hole 0x%LX-0x%LX\n", "va_address 0x%LX is in VA hole 0x%LX-0x%LX\n",
args->va_address, AMDGPU_VA_HOLE_START, args->va_address, AMDGPU_GMC_HOLE_START,
AMDGPU_VA_HOLE_END); AMDGPU_GMC_HOLE_END);
return -EINVAL; return -EINVAL;
} }
args->va_address &= AMDGPU_VA_HOLE_MASK; args->va_address &= AMDGPU_GMC_HOLE_MASK;
if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) { if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) {
dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n", dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n",

View File

@ -0,0 +1,92 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_GEM_H__
#define __AMDGPU_GEM_H__
#include <drm/amdgpu_drm.h>
#include <drm/drm_gem.h>
/*
* GEM.
*/
#define AMDGPU_GEM_DOMAIN_MAX 0x3
#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
void amdgpu_gem_object_free(struct drm_gem_object *obj);
int amdgpu_gem_object_open(struct drm_gem_object *obj,
struct drm_file *file_priv);
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv);
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
struct drm_gem_object *
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sg);
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags);
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf);
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
/*
* GEM objects.
*/
void amdgpu_gem_force_release(struct amdgpu_device *adev);
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
struct drm_gem_object **obj);
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
int amdgpu_mode_dumb_mmap(struct drm_file *filp,
struct drm_device *dev,
uint32_t handle, uint64_t *offset_p);
int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
#endif

View File

@ -26,9 +26,44 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_gfx.h" #include "amdgpu_gfx.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
/* /*
* GPU scratch registers helpers function. * GPU GFX IP block helpers function.
*/ */
int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
int pipe, int queue)
{
int bit = 0;
bit += mec * adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
bit += pipe * adev->gfx.mec.num_queue_per_pipe;
bit += queue;
return bit;
}
void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue)
{
*queue = bit % adev->gfx.mec.num_queue_per_pipe;
*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
% adev->gfx.mec.num_pipe_per_mec;
*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
/ adev->gfx.mec.num_pipe_per_mec;
}
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
int mec, int pipe, int queue)
{
return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
adev->gfx.mec.queue_bitmap);
}
/** /**
* amdgpu_gfx_scratch_get - Allocate a scratch register * amdgpu_gfx_scratch_get - Allocate a scratch register
* *
@ -340,3 +375,40 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
&ring->mqd_gpu_addr, &ring->mqd_gpu_addr,
&ring->mqd_ptr); &ring->mqd_ptr);
} }
/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
*
* @adev: amdgpu_device pointer
* @bool enable true: enable gfx off feature, false: disable gfx off feature
*
* 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
* 2. other client can send request to disable gfx off feature, the request should be honored.
* 3. other client can cancel their request of disable gfx off feature
* 4. other client should not send request to enable gfx off feature before disable gfx off feature.
*/
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
{
if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK))
return;
if (!adev->powerplay.pp_funcs->set_powergating_by_smu)
return;
mutex_lock(&adev->gfx.gfx_off_mutex);
if (!enable)
adev->gfx.gfx_off_req_count++;
else if (adev->gfx.gfx_off_req_count > 0)
adev->gfx.gfx_off_req_count--;
if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
} else if (!enable && adev->gfx.gfx_off_state) {
if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false))
adev->gfx.gfx_off_state = false;
}
mutex_unlock(&adev->gfx.gfx_off_mutex);
}

View File

@ -24,13 +24,317 @@
#ifndef __AMDGPU_GFX_H__ #ifndef __AMDGPU_GFX_H__
#define __AMDGPU_GFX_H__ #define __AMDGPU_GFX_H__
/*
* GFX stuff
*/
#include "clearstate_defs.h"
#include "amdgpu_ring.h"
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
#define AMDGPU_GFX_SAFE_MODE 0x00000001L
#define AMDGPU_GFX_PG_DISABLED_MODE 0x00000002L
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
struct amdgpu_rlc_funcs {
void (*enter_safe_mode)(struct amdgpu_device *adev);
void (*exit_safe_mode)(struct amdgpu_device *adev);
};
struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
volatile uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
volatile uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
volatile uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
bool in_safe_mode;
const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */
u32 save_and_restore_offset;
u32 clear_state_descriptor_offset;
u32 avail_scratch_ram_locations;
u32 reg_restore_list_size;
u32 reg_list_format_start;
u32 reg_list_format_separate_start;
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
u32 reg_list_format_direct_reg_list_length;
u32 save_restore_list_cntl_size_bytes;
u32 save_restore_list_gpm_size_bytes;
u32 save_restore_list_srm_size_bytes;
u32 *register_list_format;
u32 *register_restore;
u8 *save_restore_list_cntl;
u8 *save_restore_list_gpm;
u8 *save_restore_list_srm;
bool is_rlc_v2_1;
};
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
struct amdgpu_bo *mec_fw_obj;
u64 mec_fw_gpu_addr;
u32 num_mec;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
/* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
struct amdgpu_kiq {
u64 eop_gpu_addr;
struct amdgpu_bo *eop_obj;
spinlock_t ring_lock;
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
};
/*
* GPU scratch registers structures, functions & helpers
*/
struct amdgpu_scratch {
unsigned num_reg;
uint32_t reg_base;
uint32_t free_mask;
};
/*
* GFX configurations
*/
#define AMDGPU_GFX_MAX_SE 4
#define AMDGPU_GFX_MAX_SH_PER_SE 2
struct amdgpu_rb_config {
uint32_t rb_backend_disable;
uint32_t user_rb_backend_disable;
uint32_t raster_config;
uint32_t raster_config_1;
};
struct gb_addr_config {
uint16_t pipe_interleave_size;
uint8_t num_pipes;
uint8_t max_compress_frags;
uint8_t num_banks;
uint8_t num_se;
uint8_t num_rb_per_se;
};
struct amdgpu_gfx_config {
unsigned max_shader_engines;
unsigned max_tile_pipes;
unsigned max_cu_per_sh;
unsigned max_sh_per_se;
unsigned max_backends_per_se;
unsigned max_texture_channel_caches;
unsigned max_gprs;
unsigned max_gs_threads;
unsigned max_hw_contexts;
unsigned sc_prim_fifo_size_frontend;
unsigned sc_prim_fifo_size_backend;
unsigned sc_hiz_tile_fifo_size;
unsigned sc_earlyz_tile_fifo_size;
unsigned num_tile_pipes;
unsigned backend_enable_mask;
unsigned mem_max_burst_length_bytes;
unsigned mem_row_size_in_kb;
unsigned shader_engine_tile_size;
unsigned num_gpus;
unsigned multi_gpu_tile_size;
unsigned mc_arb_ramcfg;
unsigned gb_addr_config;
unsigned num_rbs;
unsigned gs_vgt_table_depth;
unsigned gs_prim_buffer_depth;
uint32_t tile_mode_array[32];
uint32_t macrotile_mode_array[16];
struct gb_addr_config gb_addr_config_fields;
struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
/* gfx configure feature */
uint32_t double_offchip_lds_buf;
/* cached value of DB_DEBUG2 */
uint32_t db_debug2;
};
struct amdgpu_cu_info {
uint32_t simd_per_cu;
uint32_t max_waves_per_simd;
uint32_t wave_front_size;
uint32_t max_scratch_slots_per_cu;
uint32_t lds_size;
/* total active CU number */
uint32_t number;
uint32_t ao_cu_mask;
uint32_t ao_cu_bitmap[4][4];
uint32_t bitmap[4][4];
};
struct amdgpu_gfx_funcs {
/* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
u32 sh_num, u32 instance);
void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd,
uint32_t wave, uint32_t *dst, int *no_fields);
void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd,
uint32_t wave, uint32_t thread, uint32_t start,
uint32_t size, uint32_t *dst);
void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd,
uint32_t wave, uint32_t start, uint32_t size,
uint32_t *dst);
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
u32 queue);
};
struct amdgpu_ngg_buf {
struct amdgpu_bo *bo;
uint64_t gpu_addr;
uint32_t size;
uint32_t bo_size;
};
enum {
NGG_PRIM = 0,
NGG_POS,
NGG_CNTL,
NGG_PARAM,
NGG_BUF_MAX
};
struct amdgpu_ngg {
struct amdgpu_ngg_buf buf[NGG_BUF_MAX];
uint32_t gds_reserve_addr;
uint32_t gds_reserve_size;
bool init;
};
struct sq_work {
struct work_struct work;
unsigned ih_data;
};
struct amdgpu_gfx {
struct mutex gpu_clock_mutex;
struct amdgpu_gfx_config config;
struct amdgpu_rlc rlc;
struct amdgpu_mec mec;
struct amdgpu_kiq kiq;
struct amdgpu_scratch scratch;
const struct firmware *me_fw; /* ME firmware */
uint32_t me_fw_version;
const struct firmware *pfp_fw; /* PFP firmware */
uint32_t pfp_fw_version;
const struct firmware *ce_fw; /* CE firmware */
uint32_t ce_fw_version;
const struct firmware *rlc_fw; /* RLC firmware */
uint32_t rlc_fw_version;
const struct firmware *mec_fw; /* MEC firmware */
uint32_t mec_fw_version;
const struct firmware *mec2_fw; /* MEC2 firmware */
uint32_t mec2_fw_version;
uint32_t me_feature_version;
uint32_t ce_feature_version;
uint32_t pfp_feature_version;
uint32_t rlc_feature_version;
uint32_t rlc_srlc_fw_version;
uint32_t rlc_srlc_feature_version;
uint32_t rlc_srlg_fw_version;
uint32_t rlc_srlg_feature_version;
uint32_t rlc_srls_fw_version;
uint32_t rlc_srls_feature_version;
uint32_t mec_feature_version;
uint32_t mec2_feature_version;
bool mec_fw_write_wait;
bool me_fw_write_wait;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsigned num_gfx_rings;
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
unsigned num_compute_rings;
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
struct amdgpu_irq_src priv_inst_irq;
struct amdgpu_irq_src cp_ecc_error_irq;
struct amdgpu_irq_src sq_irq;
struct sq_work sq_work;
/* gfx status */
uint32_t gfx_current_status;
/* ce ram size*/
unsigned ce_ram_size;
struct amdgpu_cu_info cu_info;
const struct amdgpu_gfx_funcs *funcs;
/* reset mask */
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
/* s3/s4 mask */
bool in_suspend;
/* NGG */
struct amdgpu_ngg ngg;
/* gfx off */
bool gfx_off_state; /* true: enabled, false: disabled */
struct mutex gfx_off_mutex;
uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
struct delayed_work gfx_off_delay_work;
/* pipe reservation */
struct mutex pipe_reserve_mutex;
DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
/**
* amdgpu_gfx_create_bitmask - create a bitmask
*
* @bit_width: length of the mask
*
* create a variable length bit mask.
* Returns the bitmask.
*/
static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
{
return (u32)((1ULL << bit_width) - 1);
}
int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg); int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg); void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
unsigned max_sh); unsigned max_sh);
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring, struct amdgpu_ring *ring,
@ -47,47 +351,13 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
unsigned mqd_size); unsigned mqd_size);
void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev); void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev);
/** void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
* amdgpu_gfx_create_bitmask - create a bitmask int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
* int pipe, int queue);
* @bit_width: length of the mask void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
* int *mec, int *pipe, int *queue);
* create a variable length bit mask. bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
* Returns the bitmask. int pipe, int queue);
*/ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
{
return (u32)((1ULL << bit_width) - 1);
}
static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev,
int mec, int pipe, int queue)
{
int bit = 0;
bit += mec * adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
bit += pipe * adev->gfx.mec.num_queue_per_pipe;
bit += queue;
return bit;
}
static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue)
{
*queue = bit % adev->gfx.mec.num_queue_per_pipe;
*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
% adev->gfx.mec.num_pipe_per_mec;
*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
/ adev->gfx.mec.num_pipe_per_mec;
}
static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
int mec, int pipe, int queue)
{
return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
adev->gfx.mec.queue_bitmap);
}
#endif #endif

View File

@ -0,0 +1,216 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*/
#include "amdgpu.h"
/**
* amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
*
* @bo: the BO to get the PDE for
* @level: the level in the PD hirarchy
* @addr: resulting addr
* @flags: resulting flags
*
* Get the address and flags to be used for a PDE (Page Directory Entry).
*/
void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
uint64_t *addr, uint64_t *flags)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_dma_tt *ttm;
switch (bo->tbo.mem.mem_type) {
case TTM_PL_TT:
ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
*addr = ttm->dma_address[0];
break;
case TTM_PL_VRAM:
*addr = amdgpu_bo_gpu_offset(bo);
break;
default:
*addr = 0;
break;
}
*flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem);
amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
}
/**
* amdgpu_gmc_pd_addr - return the address of the root directory
*
*/
uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
uint64_t pd_addr;
/* TODO: move that into ASIC specific code */
if (adev->asic_type >= CHIP_VEGA10) {
uint64_t flags = AMDGPU_PTE_VALID;
amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
pd_addr |= flags;
} else {
pd_addr = amdgpu_bo_gpu_offset(bo);
}
return pd_addr;
}
/**
* amdgpu_gmc_agp_addr - return the address in the AGP address space
*
* @tbo: TTM BO which needs the address, must be in GTT domain
*
* Tries to figure out how to access the BO through the AGP aperture. Returns
* AMDGPU_BO_INVALID_OFFSET if that is not possible.
*/
uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_dma_tt *ttm;
if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
return AMDGPU_BO_INVALID_OFFSET;
ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
return AMDGPU_BO_INVALID_OFFSET;
return adev->gmc.agp_start + ttm->dma_address[0];
}
/**
* amdgpu_gmc_vram_location - try to find VRAM location
*
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
* @base: base address at which to put VRAM
*
* Function will try to place VRAM at base address provided
* as parameter.
*/
void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base)
{
uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
mc->vram_start = base;
mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
if (limit && limit < mc->real_vram_size)
mc->real_vram_size = limit;
if (mc->xgmi.num_physical_nodes == 0) {
mc->fb_start = mc->vram_start;
mc->fb_end = mc->vram_end;
}
dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
mc->mc_vram_size >> 20, mc->vram_start,
mc->vram_end, mc->real_vram_size >> 20);
}
/**
* amdgpu_gmc_gart_location - try to find GART location
*
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
*
* Function will place try to place GART before or after VRAM.
*
* If GART size is bigger than space left then we ajust GART size.
* Thus function will never fails.
*/
void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
{
const uint64_t four_gb = 0x100000000ULL;
u64 size_af, size_bf;
mc->gart_size += adev->pm.smu_prv_buffer_size;
/* VCE doesn't like it when BOs cross a 4GB segment, so align
* the GART base on a 4GB boundary as well.
*/
size_bf = mc->fb_start;
size_af = adev->gmc.mc_mask + 1 - ALIGN(mc->fb_end + 1, four_gb);
if (mc->gart_size > max(size_bf, size_af)) {
dev_warn(adev->dev, "limiting GART\n");
mc->gart_size = max(size_bf, size_af);
}
if ((size_bf >= mc->gart_size && size_bf < size_af) ||
(size_af < mc->gart_size))
mc->gart_start = 0;
else
mc->gart_start = mc->mc_mask - mc->gart_size + 1;
mc->gart_start &= ~(four_gb - 1);
mc->gart_end = mc->gart_start + mc->gart_size - 1;
dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
mc->gart_size >> 20, mc->gart_start, mc->gart_end);
}
/**
* amdgpu_gmc_agp_location - try to find AGP location
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
*
* Function will place try to find a place for the AGP BAR in the MC address
* space.
*
* AGP BAR will be assigned the largest available hole in the address space.
* Should be called after VRAM and GART locations are setup.
*/
void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
{
const uint64_t sixteen_gb = 1ULL << 34;
const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
u64 size_af, size_bf;
if (mc->fb_start > mc->gart_start) {
size_bf = (mc->fb_start & sixteen_gb_mask) -
ALIGN(mc->gart_end + 1, sixteen_gb);
size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
} else {
size_bf = mc->fb_start & sixteen_gb_mask;
size_af = (mc->gart_start & sixteen_gb_mask) -
ALIGN(mc->fb_end + 1, sixteen_gb);
}
if (size_bf > size_af) {
mc->agp_start = mc->fb_start > mc->gart_start ?
mc->gart_end + 1 : 0;
mc->agp_size = size_bf;
} else {
mc->agp_start = (mc->fb_start > mc->gart_start ?
mc->fb_end : mc->gart_end) + 1,
mc->agp_size = size_af;
}
mc->agp_start = ALIGN(mc->agp_start, sixteen_gb);
mc->agp_end = mc->agp_start + mc->agp_size - 1;
dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
mc->agp_size >> 20, mc->agp_start, mc->agp_end);
}

View File

@ -30,6 +30,19 @@
#include "amdgpu_irq.h" #include "amdgpu_irq.h"
/* VA hole for 48bit addresses on Vega10 */
#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL
#define AMDGPU_GMC_HOLE_END 0xffff800000000000ULL
/*
* Hardware is programmed as if the hole doesn't exists with start and end
* address values.
*
* This mask is used to remove the upper 16bits of the VA and so come up with
* the linear addr value.
*/
#define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL
struct firmware; struct firmware;
/* /*
@ -74,6 +87,20 @@ struct amdgpu_gmc_funcs {
u64 *dst, u64 *flags); u64 *dst, u64 *flags);
}; };
struct amdgpu_xgmi {
/* from psp */
u64 device_id;
u64 hive_id;
/* fixed per family */
u64 node_segment_size;
/* physical node (0-3) */
unsigned physical_node_id;
/* number of nodes (0-4) */
unsigned num_physical_nodes;
/* gpu list in the same hive */
struct list_head head;
};
struct amdgpu_gmc { struct amdgpu_gmc {
resource_size_t aper_size; resource_size_t aper_size;
resource_size_t aper_base; resource_size_t aper_base;
@ -81,11 +108,22 @@ struct amdgpu_gmc {
* about vram size near mc fb location */ * about vram size near mc fb location */
u64 mc_vram_size; u64 mc_vram_size;
u64 visible_vram_size; u64 visible_vram_size;
u64 agp_size;
u64 agp_start;
u64 agp_end;
u64 gart_size; u64 gart_size;
u64 gart_start; u64 gart_start;
u64 gart_end; u64 gart_end;
u64 vram_start; u64 vram_start;
u64 vram_end; u64 vram_end;
/* FB region , it's same as local vram region in single GPU, in XGMI
* configuration, this region covers all GPUs in the same hive ,
* each GPU in the hive has the same view of this FB region .
* GPU0's vram starts at offset (0 * segment size) ,
* GPU1 starts at offset (1 * segment size), etc.
*/
u64 fb_start;
u64 fb_end;
unsigned vram_width; unsigned vram_width;
u64 real_vram_size; u64 real_vram_size;
int vram_mtrr; int vram_mtrr;
@ -109,8 +147,17 @@ struct amdgpu_gmc {
atomic_t vm_fault_info_updated; atomic_t vm_fault_info_updated;
const struct amdgpu_gmc_funcs *gmc_funcs; const struct amdgpu_gmc_funcs *gmc_funcs;
struct amdgpu_xgmi xgmi;
}; };
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
/** /**
* amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
* *
@ -126,4 +173,28 @@ static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
return (gmc->real_vram_size == gmc->visible_vram_size); return (gmc->real_vram_size == gmc->visible_vram_size);
} }
/**
* amdgpu_gmc_sign_extend - sign extend the given gmc address
*
* @addr: address to extend
*/
static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
{
if (addr >= AMDGPU_GMC_HOLE_START)
addr |= AMDGPU_GMC_HOLE_END;
return addr;
}
void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
uint64_t *addr, uint64_t *flags);
uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo);
uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo);
void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base);
void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
#endif #endif

View File

@ -32,6 +32,7 @@
#include <drm/amdgpu_drm.h> #include <drm/amdgpu_drm.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "atom.h" #include "atom.h"
#include "amdgpu_trace.h"
#define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000) #define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000)
@ -170,6 +171,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
(amdgpu_sriov_vf(adev) && need_ctx_switch) || (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
amdgpu_vm_need_pipeline_sync(ring, job))) { amdgpu_vm_need_pipeline_sync(ring, job))) {
need_pipe_sync = true; need_pipe_sync = true;
if (tmp)
trace_amdgpu_ib_pipe_sync(job, tmp);
dma_fence_put(tmp); dma_fence_put(tmp);
} }

View File

@ -197,78 +197,3 @@ restart_ih:
return IRQ_HANDLED; return IRQ_HANDLED;
} }
/**
* amdgpu_ih_add_fault - Add a page fault record
*
* @adev: amdgpu device pointer
* @key: 64-bit encoding of PASID and address
*
* This should be called when a retry page fault interrupt is
* received. If this is a new page fault, it will be added to a hash
* table. The return value indicates whether this is a new fault, or
* a fault that was already known and is already being handled.
*
* If there are too many pending page faults, this will fail. Retry
* interrupts should be ignored in this case until there is enough
* free space.
*
* Returns 0 if the fault was added, 1 if the fault was already known,
* -ENOSPC if there are too many pending faults.
*/
int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
{
unsigned long flags;
int r = -ENOSPC;
if (WARN_ON_ONCE(!adev->irq.ih.faults))
/* Should be allocated in <IP>_ih_sw_init on GPUs that
* support retry faults and require retry filtering.
*/
return r;
spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
/* Only let the hash table fill up to 50% for best performance */
if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
goto unlock_out;
r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
if (!r)
adev->irq.ih.faults->count++;
/* chash_table_copy_in should never fail unless we're losing count */
WARN_ON_ONCE(r < 0);
unlock_out:
spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
return r;
}
/**
* amdgpu_ih_clear_fault - Remove a page fault record
*
* @adev: amdgpu device pointer
* @key: 64-bit encoding of PASID and address
*
* This should be called when a page fault has been handled. Any
* future interrupt with this key will be processed as a new
* page fault.
*/
void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
{
unsigned long flags;
int r;
if (!adev->irq.ih.faults)
return;
spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
if (!WARN_ON_ONCE(r < 0)) {
adev->irq.ih.faults->count--;
WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
}
spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
}

View File

@ -24,7 +24,6 @@
#ifndef __AMDGPU_IH_H__ #ifndef __AMDGPU_IH_H__
#define __AMDGPU_IH_H__ #define __AMDGPU_IH_H__
#include <linux/chash.h>
#include "soc15_ih_clientid.h" #include "soc15_ih_clientid.h"
struct amdgpu_device; struct amdgpu_device;
@ -32,13 +31,6 @@ struct amdgpu_device;
#define AMDGPU_IH_CLIENTID_LEGACY 0 #define AMDGPU_IH_CLIENTID_LEGACY 0
#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX #define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
#define AMDGPU_PAGEFAULT_HASH_BITS 8
struct amdgpu_retryfault_hashtable {
DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
spinlock_t lock;
int count;
};
/* /*
* R6xx+ IH ring * R6xx+ IH ring
*/ */
@ -57,7 +49,6 @@ struct amdgpu_ih_ring {
bool use_doorbell; bool use_doorbell;
bool use_bus_addr; bool use_bus_addr;
dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */ dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */
struct amdgpu_retryfault_hashtable *faults;
}; };
#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4 #define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
@ -76,11 +67,24 @@ struct amdgpu_iv_entry {
const uint32_t *iv_entry; const uint32_t *iv_entry;
}; };
/* provided by the ih block */
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
u32 (*get_wptr)(struct amdgpu_device *adev);
bool (*prescreen_iv)(struct amdgpu_device *adev);
void (*decode_iv)(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
void (*set_rptr)(struct amdgpu_device *adev);
};
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
bool use_bus_addr); bool use_bus_addr);
void amdgpu_ih_ring_fini(struct amdgpu_device *adev); void amdgpu_ih_ring_fini(struct amdgpu_device *adev);
int amdgpu_ih_process(struct amdgpu_device *adev); int amdgpu_ih_process(struct amdgpu_device *adev);
int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key);
void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key);
#endif #endif

View File

@ -105,8 +105,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
struct amdgpu_device *adev = container_of(work, struct amdgpu_device, struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
reset_work); reset_work);
if (!amdgpu_sriov_vf(adev)) if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL, false); amdgpu_device_gpu_recover(adev, NULL);
} }
/** /**

View File

@ -33,11 +33,18 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job); struct amdgpu_job *job = to_amdgpu_job(s_job);
if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
DRM_ERROR("ring %s timeout, but soft recovered\n",
s_job->sched->name);
return;
}
DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
ring->fence_drv.sync_seq); ring->fence_drv.sync_seq);
amdgpu_device_gpu_recover(ring->adev, job, false); if (amdgpu_device_should_recover_gpu(ring->adev))
amdgpu_device_gpu_recover(ring->adev, job);
} }
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@ -66,6 +73,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->sync);
amdgpu_sync_create(&(*job)->sched_sync); amdgpu_sync_create(&(*job)->sched_sync);
(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
return 0; return 0;
} }
@ -82,8 +90,6 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]); r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
if (r) if (r)
kfree(*job); kfree(*job);
else
(*job)->vm_pd_addr = adev->gart.table_addr;
return r; return r;
} }

View File

@ -37,6 +37,8 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
#include "amdgpu_gem.h"
#include "amdgpu_display.h"
/** /**
* amdgpu_driver_unload_kms - Main unload function for KMS. * amdgpu_driver_unload_kms - Main unload function for KMS.
@ -255,12 +257,133 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->psp.asd_fw_version; fw_info->ver = adev->psp.asd_fw_version;
fw_info->feature = adev->psp.asd_feature_version; fw_info->feature = adev->psp.asd_feature_version;
break; break;
case AMDGPU_INFO_FW_DMCU:
fw_info->ver = adev->dm.dmcu_fw_version;
fw_info->feature = 0;
break;
default: default:
return -EINVAL; return -EINVAL;
} }
return 0; return 0;
} }
static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
struct drm_amdgpu_info *info,
struct drm_amdgpu_info_hw_ip *result)
{
uint32_t ib_start_alignment = 0;
uint32_t ib_size_alignment = 0;
enum amd_ip_block_type type;
unsigned int num_rings = 0;
unsigned int i, j;
if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
return -EINVAL;
switch (info->query_hw_ip.type) {
case AMDGPU_HW_IP_GFX:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
if (adev->gfx.gfx_ring[i].ready)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
if (adev->gfx.compute_ring[i].ready)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
if (adev->sdma.instance[i].ring.ready)
++num_rings;
ib_start_alignment = 256;
ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_UVD:
type = AMD_IP_BLOCK_TYPE_UVD;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (adev->uvd.harvest_config & (1 << i))
continue;
if (adev->uvd.inst[i].ring.ready)
++num_rings;
}
ib_start_alignment = 64;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
if (adev->vce.ring[i].ready)
++num_rings;
ib_start_alignment = 4;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (adev->uvd.harvest_config & (1 << i))
continue;
for (j = 0; j < adev->uvd.num_enc_rings; j++)
if (adev->uvd.inst[i].ring_enc[j].ready)
++num_rings;
}
ib_start_alignment = 64;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
if (adev->vcn.ring_dec.ready)
++num_rings;
ib_start_alignment = 16;
ib_size_alignment = 16;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_enc_rings; i++)
if (adev->vcn.ring_enc[i].ready)
++num_rings;
ib_start_alignment = 64;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_VCN_JPEG:
type = AMD_IP_BLOCK_TYPE_VCN;
if (adev->vcn.ring_jpeg.ready)
++num_rings;
ib_start_alignment = 16;
ib_size_alignment = 16;
break;
default:
return -EINVAL;
}
for (i = 0; i < adev->num_ip_blocks; i++)
if (adev->ip_blocks[i].version->type == type &&
adev->ip_blocks[i].status.valid)
break;
if (i == adev->num_ip_blocks)
return 0;
num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
num_rings);
result->hw_ip_version_major = adev->ip_blocks[i].version->major;
result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
result->capabilities_flags = 0;
result->available_rings = (1 << num_rings) - 1;
result->ib_start_alignment = ib_start_alignment;
result->ib_size_alignment = ib_size_alignment;
return 0;
}
/* /*
* Userspace get information ioctl * Userspace get information ioctl
*/ */
@ -286,7 +409,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_crtc *crtc; struct drm_crtc *crtc;
uint32_t ui32 = 0; uint32_t ui32 = 0;
uint64_t ui64 = 0; uint64_t ui64 = 0;
int i, j, found; int i, found;
int ui32_size = sizeof(ui32); int ui32_size = sizeof(ui32);
if (!info->return_size || !info->return_pointer) if (!info->return_size || !info->return_pointer)
@ -316,101 +439,14 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
case AMDGPU_INFO_HW_IP_INFO: { case AMDGPU_INFO_HW_IP_INFO: {
struct drm_amdgpu_info_hw_ip ip = {}; struct drm_amdgpu_info_hw_ip ip = {};
enum amd_ip_block_type type; int ret;
uint32_t ring_mask = 0;
uint32_t ib_start_alignment = 0;
uint32_t ib_size_alignment = 0;
if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT) ret = amdgpu_hw_ip_info(adev, info, &ip);
return -EINVAL; if (ret)
return ret;
switch (info->query_hw_ip.type) { ret = copy_to_user(out, &ip, min((size_t)size, sizeof(ip)));
case AMDGPU_HW_IP_GFX: return ret ? -EFAULT : 0;
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
ring_mask |= adev->gfx.gfx_ring[i].ready << i;
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
ring_mask |= adev->gfx.compute_ring[i].ready << i;
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
ring_mask |= adev->sdma.instance[i].ring.ready << i;
ib_start_alignment = 256;
ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_UVD:
type = AMD_IP_BLOCK_TYPE_UVD;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (adev->uvd.harvest_config & (1 << i))
continue;
ring_mask |= adev->uvd.inst[i].ring.ready;
}
ib_start_alignment = 64;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
ring_mask |= adev->vce.ring[i].ready << i;
ib_start_alignment = 4;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (adev->uvd.harvest_config & (1 << i))
continue;
for (j = 0; j < adev->uvd.num_enc_rings; j++)
ring_mask |= adev->uvd.inst[i].ring_enc[j].ready << j;
}
ib_start_alignment = 64;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
ring_mask = adev->vcn.ring_dec.ready;
ib_start_alignment = 16;
ib_size_alignment = 16;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_enc_rings; i++)
ring_mask |= adev->vcn.ring_enc[i].ready << i;
ib_start_alignment = 64;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_VCN_JPEG:
type = AMD_IP_BLOCK_TYPE_VCN;
ring_mask = adev->vcn.ring_jpeg.ready;
ib_start_alignment = 16;
ib_size_alignment = 16;
break;
default:
return -EINVAL;
}
for (i = 0; i < adev->num_ip_blocks; i++) {
if (adev->ip_blocks[i].version->type == type &&
adev->ip_blocks[i].status.valid) {
ip.hw_ip_version_major = adev->ip_blocks[i].version->major;
ip.hw_ip_version_minor = adev->ip_blocks[i].version->minor;
ip.capabilities_flags = 0;
ip.available_rings = ring_mask;
ip.ib_start_alignment = ib_start_alignment;
ip.ib_size_alignment = ib_size_alignment;
break;
}
}
return copy_to_user(out, &ip,
min((size_t)size, sizeof(ip))) ? -EFAULT : 0;
} }
case AMDGPU_INFO_HW_IP_COUNT: { case AMDGPU_INFO_HW_IP_COUNT: {
enum amd_ip_block_type type; enum amd_ip_block_type type;
@ -492,13 +528,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_amdgpu_info_gds gds_info; struct drm_amdgpu_info_gds gds_info;
memset(&gds_info, 0, sizeof(gds_info)); memset(&gds_info, 0, sizeof(gds_info));
gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT; gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
gds_info.compute_partition_size = adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT; gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
gds_info.gds_total_size = adev->gds.mem.total_size >> AMDGPU_GDS_SHIFT; gds_info.gds_total_size = adev->gds.mem.total_size;
gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT; gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT; gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT; gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT; gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
return copy_to_user(out, &gds_info, return copy_to_user(out, &gds_info,
min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0; min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
} }
@ -617,16 +653,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
vm_size -= AMDGPU_VA_RESERVED_SIZE; vm_size -= AMDGPU_VA_RESERVED_SIZE;
/* Older VCE FW versions are buggy and can handle only 40bits */ /* Older VCE FW versions are buggy and can handle only 40bits */
if (adev->vce.fw_version < AMDGPU_VCE_FW_53_45) if (adev->vce.fw_version &&
adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
vm_size = min(vm_size, 1ULL << 40); vm_size = min(vm_size, 1ULL << 40);
dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
dev_info.virtual_address_max = dev_info.virtual_address_max =
min(vm_size, AMDGPU_VA_HOLE_START); min(vm_size, AMDGPU_GMC_HOLE_START);
if (vm_size > AMDGPU_VA_HOLE_START) { if (vm_size > AMDGPU_GMC_HOLE_START) {
dev_info.high_va_offset = AMDGPU_VA_HOLE_END; dev_info.high_va_offset = AMDGPU_GMC_HOLE_END;
dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size; dev_info.high_va_max = AMDGPU_GMC_HOLE_END | vm_size;
} }
dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
@ -941,10 +978,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
pm_runtime_get_sync(dev->dev); pm_runtime_get_sync(dev->dev);
if (adev->asic_type != CHIP_RAVEN) { if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL)
amdgpu_uvd_free_handles(adev, file_priv); amdgpu_uvd_free_handles(adev, file_priv);
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
amdgpu_vce_free_handles(adev, file_priv); amdgpu_vce_free_handles(adev, file_priv);
}
amdgpu_vm_bo_rmv(adev, fpriv->prt_va); amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
@ -1262,6 +1299,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n", seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver); fw_info.feature, fw_info.ver);
/* DMCU */
query_fw.fw_type = AMDGPU_INFO_FW_DMCU;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);

View File

@ -51,18 +51,6 @@
* *
*/ */
static bool amdgpu_bo_need_backup(struct amdgpu_device *adev)
{
if (adev->flags & AMD_IS_APU)
return false;
if (amdgpu_gpu_recovery == 0 ||
(amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))
return false;
return true;
}
/** /**
* amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting
* *
@ -163,10 +151,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
if (domain & AMDGPU_GEM_DOMAIN_GTT) { if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0; places[c].fpfn = 0;
if (flags & AMDGPU_GEM_CREATE_SHADOW) places[c].lpfn = 0;
places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
else
places[c].lpfn = 0;
places[c].flags = TTM_PL_FLAG_TT; places[c].flags = TTM_PL_FLAG_TT;
if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
places[c].flags |= TTM_PL_FLAG_WC | places[c].flags |= TTM_PL_FLAG_WC |
@ -253,6 +238,11 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
bool free = false; bool free = false;
int r; int r;
if (!size) {
amdgpu_bo_unref(bo_ptr);
return 0;
}
memset(&bp, 0, sizeof(bp)); memset(&bp, 0, sizeof(bp));
bp.size = size; bp.size = size;
bp.byte_align = align; bp.byte_align = align;
@ -346,7 +336,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
if (r) if (r)
return r; return r;
amdgpu_bo_unreserve(*bo_ptr); if (*bo_ptr)
amdgpu_bo_unreserve(*bo_ptr);
return 0; return 0;
} }
@ -436,7 +427,11 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
int r; int r;
page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
size = ALIGN(size, PAGE_SIZE); if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS |
AMDGPU_GEM_DOMAIN_OA))
size <<= PAGE_SHIFT;
else
size = ALIGN(size, PAGE_SIZE);
if (!amdgpu_bo_validate_size(adev, size, bp->domain)) if (!amdgpu_bo_validate_size(adev, size, bp->domain))
return -ENOMEM; return -ENOMEM;
@ -451,7 +446,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
return -ENOMEM; return -ENOMEM;
drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->shadow_list);
INIT_LIST_HEAD(&bo->va); bo->vm_bo = NULL;
bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
bp->domain; bp->domain;
bo->allowed_domains = bo->preferred_domains; bo->allowed_domains = bo->preferred_domains;
@ -541,7 +536,7 @@ fail_unreserve:
} }
static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
unsigned long size, int byte_align, unsigned long size,
struct amdgpu_bo *bo) struct amdgpu_bo *bo)
{ {
struct amdgpu_bo_param bp; struct amdgpu_bo_param bp;
@ -552,7 +547,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
memset(&bp, 0, sizeof(bp)); memset(&bp, 0, sizeof(bp));
bp.size = size; bp.size = size;
bp.byte_align = byte_align;
bp.domain = AMDGPU_GEM_DOMAIN_GTT; bp.domain = AMDGPU_GEM_DOMAIN_GTT;
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_SHADOW; AMDGPU_GEM_CREATE_SHADOW;
@ -563,7 +557,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
if (!r) { if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo); bo->shadow->parent = amdgpu_bo_ref(bo);
mutex_lock(&adev->shadow_list_lock); mutex_lock(&adev->shadow_list_lock);
list_add_tail(&bo->shadow_list, &adev->shadow_list); list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
mutex_unlock(&adev->shadow_list_lock); mutex_unlock(&adev->shadow_list_lock);
} }
@ -596,12 +590,12 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (r) if (r)
return r; return r;
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_bo_need_backup(adev)) { if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
if (!bp->resv) if (!bp->resv)
WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
NULL)); NULL));
r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr)); r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
if (!bp->resv) if (!bp->resv)
reservation_object_unlock((*bo_ptr)->tbo.resv); reservation_object_unlock((*bo_ptr)->tbo.resv);
@ -695,13 +689,10 @@ retry:
} }
/** /**
* amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
* @adev: amdgpu device object *
* @ring: amdgpu_ring for the engine handling the buffer operations * @shadow: &amdgpu_bo shadow to be restored
* @bo: &amdgpu_bo buffer to be restored
* @resv: reservation object with embedded fence
* @fence: dma_fence associated with the operation * @fence: dma_fence associated with the operation
* @direct: whether to submit the job directly
* *
* Copies a buffer object's shadow content back to the object. * Copies a buffer object's shadow content back to the object.
* This is used for recovering a buffer from its shadow in case of a gpu * This is used for recovering a buffer from its shadow in case of a gpu
@ -710,36 +701,19 @@ retry:
* Returns: * Returns:
* 0 for success or a negative error code on failure. * 0 for success or a negative error code on failure.
*/ */
int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
struct reservation_object *resv,
struct dma_fence **fence,
bool direct)
{ {
struct amdgpu_bo *shadow = bo->shadow; struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
uint64_t bo_addr, shadow_addr; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
int r; uint64_t shadow_addr, parent_addr;
if (!shadow) shadow_addr = amdgpu_bo_gpu_offset(shadow);
return -EINVAL; parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
bo_addr = amdgpu_bo_gpu_offset(bo); return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
shadow_addr = amdgpu_bo_gpu_offset(bo->shadow); amdgpu_bo_size(shadow), NULL, fence,
true, false);
r = reservation_object_reserve_shared(bo->tbo.resv);
if (r)
goto err;
r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
amdgpu_bo_size(bo), resv, fence,
direct, false);
if (!r)
amdgpu_bo_fence(bo, *fence, true);
err:
return r;
} }
/** /**
@ -1019,10 +993,12 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
int amdgpu_bo_evict_vram(struct amdgpu_device *adev) int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
{ {
/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */ /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
if (0 && (adev->flags & AMD_IS_APU)) { #ifndef CONFIG_HIBERNATION
if (adev->flags & AMD_IS_APU) {
/* Useless to evict on IGP chips */ /* Useless to evict on IGP chips */
return 0; return 0;
} }
#endif
return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM); return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
} }
@ -1360,15 +1336,13 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
{ {
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
!amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem));
WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
!bo->pin_count); !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel);
WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
!(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)); !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
return bo->tbo.offset; return amdgpu_gmc_sign_extend(bo->tbo.offset);
} }
/** /**

View File

@ -89,8 +89,8 @@ struct amdgpu_bo {
void *metadata; void *metadata;
u32 metadata_size; u32 metadata_size;
unsigned prime_shared_count; unsigned prime_shared_count;
/* list of all virtual address to which this bo is associated to */ /* per VM structure for page tables and with virtual addresses */
struct list_head va; struct amdgpu_vm_bo_base *vm_bo;
/* Constant after initialization */ /* Constant after initialization */
struct drm_gem_object gem_base; struct drm_gem_object gem_base;
struct amdgpu_bo *parent; struct amdgpu_bo *parent;
@ -193,19 +193,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
return drm_vma_node_offset_addr(&bo->tbo.vma_node); return drm_vma_node_offset_addr(&bo->tbo.vma_node);
} }
/**
* amdgpu_bo_gpu_accessible - return whether the bo is currently in memory that
* is accessible to the GPU.
*/
static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
{
switch (bo->tbo.mem.mem_type) {
case TTM_PL_TT: return amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem);
case TTM_PL_VRAM: return true;
default: return false;
}
}
/** /**
* amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
*/ */
@ -286,12 +273,8 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
struct reservation_object *resv, struct reservation_object *resv,
struct dma_fence **fence, bool direct); struct dma_fence **fence, bool direct);
int amdgpu_bo_validate(struct amdgpu_bo *bo); int amdgpu_bo_validate(struct amdgpu_bo *bo);
int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
struct amdgpu_ring *ring, struct dma_fence **fence);
struct amdgpu_bo *bo,
struct reservation_object *resv,
struct dma_fence **fence,
bool direct);
uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
uint32_t domain); uint32_t domain);

View File

@ -27,6 +27,7 @@
#include "amdgpu_drv.h" #include "amdgpu_drv.h"
#include "amdgpu_pm.h" #include "amdgpu_pm.h"
#include "amdgpu_dpm.h" #include "amdgpu_dpm.h"
#include "amdgpu_display.h"
#include "atom.h" #include "atom.h"
#include <linux/power_supply.h> #include <linux/power_supply.h>
#include <linux/hwmon.h> #include <linux/hwmon.h>
@ -473,6 +474,8 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
* in each power level within a power state. The pp_od_clk_voltage is used for * in each power level within a power state. The pp_od_clk_voltage is used for
* this. * this.
* *
* < For Vega10 and previous ASICs >
*
* Reading the file will display: * Reading the file will display:
* *
* - a list of engine clock levels and voltages labeled OD_SCLK * - a list of engine clock levels and voltages labeled OD_SCLK
@ -490,6 +493,44 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
* "c" (commit) to the file to commit your changes. If you want to reset to the * "c" (commit) to the file to commit your changes. If you want to reset to the
* default power levels, write "r" (reset) to the file to reset them. * default power levels, write "r" (reset) to the file to reset them.
* *
*
* < For Vega20 >
*
* Reading the file will display:
*
* - minimum and maximum engine clock labeled OD_SCLK
*
* - maximum memory clock labeled OD_MCLK
*
* - three <frequency, voltage> points labeled OD_VDDC_CURVE.
* They can be used to calibrate the sclk voltage curve.
*
* - a list of valid ranges for sclk, mclk, and voltage curve points
* labeled OD_RANGE
*
* To manually adjust these settings:
*
* - First select manual using power_dpm_force_performance_level
*
* - For clock frequency setting, enter a new value by writing a
* string that contains "s/m index clock" to the file. The index
* should be 0 if to set minimum clock. And 1 if to set maximum
* clock. E.g., "s 0 500" will update minimum sclk to be 500 MHz.
* "m 1 800" will update maximum mclk to be 800Mhz.
*
* For sclk voltage curve, enter the new values by writing a
* string that contains "vc point clock voltage" to the file. The
* points are indexed by 0, 1 and 2. E.g., "vc 0 300 600" will
* update point1 with clock set as 300Mhz and voltage as
* 600mV. "vc 2 1000 1000" will update point3 with clock set
* as 1000Mhz and voltage 1000mV.
*
* - When you have edited all of the states as needed, write "c" (commit)
* to the file to commit your changes
*
* - If you want to reset to the default power levels, write "r" (reset)
* to the file to reset them
*
*/ */
static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
@ -519,6 +560,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
type = PP_OD_RESTORE_DEFAULT_TABLE; type = PP_OD_RESTORE_DEFAULT_TABLE;
else if (*buf == 'c') else if (*buf == 'c')
type = PP_OD_COMMIT_DPM_TABLE; type = PP_OD_COMMIT_DPM_TABLE;
else if (!strncmp(buf, "vc", 2))
type = PP_OD_EDIT_VDDC_CURVE;
else else
return -EINVAL; return -EINVAL;
@ -526,6 +569,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
tmp_str = buf_cpy; tmp_str = buf_cpy;
if (type == PP_OD_EDIT_VDDC_CURVE)
tmp_str++;
while (isspace(*++tmp_str)); while (isspace(*++tmp_str));
while (tmp_str[0]) { while (tmp_str[0]) {
@ -569,6 +614,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
if (adev->powerplay.pp_funcs->print_clock_levels) { if (adev->powerplay.pp_funcs->print_clock_levels) {
size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size);
size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
return size; return size;
} else { } else {
@ -1719,18 +1765,6 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
mutex_lock(&adev->pm.mutex); mutex_lock(&adev->pm.mutex);
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
mutex_unlock(&adev->pm.mutex); mutex_unlock(&adev->pm.mutex);
} else {
if (enable) {
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.uvd_active = true;
adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
mutex_unlock(&adev->pm.mutex);
} else {
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.uvd_active = false;
mutex_unlock(&adev->pm.mutex);
}
amdgpu_pm_compute_clocks(adev);
} }
} }
@ -1741,29 +1775,6 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
mutex_lock(&adev->pm.mutex); mutex_lock(&adev->pm.mutex);
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
mutex_unlock(&adev->pm.mutex); mutex_unlock(&adev->pm.mutex);
} else {
if (enable) {
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.vce_active = true;
/* XXX select vce level based on ring/task */
adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
mutex_unlock(&adev->pm.mutex);
amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_CG_STATE_UNGATE);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_UNGATE);
amdgpu_pm_compute_clocks(adev);
} else {
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_GATE);
amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_CG_STATE_GATE);
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.vce_active = false;
mutex_unlock(&adev->pm.mutex);
amdgpu_pm_compute_clocks(adev);
}
} }
} }

View File

@ -35,6 +35,7 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_display.h" #include "amdgpu_display.h"
#include "amdgpu_gem.h"
#include <drm/amdgpu_drm.h> #include <drm/amdgpu_drm.h>
#include <linux/dma-buf.h> #include <linux/dma-buf.h>
@ -43,10 +44,10 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops;
/** /**
* amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
* implementation * implementation
* @obj: GEM buffer object * @obj: GEM buffer object (BO)
* *
* Returns: * Returns:
* A scatter/gather table for the pinned pages of the buffer object's memory. * A scatter/gather table for the pinned pages of the BO's memory.
*/ */
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
{ {
@ -58,9 +59,9 @@ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
/** /**
* amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
* @obj: GEM buffer object * @obj: GEM BO
* *
* Sets up an in-kernel virtual mapping of the buffer object's memory. * Sets up an in-kernel virtual mapping of the BO's memory.
* *
* Returns: * Returns:
* The virtual address of the mapping or an error pointer. * The virtual address of the mapping or an error pointer.
@ -80,10 +81,10 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
/** /**
* amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation
* @obj: GEM buffer object * @obj: GEM BO
* @vaddr: virtual address (unused) * @vaddr: Virtual address (unused)
* *
* Tears down the in-kernel virtual mapping of the buffer object's memory. * Tears down the in-kernel virtual mapping of the BO's memory.
*/ */
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
{ {
@ -94,14 +95,14 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
/** /**
* amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation
* @obj: GEM buffer object * @obj: GEM BO
* @vma: virtual memory area * @vma: Virtual memory area
* *
* Sets up a userspace mapping of the buffer object's memory in the given * Sets up a userspace mapping of the BO's memory in the given
* virtual memory area. * virtual memory area.
* *
* Returns: * Returns:
* 0 on success or negative error code. * 0 on success or a negative error code on failure.
*/ */
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
{ {
@ -144,10 +145,10 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
* @attach: DMA-buf attachment * @attach: DMA-buf attachment
* @sg: Scatter/gather table * @sg: Scatter/gather table
* *
* Import shared DMA buffer memory exported by another device. * Imports shared DMA buffer memory exported by another device.
* *
* Returns: * Returns:
* A new GEM buffer object of the given DRM device, representing the memory * A new GEM BO of the given DRM device, representing the memory
* described by the given DMA-buf attachment and scatter/gather table. * described by the given DMA-buf attachment and scatter/gather table.
*/ */
struct drm_gem_object * struct drm_gem_object *
@ -190,7 +191,7 @@ error:
/** /**
* amdgpu_gem_map_attach - &dma_buf_ops.attach implementation * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
* @dma_buf: shared DMA buffer * @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment * @attach: DMA-buf attachment
* *
* Makes sure that the shared DMA buffer can be accessed by the target device. * Makes sure that the shared DMA buffer can be accessed by the target device.
@ -198,7 +199,7 @@ error:
* all DMA devices. * all DMA devices.
* *
* Returns: * Returns:
* 0 on success or negative error code. * 0 on success or a negative error code on failure.
*/ */
static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach) struct dma_buf_attachment *attach)
@ -250,11 +251,11 @@ error_detach:
/** /**
* amdgpu_gem_map_detach - &dma_buf_ops.detach implementation * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
* @dma_buf: shared DMA buffer * @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment * @attach: DMA-buf attachment
* *
* This is called when a shared DMA buffer no longer needs to be accessible by * This is called when a shared DMA buffer no longer needs to be accessible by
* the other device. For now, simply unpins the buffer from GTT. * another device. For now, simply unpins the buffer from GTT.
*/ */
static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach) struct dma_buf_attachment *attach)
@ -279,10 +280,10 @@ error:
/** /**
* amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
* @obj: GEM buffer object * @obj: GEM BO
* *
* Returns: * Returns:
* The buffer object's reservation object. * The BO's reservation object.
*/ */
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
{ {
@ -293,15 +294,15 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
/** /**
* amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
* @dma_buf: shared DMA buffer * @dma_buf: Shared DMA buffer
* @direction: direction of DMA transfer * @direction: Direction of DMA transfer
* *
* This is called before CPU access to the shared DMA buffer's memory. If it's * This is called before CPU access to the shared DMA buffer's memory. If it's
* a read access, the buffer is moved to the GTT domain if possible, for optimal * a read access, the buffer is moved to the GTT domain if possible, for optimal
* CPU read performance. * CPU read performance.
* *
* Returns: * Returns:
* 0 on success or negative error code. * 0 on success or a negative error code on failure.
*/ */
static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction direction) enum dma_data_direction direction)
@ -348,14 +349,14 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops = {
/** /**
* amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
* @dev: DRM device * @dev: DRM device
* @gobj: GEM buffer object * @gobj: GEM BO
* @flags: flags like DRM_CLOEXEC and DRM_RDWR * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
* *
* The main work is done by the &drm_gem_prime_export helper, which in turn * The main work is done by the &drm_gem_prime_export helper, which in turn
* uses &amdgpu_gem_prime_res_obj. * uses &amdgpu_gem_prime_res_obj.
* *
* Returns: * Returns:
* Shared DMA buffer representing the GEM buffer object from the given device. * Shared DMA buffer representing the GEM BO from the given device.
*/ */
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj, struct drm_gem_object *gobj,
@ -386,7 +387,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
* uses &amdgpu_gem_prime_import_sg_table. * uses &amdgpu_gem_prime_import_sg_table.
* *
* Returns: * Returns:
* GEM buffer object representing the shared DMA buffer for the given device. * GEM BO representing the shared DMA buffer for the given device.
*/ */
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf) struct dma_buf *dma_buf)

View File

@ -31,6 +31,7 @@
#include "soc15_common.h" #include "soc15_common.h"
#include "psp_v3_1.h" #include "psp_v3_1.h"
#include "psp_v10_0.h" #include "psp_v10_0.h"
#include "psp_v11_0.h"
static void psp_set_funcs(struct amdgpu_device *adev); static void psp_set_funcs(struct amdgpu_device *adev);
@ -52,12 +53,14 @@ static int psp_sw_init(void *handle)
switch (adev->asic_type) { switch (adev->asic_type) {
case CHIP_VEGA10: case CHIP_VEGA10:
case CHIP_VEGA12: case CHIP_VEGA12:
case CHIP_VEGA20:
psp_v3_1_set_psp_funcs(psp); psp_v3_1_set_psp_funcs(psp);
break; break;
case CHIP_RAVEN: case CHIP_RAVEN:
psp_v10_0_set_psp_funcs(psp); psp_v10_0_set_psp_funcs(psp);
break; break;
case CHIP_VEGA20:
psp_v11_0_set_psp_funcs(psp);
break;
default: default:
return -EINVAL; return -EINVAL;
} }
@ -131,6 +134,13 @@ psp_cmd_submit_buf(struct psp_context *psp,
msleep(1); msleep(1);
} }
/* the status field must be 0 after FW is loaded */
if (ucode && psp->cmd_buf_mem->resp.status) {
DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n",
psp->cmd_buf_mem->resp.status, ucode->ucode_id);
return -EINVAL;
}
if (ucode) { if (ucode) {
ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo; ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo;
ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi; ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi;
@ -160,7 +170,7 @@ static int psp_tmr_init(struct psp_context *psp)
* Note: this memory need be reserved till the driver * Note: this memory need be reserved till the driver
* uninitializes. * uninitializes.
*/ */
ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000, ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, 0x100000,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
@ -176,7 +186,9 @@ static int psp_tmr_load(struct psp_context *psp)
if (!cmd) if (!cmd)
return -ENOMEM; return -ENOMEM;
psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000); psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n",
PSP_TMR_SIZE, psp->tmr_mc_addr);
ret = psp_cmd_submit_buf(psp, NULL, cmd, ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr, 1); psp->fence_buf_mc_addr, 1);
@ -594,3 +606,12 @@ const struct amdgpu_ip_block_version psp_v10_0_ip_block =
.rev = 0, .rev = 0,
.funcs = &psp_ip_funcs, .funcs = &psp_ip_funcs,
}; };
const struct amdgpu_ip_block_version psp_v11_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 11,
.minor = 0,
.rev = 0,
.funcs = &psp_ip_funcs,
};

View File

@ -32,8 +32,10 @@
#define PSP_CMD_BUFFER_SIZE 0x1000 #define PSP_CMD_BUFFER_SIZE 0x1000
#define PSP_ASD_SHARED_MEM_SIZE 0x4000 #define PSP_ASD_SHARED_MEM_SIZE 0x4000
#define PSP_1_MEG 0x100000 #define PSP_1_MEG 0x100000
#define PSP_TMR_SIZE 0x400000
struct psp_context; struct psp_context;
struct psp_xgmi_topology_info;
enum psp_ring_type enum psp_ring_type
{ {
@ -63,18 +65,27 @@ struct psp_funcs
int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode, int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd); struct psp_gfx_cmd_resp *cmd);
int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); int (*ring_create)(struct psp_context *psp,
enum psp_ring_type ring_type);
int (*ring_stop)(struct psp_context *psp, int (*ring_stop)(struct psp_context *psp,
enum psp_ring_type ring_type); enum psp_ring_type ring_type);
int (*ring_destroy)(struct psp_context *psp, int (*ring_destroy)(struct psp_context *psp,
enum psp_ring_type ring_type); enum psp_ring_type ring_type);
int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, int (*cmd_submit)(struct psp_context *psp,
uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index); struct amdgpu_firmware_info *ucode,
uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
int index);
bool (*compare_sram_data)(struct psp_context *psp, bool (*compare_sram_data)(struct psp_context *psp,
struct amdgpu_firmware_info *ucode, struct amdgpu_firmware_info *ucode,
enum AMDGPU_UCODE_ID ucode_type); enum AMDGPU_UCODE_ID ucode_type);
bool (*smu_reload_quirk)(struct psp_context *psp); bool (*smu_reload_quirk)(struct psp_context *psp);
int (*mode1_reset)(struct psp_context *psp); int (*mode1_reset)(struct psp_context *psp);
uint64_t (*xgmi_get_device_id)(struct psp_context *psp);
uint64_t (*xgmi_get_hive_id)(struct psp_context *psp);
int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices,
struct psp_xgmi_topology_info *topology);
int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices,
struct psp_xgmi_topology_info *topology);
}; };
struct psp_context struct psp_context
@ -83,11 +94,11 @@ struct psp_context
struct psp_ring km_ring; struct psp_ring km_ring;
struct psp_gfx_cmd_resp *cmd; struct psp_gfx_cmd_resp *cmd;
const struct psp_funcs *funcs; const struct psp_funcs *funcs;
/* fence buffer */ /* fence buffer */
struct amdgpu_bo *fw_pri_bo; struct amdgpu_bo *fw_pri_bo;
uint64_t fw_pri_mc_addr; uint64_t fw_pri_mc_addr;
void *fw_pri_buf; void *fw_pri_buf;
/* sos firmware */ /* sos firmware */
@ -100,8 +111,8 @@ struct psp_context
uint8_t *sos_start_addr; uint8_t *sos_start_addr;
/* tmr buffer */ /* tmr buffer */
struct amdgpu_bo *tmr_bo; struct amdgpu_bo *tmr_bo;
uint64_t tmr_mc_addr; uint64_t tmr_mc_addr;
void *tmr_buf; void *tmr_buf;
/* asd firmware and buffer */ /* asd firmware and buffer */
@ -110,13 +121,13 @@ struct psp_context
uint32_t asd_feature_version; uint32_t asd_feature_version;
uint32_t asd_ucode_size; uint32_t asd_ucode_size;
uint8_t *asd_start_addr; uint8_t *asd_start_addr;
struct amdgpu_bo *asd_shared_bo; struct amdgpu_bo *asd_shared_bo;
uint64_t asd_shared_mc_addr; uint64_t asd_shared_mc_addr;
void *asd_shared_buf; void *asd_shared_buf;
/* fence buffer */ /* fence buffer */
struct amdgpu_bo *fence_buf_bo; struct amdgpu_bo *fence_buf_bo;
uint64_t fence_buf_mc_addr; uint64_t fence_buf_mc_addr;
void *fence_buf; void *fence_buf;
/* cmd buffer */ /* cmd buffer */
@ -130,6 +141,23 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID); enum AMDGPU_UCODE_ID);
}; };
struct psp_xgmi_topology_info {
/* Generated by PSP to identify the GPU instance within xgmi connection */
uint64_t device_id;
/*
* If all bits set to 0 , driver indicates it wants to retrieve the xgmi
* connection vector topology, but not access enable the connections
* if some or all bits are set to 1, driver indicates it want to retrieve the
* current xgmi topology and access enable the link to GPU[i] associated
* with the bit position in the vector.
* On return,: bits indicated which xgmi links are present/active depending
* on the value passed in. The relative bit offset for the relative GPU index
* within the hive is always marked active.
*/
uint32_t connection_mask;
uint32_t reserved; /* must be 0 */
};
#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type)) #define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
@ -149,6 +177,18 @@ struct amdgpu_psp_funcs {
((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false) ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
#define psp_mode1_reset(psp) \ #define psp_mode1_reset(psp) \
((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false) ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
#define psp_xgmi_get_device_id(psp) \
((psp)->funcs->xgmi_get_device_id ? (psp)->funcs->xgmi_get_device_id((psp)) : 0)
#define psp_xgmi_get_hive_id(psp) \
((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0)
#define psp_xgmi_get_topology_info(psp, num_device, topology) \
((psp)->funcs->xgmi_get_topology_info ? \
(psp)->funcs->xgmi_get_topology_info((psp), (num_device), (topology)) : -EINVAL)
#define psp_xgmi_set_topology_info(psp, num_device, topology) \
((psp)->funcs->xgmi_set_topology_info ? \
(psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL)
#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amd_ip_funcs psp_ip_funcs;
@ -159,5 +199,6 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
int psp_gpu_reset(struct amdgpu_device *adev); int psp_gpu_reset(struct amdgpu_device *adev);
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
#endif #endif

View File

@ -1,316 +0,0 @@
/*
* Copyright 2017 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Andres Rodriguez
*/
#include "amdgpu.h"
#include "amdgpu_ring.h"
static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper,
int hw_ip)
{
if (!mapper)
return -EINVAL;
if (hw_ip > AMDGPU_MAX_IP_NUM)
return -EINVAL;
mapper->hw_ip = hw_ip;
mutex_init(&mapper->lock);
memset(mapper->queue_map, 0, sizeof(mapper->queue_map));
return 0;
}
static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper,
int ring)
{
return mapper->queue_map[ring];
}
static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper,
int ring, struct amdgpu_ring *pring)
{
if (WARN_ON(mapper->queue_map[ring])) {
DRM_ERROR("Un-expected ring re-map\n");
return -EINVAL;
}
mapper->queue_map[ring] = pring;
return 0;
}
static int amdgpu_identity_map(struct amdgpu_device *adev,
struct amdgpu_queue_mapper *mapper,
u32 ring,
struct amdgpu_ring **out_ring)
{
switch (mapper->hw_ip) {
case AMDGPU_HW_IP_GFX:
*out_ring = &adev->gfx.gfx_ring[ring];
break;
case AMDGPU_HW_IP_COMPUTE:
*out_ring = &adev->gfx.compute_ring[ring];
break;
case AMDGPU_HW_IP_DMA:
*out_ring = &adev->sdma.instance[ring].ring;
break;
case AMDGPU_HW_IP_UVD:
*out_ring = &adev->uvd.inst[0].ring;
break;
case AMDGPU_HW_IP_VCE:
*out_ring = &adev->vce.ring[ring];
break;
case AMDGPU_HW_IP_UVD_ENC:
*out_ring = &adev->uvd.inst[0].ring_enc[ring];
break;
case AMDGPU_HW_IP_VCN_DEC:
*out_ring = &adev->vcn.ring_dec;
break;
case AMDGPU_HW_IP_VCN_ENC:
*out_ring = &adev->vcn.ring_enc[ring];
break;
case AMDGPU_HW_IP_VCN_JPEG:
*out_ring = &adev->vcn.ring_jpeg;
break;
default:
*out_ring = NULL;
DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
return -EINVAL;
}
return amdgpu_update_cached_map(mapper, ring, *out_ring);
}
static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
{
switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
return AMDGPU_RING_TYPE_GFX;
case AMDGPU_HW_IP_COMPUTE:
return AMDGPU_RING_TYPE_COMPUTE;
case AMDGPU_HW_IP_DMA:
return AMDGPU_RING_TYPE_SDMA;
case AMDGPU_HW_IP_UVD:
return AMDGPU_RING_TYPE_UVD;
case AMDGPU_HW_IP_VCE:
return AMDGPU_RING_TYPE_VCE;
default:
DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
return -1;
}
}
static int amdgpu_lru_map(struct amdgpu_device *adev,
struct amdgpu_queue_mapper *mapper,
u32 user_ring, bool lru_pipe_order,
struct amdgpu_ring **out_ring)
{
int r, i, j;
int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
int ring_blacklist[AMDGPU_MAX_RINGS];
struct amdgpu_ring *ring;
/* 0 is a valid ring index, so initialize to -1 */
memset(ring_blacklist, 0xff, sizeof(ring_blacklist));
for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) {
ring = mapper->queue_map[i];
if (ring)
ring_blacklist[j++] = ring->idx;
}
r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
j, lru_pipe_order, out_ring);
if (r)
return r;
return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
}
/**
* amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
*
* @adev: amdgpu_device pointer
* @mgr: amdgpu_queue_mgr structure holding queue information
*
* Initialize the the selected @mgr (all asics).
*
* Returns 0 on success, error on failure.
*/
int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr)
{
int i, r;
if (!adev || !mgr)
return -EINVAL;
memset(mgr, 0, sizeof(*mgr));
for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) {
r = amdgpu_queue_mapper_init(&mgr->mapper[i], i);
if (r)
return r;
}
return 0;
}
/**
* amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct
*
* @adev: amdgpu_device pointer
* @mgr: amdgpu_queue_mgr structure holding queue information
*
* De-initialize the the selected @mgr (all asics).
*
* Returns 0 on success, error on failure.
*/
int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr)
{
return 0;
}
/**
* amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring
*
* @adev: amdgpu_device pointer
* @mgr: amdgpu_queue_mgr structure holding queue information
* @hw_ip: HW IP enum
* @instance: HW instance
* @ring: user ring id
* @our_ring: pointer to mapped amdgpu_ring
*
* Map a userspace ring id to an appropriate kernel ring. Different
* policies are configurable at a HW IP level.
*
* Returns 0 on success, error on failure.
*/
int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr,
u32 hw_ip, u32 instance, u32 ring,
struct amdgpu_ring **out_ring)
{
int i, r, ip_num_rings = 0;
struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];
if (!adev || !mgr || !out_ring)
return -EINVAL;
if (hw_ip >= AMDGPU_MAX_IP_NUM)
return -EINVAL;
if (ring >= AMDGPU_MAX_RINGS)
return -EINVAL;
/* Right now all IPs have only one instance - multiple rings. */
if (instance != 0) {
DRM_DEBUG("invalid ip instance: %d\n", instance);
return -EINVAL;
}
switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
ip_num_rings = adev->gfx.num_gfx_rings;
break;
case AMDGPU_HW_IP_COMPUTE:
ip_num_rings = adev->gfx.num_compute_rings;
break;
case AMDGPU_HW_IP_DMA:
ip_num_rings = adev->sdma.num_instances;
break;
case AMDGPU_HW_IP_UVD:
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (!(adev->uvd.harvest_config & (1 << i)))
ip_num_rings++;
}
break;
case AMDGPU_HW_IP_VCE:
ip_num_rings = adev->vce.num_rings;
break;
case AMDGPU_HW_IP_UVD_ENC:
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (!(adev->uvd.harvest_config & (1 << i)))
ip_num_rings++;
}
ip_num_rings =
adev->uvd.num_enc_rings * ip_num_rings;
break;
case AMDGPU_HW_IP_VCN_DEC:
ip_num_rings = 1;
break;
case AMDGPU_HW_IP_VCN_ENC:
ip_num_rings = adev->vcn.num_enc_rings;
break;
case AMDGPU_HW_IP_VCN_JPEG:
ip_num_rings = 1;
break;
default:
DRM_DEBUG("unknown ip type: %d\n", hw_ip);
return -EINVAL;
}
if (ring >= ip_num_rings) {
DRM_DEBUG("Ring index:%d exceeds maximum:%d for ip:%d\n",
ring, ip_num_rings, hw_ip);
return -EINVAL;
}
mutex_lock(&mapper->lock);
*out_ring = amdgpu_get_cached_map(mapper, ring);
if (*out_ring) {
/* cache hit */
r = 0;
goto out_unlock;
}
switch (mapper->hw_ip) {
case AMDGPU_HW_IP_GFX:
case AMDGPU_HW_IP_UVD:
case AMDGPU_HW_IP_VCE:
case AMDGPU_HW_IP_UVD_ENC:
case AMDGPU_HW_IP_VCN_DEC:
case AMDGPU_HW_IP_VCN_ENC:
case AMDGPU_HW_IP_VCN_JPEG:
r = amdgpu_identity_map(adev, mapper, ring, out_ring);
break;
case AMDGPU_HW_IP_DMA:
r = amdgpu_lru_map(adev, mapper, ring, false, out_ring);
break;
case AMDGPU_HW_IP_COMPUTE:
r = amdgpu_lru_map(adev, mapper, ring, true, out_ring);
break;
default:
*out_ring = NULL;
r = -EINVAL;
DRM_DEBUG("unknown HW IP type: %d\n", mapper->hw_ip);
}
out_unlock:
mutex_unlock(&mapper->lock);
return r;
}

View File

@ -135,9 +135,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
if (ring->funcs->end_use) if (ring->funcs->end_use)
ring->funcs->end_use(ring); ring->funcs->end_use(ring);
if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)
amdgpu_ring_lru_touch(ring->adev, ring);
} }
/** /**
@ -320,8 +317,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->max_dw = max_dw; ring->max_dw = max_dw;
ring->priority = DRM_SCHED_PRIORITY_NORMAL; ring->priority = DRM_SCHED_PRIORITY_NORMAL;
mutex_init(&ring->priority_mutex); mutex_init(&ring->priority_mutex);
INIT_LIST_HEAD(&ring->lru_list);
amdgpu_ring_lru_touch(adev, ring);
for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i) for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i)
atomic_set(&ring->num_jobs[i], 0); atomic_set(&ring->num_jobs[i], 0);
@ -368,99 +363,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
ring->adev->rings[ring->idx] = NULL; ring->adev->rings[ring->idx] = NULL;
} }
static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
/* list_move_tail handles the case where ring isn't part of the list */
list_move_tail(&ring->lru_list, &adev->ring_lru_list);
}
static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
int *blacklist, int num_blacklist)
{
int i;
for (i = 0; i < num_blacklist; i++) {
if (ring->idx == blacklist[i])
return true;
}
return false;
}
/**
* amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
*
* @adev: amdgpu_device pointer
* @type: amdgpu_ring_type enum
* @blacklist: blacklisted ring ids array
* @num_blacklist: number of entries in @blacklist
* @lru_pipe_order: find a ring from the least recently used pipe
* @ring: output ring
*
* Retrieve the amdgpu_ring structure for the least recently used ring of
* a specific IP block (all asics).
* Returns 0 on success, error on failure.
*/
int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
int *blacklist, int num_blacklist,
bool lru_pipe_order, struct amdgpu_ring **ring)
{
struct amdgpu_ring *entry;
/* List is sorted in LRU order, find first entry corresponding
* to the desired HW IP */
*ring = NULL;
spin_lock(&adev->ring_lru_list_lock);
list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
if (entry->funcs->type != type)
continue;
if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
continue;
if (!*ring) {
*ring = entry;
/* We are done for ring LRU */
if (!lru_pipe_order)
break;
}
/* Move all rings on the same pipe to the end of the list */
if (entry->pipe == (*ring)->pipe)
amdgpu_ring_lru_touch_locked(adev, entry);
}
/* Move the ring we found to the end of the list */
if (*ring)
amdgpu_ring_lru_touch_locked(adev, *ring);
spin_unlock(&adev->ring_lru_list_lock);
if (!*ring) {
DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type);
return -EINVAL;
}
return 0;
}
/**
* amdgpu_ring_lru_touch - mark a ring as recently being used
*
* @adev: amdgpu_device pointer
* @ring: ring to touch
*
* Move @ring to the tail of the lru list
*/
void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
spin_lock(&adev->ring_lru_list_lock);
amdgpu_ring_lru_touch_locked(adev, ring);
spin_unlock(&adev->ring_lru_list_lock);
}
/** /**
* amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
* *
@ -481,6 +383,31 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
} }
/**
* amdgpu_ring_soft_recovery - try to soft recover a ring lockup
*
* @ring: ring to try the recovery on
* @vmid: VMID we try to get going again
* @fence: timedout fence
*
* Tries to get a ring proceeding again when it is stuck.
*/
bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence)
{
ktime_t deadline = ktime_add_us(ktime_get(), 10000);
if (!ring->funcs->soft_recovery)
return false;
atomic_inc(&ring->adev->gpu_reset_counter);
while (!dma_fence_is_signaled(fence) &&
ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
ring->funcs->soft_recovery(ring, vmid);
return dma_fence_is_signaled(fence);
}
/* /*
* Debugfs info * Debugfs info
*/ */

View File

@ -77,7 +77,6 @@ struct amdgpu_fence_driver {
bool initialized; bool initialized;
struct amdgpu_irq_src *irq_src; struct amdgpu_irq_src *irq_src;
unsigned irq_type; unsigned irq_type;
struct timer_list fallback_timer;
unsigned num_fences_mask; unsigned num_fences_mask;
spinlock_t lock; spinlock_t lock;
struct dma_fence **fences; struct dma_fence **fences;
@ -168,6 +167,8 @@ struct amdgpu_ring_funcs {
/* priority functions */ /* priority functions */
void (*set_priority) (struct amdgpu_ring *ring, void (*set_priority) (struct amdgpu_ring *ring,
enum drm_sched_priority priority); enum drm_sched_priority priority);
/* Try to soft recover the ring to make the fence signal */
void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
}; };
struct amdgpu_ring { struct amdgpu_ring {
@ -175,7 +176,6 @@ struct amdgpu_ring {
const struct amdgpu_ring_funcs *funcs; const struct amdgpu_ring_funcs *funcs;
struct amdgpu_fence_driver fence_drv; struct amdgpu_fence_driver fence_drv;
struct drm_gpu_scheduler sched; struct drm_gpu_scheduler sched;
struct list_head lru_list;
struct amdgpu_bo *ring_obj; struct amdgpu_bo *ring_obj;
volatile uint32_t *ring; volatile uint32_t *ring;
@ -221,6 +221,30 @@ struct amdgpu_ring {
#endif #endif
}; };
#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
@ -234,13 +258,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned ring_size, struct amdgpu_irq_src *irq_src,
unsigned irq_type); unsigned irq_type);
void amdgpu_ring_fini(struct amdgpu_ring *ring); void amdgpu_ring_fini(struct amdgpu_ring *ring);
int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
int *blacklist, int num_blacklist,
bool lru_pipe_order, struct amdgpu_ring **ring);
void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
uint32_t reg0, uint32_t val0, uint32_t reg0, uint32_t val0,
uint32_t reg1, uint32_t val1); uint32_t reg1, uint32_t val1);
bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence);
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{ {

View File

@ -226,6 +226,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) { for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
struct amdgpu_sa_bo *sa_bo; struct amdgpu_sa_bo *sa_bo;
fences[i] = NULL;
if (list_empty(&sa_manager->flist[i])) if (list_empty(&sa_manager->flist[i]))
continue; continue;
@ -296,10 +298,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
spin_lock(&sa_manager->wq.lock); spin_lock(&sa_manager->wq.lock);
do { do {
for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) { for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
fences[i] = NULL;
tries[i] = 0; tries[i] = 0;
}
do { do {
amdgpu_sa_bo_try_free(sa_manager); amdgpu_sa_bo_try_free(sa_manager);

View File

@ -0,0 +1,44 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_sdma.h"
/*
* GPU SDMA IP block helpers function.
*/
struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
if (&adev->sdma.instance[i].ring == ring)
break;
if (i < AMDGPU_MAX_SDMA_INSTANCES)
return &adev->sdma.instance[i];
else
return NULL;
}

View File

@ -0,0 +1,101 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_SDMA_H__
#define __AMDGPU_SDMA_H__
/* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 2
enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_TRAP0 = 0,
AMDGPU_SDMA_IRQ_TRAP1,
AMDGPU_SDMA_IRQ_LAST
};
struct amdgpu_sdma_instance {
/* SDMA firmware */
const struct firmware *fw;
uint32_t fw_version;
uint32_t feature_version;
struct amdgpu_ring ring;
bool burst_nop;
};
struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
#ifdef CONFIG_DRM_AMDGPU_SI
//SI DMA has a difference trap irq number for the second engine
struct amdgpu_irq_src trap_irq_1;
#endif
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
int num_instances;
uint32_t srbm_soft_reset;
};
/*
* Provided by hw blocks that can move/clear data. e.g., gfx or sdma
* But currently, we use sdma to move data.
*/
struct amdgpu_buffer_funcs {
/* maximum bytes in a single operation */
uint32_t copy_max_bytes;
/* number of dw to reserve per operation */
unsigned copy_num_dw;
/* used for buffer migration */
void (*emit_copy_buffer)(struct amdgpu_ib *ib,
/* src addr in bytes */
uint64_t src_offset,
/* dst addr in bytes */
uint64_t dst_offset,
/* number of byte to transfer */
uint32_t byte_count);
/* maximum bytes in a single operation */
uint32_t fill_max_bytes;
/* number of dw to reserve per operation */
unsigned fill_num_dw;
/* used for buffer clearing */
void (*emit_fill_buffer)(struct amdgpu_ib *ib,
/* value to write to memory */
uint32_t src_data,
/* dst addr in bytes */
uint64_t dst_offset,
/* number of byte to fill */
uint32_t byte_count);
};
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
struct amdgpu_sdma_instance *
amdgpu_get_sdma_instance(struct amdgpu_ring *ring);
#endif

View File

@ -150,10 +150,10 @@ TRACE_EVENT(amdgpu_cs,
TP_fast_assign( TP_fast_assign(
__entry->bo_list = p->bo_list; __entry->bo_list = p->bo_list;
__entry->ring = p->ring->idx; __entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx;
__entry->dw = p->job->ibs[i].length_dw; __entry->dw = p->job->ibs[i].length_dw;
__entry->fences = amdgpu_fence_count_emitted( __entry->fences = amdgpu_fence_count_emitted(
p->ring); to_amdgpu_ring(p->entity->rq->sched));
), ),
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
__entry->bo_list, __entry->ring, __entry->dw, __entry->bo_list, __entry->ring, __entry->dw,
@ -462,6 +462,30 @@ TRACE_EVENT(amdgpu_bo_move,
__entry->new_placement, __entry->bo_size) __entry->new_placement, __entry->bo_size)
); );
TRACE_EVENT(amdgpu_ib_pipe_sync,
TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
__field(const char *,name)
__field(uint64_t, id)
__field(struct dma_fence *, fence)
__field(uint64_t, ctx)
__field(unsigned, seqno)
),
TP_fast_assign(
__entry->name = sched_job->base.sched->name;
__entry->id = sched_job->base.id;
__entry->fence = fence;
__entry->ctx = fence->context;
__entry->seqno = fence->seqno;
),
TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
__entry->name, __entry->id,
__entry->fence, __entry->ctx,
__entry->seqno)
);
#undef AMDGPU_JOB_GET_TIMELINE_NAME #undef AMDGPU_JOB_GET_TIMELINE_NAME
#endif #endif

View File

@ -47,6 +47,7 @@
#include "amdgpu_object.h" #include "amdgpu_object.h"
#include "amdgpu_trace.h" #include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
#include "amdgpu_sdma.h"
#include "bif/bif_4_1_d.h" #include "bif/bif_4_1_d.h"
#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@ -255,6 +256,13 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
abo = ttm_to_amdgpu_bo(bo); abo = ttm_to_amdgpu_bo(bo);
switch (bo->mem.mem_type) { switch (bo->mem.mem_type) {
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
case AMDGPU_PL_OA:
placement->num_placement = 0;
placement->num_busy_placement = 0;
return;
case TTM_PL_VRAM: case TTM_PL_VRAM:
if (!adev->mman.buffer_funcs_enabled) { if (!adev->mman.buffer_funcs_enabled) {
/* Move to system memory */ /* Move to system memory */
@ -282,6 +290,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
case TTM_PL_TT: case TTM_PL_TT:
default: default:
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
break;
} }
*placement = abo->placement; *placement = abo->placement;
} }
@ -344,7 +353,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
{ {
uint64_t addr = 0; uint64_t addr = 0;
if (mem->mem_type != TTM_PL_TT || amdgpu_gtt_mgr_has_gart_addr(mem)) { if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) {
addr = mm_node->start << PAGE_SHIFT; addr = mm_node->start << PAGE_SHIFT;
addr += bo->bdev->man[mem->mem_type].gpu_offset; addr += bo->bdev->man[mem->mem_type].gpu_offset;
} }
@ -432,8 +441,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
/* Map only what needs to be accessed. Map src to window 0 and /* Map only what needs to be accessed. Map src to window 0 and
* dst to window 1 * dst to window 1
*/ */
if (src->mem->mem_type == TTM_PL_TT && if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) {
!amdgpu_gtt_mgr_has_gart_addr(src->mem)) {
r = amdgpu_map_buffer(src->bo, src->mem, r = amdgpu_map_buffer(src->bo, src->mem,
PFN_UP(cur_size + src_page_offset), PFN_UP(cur_size + src_page_offset),
src_node_start, 0, ring, src_node_start, 0, ring,
@ -446,8 +454,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
from += src_page_offset; from += src_page_offset;
} }
if (dst->mem->mem_type == TTM_PL_TT && if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) {
!amdgpu_gtt_mgr_has_gart_addr(dst->mem)) {
r = amdgpu_map_buffer(dst->bo, dst->mem, r = amdgpu_map_buffer(dst->bo, dst->mem,
PFN_UP(cur_size + dst_page_offset), PFN_UP(cur_size + dst_page_offset),
dst_node_start, 1, ring, dst_node_start, 1, ring,
@ -525,7 +532,11 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
if (r) if (r)
goto error; goto error;
r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); /* Always block for VM page tables before committing the new location */
if (bo->type == ttm_bo_type_kernel)
r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem);
else
r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
dma_fence_put(fence); dma_fence_put(fence);
return r; return r;
@ -676,6 +687,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
amdgpu_move_null(bo, new_mem); amdgpu_move_null(bo, new_mem);
return 0; return 0;
} }
if (old_mem->mem_type == AMDGPU_PL_GDS ||
old_mem->mem_type == AMDGPU_PL_GWS ||
old_mem->mem_type == AMDGPU_PL_OA ||
new_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_GWS ||
new_mem->mem_type == AMDGPU_PL_OA) {
/* Nothing to save here */
amdgpu_move_null(bo, new_mem);
return 0;
}
if (!adev->mman.buffer_funcs_enabled) if (!adev->mman.buffer_funcs_enabled)
goto memcpy; goto memcpy;
@ -1082,42 +1103,48 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
struct ttm_mem_reg tmp; struct ttm_mem_reg tmp;
struct ttm_placement placement; struct ttm_placement placement;
struct ttm_place placements; struct ttm_place placements;
uint64_t flags; uint64_t addr, flags;
int r; int r;
if (bo->mem.mem_type != TTM_PL_TT || if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
return 0; return 0;
/* allocate GTT space */ addr = amdgpu_gmc_agp_addr(bo);
tmp = bo->mem; if (addr != AMDGPU_BO_INVALID_OFFSET) {
tmp.mm_node = NULL; bo->mem.start = addr >> PAGE_SHIFT;
placement.num_placement = 1; } else {
placement.placement = &placements;
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); /* allocate GART space */
if (unlikely(r)) tmp = bo->mem;
return r; tmp.mm_node = NULL;
placement.num_placement = 1;
placement.placement = &placements;
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
TTM_PL_FLAG_TT;
/* compute PTE flags for this buffer object */ r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); if (unlikely(r))
return r;
/* Bind pages */ /* compute PTE flags for this buffer object */
gtt->offset = (u64)tmp.start << PAGE_SHIFT; flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
r = amdgpu_ttm_gart_bind(adev, bo, flags);
if (unlikely(r)) { /* Bind pages */
ttm_bo_mem_put(bo, &tmp); gtt->offset = (u64)tmp.start << PAGE_SHIFT;
return r; r = amdgpu_ttm_gart_bind(adev, bo, flags);
if (unlikely(r)) {
ttm_bo_mem_put(bo, &tmp);
return r;
}
ttm_bo_mem_put(bo, &bo->mem);
bo->mem = tmp;
} }
ttm_bo_mem_put(bo, &bo->mem);
bo->mem = tmp;
bo->offset = (bo->mem.start << PAGE_SHIFT) + bo->offset = (bo->mem.start << PAGE_SHIFT) +
bo->bdev->man[bo->mem.mem_type].gpu_offset; bo->bdev->man[bo->mem.mem_type].gpu_offset;
@ -1427,13 +1454,14 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
} }
/** /**
* amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
* *
* @ttm: The ttm_tt object to compute the flags for * @ttm: The ttm_tt object to compute the flags for
* @mem: The memory registry backing this ttm_tt object * @mem: The memory registry backing this ttm_tt object
*
* Figure out the flags to use for a VM PDE (Page Directory Entry).
*/ */
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
struct ttm_mem_reg *mem)
{ {
uint64_t flags = 0; uint64_t flags = 0;
@ -1447,6 +1475,22 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
flags |= AMDGPU_PTE_SNOOPED; flags |= AMDGPU_PTE_SNOOPED;
} }
return flags;
}
/**
* amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
*
* @ttm: The ttm_tt object to compute the flags for
* @mem: The memory registry backing this ttm_tt object
* Figure out the flags to use for a VM PTE (Page Table Entry).
*/
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem)
{
uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
flags |= adev->gart.gart_pte_flags; flags |= adev->gart.gart_pte_flags;
flags |= AMDGPU_PTE_READABLE; flags |= AMDGPU_PTE_READABLE;
@ -1769,14 +1813,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
* This is used for VGA emulation and pre-OS scanout buffers to * This is used for VGA emulation and pre-OS scanout buffers to
* avoid display artifacts while transitioning between pre-OS * avoid display artifacts while transitioning between pre-OS
* and driver. */ * and driver. */
if (adev->gmc.stolen_size) { r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_DOMAIN_VRAM, &adev->stolen_vga_memory,
&adev->stolen_vga_memory, NULL, NULL);
NULL, NULL); if (r)
if (r) return r;
return r;
}
DRM_INFO("amdgpu: %uM of VRAM memory ready\n", DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
@ -1803,45 +1845,45 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
(unsigned)(gtt_size / (1024 * 1024))); (unsigned)(gtt_size / (1024 * 1024)));
/* Initialize various on-chip memory pools */ /* Initialize various on-chip memory pools */
adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; adev->gds.mem.total_size);
adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT; if (r) {
adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT; DRM_ERROR("Failed initializing GDS heap.\n");
adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT; return r;
adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT;
adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT;
adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
/* GDS Memory */
if (adev->gds.mem.total_size) {
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
adev->gds.mem.total_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing GDS heap.\n");
return r;
}
} }
/* GWS */ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
if (adev->gds.gws.total_size) { PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, &adev->gds.gds_gfx_bo, NULL, NULL);
adev->gds.gws.total_size >> PAGE_SHIFT); if (r)
if (r) { return r;
DRM_ERROR("Failed initializing gws heap.\n");
return r; r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
} adev->gds.gws.total_size);
if (r) {
DRM_ERROR("Failed initializing gws heap.\n");
return r;
} }
/* OA */ r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
if (adev->gds.oa.total_size) { PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, &adev->gds.gws_gfx_bo, NULL, NULL);
adev->gds.oa.total_size >> PAGE_SHIFT); if (r)
if (r) { return r;
DRM_ERROR("Failed initializing oa heap.\n");
return r; r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
} adev->gds.oa.total_size);
if (r) {
DRM_ERROR("Failed initializing oa heap.\n");
return r;
} }
r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
&adev->gds.oa_gfx_bo, NULL, NULL);
if (r)
return r;
/* Register debugfs entries for amdgpu_ttm */ /* Register debugfs entries for amdgpu_ttm */
r = amdgpu_ttm_debugfs_init(adev); r = amdgpu_ttm_debugfs_init(adev);
if (r) { if (r) {
@ -1876,12 +1918,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
if (adev->gds.mem.total_size) ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS); ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
if (adev->gds.gws.total_size) ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
if (adev->gds.oa.total_size)
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_bo_device_release(&adev->mman.bdev); ttm_bo_device_release(&adev->mman.bdev);
amdgpu_ttm_global_fini(adev); amdgpu_ttm_global_fini(adev);
adev->mman.initialized = false; adev->mman.initialized = false;
@ -1987,7 +2026,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
src_addr = num_dw * 4; src_addr = num_dw * 4;
src_addr += job->ibs[0].gpu_addr; src_addr += job->ibs[0].gpu_addr;
dst_addr = adev->gart.table_addr; dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
dst_addr, num_bytes); dst_addr, num_bytes);
@ -2047,7 +2086,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
if (r) if (r)
return r; return r;
job->vm_needs_flush = vm_needs_flush; if (vm_needs_flush) {
job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
job->vm_needs_flush = true;
}
if (resv) { if (resv) {
r = amdgpu_sync_resv(adev, &job->sync, resv, r = amdgpu_sync_resv(adev, &job->sync, resv,
AMDGPU_FENCE_OWNER_UNDEFINED, AMDGPU_FENCE_OWNER_UNDEFINED,
@ -2183,7 +2225,7 @@ error_free:
static int amdgpu_mm_dump_table(struct seq_file *m, void *data) static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
{ {
struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_info_node *node = (struct drm_info_node *)m->private;
unsigned ttm_pl = *(int *)node->info_ent->data; unsigned ttm_pl = (uintptr_t)node->info_ent->data;
struct drm_device *dev = node->minor->dev; struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl]; struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
@ -2193,12 +2235,12 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
return 0; return 0;
} }
static int ttm_pl_vram = TTM_PL_VRAM;
static int ttm_pl_tt = TTM_PL_TT;
static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram}, {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM},
{"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt}, {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT},
{"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS},
{"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS},
{"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA},
{"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL}, {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
#ifdef CONFIG_SWIOTLB #ifdef CONFIG_SWIOTLB
{"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL} {"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}

View File

@ -116,6 +116,7 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated); int *last_invalidated);
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem); struct ttm_mem_reg *mem);

View File

@ -277,6 +277,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_PITCAIRN: case CHIP_PITCAIRN:
case CHIP_VERDE: case CHIP_VERDE:
case CHIP_OLAND: case CHIP_OLAND:
case CHIP_HAINAN:
return AMDGPU_FW_LOAD_DIRECT; return AMDGPU_FW_LOAD_DIRECT;
#endif #endif
#ifdef CONFIG_DRM_AMDGPU_CIK #ifdef CONFIG_DRM_AMDGPU_CIK
@ -303,12 +304,11 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_VEGA10: case CHIP_VEGA10:
case CHIP_RAVEN: case CHIP_RAVEN:
case CHIP_VEGA12: case CHIP_VEGA12:
case CHIP_VEGA20:
if (!load_type) if (!load_type)
return AMDGPU_FW_LOAD_DIRECT; return AMDGPU_FW_LOAD_DIRECT;
else else
return AMDGPU_FW_LOAD_PSP; return AMDGPU_FW_LOAD_PSP;
case CHIP_VEGA20:
return AMDGPU_FW_LOAD_DIRECT;
default: default:
DRM_ERROR("Unknown firmware load type\n"); DRM_ERROR("Unknown firmware load type\n");
} }
@ -322,6 +322,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
{ {
const struct common_firmware_header *header = NULL; const struct common_firmware_header *header = NULL;
const struct gfx_firmware_header_v1_0 *cp_hdr = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
if (NULL == ucode->fw) if (NULL == ucode->fw)
return 0; return 0;
@ -333,8 +334,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
return 0; return 0;
header = (const struct common_firmware_header *)ucode->fw->data; header = (const struct common_firmware_header *)ucode->fw->data;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP || if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
(ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
@ -343,7 +344,9 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL && ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM && ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) { ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM &&
ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM &&
ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV)) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@ -365,6 +368,20 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
le32_to_cpu(header->ucode_array_offset_bytes) + le32_to_cpu(header->ucode_array_offset_bytes) +
le32_to_cpu(cp_hdr->jt_offset) * 4), le32_to_cpu(cp_hdr->jt_offset) * 4),
ucode->ucode_size); ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_ERAM) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
le32_to_cpu(dmcu_hdr->intv_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes)),
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_INTV) {
ucode->ucode_size = le32_to_cpu(dmcu_hdr->intv_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes) +
le32_to_cpu(dmcu_hdr->intv_offset_bytes)),
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) { } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes; ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl, memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,

View File

@ -157,6 +157,13 @@ struct gpu_info_firmware_header_v1_0 {
uint16_t version_minor; /* version */ uint16_t version_minor; /* version */
}; };
/* version_major=1, version_minor=0 */
struct dmcu_firmware_header_v1_0 {
struct common_firmware_header header;
uint32_t intv_offset_bytes; /* interrupt vectors offset from end of header, in bytes */
uint32_t intv_size_bytes; /* size of interrupt vectors, in bytes */
};
/* header is fixed size */ /* header is fixed size */
union amdgpu_firmware_header { union amdgpu_firmware_header {
struct common_firmware_header common; struct common_firmware_header common;
@ -170,6 +177,7 @@ union amdgpu_firmware_header {
struct sdma_firmware_header_v1_0 sdma; struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1; struct sdma_firmware_header_v1_1 sdma_v1_1;
struct gpu_info_firmware_header_v1_0 gpu_info; struct gpu_info_firmware_header_v1_0 gpu_info;
struct dmcu_firmware_header_v1_0 dmcu;
uint8_t raw[0x100]; uint8_t raw[0x100];
}; };
@ -193,8 +201,11 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_STORAGE, AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC, AMDGPU_UCODE_ID_SMC,
AMDGPU_UCODE_ID_UVD, AMDGPU_UCODE_ID_UVD,
AMDGPU_UCODE_ID_UVD1,
AMDGPU_UCODE_ID_VCE, AMDGPU_UCODE_ID_VCE,
AMDGPU_UCODE_ID_VCN, AMDGPU_UCODE_ID_VCN,
AMDGPU_UCODE_ID_DMCU_ERAM,
AMDGPU_UCODE_ID_DMCU_INTV,
AMDGPU_UCODE_ID_MAXIMUM, AMDGPU_UCODE_ID_MAXIMUM,
}; };
@ -205,6 +216,12 @@ enum AMDGPU_UCODE_STATUS {
AMDGPU_UCODE_STATUS_LOADED, AMDGPU_UCODE_STATUS_LOADED,
}; };
enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_DIRECT = 0,
AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_PSP,
};
/* conform to smu_ucode_xfer_cz.h */ /* conform to smu_ucode_xfer_cz.h */
#define AMDGPU_SDMA0_UCODE_LOADED 0x00000001 #define AMDGPU_SDMA0_UCODE_LOADED 0x00000001
#define AMDGPU_SDMA1_UCODE_LOADED 0x00000002 #define AMDGPU_SDMA1_UCODE_LOADED 0x00000002
@ -232,6 +249,24 @@ struct amdgpu_firmware_info {
uint32_t tmr_mc_addr_hi; uint32_t tmr_mc_addr_hi;
}; };
struct amdgpu_firmware {
struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
enum amdgpu_firmware_load_type load_type;
struct amdgpu_bo *fw_buf;
unsigned int fw_size;
unsigned int max_ucodes;
/* firmwares are loaded by psp instead of smu from vega10 */
const struct amdgpu_psp_funcs *funcs;
struct amdgpu_bo *rbuf;
struct mutex mutex;
/* gpu info firmware data pointer */
const struct firmware *gpu_info_fw;
void *fw_buf_ptr;
uint64_t fw_buf_mc;
};
void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);

View File

@ -42,8 +42,12 @@
/* Firmware Names */ /* Firmware Names */
#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
MODULE_FIRMWARE(FIRMWARE_RAVEN2);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work); static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@ -59,7 +63,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
switch (adev->asic_type) { switch (adev->asic_type) {
case CHIP_RAVEN: case CHIP_RAVEN:
fw_name = FIRMWARE_RAVEN; if (adev->rev_id >= 8)
fw_name = FIRMWARE_RAVEN2;
else if (adev->pdev->device == 0x15d8)
fw_name = FIRMWARE_PICASSO;
else
fw_name = FIRMWARE_RAVEN;
break; break;
default: default:
return -EINVAL; return -EINVAL;
@ -217,6 +226,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
if (fences == 0) { if (fences == 0) {
amdgpu_gfx_off_ctrl(adev, true);
if (adev->pm.dpm_enabled) if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, false); amdgpu_dpm_enable_uvd(adev, false);
else else
@ -233,6 +243,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
if (set_clocks) { if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
if (adev->pm.dpm_enabled) if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, true); amdgpu_dpm_enable_uvd(adev, true);
else else

View File

@ -22,18 +22,13 @@
*/ */
#include "amdgpu.h" #include "amdgpu.h"
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 20
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
{ {
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
addr -= AMDGPU_VA_RESERVED_SIZE; addr -= AMDGPU_VA_RESERVED_SIZE;
addr = amdgpu_gmc_sign_extend(addr);
if (addr >= AMDGPU_VA_HOLE_START)
addr |= AMDGPU_VA_HOLE_END;
return addr; return addr;
} }
@ -76,7 +71,7 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) {
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va) struct amdgpu_bo_va **bo_va)
{ {
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK; uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
struct ww_acquire_ctx ticket; struct ww_acquire_ctx ticket;
struct list_head list; struct list_head list;
struct amdgpu_bo_list_entry pd; struct amdgpu_bo_list_entry pd;

File diff suppressed because it is too large Load Diff

View File

@ -29,6 +29,8 @@
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <drm/gpu_scheduler.h> #include <drm/gpu_scheduler.h>
#include <drm/drm_file.h> #include <drm/drm_file.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <linux/chash.h>
#include "amdgpu_sync.h" #include "amdgpu_sync.h"
#include "amdgpu_ring.h" #include "amdgpu_ring.h"
@ -48,9 +50,6 @@ struct amdgpu_bo_list_entry;
/* number of entries in page table */ /* number of entries in page table */
#define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size) #define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size)
/* PTBs (Page Table Blocks) need to be aligned to 32K */
#define AMDGPU_VM_PTB_ALIGN_SIZE 32768
#define AMDGPU_PTE_VALID (1ULL << 0) #define AMDGPU_PTE_VALID (1ULL << 0)
#define AMDGPU_PTE_SYSTEM (1ULL << 1) #define AMDGPU_PTE_SYSTEM (1ULL << 1)
#define AMDGPU_PTE_SNOOPED (1ULL << 2) #define AMDGPU_PTE_SNOOPED (1ULL << 2)
@ -103,19 +102,6 @@ struct amdgpu_bo_list_entry;
/* hardcode that limit for now */ /* hardcode that limit for now */
#define AMDGPU_VA_RESERVED_SIZE (1ULL << 20) #define AMDGPU_VA_RESERVED_SIZE (1ULL << 20)
/* VA hole for 48bit addresses on Vega10 */
#define AMDGPU_VA_HOLE_START 0x0000800000000000ULL
#define AMDGPU_VA_HOLE_END 0xffff800000000000ULL
/*
* Hardware is programmed as if the hole doesn't exists with start and end
* address values.
*
* This mask is used to remove the upper 16bits of the VA and so come up with
* the linear addr value.
*/
#define AMDGPU_VA_HOLE_MASK 0x0000ffffffffffffULL
/* max vmids dedicated for process */ /* max vmids dedicated for process */
#define AMDGPU_VM_MAX_RESERVED_VMID 1 #define AMDGPU_VM_MAX_RESERVED_VMID 1
@ -143,7 +129,7 @@ struct amdgpu_vm_bo_base {
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
/* protected by bo being reserved */ /* protected by bo being reserved */
struct list_head bo_list; struct amdgpu_vm_bo_base *next;
/* protected by spinlock */ /* protected by spinlock */
struct list_head vm_status; struct list_head vm_status;
@ -160,6 +146,27 @@ struct amdgpu_vm_pt {
struct amdgpu_vm_pt *entries; struct amdgpu_vm_pt *entries;
}; };
/* provided by hw blocks that can write ptes, e.g., sdma */
struct amdgpu_vm_pte_funcs {
/* number of dw to reserve per operation */
unsigned copy_pte_num_dw;
/* copy pte entries from GART */
void (*copy_pte)(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src,
unsigned count);
/* write pte one entry at a time with addr mapping */
void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
uint64_t value, unsigned count,
uint32_t incr);
/* for linear pte/pde updates without addr mapping */
void (*set_pte_pde)(struct amdgpu_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint64_t flags);
};
#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr)) #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48) #define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
#define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL) #define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL)
@ -172,6 +179,13 @@ struct amdgpu_task_info {
pid_t tgid; pid_t tgid;
}; };
#define AMDGPU_PAGEFAULT_HASH_BITS 8
struct amdgpu_retryfault_hashtable {
DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
spinlock_t lock;
int count;
};
struct amdgpu_vm { struct amdgpu_vm {
/* tree of virtual addresses mapped */ /* tree of virtual addresses mapped */
struct rb_root_cached va; struct rb_root_cached va;
@ -182,13 +196,16 @@ struct amdgpu_vm {
/* PT BOs which relocated and their parent need an update */ /* PT BOs which relocated and their parent need an update */
struct list_head relocated; struct list_head relocated;
/* BOs moved, but not yet updated in the PT */ /* per VM BOs moved, but not yet updated in the PT */
struct list_head moved; struct list_head moved;
spinlock_t moved_lock;
/* All BOs of this VM not currently in the state machine */ /* All BOs of this VM not currently in the state machine */
struct list_head idle; struct list_head idle;
/* regular invalidated BOs, but not yet updated in the PT */
struct list_head invalidated;
spinlock_t invalidated_lock;
/* BO mappings freed, but not yet updated in the PT */ /* BO mappings freed, but not yet updated in the PT */
struct list_head freed; struct list_head freed;
@ -226,6 +243,12 @@ struct amdgpu_vm {
/* Some basic info about the task */ /* Some basic info about the task */
struct amdgpu_task_info task_info; struct amdgpu_task_info task_info;
/* Store positions of group of BOs */
struct ttm_lru_bulk_move lru_bulk_move;
/* mark whether can do the bulk move */
bool bulk_moveable;
struct amdgpu_retryfault_hashtable *fault_hash;
}; };
struct amdgpu_vm_manager { struct amdgpu_vm_manager {
@ -244,10 +267,9 @@ struct amdgpu_vm_manager {
/* vram base address for page table entry */ /* vram base address for page table entry */
u64 vram_base_offset; u64 vram_base_offset;
/* vm pte handling */ /* vm pte handling */
const struct amdgpu_vm_pte_funcs *vm_pte_funcs; const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; struct drm_sched_rq *vm_pte_rqs[AMDGPU_MAX_RINGS];
unsigned vm_pte_num_rings; unsigned vm_pte_num_rqs;
atomic_t vm_pte_next_ring;
/* partial resident texture handling */ /* partial resident texture handling */
spinlock_t prt_lock; spinlock_t prt_lock;
@ -266,11 +288,16 @@ struct amdgpu_vm_manager {
spinlock_t pasid_lock; spinlock_t pasid_lock;
}; };
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, unsigned int pasid); int vm_context, unsigned int pasid);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid);
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
unsigned int pasid); unsigned int pasid);
@ -330,8 +357,15 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
struct amdgpu_task_info *task_info); struct amdgpu_task_info *task_info);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key);
void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key);
#endif #endif

View File

@ -124,6 +124,28 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
return usage; return usage;
} }
/**
* amdgpu_vram_mgr_virt_start - update virtual start address
*
* @mem: ttm_mem_reg to update
* @node: just allocated node
*
* Calculate a virtual BO start address to easily check if everything is CPU
* accessible.
*/
static void amdgpu_vram_mgr_virt_start(struct ttm_mem_reg *mem,
struct drm_mm_node *node)
{
unsigned long start;
start = node->start + node->size;
if (start > mem->num_pages)
start -= mem->num_pages;
else
start = 0;
mem->start = max(mem->start, start);
}
/** /**
* amdgpu_vram_mgr_new - allocate new ranges * amdgpu_vram_mgr_new - allocate new ranges
* *
@ -176,10 +198,25 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
pages_left = mem->num_pages; pages_left = mem->num_pages;
spin_lock(&mgr->lock); spin_lock(&mgr->lock);
for (i = 0; i < num_nodes; ++i) { for (i = 0; pages_left >= pages_per_node; ++i) {
unsigned long pages = rounddown_pow_of_two(pages_left);
r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
pages_per_node, 0,
place->fpfn, lpfn,
mode);
if (unlikely(r))
break;
usage += nodes[i].size << PAGE_SHIFT;
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
pages_left -= pages;
}
for (; pages_left; ++i) {
unsigned long pages = min(pages_left, pages_per_node); unsigned long pages = min(pages_left, pages_per_node);
uint32_t alignment = mem->page_alignment; uint32_t alignment = mem->page_alignment;
unsigned long start;
if (pages == pages_per_node) if (pages == pages_per_node)
alignment = pages_per_node; alignment = pages_per_node;
@ -193,16 +230,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
usage += nodes[i].size << PAGE_SHIFT; usage += nodes[i].size << PAGE_SHIFT;
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
/* Calculate a virtual BO start address to easily check if
* everything is CPU accessible.
*/
start = nodes[i].start + nodes[i].size;
if (start > mem->num_pages)
start -= mem->num_pages;
else
start = 0;
mem->start = max(mem->start, start);
pages_left -= pages; pages_left -= pages;
} }
spin_unlock(&mgr->lock); spin_unlock(&mgr->lock);

View File

@ -0,0 +1,119 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*
*/
#include <linux/list.h>
#include "amdgpu.h"
#include "amdgpu_psp.h"
static DEFINE_MUTEX(xgmi_mutex);
#define AMDGPU_MAX_XGMI_HIVE 8
#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
struct amdgpu_hive_info {
uint64_t hive_id;
struct list_head device_list;
};
static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
static unsigned hive_count = 0;
static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
{
int i;
struct amdgpu_hive_info *tmp;
if (!adev->gmc.xgmi.hive_id)
return NULL;
for (i = 0 ; i < hive_count; ++i) {
tmp = &xgmi_hives[i];
if (tmp->hive_id == adev->gmc.xgmi.hive_id)
return tmp;
}
if (i >= AMDGPU_MAX_XGMI_HIVE)
return NULL;
/* initialize new hive if not exist */
tmp = &xgmi_hives[hive_count++];
tmp->hive_id = adev->gmc.xgmi.hive_id;
INIT_LIST_HEAD(&tmp->device_list);
return tmp;
}
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE];
struct amdgpu_hive_info *hive;
struct amdgpu_xgmi *entry;
struct amdgpu_device *tmp_adev;
int count = 0, ret = -EINVAL;
if ((adev->asic_type < CHIP_VEGA20) ||
(adev->flags & AMD_IS_APU) )
return 0;
adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp);
adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
memset(&tmp_topology[0], 0, sizeof(tmp_topology));
mutex_lock(&xgmi_mutex);
hive = amdgpu_get_xgmi_hive(adev);
if (!hive)
goto exit;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
list_for_each_entry(entry, &hive->device_list, head)
tmp_topology[count++].device_id = entry->device_id;
ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology);
if (ret) {
dev_err(adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.device_id,
adev->gmc.xgmi.hive_id, ret);
goto exit;
}
/* Each psp need to set the latest topology */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
if (ret) {
dev_err(tmp_adev->dev,
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
tmp_adev->gmc.xgmi.device_id,
tmp_adev->gmc.xgmi.hive_id, ret);
/* To do : continue with some node failed or disable the whole hive */
break;
}
}
if (!ret)
dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
adev->gmc.xgmi.physical_node_id,
adev->gmc.xgmi.hive_id);
exit:
mutex_unlock(&xgmi_mutex);
return ret;
}

View File

@ -28,6 +28,7 @@
#include <drm/amdgpu_drm.h> #include <drm/amdgpu_drm.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_connectors.h" #include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "atom.h" #include "atom.h"
#include "atombios_encoders.h" #include "atombios_encoders.h"
#include "atombios_dp.h" #include "atombios_dp.h"

View File

@ -1386,15 +1386,17 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
struct drm_gpu_scheduler *sched;
unsigned i; unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) for (i = 0; i < adev->sdma.num_instances; i++) {
adev->vm_manager.vm_pte_rings[i] = sched = &adev->sdma.instance[i].ring.sched;
&adev->sdma.instance[i].ring; adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; }
adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
} }
} }

View File

@ -31,6 +31,7 @@
#include "atombios_encoders.h" #include "atombios_encoders.h"
#include "amdgpu_pll.h" #include "amdgpu_pll.h"
#include "amdgpu_connectors.h" #include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "dce_v10_0.h" #include "dce_v10_0.h"
#include "dce/dce_10_0_d.h" #include "dce/dce_10_0_d.h"
@ -1942,6 +1943,17 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true; bypass_lut = true;
break; break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
#ifdef __BIG_ENDIAN
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
ENDIAN_8IN32);
#endif
break;
default: default:
DRM_ERROR("Unsupported screen format %s\n", DRM_ERROR("Unsupported screen format %s\n",
drm_get_format_name(target_fb->format->format, &format_name)); drm_get_format_name(target_fb->format->format, &format_name));

View File

@ -31,6 +31,7 @@
#include "atombios_encoders.h" #include "atombios_encoders.h"
#include "amdgpu_pll.h" #include "amdgpu_pll.h"
#include "amdgpu_connectors.h" #include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "dce_v11_0.h" #include "dce_v11_0.h"
#include "dce/dce_11_0_d.h" #include "dce/dce_11_0_d.h"
@ -1984,6 +1985,17 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true; bypass_lut = true;
break; break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
#ifdef __BIG_ENDIAN
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
ENDIAN_8IN32);
#endif
break;
default: default:
DRM_ERROR("Unsupported screen format %s\n", DRM_ERROR("Unsupported screen format %s\n",
drm_get_format_name(target_fb->format->format, &format_name)); drm_get_format_name(target_fb->format->format, &format_name));

View File

@ -30,6 +30,7 @@
#include "atombios_encoders.h" #include "atombios_encoders.h"
#include "amdgpu_pll.h" #include "amdgpu_pll.h"
#include "amdgpu_connectors.h" #include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "bif/bif_3_0_d.h" #include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h" #include "bif/bif_3_0_sh_mask.h"
@ -1887,6 +1888,16 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true; bypass_lut = true;
break; break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) |
GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R));
#ifdef __BIG_ENDIAN
fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
#endif
break;
default: default:
DRM_ERROR("Unsupported screen format %s\n", DRM_ERROR("Unsupported screen format %s\n",
drm_get_format_name(target_fb->format->format, &format_name)); drm_get_format_name(target_fb->format->format, &format_name));

View File

@ -31,6 +31,7 @@
#include "atombios_encoders.h" #include "atombios_encoders.h"
#include "amdgpu_pll.h" #include "amdgpu_pll.h"
#include "amdgpu_connectors.h" #include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "dce_v8_0.h" #include "dce_v8_0.h"
#include "dce/dce_8_0_d.h" #include "dce/dce_8_0_d.h"
@ -1864,6 +1865,16 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true; bypass_lut = true;
break; break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
(GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
(GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
#ifdef __BIG_ENDIAN
fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
default: default:
DRM_ERROR("Unsupported screen format %s\n", DRM_ERROR("Unsupported screen format %s\n",
drm_get_format_name(target_fb->format->format, &format_name)); drm_get_format_name(target_fb->format->format, &format_name));

View File

@ -4170,15 +4170,6 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
uint32_t gws_base, uint32_t gws_size, uint32_t gws_base, uint32_t gws_size,
uint32_t oa_base, uint32_t oa_size) uint32_t oa_base, uint32_t oa_size)
{ {
gds_base = gds_base >> AMDGPU_GDS_SHIFT;
gds_size = gds_size >> AMDGPU_GDS_SHIFT;
gws_base = gws_base >> AMDGPU_GWS_SHIFT;
gws_size = gws_size >> AMDGPU_GWS_SHIFT;
oa_base = oa_base >> AMDGPU_OA_SHIFT;
oa_size = oa_size >> AMDGPU_OA_SHIFT;
/* GDS Base */ /* GDS Base */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
@ -4212,6 +4203,18 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
} }
static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
{
struct amdgpu_device *adev = ring->adev;
uint32_t value = 0;
value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
WREG32(mmSQ_CMD, value);
}
static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
{ {
WREG32(mmSQ_IND_INDEX, WREG32(mmSQ_IND_INDEX,
@ -4579,25 +4582,6 @@ static int gfx_v7_0_sw_init(void *handle)
} }
} }
/* reserve GDS, GWS and OA resource for gfx */
r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
&adev->gds.gds_gfx_bo, NULL, NULL);
if (r)
return r;
r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
&adev->gds.gws_gfx_bo, NULL, NULL);
if (r)
return r;
r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
&adev->gds.oa_gfx_bo, NULL, NULL);
if (r)
return r;
adev->gfx.ce_ram_size = 0x8000; adev->gfx.ce_ram_size = 0x8000;
gfx_v7_0_gpu_early_init(adev); gfx_v7_0_gpu_early_init(adev);
@ -5088,6 +5072,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.pad_ib = amdgpu_ring_generic_pad_ib, .pad_ib = amdgpu_ring_generic_pad_ib,
.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
.emit_wreg = gfx_v7_0_ring_emit_wreg, .emit_wreg = gfx_v7_0_ring_emit_wreg,
.soft_recovery = gfx_v7_0_ring_soft_recovery,
}; };
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {

View File

@ -1114,14 +1114,14 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
tmp = (unsigned int *)((uintptr_t)rlc_hdr + tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
tmp = (unsigned int *)((uintptr_t)rlc_hdr + tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
@ -2161,25 +2161,6 @@ static int gfx_v8_0_sw_init(void *handle)
if (r) if (r)
return r; return r;
/* reserve GDS, GWS and OA resource for gfx */
r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
&adev->gds.gds_gfx_bo, NULL, NULL);
if (r)
return r;
r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
&adev->gds.gws_gfx_bo, NULL, NULL);
if (r)
return r;
r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
&adev->gds.oa_gfx_bo, NULL, NULL);
if (r)
return r;
adev->gfx.ce_ram_size = 0x8000; adev->gfx.ce_ram_size = 0x8000;
r = gfx_v8_0_gpu_early_init(adev); r = gfx_v8_0_gpu_early_init(adev);
@ -4604,7 +4585,6 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
{ {
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
uint32_t scratch, tmp = 0;
uint64_t queue_mask = 0; uint64_t queue_mask = 0;
int r, i; int r, i;
@ -4623,17 +4603,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
queue_mask |= (1ull << i); queue_mask |= (1ull << i);
} }
r = amdgpu_gfx_scratch_get(adev, &scratch); r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
if (r) {
DRM_ERROR("Failed to get scratch reg (%d).\n", r);
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
if (r) { if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r); DRM_ERROR("Failed to lock KIQ (%d).\n", r);
amdgpu_gfx_scratch_free(adev, scratch);
return r; return r;
} }
/* set resources */ /* set resources */
@ -4665,25 +4637,12 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
} }
/* write to scratch for completion */
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
amdgpu_ring_commit(kiq_ring);
for (i = 0; i < adev->usec_timeout; i++) { r = amdgpu_ring_test_ring(kiq_ring);
tmp = RREG32(scratch); if (r) {
if (tmp == 0xDEADBEEF) DRM_ERROR("KCQ enable failed\n");
break; kiq_ring->ready = false;
DRM_UDELAY(1);
} }
if (i >= adev->usec_timeout) {
DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
scratch, tmp);
r = -EINVAL;
}
amdgpu_gfx_scratch_free(adev, scratch);
return r; return r;
} }
@ -4970,26 +4929,33 @@ static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
{ {
struct amdgpu_ring *ring = NULL; struct amdgpu_ring *ring;
int r = 0, i; int r;
gfx_v8_0_cp_compute_enable(adev, true);
ring = &adev->gfx.kiq.ring; ring = &adev->gfx.kiq.ring;
r = amdgpu_bo_reserve(ring->mqd_obj, false); r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0)) if (unlikely(r != 0))
goto done; return r;
r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
if (!r) { if (unlikely(r != 0))
r = gfx_v8_0_kiq_init_queue(ring); return r;
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL; gfx_v8_0_kiq_init_queue(ring);
} amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
amdgpu_bo_unreserve(ring->mqd_obj); amdgpu_bo_unreserve(ring->mqd_obj);
if (r) ring->ready = true;
goto done; return 0;
}
static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = NULL;
int r = 0, i;
gfx_v8_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) { for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i]; ring = &adev->gfx.compute_ring[i];
@ -5014,15 +4980,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
if (r) if (r)
goto done; goto done;
/* Test KIQ */
ring = &adev->gfx.kiq.ring;
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
goto done;
}
/* Test KCQs */ /* Test KCQs */
for (i = 0; i < adev->gfx.num_compute_rings; i++) { for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i]; ring = &adev->gfx.compute_ring[i];
@ -5054,14 +5011,17 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
return r; return r;
} }
r = gfx_v8_0_cp_gfx_resume(adev);
if (r)
return r;
r = gfx_v8_0_kiq_resume(adev); r = gfx_v8_0_kiq_resume(adev);
if (r) if (r)
return r; return r;
r = gfx_v8_0_cp_gfx_resume(adev);
if (r)
return r;
r = gfx_v8_0_kcq_resume(adev);
if (r)
return r;
gfx_v8_0_enable_gui_idle_interrupt(adev, true); gfx_v8_0_enable_gui_idle_interrupt(adev, true);
return 0; return 0;
@ -5090,61 +5050,39 @@ static int gfx_v8_0_hw_init(void *handle)
return r; return r;
} }
static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
{ {
struct amdgpu_device *adev = kiq_ring->adev;
uint32_t scratch, tmp = 0;
int r, i; int r, i;
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
r = amdgpu_gfx_scratch_get(adev, &scratch); r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
if (r) { if (r)
DRM_ERROR("Failed to get scratch reg (%d).\n", r);
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(kiq_ring, 10);
if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r); DRM_ERROR("Failed to lock KIQ (%d).\n", r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
/* unmap queues */ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0);
/* write to scratch for completion */ }
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); r = amdgpu_ring_test_ring(kiq_ring);
amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); if (r)
amdgpu_ring_write(kiq_ring, 0xDEADBEEF); DRM_ERROR("KCQ disable failed\n");
amdgpu_ring_commit(kiq_ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
}
if (i >= adev->usec_timeout) {
DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
r = -EINVAL;
}
amdgpu_gfx_scratch_free(adev, scratch);
return r; return r;
} }
static int gfx_v8_0_hw_fini(void *handle) static int gfx_v8_0_hw_fini(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@ -5154,8 +5092,7 @@ static int gfx_v8_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
/* disable KCQ to avoid CPC touch memory not valid anymore */ /* disable KCQ to avoid CPC touch memory not valid anymore */
for (i = 0; i < adev->gfx.num_compute_rings; i++) gfx_v8_0_kcq_disable(adev);
gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_vf(adev)) {
pr_debug("For SRIOV client, shouldn't do anything.\n"); pr_debug("For SRIOV client, shouldn't do anything.\n");
@ -5164,10 +5101,6 @@ static int gfx_v8_0_hw_fini(void *handle)
gfx_v8_0_cp_enable(adev, false); gfx_v8_0_cp_enable(adev, false);
gfx_v8_0_rlc_stop(adev); gfx_v8_0_rlc_stop(adev);
amdgpu_device_ip_set_powergating_state(adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_UNGATE);
return 0; return 0;
} }
@ -5390,10 +5323,6 @@ static int gfx_v8_0_post_soft_reset(void *handle)
grbm_soft_reset = adev->gfx.grbm_soft_reset; grbm_soft_reset = adev->gfx.grbm_soft_reset;
srbm_soft_reset = adev->gfx.srbm_soft_reset; srbm_soft_reset = adev->gfx.srbm_soft_reset;
if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
gfx_v8_0_cp_gfx_resume(adev);
if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
@ -5410,7 +5339,13 @@ static int gfx_v8_0_post_soft_reset(void *handle)
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
} }
gfx_v8_0_kiq_resume(adev); gfx_v8_0_kiq_resume(adev);
gfx_v8_0_kcq_resume(adev);
} }
if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
gfx_v8_0_cp_gfx_resume(adev);
gfx_v8_0_rlc_start(adev); gfx_v8_0_rlc_start(adev);
return 0; return 0;
@ -5442,15 +5377,6 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
uint32_t gws_base, uint32_t gws_size, uint32_t gws_base, uint32_t gws_size,
uint32_t oa_base, uint32_t oa_size) uint32_t oa_base, uint32_t oa_size)
{ {
gds_base = gds_base >> AMDGPU_GDS_SHIFT;
gds_size = gds_size >> AMDGPU_GDS_SHIFT;
gws_base = gws_base >> AMDGPU_GWS_SHIFT;
gws_size = gws_size >> AMDGPU_GWS_SHIFT;
oa_base = oa_base >> AMDGPU_OA_SHIFT;
oa_size = oa_size >> AMDGPU_OA_SHIFT;
/* GDS Base */ /* GDS Base */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
@ -6727,6 +6653,18 @@ static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
amdgpu_ring_write(ring, val); amdgpu_ring_write(ring, val);
} }
static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
{
struct amdgpu_device *adev = ring->adev;
uint32_t value = 0;
value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
WREG32(mmSQ_CMD, value);
}
static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state) enum amdgpu_interrupt_state state)
{ {
@ -7184,6 +7122,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
.emit_wreg = gfx_v8_0_ring_emit_wreg, .emit_wreg = gfx_v8_0_ring_emit_wreg,
.soft_recovery = gfx_v8_0_ring_soft_recovery,
}; };
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {

View File

@ -80,9 +80,23 @@ MODULE_FIRMWARE("amdgpu/raven_mec.bin");
MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
MODULE_FIRMWARE("amdgpu/picasso_me.bin");
MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
MODULE_FIRMWARE("amdgpu/raven2_me.bin");
MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
static const struct soc15_reg_golden golden_settings_gc_9_0[] = static const struct soc15_reg_golden golden_settings_gc_9_0[] =
{ {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
@ -173,6 +187,29 @@ static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
}; };
static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
};
static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
{ {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
@ -240,6 +277,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
@ -279,12 +317,16 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
ARRAY_SIZE(golden_settings_gc_9_0_vg20)); ARRAY_SIZE(golden_settings_gc_9_0_vg20));
break; break;
case CHIP_RAVEN: case CHIP_RAVEN:
soc15_program_register_sequence(adev, soc15_program_register_sequence(adev, golden_settings_gc_9_1,
golden_settings_gc_9_1, ARRAY_SIZE(golden_settings_gc_9_1));
ARRAY_SIZE(golden_settings_gc_9_1)); if (adev->rev_id >= 8)
soc15_program_register_sequence(adev, soc15_program_register_sequence(adev,
golden_settings_gc_9_1_rv1, golden_settings_gc_9_1_rv2,
ARRAY_SIZE(golden_settings_gc_9_1_rv1)); ARRAY_SIZE(golden_settings_gc_9_1_rv2));
else
soc15_program_register_sequence(adev,
golden_settings_gc_9_1_rv1,
ARRAY_SIZE(golden_settings_gc_9_1_rv1));
break; break;
default: default:
break; break;
@ -482,6 +524,61 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
} }
static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
{
adev->gfx.me_fw_write_wait = false;
adev->gfx.mec_fw_write_wait = false;
switch (adev->asic_type) {
case CHIP_VEGA10:
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
(adev->gfx.me_feature_version >= 42) &&
(adev->gfx.pfp_fw_version >= 0x000000b1) &&
(adev->gfx.pfp_feature_version >= 42))
adev->gfx.me_fw_write_wait = true;
if ((adev->gfx.mec_fw_version >= 0x00000193) &&
(adev->gfx.mec_feature_version >= 42))
adev->gfx.mec_fw_write_wait = true;
break;
case CHIP_VEGA12:
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
(adev->gfx.me_feature_version >= 44) &&
(adev->gfx.pfp_fw_version >= 0x000000b2) &&
(adev->gfx.pfp_feature_version >= 44))
adev->gfx.me_fw_write_wait = true;
if ((adev->gfx.mec_fw_version >= 0x00000196) &&
(adev->gfx.mec_feature_version >= 44))
adev->gfx.mec_fw_write_wait = true;
break;
case CHIP_VEGA20:
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
(adev->gfx.me_feature_version >= 44) &&
(adev->gfx.pfp_fw_version >= 0x000000b2) &&
(adev->gfx.pfp_feature_version >= 44))
adev->gfx.me_fw_write_wait = true;
if ((adev->gfx.mec_fw_version >= 0x00000197) &&
(adev->gfx.mec_feature_version >= 44))
adev->gfx.mec_fw_write_wait = true;
break;
case CHIP_RAVEN:
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
(adev->gfx.me_feature_version >= 42) &&
(adev->gfx.pfp_fw_version >= 0x000000b1) &&
(adev->gfx.pfp_feature_version >= 42))
adev->gfx.me_fw_write_wait = true;
if ((adev->gfx.mec_fw_version >= 0x00000192) &&
(adev->gfx.mec_feature_version >= 42))
adev->gfx.mec_fw_write_wait = true;
break;
default:
break;
}
}
static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
{ {
const char *chip_name; const char *chip_name;
@ -509,7 +606,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
chip_name = "vega20"; chip_name = "vega20";
break; break;
case CHIP_RAVEN: case CHIP_RAVEN:
chip_name = "raven"; if (adev->rev_id >= 8)
chip_name = "raven2";
else if (adev->pdev->device == 0x15d8)
chip_name = "picasso";
else
chip_name = "raven";
break; break;
default: default:
BUG(); BUG();
@ -590,14 +692,14 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
tmp = (unsigned int *)((uintptr_t)rlc_hdr + tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
tmp = (unsigned int *)((uintptr_t)rlc_hdr + tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
if (adev->gfx.rlc.is_rlc_v2_1) if (adev->gfx.rlc.is_rlc_v2_1)
@ -716,6 +818,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
} }
out: out:
gfx_v9_0_check_fw_write_wait(adev);
if (err) { if (err) {
dev_err(adev->dev, dev_err(adev->dev,
"gfx9: Failed to load firmware \"%s\"\n", "gfx9: Failed to load firmware \"%s\"\n",
@ -1210,7 +1313,10 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; if (adev->rev_id >= 8)
gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
else
gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
break; break;
default: default:
BUG(); BUG();
@ -1421,8 +1527,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
gfx_v9_0_write_data_to_reg(ring, 0, false, gfx_v9_0_write_data_to_reg(ring, 0, false,
SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
(adev->gds.mem.total_size + (adev->gds.mem.total_size +
adev->gfx.ngg.gds_reserve_size) >> adev->gfx.ngg.gds_reserve_size));
AMDGPU_GDS_SHIFT);
amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
@ -1595,25 +1700,6 @@ static int gfx_v9_0_sw_init(void *handle)
if (r) if (r)
return r; return r;
/* reserve GDS, GWS and OA resource for gfx */
r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
&adev->gds.gds_gfx_bo, NULL, NULL);
if (r)
return r;
r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
&adev->gds.gws_gfx_bo, NULL, NULL);
if (r)
return r;
r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
&adev->gds.oa_gfx_bo, NULL, NULL);
if (r)
return r;
adev->gfx.ce_ram_size = 0x8000; adev->gfx.ce_ram_size = 0x8000;
r = gfx_v9_0_gpu_early_init(adev); r = gfx_v9_0_gpu_early_init(adev);
@ -2610,7 +2696,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
{ {
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
uint32_t scratch, tmp = 0;
uint64_t queue_mask = 0; uint64_t queue_mask = 0;
int r, i; int r, i;
@ -2629,17 +2714,9 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
queue_mask |= (1ull << i); queue_mask |= (1ull << i);
} }
r = amdgpu_gfx_scratch_get(adev, &scratch); r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
if (r) {
DRM_ERROR("Failed to get scratch reg (%d).\n", r);
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11);
if (r) { if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r); DRM_ERROR("Failed to lock KIQ (%d).\n", r);
amdgpu_gfx_scratch_free(adev, scratch);
return r; return r;
} }
@ -2676,24 +2753,12 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
} }
/* write to scratch for completion */
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
amdgpu_ring_commit(kiq_ring);
for (i = 0; i < adev->usec_timeout; i++) { r = amdgpu_ring_test_ring(kiq_ring);
tmp = RREG32(scratch); if (r) {
if (tmp == 0xDEADBEEF) DRM_ERROR("KCQ enable failed\n");
break; kiq_ring->ready = false;
DRM_UDELAY(1);
} }
if (i >= adev->usec_timeout) {
DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
scratch, tmp);
r = -EINVAL;
}
amdgpu_gfx_scratch_free(adev, scratch);
return r; return r;
} }
@ -3055,26 +3120,33 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
{ {
struct amdgpu_ring *ring = NULL; struct amdgpu_ring *ring;
int r = 0, i; int r;
gfx_v9_0_cp_compute_enable(adev, true);
ring = &adev->gfx.kiq.ring; ring = &adev->gfx.kiq.ring;
r = amdgpu_bo_reserve(ring->mqd_obj, false); r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0)) if (unlikely(r != 0))
goto done; return r;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
if (!r) { if (unlikely(r != 0))
r = gfx_v9_0_kiq_init_queue(ring); return r;
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL; gfx_v9_0_kiq_init_queue(ring);
} amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
amdgpu_bo_unreserve(ring->mqd_obj); amdgpu_bo_unreserve(ring->mqd_obj);
if (r) ring->ready = true;
goto done; return 0;
}
static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = NULL;
int r = 0, i;
gfx_v9_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) { for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i]; ring = &adev->gfx.compute_ring[i];
@ -3117,11 +3189,15 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return r; return r;
} }
r = gfx_v9_0_kiq_resume(adev);
if (r)
return r;
r = gfx_v9_0_cp_gfx_resume(adev); r = gfx_v9_0_cp_gfx_resume(adev);
if (r) if (r)
return r; return r;
r = gfx_v9_0_kiq_resume(adev); r = gfx_v9_0_kcq_resume(adev);
if (r) if (r)
return r; return r;
@ -3132,12 +3208,6 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return r; return r;
} }
ring = &adev->gfx.kiq.ring;
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
for (i = 0; i < adev->gfx.num_compute_rings; i++) { for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i]; ring = &adev->gfx.compute_ring[i];
@ -3186,71 +3256,45 @@ static int gfx_v9_0_hw_init(void *handle)
return r; return r;
} }
static int gfx_v9_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
{ {
struct amdgpu_device *adev = kiq_ring->adev;
uint32_t scratch, tmp = 0;
int r, i; int r, i;
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
r = amdgpu_gfx_scratch_get(adev, &scratch); r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
if (r) { if (r)
DRM_ERROR("Failed to get scratch reg (%d).\n", r);
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(kiq_ring, 10);
if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r); DRM_ERROR("Failed to lock KIQ (%d).\n", r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
/* unmap queues */ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0);
/* write to scratch for completion */ }
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); r = amdgpu_ring_test_ring(kiq_ring);
amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); if (r)
amdgpu_ring_write(kiq_ring, 0xDEADBEEF); DRM_ERROR("KCQ disable failed\n");
amdgpu_ring_commit(kiq_ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
}
if (i >= adev->usec_timeout) {
DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
r = -EINVAL;
}
amdgpu_gfx_scratch_free(adev, scratch);
return r; return r;
} }
static int gfx_v9_0_hw_fini(void *handle) static int gfx_v9_0_hw_fini(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_UNGATE);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
/* disable KCQ to avoid CPC touch memory not valid anymore */ /* disable KCQ to avoid CPC touch memory not valid anymore */
for (i = 0; i < adev->gfx.num_compute_rings; i++) gfx_v9_0_kcq_disable(adev);
gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_cp_gfx_enable(adev, false); gfx_v9_0_cp_gfx_enable(adev, false);
@ -3408,15 +3452,6 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
gds_base = gds_base >> AMDGPU_GDS_SHIFT;
gds_size = gds_size >> AMDGPU_GDS_SHIFT;
gws_base = gws_base >> AMDGPU_GWS_SHIFT;
gws_size = gws_size >> AMDGPU_GWS_SHIFT;
oa_base = oa_base >> AMDGPU_OA_SHIFT;
oa_size = oa_size >> AMDGPU_OA_SHIFT;
/* GDS Base */ /* GDS Base */
gfx_v9_0_write_data_to_reg(ring, 0, false, gfx_v9_0_write_data_to_reg(ring, 0, false,
SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
@ -3763,6 +3798,10 @@ static int gfx_v9_0_set_powergating_state(void *handle,
switch (adev->asic_type) { switch (adev->asic_type) {
case CHIP_RAVEN: case CHIP_RAVEN:
if (!enable) {
amdgpu_gfx_off_ctrl(adev, false);
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
}
if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
@ -3782,14 +3821,16 @@ static int gfx_v9_0_set_powergating_state(void *handle,
/* update mgcg state */ /* update mgcg state */
gfx_v9_0_update_gfx_mg_power_gating(adev, enable); gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
/* set gfx off through smu */ if (enable)
if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu) amdgpu_gfx_off_ctrl(adev, true);
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true);
break; break;
case CHIP_VEGA12: case CHIP_VEGA12:
/* set gfx off through smu */ if (!enable) {
if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu) amdgpu_gfx_off_ctrl(adev, false);
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true); cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
} else {
amdgpu_gfx_off_ctrl(adev, true);
}
break; break;
default: default:
break; break;
@ -4350,8 +4391,11 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
uint32_t ref, uint32_t mask) uint32_t ref, uint32_t mask)
{ {
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
struct amdgpu_device *adev = ring->adev;
bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
if (amdgpu_sriov_vf(ring->adev)) if (fw_version_ok)
gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
ref, mask, 0x20); ref, mask, 0x20);
else else
@ -4359,6 +4403,18 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask); ref, mask);
} }
static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
{
struct amdgpu_device *adev = ring->adev;
uint32_t value = 0;
value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
WREG32(mmSQ_CMD, value);
}
static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state) enum amdgpu_interrupt_state state)
{ {
@ -4681,6 +4737,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
.soft_recovery = gfx_v9_0_ring_soft_recovery,
}; };
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {

View File

@ -37,13 +37,7 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
{ {
uint64_t value; uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
value = adev->gart.table_addr - adev->gmc.vram_start
+ adev->vm_manager.vram_base_offset;
value &= 0x0000FFFFFFFFF000ULL;
value |= 0x1; /*valid bit*/
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
lower_32_bits(value)); lower_32_bits(value));
@ -71,16 +65,27 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
{ {
uint64_t value; uint64_t value;
/* Disable AGP. */ /* Program the AGP BAR */
WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0); WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0);
WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0); WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFFFF); WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
/* Program the system aperture low logical page number. */ /* Program the system aperture low logical page number. */
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
adev->gmc.vram_start >> 18); min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18);
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 18); if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
/*
* Raven2 has a HW issue that it is unable to use the vram which
* is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
* workaround that increase system aperture high address (add 1)
* to get rid of the VM fault and hardware hang.
*/
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
(max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18) + 0x1);
else
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */ /* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start

View File

@ -0,0 +1,53 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "gfxhub_v1_1.h"
#include "gc/gc_9_2_1_offset.h"
#include "gc/gc_9_2_1_sh_mask.h"
#include "soc15_common.h"
int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
{
u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);
u32 max_region =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
/* PF_MAX_REGION=0 means xgmi is disabled */
if (max_region) {
adev->gmc.xgmi.num_physical_nodes = max_region + 1;
if (adev->gmc.xgmi.num_physical_nodes > 4)
return -EINVAL;
adev->gmc.xgmi.physical_node_id =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
if (adev->gmc.xgmi.physical_node_id > 3)
return -EINVAL;
adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),
MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
}
return 0;
}

View File

@ -0,0 +1,29 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __GFXHUB_V1_1_H__
#define __GFXHUB_V1_1_H__
int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev);
#endif

View File

@ -26,6 +26,7 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "gmc_v6_0.h" #include "gmc_v6_0.h"
#include "amdgpu_ucode.h" #include "amdgpu_ucode.h"
#include "amdgpu_gem.h"
#include "bif/bif_3_0_d.h" #include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h" #include "bif/bif_3_0_sh_mask.h"
@ -223,8 +224,8 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24; base <<= 24;
amdgpu_device_vram_location(adev, &adev->gmc, base); amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_device_gart_location(adev, mc); amdgpu_gmc_gart_location(adev, mc);
} }
static void gmc_v6_0_mc_program(struct amdgpu_device *adev) static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
@ -493,16 +494,20 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
{ {
uint64_t table_addr;
int r, i; int r, i;
u32 field; u32 field;
if (adev->gart.robj == NULL) { if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL; return -EINVAL;
} }
r = amdgpu_gart_table_vram_pin(adev); r = amdgpu_gart_table_vram_pin(adev);
if (r) if (r)
return r; return r;
table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
/* Setup TLB control */ /* Setup TLB control */
WREG32(mmMC_VM_MX_L1_TLB_CNTL, WREG32(mmMC_VM_MX_L1_TLB_CNTL,
(0xA << 7) | (0xA << 7) |
@ -531,7 +536,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
/* setup context0 */ /* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(adev->dummy_page_addr >> 12)); (u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT0_CNTL2, 0); WREG32(mmVM_CONTEXT0_CNTL2, 0);
@ -555,10 +560,10 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
for (i = 1; i < 16; i++) { for (i = 1; i < 16; i++) {
if (i < 8) if (i < 8)
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,
adev->gart.table_addr >> 12); table_addr >> 12);
else else
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8, WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8,
adev->gart.table_addr >> 12); table_addr >> 12);
} }
/* enable context1-15 */ /* enable context1-15 */
@ -578,7 +583,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
gmc_v6_0_flush_gpu_tlb(adev, 0); gmc_v6_0_flush_gpu_tlb(adev, 0);
dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20), (unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)adev->gart.table_addr); (unsigned long long)table_addr);
adev->gart.ready = true; adev->gart.ready = true;
return 0; return 0;
} }
@ -587,7 +592,7 @@ static int gmc_v6_0_gart_init(struct amdgpu_device *adev)
{ {
int r; int r;
if (adev->gart.robj) { if (adev->gart.bo) {
dev_warn(adev->dev, "gmc_v6_0 PCIE GART already initialized\n"); dev_warn(adev->dev, "gmc_v6_0 PCIE GART already initialized\n");
return 0; return 0;
} }

View File

@ -29,6 +29,7 @@
#include "gmc_v7_0.h" #include "gmc_v7_0.h"
#include "amdgpu_ucode.h" #include "amdgpu_ucode.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
#include "amdgpu_gem.h"
#include "bif/bif_4_1_d.h" #include "bif/bif_4_1_d.h"
#include "bif/bif_4_1_sh_mask.h" #include "bif/bif_4_1_sh_mask.h"
@ -241,8 +242,8 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24; base <<= 24;
amdgpu_device_vram_location(adev, &adev->gmc, base); amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_device_gart_location(adev, mc); amdgpu_gmc_gart_location(adev, mc);
} }
/** /**
@ -601,16 +602,20 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
*/ */
static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
{ {
uint64_t table_addr;
int r, i; int r, i;
u32 tmp, field; u32 tmp, field;
if (adev->gart.robj == NULL) { if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL; return -EINVAL;
} }
r = amdgpu_gart_table_vram_pin(adev); r = amdgpu_gart_table_vram_pin(adev);
if (r) if (r)
return r; return r;
table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
/* Setup TLB control */ /* Setup TLB control */
tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL); tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
@ -642,7 +647,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
/* setup context0 */ /* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(adev->dummy_page_addr >> 12)); (u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT0_CNTL2, 0); WREG32(mmVM_CONTEXT0_CNTL2, 0);
@ -666,10 +671,10 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
for (i = 1; i < 16; i++) { for (i = 1; i < 16; i++) {
if (i < 8) if (i < 8)
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,
adev->gart.table_addr >> 12); table_addr >> 12);
else else
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8, WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8,
adev->gart.table_addr >> 12); table_addr >> 12);
} }
/* enable context1-15 */ /* enable context1-15 */
@ -696,7 +701,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
gmc_v7_0_flush_gpu_tlb(adev, 0); gmc_v7_0_flush_gpu_tlb(adev, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20), (unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)adev->gart.table_addr); (unsigned long long)table_addr);
adev->gart.ready = true; adev->gart.ready = true;
return 0; return 0;
} }
@ -705,7 +710,7 @@ static int gmc_v7_0_gart_init(struct amdgpu_device *adev)
{ {
int r; int r;
if (adev->gart.robj) { if (adev->gart.bo) {
WARN(1, "R600 PCIE GART already initialized\n"); WARN(1, "R600 PCIE GART already initialized\n");
return 0; return 0;
} }

View File

@ -27,6 +27,7 @@
#include "gmc_v8_0.h" #include "gmc_v8_0.h"
#include "amdgpu_ucode.h" #include "amdgpu_ucode.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
#include "amdgpu_gem.h"
#include "gmc/gmc_8_1_d.h" #include "gmc/gmc_8_1_d.h"
#include "gmc/gmc_8_1_sh_mask.h" #include "gmc/gmc_8_1_sh_mask.h"
@ -410,8 +411,8 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24; base <<= 24;
amdgpu_device_vram_location(adev, &adev->gmc, base); amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_device_gart_location(adev, mc); amdgpu_gmc_gart_location(adev, mc);
} }
/** /**
@ -806,16 +807,20 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
*/ */
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
{ {
uint64_t table_addr;
int r, i; int r, i;
u32 tmp, field; u32 tmp, field;
if (adev->gart.robj == NULL) { if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL; return -EINVAL;
} }
r = amdgpu_gart_table_vram_pin(adev); r = amdgpu_gart_table_vram_pin(adev);
if (r) if (r)
return r; return r;
table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
/* Setup TLB control */ /* Setup TLB control */
tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL); tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
@ -863,7 +868,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
/* setup context0 */ /* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(adev->dummy_page_addr >> 12)); (u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT0_CNTL2, 0); WREG32(mmVM_CONTEXT0_CNTL2, 0);
@ -887,10 +892,10 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
for (i = 1; i < 16; i++) { for (i = 1; i < 16; i++) {
if (i < 8) if (i < 8)
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,
adev->gart.table_addr >> 12); table_addr >> 12);
else else
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8, WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8,
adev->gart.table_addr >> 12); table_addr >> 12);
} }
/* enable context1-15 */ /* enable context1-15 */
@ -918,7 +923,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
gmc_v8_0_flush_gpu_tlb(adev, 0); gmc_v8_0_flush_gpu_tlb(adev, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20), (unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)adev->gart.table_addr); (unsigned long long)table_addr);
adev->gart.ready = true; adev->gart.ready = true;
return 0; return 0;
} }
@ -927,7 +932,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev)
{ {
int r; int r;
if (adev->gart.robj) { if (adev->gart.bo) {
WARN(1, "R600 PCIE GART already initialized\n"); WARN(1, "R600 PCIE GART already initialized\n");
return 0; return 0;
} }

View File

@ -25,6 +25,7 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "gmc_v9_0.h" #include "gmc_v9_0.h"
#include "amdgpu_atomfirmware.h" #include "amdgpu_atomfirmware.h"
#include "amdgpu_gem.h"
#include "hdp/hdp_4_0_offset.h" #include "hdp/hdp_4_0_offset.h"
#include "hdp/hdp_4_0_sh_mask.h" #include "hdp/hdp_4_0_sh_mask.h"
@ -42,6 +43,7 @@
#include "gfxhub_v1_0.h" #include "gfxhub_v1_0.h"
#include "mmhub_v1_0.h" #include "mmhub_v1_0.h"
#include "gfxhub_v1_1.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
@ -264,12 +266,12 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
dev_err(adev->dev, dev_err(adev->dev,
"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d\n)\n", "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n",
entry->vmid_src ? "mmhub" : "gfxhub", entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid, entry->src_id, entry->ring_id, entry->vmid,
entry->pasid, task_info.process_name, task_info.tgid, entry->pasid, task_info.process_name, task_info.tgid,
task_info.task_name, task_info.pid); task_info.task_name, task_info.pid);
dev_err(adev->dev, " at address 0x%016llx from %d\n", dev_err(adev->dev, " in page starting at address 0x%016llx from %d\n",
addr, entry->client_id); addr, entry->client_id);
if (!amdgpu_sriov_vf(adev)) if (!amdgpu_sriov_vf(adev))
dev_err(adev->dev, dev_err(adev->dev,
@ -310,6 +312,48 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
return req; return req;
} }
static signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
{
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
/* don't wait anymore for IRQ context */
if (r < 1 && in_interrupt())
goto failed_kiq;
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
if (cnt > MAX_KIQ_REG_TRY)
goto failed_kiq;
return 0;
failed_kiq:
pr_err("failed to invalidate tlb with kiq\n");
return r;
}
/* /*
* GART * GART
* VMID 0 is the physical GPU addresses as used by the kernel. * VMID 0 is the physical GPU addresses as used by the kernel.
@ -331,13 +375,23 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
/* Use register 17 for GART */ /* Use register 17 for GART */
const unsigned eng = 17; const unsigned eng = 17;
unsigned i, j; unsigned i, j;
int r;
spin_lock(&adev->gmc.invalidate_lock);
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vmhub *hub = &adev->vmhub[i]; struct amdgpu_vmhub *hub = &adev->vmhub[i];
u32 tmp = gmc_v9_0_get_invalidate_req(vmid); u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
if (adev->gfx.kiq.ring.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
!adev->in_gpu_reset) {
r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
if (!r)
continue;
}
spin_lock(&adev->gmc.invalidate_lock);
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
/* Busy wait for ACK.*/ /* Busy wait for ACK.*/
@ -348,8 +402,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
break; break;
cpu_relax(); cpu_relax();
} }
if (j < 100) if (j < 100) {
spin_unlock(&adev->gmc.invalidate_lock);
continue; continue;
}
/* Wait for ACK with a delay.*/ /* Wait for ACK with a delay.*/
for (j = 0; j < adev->usec_timeout; j++) { for (j = 0; j < adev->usec_timeout; j++) {
@ -359,13 +415,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
break; break;
udelay(1); udelay(1);
} }
if (j < adev->usec_timeout) if (j < adev->usec_timeout) {
spin_unlock(&adev->gmc.invalidate_lock);
continue; continue;
}
spin_unlock(&adev->gmc.invalidate_lock);
DRM_ERROR("Timeout waiting for VM flush ACK!\n"); DRM_ERROR("Timeout waiting for VM flush ACK!\n");
} }
spin_unlock(&adev->gmc.invalidate_lock);
} }
static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
@ -374,12 +430,8 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v9_0_get_invalidate_req(vmid); uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
uint64_t flags = AMDGPU_PTE_VALID;
unsigned eng = ring->vm_inv_eng; unsigned eng = ring->vm_inv_eng;
amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags);
pd_addr |= flags;
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
lower_32_bits(pd_addr)); lower_32_bits(pd_addr));
@ -509,7 +561,7 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags) uint64_t *addr, uint64_t *flags)
{ {
if (!(*flags & AMDGPU_PDE_PTE)) if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
*addr = adev->vm_manager.vram_base_offset + *addr - *addr = adev->vm_manager.vram_base_offset + *addr -
adev->gmc.vram_start; adev->gmc.vram_start;
BUG_ON(*addr & 0xFFFF00000000003FULL); BUG_ON(*addr & 0xFFFF00000000003FULL);
@ -641,6 +693,29 @@ static int gmc_v9_0_ecc_available(struct amdgpu_device *adev)
return lost_sheep == 0; return lost_sheep == 0;
} }
static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
{
/*
* TODO:
* Currently there is a bug where some memory client outside
* of the driver writes to first 8M of VRAM on S3 resume,
* this overrides GART which by default gets placed in first 8M and
* causes VM_FAULTS once GTT is accessed.
* Keep the stolen memory reservation until the while this is not solved.
* Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
*/
switch (adev->asic_type) {
case CHIP_VEGA10:
return true;
case CHIP_RAVEN:
case CHIP_VEGA12:
case CHIP_VEGA20:
default:
return false;
}
}
static int gmc_v9_0_late_init(void *handle) static int gmc_v9_0_late_init(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@ -657,10 +732,8 @@ static int gmc_v9_0_late_init(void *handle)
unsigned i; unsigned i;
int r; int r;
/* if (!gmc_v9_0_keep_stolen_memory(adev))
* TODO - Uncomment once GART corruption issue is fixed. amdgpu_bo_late_init(adev);
*/
/* amdgpu_bo_late_init(adev); */
for(i = 0; i < adev->num_rings; ++i) { for(i = 0; i < adev->num_rings; ++i) {
struct amdgpu_ring *ring = adev->rings[i]; struct amdgpu_ring *ring = adev->rings[i];
@ -698,10 +771,18 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
u64 base = 0; u64 base = 0;
if (!amdgpu_sriov_vf(adev)) if (!amdgpu_sriov_vf(adev))
base = mmhub_v1_0_get_fb_location(adev); base = mmhub_v1_0_get_fb_location(adev);
amdgpu_device_vram_location(adev, &adev->gmc, base); /* add the xgmi offset of the physical node */
amdgpu_device_gart_location(adev, mc); base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_gmc_gart_location(adev, mc);
if (!amdgpu_sriov_vf(adev))
amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */ /* base offset of vram pages */
adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
/* XXX: add the xgmi offset of the physical node? */
adev->vm_manager.vram_base_offset +=
adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
} }
/** /**
@ -781,7 +862,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
{ {
int r; int r;
if (adev->gart.robj) { if (adev->gart.bo) {
WARN(1, "VEGA10 PCIE GART already initialized\n"); WARN(1, "VEGA10 PCIE GART already initialized\n");
return 0; return 0;
} }
@ -797,18 +878,16 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
{ {
#if 0
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
#endif
unsigned size; unsigned size;
/* /*
* TODO Remove once GART corruption is resolved * TODO Remove once GART corruption is resolved
* Check related code in gmc_v9_0_sw_fini * Check related code in gmc_v9_0_sw_fini
* */ * */
size = 9 * 1024 * 1024; if (gmc_v9_0_keep_stolen_memory(adev))
return 9 * 1024 * 1024;
#if 0
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
} else { } else {
@ -825,6 +904,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
break; break;
case CHIP_VEGA10: case CHIP_VEGA10:
case CHIP_VEGA12: case CHIP_VEGA12:
case CHIP_VEGA20:
default: default:
viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
@ -837,7 +917,6 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
return 0; return 0;
#endif
return size; return size;
} }
@ -913,6 +992,12 @@ static int gmc_v9_0_sw_init(void *handle)
} }
adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
if (adev->asic_type == CHIP_VEGA20) {
r = gfxhub_v1_1_get_xgmi_info(adev);
if (r)
return r;
}
r = gmc_v9_0_mc_init(adev); r = gmc_v9_0_mc_init(adev);
if (r) if (r)
return r; return r;
@ -949,16 +1034,8 @@ static int gmc_v9_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev); amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev); amdgpu_vm_manager_fini(adev);
/* if (gmc_v9_0_keep_stolen_memory(adev))
* TODO: amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
* Currently there is a bug where some memory client outside
* of the driver writes to first 8M of VRAM on S3 resume,
* this overrides GART which by default gets placed in first 8M and
* causes VM_FAULTS once GTT is accessed.
* Keep the stolen memory reservation until the while this is not solved.
* Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
*/
amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
amdgpu_gart_table_vram_free(adev); amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev); amdgpu_bo_fini(adev);
@ -1007,7 +1084,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
golden_settings_vega10_hdp, golden_settings_vega10_hdp,
ARRAY_SIZE(golden_settings_vega10_hdp)); ARRAY_SIZE(golden_settings_vega10_hdp));
if (adev->gart.robj == NULL) { if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL; return -EINVAL;
} }
@ -1017,7 +1094,6 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
switch (adev->asic_type) { switch (adev->asic_type) {
case CHIP_RAVEN: case CHIP_RAVEN:
mmhub_v1_0_initialize_power_gating(adev);
mmhub_v1_0_update_power_gating(adev, true); mmhub_v1_0_update_power_gating(adev, true);
break; break;
default: default:
@ -1051,7 +1127,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20), (unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)adev->gart.table_addr); (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
adev->gart.ready = true; adev->gart.ready = true;
return 0; return 0;
} }

View File

@ -38,22 +38,23 @@
u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
{ {
u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE);
u64 top = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP);
base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
base <<= 24; base <<= 24;
top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
top <<= 24;
adev->gmc.fb_start = base;
adev->gmc.fb_end = top;
return base; return base;
} }
static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
{ {
uint64_t value; uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
value = adev->gart.table_addr - adev->gmc.vram_start +
adev->vm_manager.vram_base_offset;
value &= 0x0000FFFFFFFFF000ULL;
value |= 0x1; /* valid bit */
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
lower_32_bits(value)); lower_32_bits(value));
@ -82,16 +83,27 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
uint64_t value; uint64_t value;
uint32_t tmp; uint32_t tmp;
/* Disable AGP. */ /* Program the AGP BAR */
WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BASE, 0); WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BASE, 0);
WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, 0); WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, 0x00FFFFFF); WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
/* Program the system aperture low logical page number. */ /* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
adev->gmc.vram_start >> 18); min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18);
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 18); if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
/*
* Raven2 has a HW issue that it is unable to use the vram which
* is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
* workaround that increase system aperture high address (add 1)
* to get rid of the VM fault and hardware hang.
*/
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
(max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18) + 0x1);
else
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */ /* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
@ -260,236 +272,16 @@ static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
} }
} }
struct pctl_data {
uint32_t index;
uint32_t data;
};
static const struct pctl_data pctl0_data[] = {
{0x0, 0x7a640},
{0x9, 0x2a64a},
{0xd, 0x2a680},
{0x11, 0x6a684},
{0x19, 0xea68e},
{0x29, 0xa69e},
{0x2b, 0x0010a6c0},
{0x3d, 0x83a707},
{0xc2, 0x8a7a4},
{0xcc, 0x1a7b8},
{0xcf, 0xfa7cc},
{0xe0, 0x17a7dd},
{0xf9, 0xa7dc},
{0xfb, 0x12a7f5},
{0x10f, 0xa808},
{0x111, 0x12a810},
{0x125, 0x7a82c}
};
#define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data))
#define PCTL0_RENG_EXEC_END_PTR 0x12d
#define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640
#define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833
static const struct pctl_data pctl1_data[] = {
{0x0, 0x39a000},
{0x3b, 0x44a040},
{0x81, 0x2a08d},
{0x85, 0x6ba094},
{0xf2, 0x18a100},
{0x10c, 0x4a132},
{0x112, 0xca141},
{0x120, 0x2fa158},
{0x151, 0x17a1d0},
{0x16a, 0x1a1e9},
{0x16d, 0x13a1ec},
{0x182, 0x7a201},
{0x18b, 0x3a20a},
{0x190, 0x7a580},
{0x199, 0xa590},
{0x19b, 0x4a594},
{0x1a1, 0x1a59c},
{0x1a4, 0x7a82c},
{0x1ad, 0xfa7cc},
{0x1be, 0x17a7dd},
{0x1d7, 0x12a810},
{0x1eb, 0x4000a7e1},
{0x1ec, 0x5000a7f5},
{0x1ed, 0x4000a7e2},
{0x1ee, 0x5000a7dc},
{0x1ef, 0x4000a7e3},
{0x1f0, 0x5000a7f6},
{0x1f1, 0x5000a7e4}
};
#define PCTL1_DATA_LEN (ARRAY_SIZE(pctl1_data))
#define PCTL1_RENG_EXEC_END_PTR 0x1f1
#define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE 0xa000
#define PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa20d
#define PCTL1_STCTRL_REG_SAVE_RANGE1_BASE 0xa580
#define PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT 0xa59d
#define PCTL1_STCTRL_REG_SAVE_RANGE2_BASE 0xa82c
#define PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT 0xa833
static void mmhub_v1_0_power_gating_write_save_ranges(struct amdgpu_device *adev)
{
uint32_t tmp = 0;
/* PCTL0_STCTRL_REGISTER_SAVE_RANGE0 */
tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0,
STCTRL_REGISTER_SAVE_BASE,
PCTL0_STCTRL_REG_SAVE_RANGE0_BASE);
tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0,
STCTRL_REGISTER_SAVE_LIMIT,
PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT);
WREG32_SOC15(MMHUB, 0, mmPCTL0_STCTRL_REGISTER_SAVE_RANGE0, tmp);
/* PCTL1_STCTRL_REGISTER_SAVE_RANGE0 */
tmp = 0;
tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0,
STCTRL_REGISTER_SAVE_BASE,
PCTL1_STCTRL_REG_SAVE_RANGE0_BASE);
tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0,
STCTRL_REGISTER_SAVE_LIMIT,
PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT);
WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE0, tmp);
/* PCTL1_STCTRL_REGISTER_SAVE_RANGE1 */
tmp = 0;
tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1,
STCTRL_REGISTER_SAVE_BASE,
PCTL1_STCTRL_REG_SAVE_RANGE1_BASE);
tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1,
STCTRL_REGISTER_SAVE_LIMIT,
PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT);
WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE1, tmp);
/* PCTL1_STCTRL_REGISTER_SAVE_RANGE2 */
tmp = 0;
tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2,
STCTRL_REGISTER_SAVE_BASE,
PCTL1_STCTRL_REG_SAVE_RANGE2_BASE);
tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2,
STCTRL_REGISTER_SAVE_LIMIT,
PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT);
WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE2, tmp);
}
void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
{
uint32_t pctl0_misc = 0;
uint32_t pctl0_reng_execute = 0;
uint32_t pctl1_misc = 0;
uint32_t pctl1_reng_execute = 0;
int i = 0;
if (amdgpu_sriov_vf(adev))
return;
/****************** pctl0 **********************/
pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC);
pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
/* Light sleep must be disabled before writing to pctl0 registers */
pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
/* Write data used to access ram of register engine */
for (i = 0; i < PCTL0_DATA_LEN; i++) {
WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_INDEX,
pctl0_data[i].index);
WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_DATA,
pctl0_data[i].data);
}
/* Re-enable light sleep */
pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
/****************** pctl1 **********************/
pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
/* Light sleep must be disabled before writing to pctl1 registers */
pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
/* Write data used to access ram of register engine */
for (i = 0; i < PCTL1_DATA_LEN; i++) {
WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_INDEX,
pctl1_data[i].index);
WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_DATA,
pctl1_data[i].data);
}
/* Re-enable light sleep */
pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
mmhub_v1_0_power_gating_write_save_ranges(adev);
/* Set the reng execute end ptr for pctl0 */
pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
PCTL0_RENG_EXECUTE,
RENG_EXECUTE_END_PTR,
PCTL0_RENG_EXEC_END_PTR);
WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
/* Set the reng execute end ptr for pctl1 */
pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
PCTL1_RENG_EXECUTE,
RENG_EXECUTE_END_PTR,
PCTL1_RENG_EXEC_END_PTR);
WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
}
void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
bool enable) bool enable)
{ {
uint32_t pctl0_reng_execute = 0;
uint32_t pctl1_reng_execute = 0;
if (amdgpu_sriov_vf(adev)) if (amdgpu_sriov_vf(adev))
return; return;
pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) { if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) {
pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
PCTL0_RENG_EXECUTE,
RENG_EXECUTE_ON_PWR_UP, 1);
pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
PCTL0_RENG_EXECUTE,
RENG_EXECUTE_ON_REG_UPDATE, 1);
WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
PCTL1_RENG_EXECUTE,
RENG_EXECUTE_ON_PWR_UP, 1);
pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
PCTL1_RENG_EXECUTE,
RENG_EXECUTE_ON_REG_UPDATE, 1);
WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
if (adev->powerplay.pp_funcs->set_powergating_by_smu) if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true); amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true);
} else {
pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
PCTL0_RENG_EXECUTE,
RENG_EXECUTE_ON_PWR_UP, 0);
pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
PCTL0_RENG_EXECUTE,
RENG_EXECUTE_ON_REG_UPDATE, 0);
WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
PCTL1_RENG_EXECUTE,
RENG_EXECUTE_ON_PWR_UP, 0);
pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
PCTL1_RENG_EXECUTE,
RENG_EXECUTE_ON_REG_UPDATE, 0);
WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
} }
} }

View File

@ -32,7 +32,6 @@ void mmhub_v1_0_init(struct amdgpu_device *adev);
int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state); enum amd_clockgating_state state);
void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev);
void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
bool enable); bool enable);

View File

@ -266,8 +266,8 @@ flr_done:
} }
/* Trigger recovery for world switch failure if no TDR */ /* Trigger recovery for world switch failure if no TDR */
if (amdgpu_lockup_timeout == 0) if (amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL, true); amdgpu_device_gpu_recover(adev, NULL);
} }
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,

View File

@ -521,7 +521,8 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
} }
/* Trigger recovery due to world switch failure */ /* Trigger recovery due to world switch failure */
amdgpu_device_gpu_recover(adev, NULL, false); if (amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL);
} }
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,

View File

@ -34,19 +34,10 @@
#define smnCPM_CONTROL 0x11180460 #define smnCPM_CONTROL 0x11180460
#define smnPCIE_CNTL2 0x11180070 #define smnPCIE_CNTL2 0x11180070
/* vega20 */
#define mmRCC_DEV0_EPF0_STRAP0_VG20 0x0011
#define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX 2
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
{ {
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
if (adev->asic_type == CHIP_VEGA20)
tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20);
else
tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
@ -84,14 +75,10 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
u32 doorbell_range = RREG32(reg); u32 doorbell_range = RREG32(reg);
u32 range = 2;
if (adev->asic_type == CHIP_VEGA20)
range = 8;
if (use_doorbell) { if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range); doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
} else } else
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
@ -146,9 +133,6 @@ static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
{ {
uint32_t def, data; uint32_t def, data;
if (adev->asic_type == CHIP_VEGA20)
return;
/* NBIF_MGCG_CTRL_LCLK */ /* NBIF_MGCG_CTRL_LCLK */
def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);

View File

@ -0,0 +1,248 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "nbio_v7_4.h"
#include "nbio/nbio_7_4_offset.h"
#include "nbio/nbio_7_4_sh_mask.h"
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
#define smnCPM_CONTROL 0x11180460
#define smnPCIE_CNTL2 0x11180070
static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
return tmp;
}
static void nbio_v7_4_mc_access_enable(struct amdgpu_device *adev, bool enable)
{
if (enable)
WREG32_SOC15(NBIO, 0, mmBIF_FB_EN,
BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
else
WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
}
static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
else
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
}
static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
{
return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE);
}
static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
bool use_doorbell, int doorbell_index)
{
u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
u32 doorbell_range = RREG32(reg);
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
} else
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
WREG32(reg, doorbell_range);
}
static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev,
bool enable)
{
WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0);
}
static void nbio_v7_4_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
bool enable)
{
}
static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev,
bool use_doorbell, int doorbell_index)
{
u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE);
if (use_doorbell) {
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index);
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2);
} else
ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0);
WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range);
}
static void nbio_v7_4_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
//TODO: Add support for v7.4
}
static void nbio_v7_4_update_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
def = data = RREG32_PCIE(smnPCIE_CNTL2);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
PCIE_CNTL2__MST_MEM_LS_EN_MASK |
PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
} else {
data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
PCIE_CNTL2__MST_MEM_LS_EN_MASK |
PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
}
if (def != data)
WREG32_PCIE(smnPCIE_CNTL2, data);
}
static void nbio_v7_4_get_clockgating_state(struct amdgpu_device *adev,
u32 *flags)
{
int data;
/* AMD_CG_SUPPORT_BIF_MGCG */
data = RREG32_PCIE(smnCPM_CONTROL);
if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_BIF_MGCG;
/* AMD_CG_SUPPORT_BIF_LS */
data = RREG32_PCIE(smnPCIE_CNTL2);
if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_BIF_LS;
}
static void nbio_v7_4_ih_control(struct amdgpu_device *adev)
{
u32 interrupt_cntl;
/* setup interrupt control */
WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
*/
interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
/* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
}
static u32 nbio_v7_4_get_hdp_flush_req_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ);
}
static u32 nbio_v7_4_get_hdp_flush_done_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE);
}
static u32 nbio_v7_4_get_pcie_index_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
}
static u32 nbio_v7_4_get_pcie_data_offset(struct amdgpu_device *adev)
{
return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
}
static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
.ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK,
.ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK,
.ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK,
.ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK,
.ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK,
.ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK,
.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK,
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev)
{
uint32_t reg;
reg = RREG32_SOC15(NBIO, 0, mmRCC_IOV_FUNC_IDENTIFIER);
if (reg & 1)
adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
if (reg & 0x80000000)
adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
if (!reg) {
if (is_virtual_machine()) /* passthrough mode exclus sriov mod */
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
}
static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
{
}
const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg,
.get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v7_4_get_pcie_index_offset,
.get_pcie_data_offset = nbio_v7_4_get_pcie_data_offset,
.get_rev_id = nbio_v7_4_get_rev_id,
.mc_access_enable = nbio_v7_4_mc_access_enable,
.hdp_flush = nbio_v7_4_hdp_flush,
.get_memsize = nbio_v7_4_get_memsize,
.sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range,
.enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture,
.enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture,
.ih_doorbell_range = nbio_v7_4_ih_doorbell_range,
.update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating,
.update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep,
.get_clockgating_state = nbio_v7_4_get_clockgating_state,
.ih_control = nbio_v7_4_ih_control,
.init_registers = nbio_v7_4_init_registers,
.detect_hw_virt = nbio_v7_4_detect_hw_virt,
};

View File

@ -0,0 +1,31 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __NBIO_V7_4_H__
#define __NBIO_V7_4_H__
#include "soc15_common.h"
extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs;
#endif

View File

@ -189,7 +189,8 @@ enum psp_gfx_fw_type
GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20,
GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21,
GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22, GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22,
GFX_FW_TYPE_MAX = 23 GFX_FW_TYPE_UVD1 = 23,
GFX_FW_TYPE_MAX = 24
}; };
/* Command to load HW IP FW. */ /* Command to load HW IP FW. */

View File

@ -35,6 +35,8 @@
#include "sdma0/sdma0_4_1_offset.h" #include "sdma0/sdma0_4_1_offset.h"
MODULE_FIRMWARE("amdgpu/raven_asd.bin"); MODULE_FIRMWARE("amdgpu/raven_asd.bin");
MODULE_FIRMWARE("amdgpu/picasso_asd.bin");
MODULE_FIRMWARE("amdgpu/raven2_asd.bin");
static int static int
psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type)
@ -91,6 +93,12 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *
case AMDGPU_UCODE_ID_VCN: case AMDGPU_UCODE_ID_VCN:
*type = GFX_FW_TYPE_VCN; *type = GFX_FW_TYPE_VCN;
break; break;
case AMDGPU_UCODE_ID_DMCU_ERAM:
*type = GFX_FW_TYPE_DMCU_ERAM;
break;
case AMDGPU_UCODE_ID_DMCU_INTV:
*type = GFX_FW_TYPE_DMCU_ISR;
break;
case AMDGPU_UCODE_ID_MAXIMUM: case AMDGPU_UCODE_ID_MAXIMUM:
default: default:
return -EINVAL; return -EINVAL;
@ -111,7 +119,12 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
switch (adev->asic_type) { switch (adev->asic_type) {
case CHIP_RAVEN: case CHIP_RAVEN:
chip_name = "raven"; if (adev->rev_id >= 0x8)
chip_name = "raven2";
else if (adev->pdev->device == 0x15d8)
chip_name = "picasso";
else
chip_name = "raven";
break; break;
default: BUG(); default: BUG();
} }

View File

@ -0,0 +1,598 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "amdgpu_ucode.h"
#include "soc15_common.h"
#include "psp_v11_0.h"
#include "mp/mp_11_0_offset.h"
#include "mp/mp_11_0_sh_mask.h"
#include "gc/gc_9_0_offset.h"
#include "sdma0/sdma0_4_0_offset.h"
#include "nbio/nbio_7_4_offset.h"
MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
/* address block */
#define smnMP1_FIRMWARE_FLAGS 0x3010024
static int
psp_v11_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type)
{
switch (ucode->ucode_id) {
case AMDGPU_UCODE_ID_SDMA0:
*type = GFX_FW_TYPE_SDMA0;
break;
case AMDGPU_UCODE_ID_SDMA1:
*type = GFX_FW_TYPE_SDMA1;
break;
case AMDGPU_UCODE_ID_CP_CE:
*type = GFX_FW_TYPE_CP_CE;
break;
case AMDGPU_UCODE_ID_CP_PFP:
*type = GFX_FW_TYPE_CP_PFP;
break;
case AMDGPU_UCODE_ID_CP_ME:
*type = GFX_FW_TYPE_CP_ME;
break;
case AMDGPU_UCODE_ID_CP_MEC1:
*type = GFX_FW_TYPE_CP_MEC;
break;
case AMDGPU_UCODE_ID_CP_MEC1_JT:
*type = GFX_FW_TYPE_CP_MEC_ME1;
break;
case AMDGPU_UCODE_ID_CP_MEC2:
*type = GFX_FW_TYPE_CP_MEC;
break;
case AMDGPU_UCODE_ID_CP_MEC2_JT:
*type = GFX_FW_TYPE_CP_MEC_ME2;
break;
case AMDGPU_UCODE_ID_RLC_G:
*type = GFX_FW_TYPE_RLC_G;
break;
case AMDGPU_UCODE_ID_SMC:
*type = GFX_FW_TYPE_SMU;
break;
case AMDGPU_UCODE_ID_UVD:
*type = GFX_FW_TYPE_UVD;
break;
case AMDGPU_UCODE_ID_VCE:
*type = GFX_FW_TYPE_VCE;
break;
case AMDGPU_UCODE_ID_UVD1:
*type = GFX_FW_TYPE_UVD1;
break;
case AMDGPU_UCODE_ID_MAXIMUM:
default:
return -EINVAL;
}
return 0;
}
static int psp_v11_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
const char *chip_name;
char fw_name[30];
int err = 0;
const struct psp_firmware_header_v1_0 *hdr;
DRM_DEBUG("\n");
switch (adev->asic_type) {
case CHIP_VEGA20:
chip_name = "vega20";
break;
default:
BUG();
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name);
err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev);
if (err)
goto out;
err = amdgpu_ucode_validate(adev->psp.sos_fw);
if (err)
goto out;
hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
adev->psp.sos_fw_version = le32_to_cpu(hdr->header.ucode_version);
adev->psp.sos_feature_version = le32_to_cpu(hdr->ucode_feature_version);
adev->psp.sos_bin_size = le32_to_cpu(hdr->sos_size_bytes);
adev->psp.sys_bin_size = le32_to_cpu(hdr->header.ucode_size_bytes) -
le32_to_cpu(hdr->sos_size_bytes);
adev->psp.sys_start_addr = (uint8_t *)hdr +
le32_to_cpu(hdr->header.ucode_array_offset_bytes);
adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
le32_to_cpu(hdr->sos_offset_bytes);
return 0;
out:
if (err) {
dev_err(adev->dev,
"psp v11.0: Failed to load firmware \"%s\"\n",
fw_name);
release_firmware(adev->psp.sos_fw);
adev->psp.sos_fw = NULL;
}
return err;
}
static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
{
int ret;
uint32_t psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
uint32_t sol_reg;
/* Check sOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
if (sol_reg)
return 0;
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
0x80000000, 0x80000000, false);
if (ret)
return ret;
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
/* Copy PSP System Driver binary to memory */
memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
/* Provide the sys driver to bootrom */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = 1 << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
0x80000000, 0x80000000, false);
return ret;
}
static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
{
int ret;
unsigned int psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
uint32_t sol_reg;
/* Check sOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
if (sol_reg)
return 0;
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
0x80000000, 0x80000000, false);
if (ret)
return ret;
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
/* Copy Secure OS binary to PSP memory */
memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
/* Provide the PSP secure OS to bootrom */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = 2 << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
0, true);
return ret;
}
static int psp_v11_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd)
{
int ret;
uint64_t fw_mem_mc_addr = ucode->mc_addr;
memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
ret = psp_v11_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
if (ret)
DRM_ERROR("Unknown firmware type\n");
return ret;
}
static int psp_v11_0_ring_init(struct psp_context *psp,
enum psp_ring_type ring_type)
{
int ret = 0;
struct psp_ring *ring;
struct amdgpu_device *adev = psp->adev;
ring = &psp->km_ring;
ring->ring_type = ring_type;
/* allocate 4k Page of Local Frame Buffer memory for ring */
ring->ring_size = 0x1000;
ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->firmware.rbuf,
&ring->ring_mem_mc_addr,
(void **)&ring->ring_mem);
if (ret) {
ring->ring_size = 0;
return ret;
}
return 0;
}
static int psp_v11_0_ring_create(struct psp_context *psp,
enum psp_ring_type ring_type)
{
int ret = 0;
unsigned int psp_ring_reg = 0;
struct psp_ring *ring = &psp->km_ring;
struct amdgpu_device *adev = psp->adev;
/* Write low address of the ring to C2PMSG_69 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
/* Write high address of the ring to C2PMSG_70 */
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
/* Write size of ring to C2PMSG_71 */
psp_ring_reg = ring->ring_size;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
/* Write the ring initialization command to C2PMSG_64 */
psp_ring_reg = ring_type;
psp_ring_reg = psp_ring_reg << 16;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
0x80000000, 0x8000FFFF, false);
return ret;
}
static int psp_v11_0_ring_stop(struct psp_context *psp,
enum psp_ring_type ring_type)
{
int ret = 0;
struct psp_ring *ring;
struct amdgpu_device *adev = psp->adev;
ring = &psp->km_ring;
/* Write the ring destroy command to C2PMSG_64 */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS);
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
0x80000000, 0x80000000, false);
return ret;
}
static int psp_v11_0_ring_destroy(struct psp_context *psp,
enum psp_ring_type ring_type)
{
int ret = 0;
struct psp_ring *ring = &psp->km_ring;
struct amdgpu_device *adev = psp->adev;
ret = psp_v11_0_ring_stop(psp, ring_type);
if (ret)
DRM_ERROR("Fail to stop psp ring\n");
amdgpu_bo_free_kernel(&adev->firmware.rbuf,
&ring->ring_mem_mc_addr,
(void **)&ring->ring_mem);
return ret;
}
static int psp_v11_0_cmd_submit(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
int index)
{
unsigned int psp_write_ptr_reg = 0;
struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem;
struct psp_ring *ring = &psp->km_ring;
struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
struct amdgpu_device *adev = psp->adev;
uint32_t ring_size_dw = ring->ring_size / 4;
uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
/* KM (GPCOM) prepare write pointer */
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
/* Update KM RB frame pointer to new frame */
/* write_frame ptr increments by size of rb_frame in bytes */
/* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
if ((psp_write_ptr_reg % ring_size_dw) == 0)
write_frame = ring_buffer_start;
else
write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
/* Check invalid write_frame ptr address */
if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
ring_buffer_start, ring_buffer_end, write_frame);
DRM_ERROR("write_frame is pointing to address out of bounds\n");
return -EINVAL;
}
/* Initialize KM RB frame */
memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
/* Update KM RB frame */
write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
write_frame->fence_value = index;
/* Update the write Pointer in DWORDs */
psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
return 0;
}
static int
psp_v11_0_sram_map(struct amdgpu_device *adev,
unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
unsigned int *sram_data_reg_offset,
enum AMDGPU_UCODE_ID ucode_id)
{
int ret = 0;
switch (ucode_id) {
/* TODO: needs to confirm */
#if 0
case AMDGPU_UCODE_ID_SMC:
*sram_offset = 0;
*sram_addr_reg_offset = 0;
*sram_data_reg_offset = 0;
break;
#endif
case AMDGPU_UCODE_ID_CP_CE:
*sram_offset = 0x0;
*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR);
*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA);
break;
case AMDGPU_UCODE_ID_CP_PFP:
*sram_offset = 0x0;
*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR);
*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA);
break;
case AMDGPU_UCODE_ID_CP_ME:
*sram_offset = 0x0;
*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR);
*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA);
break;
case AMDGPU_UCODE_ID_CP_MEC1:
*sram_offset = 0x10000;
*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR);
*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA);
break;
case AMDGPU_UCODE_ID_CP_MEC2:
*sram_offset = 0x10000;
*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR);
*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA);
break;
case AMDGPU_UCODE_ID_RLC_G:
*sram_offset = 0x2000;
*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR);
*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA);
break;
case AMDGPU_UCODE_ID_SDMA0:
*sram_offset = 0x0;
*sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR);
*sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA);
break;
/* TODO: needs to confirm */
#if 0
case AMDGPU_UCODE_ID_SDMA1:
*sram_offset = ;
*sram_addr_reg_offset = ;
break;
case AMDGPU_UCODE_ID_UVD:
*sram_offset = ;
*sram_addr_reg_offset = ;
break;
case AMDGPU_UCODE_ID_VCE:
*sram_offset = ;
*sram_addr_reg_offset = ;
break;
#endif
case AMDGPU_UCODE_ID_MAXIMUM:
default:
ret = -EINVAL;
break;
}
return ret;
}
static bool psp_v11_0_compare_sram_data(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
enum AMDGPU_UCODE_ID ucode_type)
{
int err = 0;
unsigned int fw_sram_reg_val = 0;
unsigned int fw_sram_addr_reg_offset = 0;
unsigned int fw_sram_data_reg_offset = 0;
unsigned int ucode_size;
uint32_t *ucode_mem = NULL;
struct amdgpu_device *adev = psp->adev;
err = psp_v11_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset,
&fw_sram_data_reg_offset, ucode_type);
if (err)
return false;
WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val);
ucode_size = ucode->ucode_size;
ucode_mem = (uint32_t *)ucode->kaddr;
while (ucode_size) {
fw_sram_reg_val = RREG32(fw_sram_data_reg_offset);
if (*ucode_mem != fw_sram_reg_val)
return false;
ucode_mem++;
/* 4 bytes */
ucode_size -= 4;
}
return true;
}
static int psp_v11_0_mode1_reset(struct psp_context *psp)
{
int ret;
uint32_t offset;
struct amdgpu_device *adev = psp->adev;
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
if (ret) {
DRM_INFO("psp is not working correctly before mode1 reset!\n");
return -EINVAL;
}
/*send the mode 1 reset command*/
WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
mdelay(1000);
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
if (ret) {
DRM_INFO("psp mode 1 reset failed!\n");
return -EINVAL;
}
DRM_INFO("psp mode1 reset succeed \n");
return 0;
}
/* TODO: Fill in follow functions once PSP firmware interface for XGMI is ready.
* For now, return success and hack the hive_id so high level code can
* start testing
*/
static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp,
int number_devices, struct psp_xgmi_topology_info *topology)
{
return 0;
}
static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp,
int number_devices, struct psp_xgmi_topology_info *topology)
{
return 0;
}
static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp)
{
u64 hive_id = 0;
/* Remove me when we can get correct hive_id through PSP */
if (psp->adev->gmc.xgmi.num_physical_nodes)
hive_id = 0x123456789abcdef;
return hive_id;
}
static const struct psp_funcs psp_v11_0_funcs = {
.init_microcode = psp_v11_0_init_microcode,
.bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv,
.bootloader_load_sos = psp_v11_0_bootloader_load_sos,
.prep_cmd_buf = psp_v11_0_prep_cmd_buf,
.ring_init = psp_v11_0_ring_init,
.ring_create = psp_v11_0_ring_create,
.ring_stop = psp_v11_0_ring_stop,
.ring_destroy = psp_v11_0_ring_destroy,
.cmd_submit = psp_v11_0_cmd_submit,
.compare_sram_data = psp_v11_0_compare_sram_data,
.mode1_reset = psp_v11_0_mode1_reset,
.xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info,
.xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info,
.xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id,
};
void psp_v11_0_set_psp_funcs(struct psp_context *psp)
{
psp->funcs = &psp_v11_0_funcs;
}

View File

@ -0,0 +1,30 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __PSP_V11_0_H__
#define __PSP_V11_0_H__
#include "amdgpu_psp.h"
void psp_v11_0_set_psp_funcs(struct psp_context *psp);
#endif

View File

@ -41,8 +41,6 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
MODULE_FIRMWARE("amdgpu/vega12_sos.bin"); MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
#define smnMP1_FIRMWARE_FLAGS 0x3010028 #define smnMP1_FIRMWARE_FLAGS 0x3010028

View File

@ -1312,15 +1312,17 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
struct drm_gpu_scheduler *sched;
unsigned i; unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) for (i = 0; i < adev->sdma.num_instances; i++) {
adev->vm_manager.vm_pte_rings[i] = sched = &adev->sdma.instance[i].ring.sched;
&adev->sdma.instance[i].ring; adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; }
adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
} }
} }

View File

@ -1752,15 +1752,17 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
struct drm_gpu_scheduler *sched;
unsigned i; unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) for (i = 0; i < adev->sdma.num_instances; i++) {
adev->vm_manager.vm_pte_rings[i] = sched = &adev->sdma.instance[i].ring.sched;
&adev->sdma.instance[i].ring; adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; }
adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
} }
} }

View File

@ -27,10 +27,10 @@
#include "amdgpu_ucode.h" #include "amdgpu_ucode.h"
#include "amdgpu_trace.h" #include "amdgpu_trace.h"
#include "sdma0/sdma0_4_0_offset.h" #include "sdma0/sdma0_4_2_offset.h"
#include "sdma0/sdma0_4_0_sh_mask.h" #include "sdma0/sdma0_4_2_sh_mask.h"
#include "sdma1/sdma1_4_0_offset.h" #include "sdma1/sdma1_4_2_offset.h"
#include "sdma1/sdma1_4_0_sh_mask.h" #include "sdma1/sdma1_4_2_sh_mask.h"
#include "hdp/hdp_4_0_offset.h" #include "hdp/hdp_4_0_offset.h"
#include "sdma0/sdma0_4_1_default.h" #include "sdma0/sdma0_4_1_default.h"
@ -48,6 +48,8 @@ MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
MODULE_FIRMWARE("amdgpu/vega20_sdma.bin"); MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin"); MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
@ -70,6 +72,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100),
@ -81,7 +84,8 @@ static const struct soc15_reg_golden golden_settings_sdma_4[] = {
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0) SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xfc000000, 0x00000000)
}; };
static const struct soc15_reg_golden golden_settings_sdma_vg10[] = { static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
@ -98,8 +102,7 @@ static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001) SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
}; };
static const struct soc15_reg_golden golden_settings_sdma_4_1[] = static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
{
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100),
@ -109,29 +112,71 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0) SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
}; };
static const struct soc15_reg_golden golden_settings_sdma_4_2[] = static const struct soc15_reg_golden golden_settings_sdma0_4_2_init[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
};
static const struct soc15_reg_golden golden_settings_sdma0_4_2[] =
{ {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RD_BURST_CNTL, 0x0000000f, 0x00000003),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
};
static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RD_BURST_CNTL, 0x0000000f, 0x00000003),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0) SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
}; };
static const struct soc15_reg_golden golden_settings_sdma_rv1[] = static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
@ -140,6 +185,12 @@ static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002) SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002)
}; };
static const struct soc15_reg_golden golden_settings_sdma_rv2[] =
{
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00003001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001)
};
static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
u32 instance, u32 offset) u32 instance, u32 offset)
{ {
@ -168,16 +219,27 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
break; break;
case CHIP_VEGA20: case CHIP_VEGA20:
soc15_program_register_sequence(adev, soc15_program_register_sequence(adev,
golden_settings_sdma_4_2, golden_settings_sdma0_4_2_init,
ARRAY_SIZE(golden_settings_sdma_4_2)); ARRAY_SIZE(golden_settings_sdma0_4_2_init));
soc15_program_register_sequence(adev,
golden_settings_sdma0_4_2,
ARRAY_SIZE(golden_settings_sdma0_4_2));
soc15_program_register_sequence(adev,
golden_settings_sdma1_4_2,
ARRAY_SIZE(golden_settings_sdma1_4_2));
break; break;
case CHIP_RAVEN: case CHIP_RAVEN:
soc15_program_register_sequence(adev, soc15_program_register_sequence(adev,
golden_settings_sdma_4_1, golden_settings_sdma_4_1,
ARRAY_SIZE(golden_settings_sdma_4_1)); ARRAY_SIZE(golden_settings_sdma_4_1));
soc15_program_register_sequence(adev, if (adev->rev_id >= 8)
golden_settings_sdma_rv1, soc15_program_register_sequence(adev,
ARRAY_SIZE(golden_settings_sdma_rv1)); golden_settings_sdma_rv2,
ARRAY_SIZE(golden_settings_sdma_rv2));
else
soc15_program_register_sequence(adev,
golden_settings_sdma_rv1,
ARRAY_SIZE(golden_settings_sdma_rv1));
break; break;
default: default:
break; break;
@ -218,7 +280,12 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
chip_name = "vega20"; chip_name = "vega20";
break; break;
case CHIP_RAVEN: case CHIP_RAVEN:
chip_name = "raven"; if (adev->rev_id >= 8)
chip_name = "raven2";
else if (adev->pdev->device == 0x15d8)
chip_name = "picasso";
else
chip_name = "raven";
break; break;
default: default:
BUG(); BUG();
@ -1750,15 +1817,17 @@ static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
struct drm_gpu_scheduler *sched;
unsigned i; unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) for (i = 0; i < adev->sdma.num_instances; i++) {
adev->vm_manager.vm_pte_rings[i] = sched = &adev->sdma.instance[i].ring.sched;
&adev->sdma.instance[i].ring; adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; }
adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
} }
} }

View File

@ -879,15 +879,17 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
struct drm_gpu_scheduler *sched;
unsigned i; unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) for (i = 0; i < adev->sdma.num_instances; i++) {
adev->vm_manager.vm_pte_rings[i] = sched = &adev->sdma.instance[i].ring.sched;
&adev->sdma.instance[i].ring; adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; }
adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
} }
} }

View File

@ -46,6 +46,26 @@
#define GRPH_ENDIAN_8IN16 1 #define GRPH_ENDIAN_8IN16 1
#define GRPH_ENDIAN_8IN32 2 #define GRPH_ENDIAN_8IN32 2
#define GRPH_ENDIAN_8IN64 3 #define GRPH_ENDIAN_8IN64 3
#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
#define GRPH_RED_SEL_R 0
#define GRPH_RED_SEL_G 1
#define GRPH_RED_SEL_B 2
#define GRPH_RED_SEL_A 3
#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
#define GRPH_GREEN_SEL_G 0
#define GRPH_GREEN_SEL_B 1
#define GRPH_GREEN_SEL_A 2
#define GRPH_GREEN_SEL_R 3
#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
#define GRPH_BLUE_SEL_B 0
#define GRPH_BLUE_SEL_A 1
#define GRPH_BLUE_SEL_R 2
#define GRPH_BLUE_SEL_G 3
#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
#define GRPH_ALPHA_SEL_A 0
#define GRPH_ALPHA_SEL_R 1
#define GRPH_ALPHA_SEL_G 2
#define GRPH_ALPHA_SEL_B 3
#define GRPH_DEPTH(x) (((x) & 0x3) << 0) #define GRPH_DEPTH(x) (((x) & 0x3) << 0)
#define GRPH_DEPTH_8BPP 0 #define GRPH_DEPTH_8BPP 0

View File

@ -2201,6 +2201,26 @@
# define EVERGREEN_GRPH_ENDIAN_8IN16 1 # define EVERGREEN_GRPH_ENDIAN_8IN16 1
# define EVERGREEN_GRPH_ENDIAN_8IN32 2 # define EVERGREEN_GRPH_ENDIAN_8IN32 2
# define EVERGREEN_GRPH_ENDIAN_8IN64 3 # define EVERGREEN_GRPH_ENDIAN_8IN64 3
#define EVERGREEN_GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
# define EVERGREEN_GRPH_RED_SEL_R 0
# define EVERGREEN_GRPH_RED_SEL_G 1
# define EVERGREEN_GRPH_RED_SEL_B 2
# define EVERGREEN_GRPH_RED_SEL_A 3
#define EVERGREEN_GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
# define EVERGREEN_GRPH_GREEN_SEL_G 0
# define EVERGREEN_GRPH_GREEN_SEL_B 1
# define EVERGREEN_GRPH_GREEN_SEL_A 2
# define EVERGREEN_GRPH_GREEN_SEL_R 3
#define EVERGREEN_GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
# define EVERGREEN_GRPH_BLUE_SEL_B 0
# define EVERGREEN_GRPH_BLUE_SEL_A 1
# define EVERGREEN_GRPH_BLUE_SEL_R 2
# define EVERGREEN_GRPH_BLUE_SEL_G 3
#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
# define EVERGREEN_GRPH_ALPHA_SEL_A 0
# define EVERGREEN_GRPH_ALPHA_SEL_R 1
# define EVERGREEN_GRPH_ALPHA_SEL_G 2
# define EVERGREEN_GRPH_ALPHA_SEL_B 3
#define EVERGREEN_D3VGA_CONTROL 0xf8 #define EVERGREEN_D3VGA_CONTROL 0xf8
#define EVERGREEN_D4VGA_CONTROL 0xf9 #define EVERGREEN_D4VGA_CONTROL 0xf9

View File

@ -479,6 +479,11 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block =
.funcs = &soc15_common_ip_funcs, .funcs = &soc15_common_ip_funcs,
}; };
static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
{
return adev->nbio_funcs->get_rev_id(adev);
}
int soc15_set_ip_blocks(struct amdgpu_device *adev) int soc15_set_ip_blocks(struct amdgpu_device *adev)
{ {
/* Set IP register base before any HW register access */ /* Set IP register base before any HW register access */
@ -498,7 +503,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU) if (adev->flags & AMD_IS_APU)
adev->nbio_funcs = &nbio_v7_0_funcs; adev->nbio_funcs = &nbio_v7_0_funcs;
else if (adev->asic_type == CHIP_VEGA20) else if (adev->asic_type == CHIP_VEGA20)
adev->nbio_funcs = &nbio_v7_0_funcs; adev->nbio_funcs = &nbio_v7_4_funcs;
else else
adev->nbio_funcs = &nbio_v6_1_funcs; adev->nbio_funcs = &nbio_v6_1_funcs;
@ -506,6 +511,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
adev->df_funcs = &df_v3_6_funcs; adev->df_funcs = &df_v3_6_funcs;
else else
adev->df_funcs = &df_v1_7_funcs; adev->df_funcs = &df_v1_7_funcs;
adev->rev_id = soc15_get_rev_id(adev);
adev->nbio_funcs->detect_hw_virt(adev); adev->nbio_funcs->detect_hw_virt(adev);
if (amdgpu_sriov_vf(adev)) if (amdgpu_sriov_vf(adev))
@ -518,11 +525,12 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
if (adev->asic_type != CHIP_VEGA20) { if (adev->asic_type == CHIP_VEGA20)
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
else
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
if (!amdgpu_sriov_vf(adev)) if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
}
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC) #if defined(CONFIG_DRM_AMD_DC)
@ -533,8 +541,10 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
#endif #endif
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block); if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev))) {
amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block);
}
break; break;
case CHIP_RAVEN: case CHIP_RAVEN:
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
@ -561,11 +571,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
return 0; return 0;
} }
static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
{
return adev->nbio_funcs->get_rev_id(adev);
}
static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{ {
adev->nbio_funcs->hdp_flush(adev, ring); adev->nbio_funcs->hdp_flush(adev, ring);
@ -622,7 +627,6 @@ static int soc15_common_early_init(void *handle)
adev->asic_funcs = &soc15_asic_funcs; adev->asic_funcs = &soc15_asic_funcs;
adev->rev_id = soc15_get_rev_id(adev);
adev->external_rev_id = 0xFF; adev->external_rev_id = 0xFF;
switch (adev->asic_type) { switch (adev->asic_type) {
case CHIP_VEGA10: case CHIP_VEGA10:
@ -693,35 +697,78 @@ static int soc15_common_early_init(void *handle)
adev->external_rev_id = adev->rev_id + 0x28; adev->external_rev_id = adev->rev_id + 0x28;
break; break;
case CHIP_RAVEN: case CHIP_RAVEN:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | if (adev->rev_id >= 0x8)
AMD_CG_SUPPORT_GFX_MGLS | adev->external_rev_id = adev->rev_id + 0x81;
AMD_CG_SUPPORT_GFX_RLC_LS | else if (adev->pdev->device == 0x15d8)
AMD_CG_SUPPORT_GFX_CP_LS | adev->external_rev_id = adev->rev_id + 0x41;
AMD_CG_SUPPORT_GFX_3D_CGCG | else
AMD_CG_SUPPORT_GFX_3D_CGLS | adev->external_rev_id = 0x1;
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_BIF_MGCG |
AMD_CG_SUPPORT_BIF_LS |
AMD_CG_SUPPORT_HDP_MGCG |
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_DRM_MGCG |
AMD_CG_SUPPORT_DRM_LS |
AMD_CG_SUPPORT_ROM_MGCG |
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_SDMA_MGCG |
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_VCN_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; if (adev->rev_id >= 0x8) {
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_CP_LS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_GFX_3D_CGLS |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_BIF_LS |
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_ROM_MGCG |
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_SDMA_MGCG |
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_VCN_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
} else if (adev->pdev->device == 0x15d8) {
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_CP_LS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_GFX_3D_CGLS |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_BIF_LS |
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_ROM_MGCG |
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_SDMA_MGCG |
AMD_CG_SUPPORT_SDMA_LS;
adev->pg_flags = AMD_PG_SUPPORT_SDMA |
AMD_PG_SUPPORT_MMHUB |
AMD_PG_SUPPORT_VCN;
} else {
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_RLC_LS |
AMD_CG_SUPPORT_GFX_CP_LS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_GFX_3D_CGLS |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_BIF_MGCG |
AMD_CG_SUPPORT_BIF_LS |
AMD_CG_SUPPORT_HDP_MGCG |
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_DRM_MGCG |
AMD_CG_SUPPORT_DRM_LS |
AMD_CG_SUPPORT_ROM_MGCG |
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_SDMA_MGCG |
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_VCN_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
}
if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_RLC_SMU_HS; AMD_PG_SUPPORT_RLC_SMU_HS;
adev->external_rev_id = 0x1;
break; break;
default: default:
/* FIXME: not supported yet */ /* FIXME: not supported yet */

Some files were not shown because too many files have changed in this diff Show More