2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-28 07:04:00 +08:00

Merge remote-tracking branch 'airlied/drm-next' into drm-misc-next-fixes

Backmerging airlied/drm-next
This commit is contained in:
Sean Paul 2017-06-20 11:50:41 -04:00
commit b15cdca5b5
778 changed files with 319405 additions and 21806 deletions

View File

@ -5,7 +5,7 @@ with HDMI output and the HVS (Hardware Video Scaler) for compositing
display planes.
Required properties for VC4:
- compatible: Should be "brcm,bcm2835-vc4"
- compatible: Should be "brcm,bcm2835-vc4" or "brcm,cygnus-vc4"
Required properties for Pixel Valve:
- compatible: Should be one of "brcm,bcm2835-pixelvalve0",
@ -54,11 +54,14 @@ Required properties for VEC:
See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
Required properties for V3D:
- compatible: Should be "brcm,bcm2835-v3d"
- compatible: Should be "brcm,bcm2835-v3d" or "brcm,cygnus-v3d"
- reg: Physical base address and length of the V3D's registers
- interrupts: The interrupt number
See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
Optional properties for V3D:
- clocks: The clock the unit runs on
Required properties for DSI:
- compatible: Should be "brcm,bcm2835-dsi0" or "brcm,bcm2835-dsi1"
- reg: Physical base address and length of the DSI block's registers

View File

@ -8,12 +8,13 @@ Required properties:
- compatible: value should be one of:
"samsung,exynos5433-decon", "samsung,exynos5433-decon-tv";
- reg: physical base address and length of the DECON registers set.
- interrupts: should contain a list of all DECON IP block interrupts in the
order: VSYNC, LCD_SYSTEM. The interrupt specifier format
depends on the interrupt controller used.
- interrupt-names: should contain the interrupt names: "vsync", "lcd_sys"
in the same order as they were listed in the interrupts
property.
- interrupt-names: should contain the interrupt names depending on mode of work:
video mode: "vsync",
command mode: "lcd_sys",
command mode with software trigger: "lcd_sys", "te".
- interrupts or interrupts-extended: list of interrupt specifiers corresponding
to names privided in interrupt-names, as described in
interrupt-controller/interrupts.txt
- clocks: must include clock specifiers corresponding to entries in the
clock-names property.
- clock-names: list of clock names sorted in the same order as the clocks

View File

@ -0,0 +1,8 @@
AU Optronics Corporation 31.5" FHD (1920x1080) TFT LCD panel
Required properties:
- compatible: should be "auo,p320hvn03"
- power-supply: as specified in the base binding
This binding is compatible with the simple-panel binding, which is specified
in simple-panel.txt in this directory.

View File

@ -0,0 +1,23 @@
Innolux P079ZCA 7.85" 768x1024 TFT LCD panel
Required properties:
- compatible: should be "innolux,p079zca"
- reg: DSI virtual channel of the peripheral
- power-supply: phandle of the regulator that provides the supply voltage
- enable-gpios: panel enable gpio
Optional properties:
- backlight: phandle of the backlight device attached to the panel
Example:
&mipi_dsi {
panel {
compatible = "innolux,p079zca";
reg = <0>;
power-supply = <...>;
backlight = <&backlight>;
enable-gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>;
status = "okay";
};
};

View File

@ -0,0 +1,8 @@
NEC LCD Technologies, Ltd. 12.1" WXGA (1280x800) LVDS TFT LCD panel
Required properties:
- compatible: should be "nec,nl12880bc20-05"
- power-supply: as specified in the base binding
This binding is compatible with the simple-panel binding, which is specified
in simple-panel.txt in this directory.

View File

@ -0,0 +1,8 @@
NLT Technologies, Ltd. 15.6" FHD (1920x1080) LVDS TFT LCD panel
Required properties:
- compatible: should be "nlt,nl192108ac18-02d"
- power-supply: as specified in the base binding
This binding is compatible with the simple-panel binding, which is specified
in simple-panel.txt in this directory.

View File

@ -1,7 +1,10 @@
Samsung S6E3HA2 5.7" 1440x2560 AMOLED panel
Samsung S6E3HF2 5.65" 1600x2560 AMOLED panel
Required properties:
- compatible: "samsung,s6e3ha2"
- compatible: should be one of:
"samsung,s6e3ha2",
"samsung,s6e3hf2".
- reg: the virtual channel number of a DSI peripheral
- vdd3-supply: I/O voltage supply
- vci-supply: voltage supply for analog circuits

View File

@ -0,0 +1,36 @@
* STMicroelectronics STM32 lcd-tft display controller
- ltdc: lcd-tft display controller host
must be a sub-node of st-display-subsystem
Required properties:
- compatible: "st,stm32-ltdc"
- reg: Physical base address of the IP registers and length of memory mapped region.
- clocks: A list of phandle + clock-specifier pairs, one for each
entry in 'clock-names'.
- clock-names: A list of clock names. For ltdc it should contain:
- "lcd" for the clock feeding the output pixel clock & IP clock.
- resets: reset to be used by the device (defined by use of RCC macro).
Required nodes:
- Video port for RGB output.
Example:
/ {
...
soc {
...
ltdc: display-controller@40016800 {
compatible = "st,stm32-ltdc";
reg = <0x40016800 0x200>;
interrupts = <88>, <89>;
resets = <&rcc STM32F4_APB2_RESET(LTDC)>;
clocks = <&rcc 1 CLK_LCD>;
clock-names = "lcd";
port {
ltdc_out_rgb: endpoint {
};
};
};
};
};

View File

@ -4,6 +4,44 @@ Allwinner A10 Display Pipeline
The Allwinner A10 Display pipeline is composed of several components
that are going to be documented below:
For the input port of all components up to the TCON in the display
pipeline, if there are multiple components, the local endpoint IDs
must correspond to the index of the upstream block. For example, if
the remote endpoint is Frontend 1, then the local endpoint ID must
be 1.
Conversely, for the output ports of the same group, the remote endpoint
ID must be the index of the local hardware block. If the local backend
is backend 1, then the remote endpoint ID must be 1.
HDMI Encoder
------------
The HDMI Encoder supports the HDMI video and audio outputs, and does
CEC. It is one end of the pipeline.
Required properties:
- compatible: value must be one of:
* allwinner,sun5i-a10s-hdmi
- reg: base address and size of memory-mapped region
- interrupts: interrupt associated to this IP
- clocks: phandles to the clocks feeding the HDMI encoder
* ahb: the HDMI interface clock
* mod: the HDMI module clock
* pll-0: the first video PLL
* pll-1: the second video PLL
- clock-names: the clock names mentioned above
- dmas: phandles to the DMA channels used by the HDMI encoder
* ddc-tx: The channel for DDC transmission
* ddc-rx: The channel for DDC reception
* audio-tx: The channel used for audio transmission
- dma-names: the channel names mentioned above
- ports: A ports node with endpoint definitions as defined in
Documentation/devicetree/bindings/media/video-interfaces.txt. The
first port should be the input endpoint. The second should be the
output, usually to an HDMI connector.
TV Encoder
----------
@ -31,6 +69,7 @@ Required properties:
* allwinner,sun6i-a31-tcon
* allwinner,sun6i-a31s-tcon
* allwinner,sun8i-a33-tcon
* allwinner,sun8i-v3s-tcon
- reg: base address and size of memory-mapped region
- interrupts: interrupt associated to this IP
- clocks: phandles to the clocks feeding the TCON. Three are needed:
@ -47,12 +86,15 @@ Required properties:
Documentation/devicetree/bindings/media/video-interfaces.txt. The
first port should be the input endpoint, the second one the output
The output should have two endpoints. The first is the block
connected to the TCON channel 0 (usually a panel or a bridge), the
second the block connected to the TCON channel 1 (usually the TV
encoder)
The output may have multiple endpoints. The TCON has two channels,
usually with the first channel being used for the panels interfaces
(RGB, LVDS, etc.), and the second being used for the outputs that
require another controller (TV Encoder, HDMI, etc.). The endpoints
will take an extra property, allwinner,tcon-channel, to specify the
channel the endpoint is associated to. If that property is not
present, the endpoint number will be used as the channel number.
On SoCs other than the A33, there is one more clock required:
On SoCs other than the A33 and V3s, there is one more clock required:
- 'tcon-ch1': The clock driving the TCON channel 1
DRC
@ -138,6 +180,26 @@ Required properties:
Documentation/devicetree/bindings/media/video-interfaces.txt. The
first port should be the input endpoints, the second one the outputs
Display Engine 2.0 Mixer
------------------------
The DE2 mixer have many functionalities, currently only layer blending is
supported.
Required properties:
- compatible: value must be one of:
* allwinner,sun8i-v3s-de2-mixer
- reg: base address and size of the memory-mapped region.
- clocks: phandles to the clocks feeding the mixer
* bus: the mixer interface clock
* mod: the mixer module clock
- clock-names: the clock names mentioned above
- resets: phandles to the reset controllers driving the mixer
- ports: A ports node with endpoint definitions as defined in
Documentation/devicetree/bindings/media/video-interfaces.txt. The
first port should be the input endpoints, the second one the output
Display Engine Pipeline
-----------------------
@ -148,13 +210,15 @@ extra node.
Required properties:
- compatible: value must be one of:
* allwinner,sun5i-a10s-display-engine
* allwinner,sun5i-a13-display-engine
* allwinner,sun6i-a31-display-engine
* allwinner,sun6i-a31s-display-engine
* allwinner,sun8i-a33-display-engine
* allwinner,sun8i-v3s-display-engine
- allwinner,pipelines: list of phandle to the display engine
frontends available.
frontends (DE 1.0) or mixers (DE 2.0) available.
Example:
@ -173,6 +237,57 @@ panel: panel {
};
};
connector {
compatible = "hdmi-connector";
type = "a";
port {
hdmi_con_in: endpoint {
remote-endpoint = <&hdmi_out_con>;
};
};
};
hdmi: hdmi@01c16000 {
compatible = "allwinner,sun5i-a10s-hdmi";
reg = <0x01c16000 0x1000>;
interrupts = <58>;
clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>,
<&ccu CLK_PLL_VIDEO0_2X>,
<&ccu CLK_PLL_VIDEO1_2X>;
clock-names = "ahb", "mod", "pll-0", "pll-1";
dmas = <&dma SUN4I_DMA_NORMAL 16>,
<&dma SUN4I_DMA_NORMAL 16>,
<&dma SUN4I_DMA_DEDICATED 24>;
dma-names = "ddc-tx", "ddc-rx", "audio-tx";
status = "disabled";
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
#address-cells = <1>;
#size-cells = <0>;
reg = <0>;
hdmi_in_tcon0: endpoint {
remote-endpoint = <&tcon0_out_hdmi>;
};
};
port@1 {
#address-cells = <1>;
#size-cells = <0>;
reg = <1>;
hdmi_out_con: endpoint {
remote-endpoint = <&hdmi_con_in>;
};
};
};
};
tve0: tv-encoder@01c0a000 {
compatible = "allwinner,sun4i-a10-tv-encoder";
reg = <0x01c0a000 0x1000>;

View File

@ -58,6 +58,18 @@ Required properties:
integer cells. The first cell is the offset of SYSCTRL register used
to control TV Encoder DAC power, and the second cell is the bit mask.
* VGA output device
Required properties:
- compatible: should be "zte,zx296718-vga"
- reg: Physical base address and length of the VGA device IO region
- interrupts : VGA interrupt number to CPU
- clocks: Phandle with clock-specifier pointing to VGA I2C clock.
- clock-names: Must be "i2c_wclk".
- zte,vga-power-control: the phandle to SYSCTRL block followed by two
integer cells. The first cell is the offset of SYSCTRL register used
to control VGA DAC power, and the second cell is the bit mask.
Example:
vou: vou@1440000 {
@ -81,6 +93,15 @@ vou: vou@1440000 {
"main_wclk", "aux_wclk";
};
vga: vga@8000 {
compatible = "zte,zx296718-vga";
reg = <0x8000 0x1000>;
interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&topcrm VGA_I2C_WCLK>;
clock-names = "i2c_wclk";
zte,vga-power-control = <&sysctrl 0x170 0xe0>;
};
hdmi: hdmi@c000 {
compatible = "zte,zx296718-hdmi";
reg = <0xc000 0x4000>;

View File

@ -219,6 +219,7 @@ nexbox Nexbox
newhaven Newhaven Display International
ni National Instruments
nintendo Nintendo
nlt NLT Technologies, Ltd.
nokia Nokia
nordic Nordic Semiconductor
nuvoton Nuvoton Technology Corporation

View File

@ -98,6 +98,9 @@ DRIVER_ATOMIC
implement appropriate obj->atomic_get_property() vfuncs for any
modeset objects with driver specific properties.
DRIVER_SYNCOBJ
Driver support drm sync objects.
Major, Minor and Patchlevel
~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -149,60 +152,15 @@ Device Instance and Driver Handling
Driver Load
-----------
IRQ Registration
~~~~~~~~~~~~~~~~
The DRM core tries to facilitate IRQ handler registration and
unregistration by providing :c:func:`drm_irq_install()` and
:c:func:`drm_irq_uninstall()` functions. Those functions only
support a single interrupt per device, devices that use more than one
IRQs need to be handled manually.
IRQ Helper Library
~~~~~~~~~~~~~~~~~~
Managed IRQ Registration
''''''''''''''''''''''''
.. kernel-doc:: drivers/gpu/drm/drm_irq.c
:doc: irq helpers
:c:func:`drm_irq_install()` starts by calling the irq_preinstall
driver operation. The operation is optional and must make sure that the
interrupt will not get fired by clearing all pending interrupt flags or
disabling the interrupt.
The passed-in IRQ will then be requested by a call to
:c:func:`request_irq()`. If the DRIVER_IRQ_SHARED driver feature
flag is set, a shared (IRQF_SHARED) IRQ handler will be requested.
The IRQ handler function must be provided as the mandatory irq_handler
driver operation. It will get passed directly to
:c:func:`request_irq()` and thus has the same prototype as all IRQ
handlers. It will get called with a pointer to the DRM device as the
second argument.
Finally the function calls the optional irq_postinstall driver
operation. The operation usually enables interrupts (excluding the
vblank interrupt, which is enabled separately), but drivers may choose
to enable/disable interrupts at a different time.
:c:func:`drm_irq_uninstall()` is similarly used to uninstall an
IRQ handler. It starts by waking up all processes waiting on a vblank
interrupt to make sure they don't hang, and then calls the optional
irq_uninstall driver operation. The operation must disable all hardware
interrupts. Finally the function frees the IRQ by calling
:c:func:`free_irq()`.
Manual IRQ Registration
'''''''''''''''''''''''
Drivers that require multiple interrupt handlers can't use the managed
IRQ registration functions. In that case IRQs must be registered and
unregistered manually (usually with the :c:func:`request_irq()` and
:c:func:`free_irq()` functions, or their :c:func:`devm_request_irq()` and
:c:func:`devm_free_irq()` equivalents).
When manually registering IRQs, drivers must not set the
DRIVER_HAVE_IRQ driver feature flag, and must not provide the
irq_handler driver operation. They must set the :c:type:`struct
drm_device <drm_device>` irq_enabled field to 1 upon
registration of the IRQs, and clear it to 0 after unregistering the
IRQs.
.. kernel-doc:: drivers/gpu/drm/drm_irq.c
:export:
Memory Manager Initialization
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -143,6 +143,12 @@ Bridge Helper Reference
.. kernel-doc:: drivers/gpu/drm/drm_bridge.c
:export:
Panel-Bridge Helper Reference
-----------------------------
.. kernel-doc:: drivers/gpu/drm/bridge/panel.c
:export:
.. _drm_panel_helper:
Panel Helper Reference

View File

@ -612,8 +612,8 @@ operation handler.
Vertical Blanking and Interrupt Handling Functions Reference
------------------------------------------------------------
.. kernel-doc:: include/drm/drm_irq.h
.. kernel-doc:: include/drm/drm_vblank.h
:internal:
.. kernel-doc:: drivers/gpu/drm/drm_irq.c
.. kernel-doc:: drivers/gpu/drm/drm_vblank.c
:export:

View File

@ -484,3 +484,15 @@ DRM Cache Handling
.. kernel-doc:: drivers/gpu/drm/drm_cache.c
:export:
DRM Sync Objects
===========================
.. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
:doc: Overview
.. kernel-doc:: include/drm/drm_syncobj.h
:export:
.. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
:export:

View File

@ -12,6 +12,8 @@ Linux GPU Driver Developer's Guide
drm-uapi
i915
meson
pl111
tegra
tinydrm
vc4
vga-switcheroo

View File

@ -0,0 +1,6 @@
==========================================
drm/pl111 ARM PrimeCell PL111 CLCD Driver
==========================================
.. kernel-doc:: drivers/gpu/drm/pl111/pl111_drv.c
:doc: ARM PrimeCell PL111 CLCD Driver

178
Documentation/gpu/tegra.rst Normal file
View File

@ -0,0 +1,178 @@
===============================================
drm/tegra NVIDIA Tegra GPU and display driver
===============================================
NVIDIA Tegra SoCs support a set of display, graphics and video functions via
the host1x controller. host1x supplies command streams, gathered from a push
buffer provided directly by the CPU, to its clients via channels. Software,
or blocks amongst themselves, can use syncpoints for synchronization.
Up until, but not including, Tegra124 (aka Tegra K1) the drm/tegra driver
supports the built-in GPU, comprised of the gr2d and gr3d engines. Starting
with Tegra124 the GPU is based on the NVIDIA desktop GPU architecture and
supported by the drm/nouveau driver.
The drm/tegra driver supports NVIDIA Tegra SoC generations since Tegra20. It
has three parts:
- A host1x driver that provides infrastructure and access to the host1x
services.
- A KMS driver that supports the display controllers as well as a number of
outputs, such as RGB, HDMI, DSI, and DisplayPort.
- A set of custom userspace IOCTLs that can be used to submit jobs to the
GPU and video engines via host1x.
Driver Infrastructure
=====================
The various host1x clients need to be bound together into a logical device in
order to expose their functionality to users. The infrastructure that supports
this is implemented in the host1x driver. When a driver is registered with the
infrastructure it provides a list of compatible strings specifying the devices
that it needs. The infrastructure creates a logical device and scan the device
tree for matching device nodes, adding the required clients to a list. Drivers
for individual clients register with the infrastructure as well and are added
to the logical host1x device.
Once all clients are available, the infrastructure will initialize the logical
device using a driver-provided function which will set up the bits specific to
the subsystem and in turn initialize each of its clients.
Similarly, when one of the clients is unregistered, the infrastructure will
destroy the logical device by calling back into the driver, which ensures that
the subsystem specific bits are torn down and the clients destroyed in turn.
Host1x Infrastructure Reference
-------------------------------
.. kernel-doc:: include/linux/host1x.h
.. kernel-doc:: drivers/gpu/host1x/bus.c
:export:
Host1x Syncpoint Reference
--------------------------
.. kernel-doc:: drivers/gpu/host1x/syncpt.c
:export:
KMS driver
==========
The display hardware has remained mostly backwards compatible over the various
Tegra SoC generations, up until Tegra186 which introduces several changes that
make it difficult to support with a parameterized driver.
Display Controllers
-------------------
Tegra SoCs have two display controllers, each of which can be associated with
zero or more outputs. Outputs can also share a single display controller, but
only if they run with compatible display timings. Two display controllers can
also share a single framebuffer, allowing cloned configurations even if modes
on two outputs don't match. A display controller is modelled as a CRTC in KMS
terms.
On Tegra186, the number of display controllers has been increased to three. A
display controller can no longer drive all of the outputs. While two of these
controllers can drive both DSI outputs and both SOR outputs, the third cannot
drive any DSI.
Windows
~~~~~~~
A display controller controls a set of windows that can be used to composite
multiple buffers onto the screen. While it is possible to assign arbitrary Z
ordering to individual windows (by programming the corresponding blending
registers), this is currently not supported by the driver. Instead, it will
assume a fixed Z ordering of the windows (window A is the root window, that
is, the lowest, while windows B and C are overlaid on top of window A). The
overlay windows support multiple pixel formats and can automatically convert
from YUV to RGB at scanout time. This makes them useful for displaying video
content. In KMS, each window is modelled as a plane. Each display controller
has a hardware cursor that is exposed as a cursor plane.
Outputs
-------
The type and number of supported outputs varies between Tegra SoC generations.
All generations support at least HDMI. While earlier generations supported the
very simple RGB interfaces (one per display controller), recent generations no
longer do and instead provide standard interfaces such as DSI and eDP/DP.
Outputs are modelled as a composite encoder/connector pair.
RGB/LVDS
~~~~~~~~
This interface is no longer available since Tegra124. It has been replaced by
the more standard DSI and eDP interfaces.
HDMI
~~~~
HDMI is supported on all Tegra SoCs. Starting with Tegra210, HDMI is provided
by the versatile SOR output, which supports eDP, DP and HDMI. The SOR is able
to support HDMI 2.0, though support for this is currently not merged.
DSI
~~~
Although Tegra has supported DSI since Tegra30, the controller has changed in
several ways in Tegra114. Since none of the publicly available development
boards prior to Dalmore (Tegra114) have made use of DSI, only Tegra114 and
later are supported by the drm/tegra driver.
eDP/DP
~~~~~~
eDP was first introduced in Tegra124 where it was used to drive the display
panel for notebook form factors. Tegra210 added support for full DisplayPort
support, though this is currently not implemented in the drm/tegra driver.
Userspace Interface
===================
The userspace interface provided by drm/tegra allows applications to create
GEM buffers, access and control syncpoints as well as submit command streams
to host1x.
GEM Buffers
-----------
The ``DRM_IOCTL_TEGRA_GEM_CREATE`` IOCTL is used to create a GEM buffer object
with Tegra-specific flags. This is useful for buffers that should be tiled, or
that are to be scanned out upside down (useful for 3D content).
After a GEM buffer object has been created, its memory can be mapped by an
application using the mmap offset returned by the ``DRM_IOCTL_TEGRA_GEM_MMAP``
IOCTL.
Syncpoints
----------
The current value of a syncpoint can be obtained by executing the
``DRM_IOCTL_TEGRA_SYNCPT_READ`` IOCTL. Incrementing the syncpoint is achieved
using the ``DRM_IOCTL_TEGRA_SYNCPT_INCR`` IOCTL.
Userspace can also request blocking on a syncpoint. To do so, it needs to
execute the ``DRM_IOCTL_TEGRA_SYNCPT_WAIT`` IOCTL, specifying the value of
the syncpoint to wait for. The kernel will release the application when the
syncpoint reaches that value or after a specified timeout.
Command Stream Submission
-------------------------
Before an application can submit command streams to host1x it needs to open a
channel to an engine using the ``DRM_IOCTL_TEGRA_OPEN_CHANNEL`` IOCTL. Client
IDs are used to identify the target of the channel. When a channel is no
longer needed, it can be closed using the ``DRM_IOCTL_TEGRA_CLOSE_CHANNEL``
IOCTL. To retrieve the syncpoint associated with a channel, an application
can use the ``DRM_IOCTL_TEGRA_GET_SYNCPT``.
After opening a channel, submitting command streams is easy. The application
writes commands into the memory backing a GEM buffer object and passes these
to the ``DRM_IOCTL_TEGRA_SUBMIT`` IOCTL along with various other parameters,
such as the syncpoints or relocations used in the job submission.

View File

@ -177,19 +177,6 @@ following drivers still use ``struct_mutex``: ``msm``, ``omapdrm`` and
Contact: Daniel Vetter, respective driver maintainers
Switch to drm_connector_list_iter for any connector_list walking
----------------------------------------------------------------
Connectors can be hotplugged, and we now have a special list of helpers to walk
the connector_list in a race-free fashion, without incurring deadlocks on
mutexes and other fun stuff.
Unfortunately most drivers are not converted yet. At least all those supporting
DP MST hotplug should be converted, since for those drivers the difference
matters. See drm_for_each_connector_iter() vs. drm_for_each_connector().
Contact: Daniel Vetter
Core refactorings
=================

View File

@ -1,8 +1,8 @@
Sync File API Guide
~~~~~~~~~~~~~~~~~~~
===================
Sync File API Guide
===================
Gustavo Padovan
<gustavo at padovan dot org>
:Author: Gustavo Padovan <gustavo at padovan dot org>
This document serves as a guide for device drivers writers on what the
sync_file API is, and how drivers can support it. Sync file is the carrier of
@ -46,16 +46,17 @@ Creating Sync Files
When a driver needs to send an out-fence userspace it creates a sync_file.
Interface:
Interface::
struct sync_file *sync_file_create(struct dma_fence *fence);
The caller pass the out-fence and gets back the sync_file. That is just the
first step, next it needs to install an fd on sync_file->file. So it gets an
fd:
fd::
fd = get_unused_fd_flags(O_CLOEXEC);
and installs it on sync_file->file:
and installs it on sync_file->file::
fd_install(fd, sync_file->file);
@ -71,7 +72,8 @@ When userspace needs to send an in-fence to the driver it passes file descriptor
of the Sync File to the kernel. The kernel can then retrieve the fences
from it.
Interface:
Interface::
struct dma_fence *sync_file_get_fence(int fd);
@ -79,5 +81,6 @@ The returned reference is owned by the caller and must be disposed of
afterwards using dma_fence_put(). In case of error, a NULL is returned instead.
References:
[1] struct sync_file in include/linux/sync_file.h
[2] All interfaces mentioned above defined in include/linux/sync_file.h
1. struct sync_file in include/linux/sync_file.h
2. All interfaces mentioned above defined in include/linux/sync_file.h

View File

@ -4235,6 +4235,12 @@ F: include/drm/drm*
F: include/uapi/drm/drm*
F: include/linux/vga*
DRM DRIVER FOR ARM PL111 CLCD
M: Eric Anholt <eric@anholt.net>
T: git git://anongit.freedesktop.org/drm/drm-misc
S: Supported
F: drivers/gpu/drm/pl111/
DRM DRIVER FOR AST SERVER GRAPHICS CHIPS
M: Dave Airlie <airlied@redhat.com>
S: Odd Fixes
@ -4242,6 +4248,8 @@ F: drivers/gpu/drm/ast/
DRM DRIVERS FOR BRIDGE CHIPS
M: Archit Taneja <architt@codeaurora.org>
M: Andrzej Hajda <a.hajda@samsung.com>
R: Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
F: drivers/gpu/drm/bridge/
@ -4498,6 +4506,17 @@ S: Maintained
F: drivers/gpu/drm/sti
F: Documentation/devicetree/bindings/display/st,stih4xx.txt
DRM DRIVERS FOR STM
M: Yannick Fertre <yannick.fertre@st.com>
M: Philippe Cornu <philippe.cornu@st.com>
M: Benjamin Gaignard <benjamin.gaignard@linaro.org>
M: Vincent Abriou <vincent.abriou@st.com>
L: dri-devel@lists.freedesktop.org
T: git git://anongit.freedesktop.org/drm/drm-misc
S: Maintained
F: drivers/gpu/drm/stm
F: Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
DRM DRIVER FOR TDFX VIDEO CARDS
S: Orphan / Obsolete
F: drivers/gpu/drm/tdfx/

View File

@ -558,8 +558,8 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
if (WARN_ON(!dmabuf || !dev))
return ERR_PTR(-EINVAL);
attach = kzalloc(sizeof(struct dma_buf_attachment), GFP_KERNEL);
if (attach == NULL)
attach = kzalloc(sizeof(*attach), GFP_KERNEL);
if (!attach)
return ERR_PTR(-ENOMEM);
attach->dev = dev;
@ -1122,9 +1122,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
attach_count = 0;
list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
seq_puts(s, "\t");
seq_printf(s, "%s\n", dev_name(attach_obj->dev));
seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
attach_count++;
}

View File

@ -402,6 +402,11 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
}
}
if (!timeout) {
ret = 0;
goto out;
}
cb.base.func = dma_fence_default_wait_cb;
cb.task = current;
list_add(&cb.base.node, &fence->cb_list);

View File

@ -110,7 +110,7 @@ static void sync_print_fence(struct seq_file *s,
}
}
seq_puts(s, "\n");
seq_putc(s, '\n');
}
static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
@ -132,9 +132,11 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
static void sync_print_sync_file(struct seq_file *s,
struct sync_file *sync_file)
{
char buf[128];
int i;
seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name,
seq_printf(s, "[%p] %s: %s\n", sync_file,
sync_file_get_name(sync_file, buf, sizeof(buf)),
sync_status_str(dma_fence_get_status(sync_file->fence)));
if (dma_fence_is_array(sync_file->fence)) {
@ -161,7 +163,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused)
sync_timeline_list);
sync_print_obj(s, obj);
seq_puts(s, "\n");
seq_putc(s, '\n');
}
spin_unlock_irqrestore(&sync_timeline_list_lock, flags);
@ -173,7 +175,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused)
container_of(pos, struct sync_file, sync_file_list);
sync_print_sync_file(s, sync_file);
seq_puts(s, "\n");
seq_putc(s, '\n');
}
spin_unlock_irqrestore(&sync_file_list_lock, flags);
return 0;

View File

@ -41,8 +41,6 @@ static struct sync_file *sync_file_alloc(void)
if (IS_ERR(sync_file->file))
goto err;
kref_init(&sync_file->kref);
init_waitqueue_head(&sync_file->wq);
INIT_LIST_HEAD(&sync_file->cb.node);
@ -82,11 +80,6 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
sync_file->fence = dma_fence_get(fence);
snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
fence->ops->get_driver_name(fence),
fence->ops->get_timeline_name(fence), fence->context,
fence->seqno);
return sync_file;
}
EXPORT_SYMBOL(sync_file_create);
@ -131,6 +124,36 @@ struct dma_fence *sync_file_get_fence(int fd)
}
EXPORT_SYMBOL(sync_file_get_fence);
/**
* sync_file_get_name - get the name of the sync_file
* @sync_file: sync_file to get the fence from
* @buf: destination buffer to copy sync_file name into
* @len: available size of destination buffer.
*
* Each sync_file may have a name assigned either by the user (when merging
* sync_files together) or created from the fence it contains. In the latter
* case construction of the name is deferred until use, and so requires
* sync_file_get_name().
*
* Returns: a string representing the name.
*/
char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len)
{
if (sync_file->user_name[0]) {
strlcpy(buf, sync_file->user_name, len);
} else {
struct dma_fence *fence = sync_file->fence;
snprintf(buf, len, "%s-%s%llu-%d",
fence->ops->get_driver_name(fence),
fence->ops->get_timeline_name(fence),
fence->context,
fence->seqno);
}
return buf;
}
static int sync_file_set_fence(struct sync_file *sync_file,
struct dma_fence **fences, int num_fences)
{
@ -268,7 +291,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
goto err;
}
strlcpy(sync_file->name, name, sizeof(sync_file->name));
strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name));
return sync_file;
err:
@ -277,22 +300,15 @@ err:
}
static void sync_file_free(struct kref *kref)
static int sync_file_release(struct inode *inode, struct file *file)
{
struct sync_file *sync_file = container_of(kref, struct sync_file,
kref);
struct sync_file *sync_file = file->private_data;
if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
dma_fence_put(sync_file->fence);
kfree(sync_file);
}
static int sync_file_release(struct inode *inode, struct file *file)
{
struct sync_file *sync_file = file->private_data;
kref_put(&sync_file->kref, sync_file_free);
return 0;
}
@ -422,7 +438,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
}
no_fences:
strlcpy(info.name, sync_file->name, sizeof(info.name));
sync_file_get_name(sync_file, info.name, sizeof(info.name));
info.status = dma_fence_is_signaled(sync_file->fence);
info.num_fences = num_fences;

View File

@ -246,6 +246,8 @@ source "drivers/gpu/drm/fsl-dcu/Kconfig"
source "drivers/gpu/drm/tegra/Kconfig"
source "drivers/gpu/drm/stm/Kconfig"
source "drivers/gpu/drm/panel/Kconfig"
source "drivers/gpu/drm/bridge/Kconfig"
@ -274,6 +276,8 @@ source "drivers/gpu/drm/meson/Kconfig"
source "drivers/gpu/drm/tinydrm/Kconfig"
source "drivers/gpu/drm/pl111/Kconfig"
# Keep legacy drivers last
menuconfig DRM_LEGACY

View File

@ -16,7 +16,8 @@ drm-y := drm_auth.o drm_bufs.o drm_cache.o \
drm_framebuffer.o drm_connector.o drm_blend.o \
drm_encoder.o drm_mode_object.o drm_property.o \
drm_plane.o drm_color_mgmt.o drm_print.o \
drm_dumb_buffers.o drm_mode_config.o
drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
drm_syncobj.o
drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
drm-$(CONFIG_DRM_VM) += drm_vm.o
@ -34,6 +35,7 @@ drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \
drm_simple_kms_helper.o drm_modeset_helper.o \
drm_scdc_helper.o
drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
@ -82,6 +84,7 @@ obj-$(CONFIG_DRM_BOCHS) += bochs/
obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/
obj-$(CONFIG_DRM_MSM) += msm/
obj-$(CONFIG_DRM_TEGRA) += tegra/
obj-$(CONFIG_DRM_STM) += stm/
obj-$(CONFIG_DRM_STI) += sti/
obj-$(CONFIG_DRM_IMX) += imx/
obj-$(CONFIG_DRM_MEDIATEK) += mediatek/
@ -96,3 +99,4 @@ obj-y += hisilicon/
obj-$(CONFIG_DRM_ZTE) += zte/
obj-$(CONFIG_DRM_MXSFB) += mxsfb/
obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
obj-$(CONFIG_DRM_PL111) += pl111/

View File

@ -5,15 +5,23 @@ config DRM_AMDGPU_SI
Choose this option if you want to enable experimental support
for SI asics.
SI is already supported in radeon. Experimental support for SI
in amdgpu will be disabled by default and is still provided by
radeon. Use module options to override this:
radeon.si_support=0 amdgpu.si_support=1
config DRM_AMDGPU_CIK
bool "Enable amdgpu support for CIK parts"
depends on DRM_AMDGPU
help
Choose this option if you want to enable experimental support
for CIK asics.
Choose this option if you want to enable support for CIK asics.
CIK is already supported in radeon. CIK support in amdgpu
is for experimentation and testing.
CIK is already supported in radeon. Support for CIK in amdgpu
will be disabled by default and is still provided by radeon.
Use module options to override this:
radeon.cik_support=0 amdgpu.cik_support=1
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"

View File

@ -4,7 +4,7 @@
FULL_AMD_PATH=$(src)/..
ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \
ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_PATH)/include \
-I$(FULL_AMD_PATH)/amdgpu \
-I$(FULL_AMD_PATH)/scheduler \
@ -24,7 +24,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_queue_mgr.o
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@ -34,7 +35,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o
# add GMC block
amdgpu-y += \
@ -54,7 +55,8 @@ amdgpu-y += \
# add PSP block
amdgpu-y += \
amdgpu_psp.o \
psp_v3_1.o
psp_v3_1.o \
psp_v10_0.o
# add SMC block
amdgpu-y += \
@ -92,6 +94,11 @@ amdgpu-y += \
vce_v3_0.o \
vce_v4_0.o
# add VCN block
amdgpu-y += \
amdgpu_vcn.o \
vcn_v1_0.o
# add amdkfd interfaces
amdgpu-y += \
amdgpu_amdkfd.o \

View File

@ -36,16 +36,18 @@
#include <linux/hashtable.h>
#include <linux/dma-fence.h>
#include <ttm/ttm_bo_api.h>
#include <ttm/ttm_bo_driver.h>
#include <ttm/ttm_placement.h>
#include <ttm/ttm_module.h>
#include <ttm/ttm_execbuf_util.h>
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_module.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include <drm/drmP.h>
#include <drm/drm_gem.h>
#include <drm/amdgpu_drm.h>
#include <kgd_kfd_interface.h>
#include "amd_shared.h"
#include "amdgpu_mode.h"
#include "amdgpu_ih.h"
@ -62,6 +64,7 @@
#include "amdgpu_acp.h"
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
#include "amdgpu_vcn.h"
#include "gpu_scheduler.h"
#include "amdgpu_virt.h"
@ -92,6 +95,7 @@ extern int amdgpu_vm_size;
extern int amdgpu_vm_block_size;
extern int amdgpu_vm_fault_stop;
extern int amdgpu_vm_debug;
extern int amdgpu_vm_update_mode;
extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
extern int amdgpu_no_evict;
@ -109,6 +113,15 @@ extern int amdgpu_prim_buf_per_se;
extern int amdgpu_pos_buf_per_se;
extern int amdgpu_cntl_sb_buf_per_se;
extern int amdgpu_param_buf_per_se;
extern int amdgpu_job_hang_limit;
extern int amdgpu_lbpw;
#ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
extern int amdgpu_cik_support;
#endif
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
@ -305,8 +318,8 @@ struct amdgpu_gart_funcs {
/* set pte flags based per asic */
uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
uint32_t flags);
/* adjust mc addr in fb for APU case */
u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr);
/* get the pde for a given mc addr */
u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr);
uint32_t (*get_invalidate_req)(unsigned int vm_id);
};
@ -554,7 +567,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
int amdgpu_gart_init(struct amdgpu_device *adev);
void amdgpu_gart_fini(struct amdgpu_device *adev);
void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages);
int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
int pages, struct page **pagelist,
@ -602,6 +615,7 @@ struct amdgpu_mc {
uint32_t srbm_soft_reset;
struct amdgpu_mode_mc_save save;
bool prt_warning;
uint64_t stolen_size;
/* apertures */
u64 shared_aperture_start;
u64 shared_aperture_end;
@ -771,6 +785,29 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
struct amd_sched_entity *entity, void *owner,
struct dma_fence **f);
/*
* Queue manager
*/
struct amdgpu_queue_mapper {
int hw_ip;
struct mutex lock;
/* protected by lock */
struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
};
struct amdgpu_queue_mgr {
struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
};
int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr);
int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr);
int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr,
int hw_ip, int instance, int ring,
struct amdgpu_ring **out_ring);
/*
* context related structures
*/
@ -784,6 +821,7 @@ struct amdgpu_ctx_ring {
struct amdgpu_ctx {
struct kref refcount;
struct amdgpu_device *adev;
struct amdgpu_queue_mgr queue_mgr;
unsigned reset_counter;
spinlock_t ring_lock;
struct dma_fence **fences;
@ -822,6 +860,7 @@ struct amdgpu_fpriv {
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
u32 vram_lost_counter;
};
/*
@ -830,6 +869,8 @@ struct amdgpu_fpriv {
struct amdgpu_bo_list {
struct mutex lock;
struct rcu_head rhead;
struct kref refcount;
struct amdgpu_bo *gds_obj;
struct amdgpu_bo *gws_obj;
struct amdgpu_bo *oa_obj;
@ -893,20 +934,26 @@ struct amdgpu_rlc {
u32 *register_restore;
};
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
struct amdgpu_bo *mec_fw_obj;
u64 mec_fw_gpu_addr;
u32 num_pipe;
u32 num_mec;
u32 num_queue;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
/* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
struct amdgpu_kiq {
u64 eop_gpu_addr;
struct amdgpu_bo *eop_obj;
struct mutex ring_mutex;
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
};
@ -983,7 +1030,10 @@ struct amdgpu_gfx_config {
struct amdgpu_cu_info {
uint32_t number; /* total active CU number */
uint32_t ao_cu_mask;
uint32_t max_waves_per_simd;
uint32_t wave_front_size;
uint32_t max_scratch_slots_per_cu;
uint32_t lds_size;
uint32_t bitmap[4][4];
};
@ -1061,6 +1111,8 @@ struct amdgpu_gfx {
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
bool in_reset;
/* s3/s4 mask */
bool in_suspend;
/* NGG */
struct amdgpu_ngg ngg;
};
@ -1109,12 +1161,14 @@ struct amdgpu_cs_parser {
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
unsigned num_post_dep_syncobjs;
struct drm_syncobj **post_dep_syncobjs;
};
#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
#define AMDGPU_VM_DOMAIN (1 << 3) /* bit set means in virtual memory context */
struct amdgpu_job {
struct amd_sched_job base;
@ -1122,6 +1176,8 @@ struct amdgpu_job {
struct amdgpu_vm *vm;
struct amdgpu_ring *ring;
struct amdgpu_sync sync;
struct amdgpu_sync dep_sync;
struct amdgpu_sync sched_sync;
struct amdgpu_ib *ibs;
struct dma_fence *fence; /* the hw fence */
uint32_t preamble_status;
@ -1129,7 +1185,6 @@ struct amdgpu_job {
void *owner;
uint64_t fence_ctx; /* the fence_context this job uses */
bool vm_needs_flush;
bool need_pipeline_sync;
unsigned vm_id;
uint64_t vm_pd_addr;
uint32_t gds_base, gds_size;
@ -1221,6 +1276,9 @@ struct amdgpu_firmware {
const struct amdgpu_psp_funcs *funcs;
struct amdgpu_bo *rbuf;
struct mutex mutex;
/* gpu info firmware data pointer */
const struct firmware *gpu_info_fw;
};
/*
@ -1296,7 +1354,6 @@ struct amdgpu_smumgr {
*/
struct amdgpu_allowed_register_entry {
uint32_t reg_offset;
bool untouched;
bool grbm_indexed;
};
@ -1424,6 +1481,7 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
#define AMDGPU_RESET_MAGIC_NUM 64
struct amdgpu_device {
struct device *dev;
struct drm_device *ddev;
@ -1523,7 +1581,9 @@ struct amdgpu_device {
atomic64_t gtt_usage;
atomic64_t num_bytes_moved;
atomic64_t num_evictions;
atomic64_t num_vram_cpu_page_faults;
atomic_t gpu_reset_counter;
atomic_t vram_lost_counter;
/* data for buffer migration throttling */
struct {
@ -1570,11 +1630,18 @@ struct amdgpu_device {
/* sdma */
struct amdgpu_sdma sdma;
/* uvd */
struct amdgpu_uvd uvd;
union {
struct {
/* uvd */
struct amdgpu_uvd uvd;
/* vce */
struct amdgpu_vce vce;
/* vce */
struct amdgpu_vce vce;
};
/* vcn */
struct amdgpu_vcn vcn;
};
/* firmwares */
struct amdgpu_firmware firmware;
@ -1598,6 +1665,9 @@ struct amdgpu_device {
/* amdkfd interface */
struct kfd_dev *kfd;
/* delayed work_func for deferring clockgating during resume */
struct delayed_work late_init_work;
struct amdgpu_virt virt;
/* link all shadow bo */
@ -1606,9 +1676,13 @@ struct amdgpu_device {
/* link all gtt */
spinlock_t gtt_list_lock;
struct list_head gtt_list;
/* keep an lru list of rings by HW IP */
struct list_head ring_lru_list;
spinlock_t ring_lru_list_lock;
/* record hw reset is performed */
bool has_hw_reset;
u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
};
@ -1617,7 +1691,6 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
return container_of(bdev, struct amdgpu_device, mman.bdev);
}
bool amdgpu_device_is_px(struct drm_device *dev);
int amdgpu_device_init(struct amdgpu_device *adev,
struct drm_device *ddev,
struct pci_dev *pdev,
@ -1733,30 +1806,31 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr
unsigned occupied, chunk1, chunk2;
void *dst;
if (ring->count_dw < count_dw) {
if (unlikely(ring->count_dw < count_dw)) {
DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
} else {
occupied = ring->wptr & ring->buf_mask;
dst = (void *)&ring->ring[occupied];
chunk1 = ring->buf_mask + 1 - occupied;
chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
chunk2 = count_dw - chunk1;
chunk1 <<= 2;
chunk2 <<= 2;
if (chunk1)
memcpy(dst, src, chunk1);
if (chunk2) {
src += chunk1;
dst = (void *)ring->ring;
memcpy(dst, src, chunk2);
}
ring->wptr += count_dw;
ring->wptr &= ring->ptr_mask;
ring->count_dw -= count_dw;
return;
}
occupied = ring->wptr & ring->buf_mask;
dst = (void *)&ring->ring[occupied];
chunk1 = ring->buf_mask + 1 - occupied;
chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
chunk2 = count_dw - chunk1;
chunk1 <<= 2;
chunk2 <<= 2;
if (chunk1)
memcpy(dst, src, chunk1);
if (chunk2) {
src += chunk1;
dst = (void *)ring->ring;
memcpy(dst, src, chunk2);
}
ring->wptr += count_dw;
ring->wptr &= ring->ptr_mask;
ring->count_dw -= count_dw;
}
static inline struct amdgpu_sdma_instance *
@ -1792,6 +1866,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr))
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
@ -1813,6 +1888,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
@ -1849,9 +1925,6 @@ bool amdgpu_need_post(struct amdgpu_device *adev);
void amdgpu_update_display_priority(struct amdgpu_device *adev);
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
u32 ip_instance, u32 ring,
struct amdgpu_ring **out_ring);
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
@ -1900,6 +1973,8 @@ static inline bool amdgpu_has_atpx(void) { return false; }
extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
extern const int amdgpu_max_kms_ioctl;
bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv);
int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
void amdgpu_driver_unload_kms(struct drm_device *dev);
void amdgpu_driver_lastclose_kms(struct drm_device *dev);
@ -1912,10 +1987,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
int *max_error,
struct timeval *vblank_time,
unsigned flags);
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg);

View File

@ -24,6 +24,7 @@
#include "amd_shared.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include <linux/module.h>
const struct kfd2kgd_calls *kfd2kgd;
@ -60,9 +61,9 @@ int amdgpu_amdkfd_init(void)
return ret;
}
bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
{
switch (rdev->asic_type) {
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
@ -86,59 +87,83 @@ void amdgpu_amdkfd_fini(void)
}
}
void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
{
if (kgd2kfd)
rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
rdev->pdev, kfd2kgd);
adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
adev->pdev, kfd2kgd);
}
void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
if (rdev->kfd) {
int i;
int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
.first_compute_pipe = 1,
.compute_pipe_count = 4 - 1,
.num_mec = adev->gfx.mec.num_mec,
.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
};
amdgpu_doorbell_get_kfd_info(rdev,
/* this is going to have a few of the MSBs set that we need to
* clear */
bitmap_complement(gpu_resources.queue_bitmap,
adev->gfx.mec.queue_bitmap,
KGD_MAX_QUEUES);
/* remove the KIQ bit as well */
if (adev->gfx.kiq.ring.ready)
clear_bit(amdgpu_gfx_queue_to_bit(adev,
adev->gfx.kiq.ring.me - 1,
adev->gfx.kiq.ring.pipe,
adev->gfx.kiq.ring.queue),
gpu_resources.queue_bitmap);
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
* nbits is not compile time constant */
last_valid_bit = adev->gfx.mec.num_mec
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
clear_bit(i, gpu_resources.queue_bitmap);
amdgpu_doorbell_get_kfd_info(adev,
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
kgd2kfd->device_init(rdev->kfd, &gpu_resources);
kgd2kfd->device_init(adev->kfd, &gpu_resources);
}
}
void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
{
if (rdev->kfd) {
kgd2kfd->device_exit(rdev->kfd);
rdev->kfd = NULL;
if (adev->kfd) {
kgd2kfd->device_exit(adev->kfd);
adev->kfd = NULL;
}
}
void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry)
{
if (rdev->kfd)
kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
if (adev->kfd)
kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
}
void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
{
if (rdev->kfd)
kgd2kfd->suspend(rdev->kfd);
if (adev->kfd)
kgd2kfd->suspend(adev->kfd);
}
int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
{
int r = 0;
if (rdev->kfd)
r = kgd2kfd->resume(rdev->kfd);
if (adev->kfd)
r = kgd2kfd->resume(adev->kfd);
return r;
}
@ -147,7 +172,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr)
{
struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
int r;
@ -159,10 +184,10 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
if ((*mem) == NULL)
return -ENOMEM;
r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
if (r) {
dev_err(rdev->dev,
dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
return r;
}
@ -170,21 +195,21 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
/* map the buffer */
r = amdgpu_bo_reserve((*mem)->bo, true);
if (r) {
dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
goto allocate_mem_reserve_bo_failed;
}
r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
&(*mem)->gpu_addr);
if (r) {
dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
goto allocate_mem_pin_bo_failed;
}
*gpu_addr = (*mem)->gpu_addr;
r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
if (r) {
dev_err(rdev->dev,
dev_err(adev->dev,
"(%d) failed to map bo to kernel for amdkfd\n", r);
goto allocate_mem_kmap_bo_failed;
}
@ -220,27 +245,27 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
uint64_t get_vmem_size(struct kgd_dev *kgd)
{
struct amdgpu_device *rdev =
struct amdgpu_device *adev =
(struct amdgpu_device *)kgd;
BUG_ON(kgd == NULL);
return rdev->mc.real_vram_size;
return adev->mc.real_vram_size;
}
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
{
struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
if (rdev->gfx.funcs->get_gpu_clock_counter)
return rdev->gfx.funcs->get_gpu_clock_counter(rdev);
if (adev->gfx.funcs->get_gpu_clock_counter)
return adev->gfx.funcs->get_gpu_clock_counter(adev);
return 0;
}
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
{
struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
/* The sclk is in quantas of 10kHz */
return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
}

View File

@ -39,15 +39,15 @@ struct kgd_mem {
int amdgpu_amdkfd_init(void);
void amdgpu_amdkfd_fini(void);
bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev);
bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev);
void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev);
int amdgpu_amdkfd_resume(struct amdgpu_device *rdev);
void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev);
int amdgpu_amdkfd_resume(struct amdgpu_device *adev);
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry);
void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev);
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);

View File

@ -29,6 +29,7 @@
#include "cikd.h"
#include "cik_sdma.h"
#include "amdgpu_ucode.h"
#include "gfx_v7_0.h"
#include "gca/gfx_7_2_d.h"
#include "gca/gfx_7_2_enum.h"
#include "gca/gfx_7_2_sh_mask.h"
@ -38,8 +39,6 @@
#include "gmc/gmc_7_1_sh_mask.h"
#include "cik_structs.h"
#define CIK_PIPE_PER_MEC (4)
enum {
MAX_TRAPID = 8, /* 3 bits in the bitfield. */
MAX_WATCH_ADDRESSES = 4
@ -185,8 +184,10 @@ static void unlock_srbm(struct kgd_dev *kgd)
static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t queue_id)
{
uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(kgd, mec, pipe, queue_id, 0);
}
@ -243,18 +244,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
lock_srbm(kgd, mec, pipe, 0, 0);
WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
WREG32(mmCP_HPD_EOP_VMID, 0);
WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
unlock_srbm(kgd);
/* amdgpu owns the per-pipe state */
return 0;
}
@ -264,8 +254,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
pipe = (pipe_id % CIK_PIPE_PER_MEC);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(kgd, mec, pipe, 0, 0);
@ -309,55 +299,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
m = get_mqd(mqd);
is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
if (is_wptr_shadow_valid)
m->cp_hqd_pq_wptr = wptr_shadow;
acquire_queue(kgd, pipe_id, queue_id);
WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
m->cp_hqd_pq_rptr_report_addr_hi);
WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
if (is_wptr_shadow_valid)
WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
gfx_v7_0_mqd_commit(adev, m);
release_queue(kgd);
return 0;

View File

@ -28,6 +28,7 @@
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ucode.h"
#include "gfx_v8_0.h"
#include "gca/gfx_8_0_sh_mask.h"
#include "gca/gfx_8_0_d.h"
#include "gca/gfx_8_0_enum.h"
@ -38,8 +39,6 @@
#include "vi_structs.h"
#include "vid.h"
#define VI_PIPE_PER_MEC (4)
struct cik_sdma_rlc_registers;
/*
@ -146,8 +145,10 @@ static void unlock_srbm(struct kgd_dev *kgd)
static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t queue_id)
{
uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC);
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(kgd, mec, pipe, queue_id, 0);
}
@ -205,6 +206,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
{
/* amdgpu owns the per-pipe state */
return 0;
}
@ -214,8 +216,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
uint32_t mec;
uint32_t pipe;
mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
pipe = (pipe_id % VI_PIPE_PER_MEC);
mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(kgd, mec, pipe, 0, 0);
@ -251,53 +253,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
m = get_mqd(mqd);
valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
if (valid_wptr == 0)
m->cp_hqd_pq_wptr = shadow_wptr;
acquire_queue(kgd, pipe_id, queue_id);
WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
m->cp_hqd_pq_rptr_report_addr_hi);
if (valid_wptr > 0)
WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr);
WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
gfx_v8_0_mqd_commit(adev, mqd);
release_queue(kgd);
return 0;

View File

@ -35,33 +35,59 @@
#define AMDGPU_BO_LIST_MAX_PRIORITY 32u
#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1)
static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
struct amdgpu_bo_list **result,
static int amdgpu_bo_list_set(struct amdgpu_device *adev,
struct drm_file *filp,
struct amdgpu_bo_list *list,
struct drm_amdgpu_bo_list_entry *info,
unsigned num_entries);
static void amdgpu_bo_list_release_rcu(struct kref *ref)
{
unsigned i;
struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
refcount);
for (i = 0; i < list->num_entries; ++i)
amdgpu_bo_unref(&list->array[i].robj);
mutex_destroy(&list->lock);
kvfree(list->array);
kfree_rcu(list, rhead);
}
static int amdgpu_bo_list_create(struct amdgpu_device *adev,
struct drm_file *filp,
struct drm_amdgpu_bo_list_entry *info,
unsigned num_entries,
int *id)
{
int r;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_bo_list *list;
*result = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
if (!*result)
list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
if (!list)
return -ENOMEM;
/* initialize bo list*/
mutex_init(&list->lock);
kref_init(&list->refcount);
r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
if (r) {
kfree(list);
return r;
}
/* idr alloc should be called only after initialization of bo list. */
mutex_lock(&fpriv->bo_list_lock);
r = idr_alloc(&fpriv->bo_list_handles, *result,
1, 0, GFP_KERNEL);
r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
mutex_unlock(&fpriv->bo_list_lock);
if (r < 0) {
mutex_unlock(&fpriv->bo_list_lock);
kfree(*result);
kfree(list);
return r;
}
*id = r;
mutex_init(&(*result)->lock);
(*result)->num_entries = 0;
(*result)->array = NULL;
mutex_lock(&(*result)->lock);
mutex_unlock(&fpriv->bo_list_lock);
return 0;
}
@ -71,13 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
mutex_lock(&fpriv->bo_list_lock);
list = idr_remove(&fpriv->bo_list_handles, id);
if (list) {
/* Another user may have a reference to this list still */
mutex_lock(&list->lock);
mutex_unlock(&list->lock);
amdgpu_bo_list_free(list);
}
mutex_unlock(&fpriv->bo_list_lock);
if (list)
kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
}
static int amdgpu_bo_list_set(struct amdgpu_device *adev,
@ -96,7 +118,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
int r;
unsigned long total_size = 0;
array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
array = kvmalloc_array(num_entries, sizeof(struct amdgpu_bo_list_entry), GFP_KERNEL);
if (!array)
return -ENOMEM;
memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
@ -148,7 +170,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
for (i = 0; i < list->num_entries; ++i)
amdgpu_bo_unref(&list->array[i].robj);
drm_free_large(list->array);
kvfree(list->array);
list->gds_obj = gds_obj;
list->gws_obj = gws_obj;
@ -163,7 +185,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
error_free:
while (i--)
amdgpu_bo_unref(&array[i].robj);
drm_free_large(array);
kvfree(array);
return r;
}
@ -172,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
{
struct amdgpu_bo_list *result;
mutex_lock(&fpriv->bo_list_lock);
rcu_read_lock();
result = idr_find(&fpriv->bo_list_handles, id);
if (result)
mutex_lock(&result->lock);
mutex_unlock(&fpriv->bo_list_lock);
if (result) {
if (kref_get_unless_zero(&result->refcount))
mutex_lock(&result->lock);
else
result = NULL;
}
rcu_read_unlock();
return result;
}
@ -214,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
{
mutex_unlock(&list->lock);
kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
}
void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
@ -224,7 +253,7 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
amdgpu_bo_unref(&list->array[i].robj);
mutex_destroy(&list->lock);
drm_free_large(list->array);
kvfree(list->array);
kfree(list);
}
@ -244,8 +273,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
int r;
info = drm_malloc_ab(args->in.bo_number,
sizeof(struct drm_amdgpu_bo_list_entry));
info = kvmalloc_array(args->in.bo_number,
sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
if (!info)
return -ENOMEM;
@ -273,16 +302,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
switch (args->in.operation) {
case AMDGPU_BO_LIST_OP_CREATE:
r = amdgpu_bo_list_create(fpriv, &list, &handle);
r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
&handle);
if (r)
goto error_free;
r = amdgpu_bo_list_set(adev, filp, list, info,
args->in.bo_number);
amdgpu_bo_list_put(list);
if (r)
goto error_free;
break;
case AMDGPU_BO_LIST_OP_DESTROY:
@ -311,11 +334,11 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
memset(args, 0, sizeof(*args));
args->out.list_handle = handle;
drm_free_large(info);
kvfree(info);
return 0;
error_free:
drm_free_large(info);
kvfree(info);
return r;
}

View File

@ -27,81 +27,10 @@
#include <linux/pagemap.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
u32 ip_instance, u32 ring,
struct amdgpu_ring **out_ring)
{
/* Right now all IPs have only one instance - multiple rings. */
if (ip_instance != 0) {
DRM_ERROR("invalid ip instance: %d\n", ip_instance);
return -EINVAL;
}
switch (ip_type) {
default:
DRM_ERROR("unknown ip type: %d\n", ip_type);
return -EINVAL;
case AMDGPU_HW_IP_GFX:
if (ring < adev->gfx.num_gfx_rings) {
*out_ring = &adev->gfx.gfx_ring[ring];
} else {
DRM_ERROR("only %d gfx rings are supported now\n",
adev->gfx.num_gfx_rings);
return -EINVAL;
}
break;
case AMDGPU_HW_IP_COMPUTE:
if (ring < adev->gfx.num_compute_rings) {
*out_ring = &adev->gfx.compute_ring[ring];
} else {
DRM_ERROR("only %d compute rings are supported now\n",
adev->gfx.num_compute_rings);
return -EINVAL;
}
break;
case AMDGPU_HW_IP_DMA:
if (ring < adev->sdma.num_instances) {
*out_ring = &adev->sdma.instance[ring].ring;
} else {
DRM_ERROR("only %d SDMA rings are supported\n",
adev->sdma.num_instances);
return -EINVAL;
}
break;
case AMDGPU_HW_IP_UVD:
*out_ring = &adev->uvd.ring;
break;
case AMDGPU_HW_IP_VCE:
if (ring < adev->vce.num_rings){
*out_ring = &adev->vce.ring[ring];
} else {
DRM_ERROR("only %d VCE rings are supported\n", adev->vce.num_rings);
return -EINVAL;
}
break;
case AMDGPU_HW_IP_UVD_ENC:
if (ring < adev->uvd.num_enc_rings){
*out_ring = &adev->uvd.ring_enc[ring];
} else {
DRM_ERROR("only %d UVD ENC rings are supported\n",
adev->uvd.num_enc_rings);
return -EINVAL;
}
break;
}
if (!(*out_ring && (*out_ring)->adev)) {
DRM_ERROR("Ring %d is not initialized on IP %d\n",
ring, ip_type);
return -EINVAL;
}
return 0;
}
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
struct drm_amdgpu_cs_chunk_fence *data,
uint32_t *offset)
@ -194,7 +123,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
size = p->chunks[i].length_dw;
cdata = (void __user *)(uintptr_t)user_chunk.chunk_data;
p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
if (p->chunks[i].kdata == NULL) {
ret = -ENOMEM;
i--;
@ -226,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
break;
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
break;
default:
@ -247,7 +178,7 @@ free_all_kdata:
i = p->nchunks - 1;
free_partial_kdata:
for (; i >= 0; i--)
drm_free_large(p->chunks[i].kdata);
kvfree(p->chunks[i].kdata);
kfree(p->chunks);
p->chunks = NULL;
p->nchunks = 0;
@ -505,7 +436,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
return r;
if (binding_userptr) {
drm_free_large(lobj->user_pages);
kvfree(lobj->user_pages);
lobj->user_pages = NULL;
}
}
@ -571,7 +502,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
release_pages(e->user_pages,
e->robj->tbo.ttm->num_pages,
false);
drm_free_large(e->user_pages);
kvfree(e->user_pages);
e->user_pages = NULL;
}
@ -597,12 +528,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto error_free_pages;
}
/* Fill the page arrays for all useptrs. */
/* Fill the page arrays for all userptrs. */
list_for_each_entry(e, &need_pages, tv.head) {
struct ttm_tt *ttm = e->robj->tbo.ttm;
e->user_pages = drm_calloc_large(ttm->num_pages,
sizeof(struct page*));
e->user_pages = kvmalloc_array(ttm->num_pages,
sizeof(struct page*),
GFP_KERNEL | __GFP_ZERO);
if (!e->user_pages) {
r = -ENOMEM;
DRM_ERROR("calloc failure in %s\n", __func__);
@ -612,7 +544,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
if (r) {
DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
drm_free_large(e->user_pages);
kvfree(e->user_pages);
e->user_pages = NULL;
goto error_free_pages;
}
@ -708,7 +640,7 @@ error_free_pages:
release_pages(e->user_pages,
e->robj->tbo.ttm->num_pages,
false);
drm_free_large(e->user_pages);
kvfree(e->user_pages);
}
}
@ -753,6 +685,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
}
for (i = 0; i < parser->num_post_dep_syncobjs; i++)
drm_syncobj_put(parser->post_dep_syncobjs[i]);
kfree(parser->post_dep_syncobjs);
dma_fence_put(parser->fence);
if (parser->ctx)
@ -761,7 +698,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
amdgpu_bo_list_put(parser->bo_list);
for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata);
kvfree(parser->chunks[i].kdata);
kfree(parser->chunks);
if (parser->job)
amdgpu_job_free(parser->job);
@ -916,9 +853,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return -EINVAL;
}
r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
chunk_ib->ip_instance, chunk_ib->ring,
&ring);
r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
chunk_ib->ip_instance, chunk_ib->ring, &ring);
if (r)
return r;
@ -995,62 +931,150 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return 0;
}
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
unsigned num_deps;
int i, r;
struct drm_amdgpu_cs_chunk_dep *deps;
deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_dep);
for (i = 0; i < num_deps; ++i) {
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
if (ctx == NULL)
return -EINVAL;
r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
deps[i].ip_type,
deps[i].ip_instance,
deps[i].ring, &ring);
if (r) {
amdgpu_ctx_put(ctx);
return r;
}
fence = amdgpu_ctx_get_fence(ctx, ring,
deps[i].handle);
if (IS_ERR(fence)) {
r = PTR_ERR(fence);
amdgpu_ctx_put(ctx);
return r;
} else if (fence) {
r = amdgpu_sync_fence(p->adev, &p->job->sync,
fence);
dma_fence_put(fence);
amdgpu_ctx_put(ctx);
if (r)
return r;
}
}
return 0;
}
static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
uint32_t handle)
{
int r;
struct dma_fence *fence;
r = drm_syncobj_fence_get(p->filp, handle, &fence);
if (r)
return r;
r = amdgpu_sync_fence(p->adev, &p->job->sync, fence);
dma_fence_put(fence);
return r;
}
static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
unsigned num_deps;
int i, r;
struct drm_amdgpu_cs_chunk_sem *deps;
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_sem);
for (i = 0; i < num_deps; ++i) {
r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
if (r)
return r;
}
return 0;
}
static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
unsigned num_deps;
int i;
struct drm_amdgpu_cs_chunk_sem *deps;
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_sem);
p->post_dep_syncobjs = kmalloc_array(num_deps,
sizeof(struct drm_syncobj *),
GFP_KERNEL);
p->num_post_dep_syncobjs = 0;
for (i = 0; i < num_deps; ++i) {
p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
if (!p->post_dep_syncobjs[i])
return -EINVAL;
p->num_post_dep_syncobjs++;
}
return 0;
}
static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
struct amdgpu_cs_parser *p)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
int i, j, r;
int i, r;
for (i = 0; i < p->nchunks; ++i) {
struct drm_amdgpu_cs_chunk_dep *deps;
struct amdgpu_cs_chunk *chunk;
unsigned num_deps;
chunk = &p->chunks[i];
if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
continue;
deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_dep);
for (j = 0; j < num_deps; ++j) {
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
deps[j].ip_instance,
deps[j].ring, &ring);
if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
r = amdgpu_cs_process_fence_dep(p, chunk);
if (r)
return r;
ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
if (ctx == NULL)
return -EINVAL;
fence = amdgpu_ctx_get_fence(ctx, ring,
deps[j].handle);
if (IS_ERR(fence)) {
r = PTR_ERR(fence);
amdgpu_ctx_put(ctx);
} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
if (r)
return r;
} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
if (r)
return r;
} else if (fence) {
r = amdgpu_sync_fence(adev, &p->job->sync,
fence);
dma_fence_put(fence);
amdgpu_ctx_put(ctx);
if (r)
return r;
}
}
}
return 0;
}
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
{
int i;
for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
drm_syncobj_replace_fence(p->filp, p->post_dep_syncobjs[i],
p->fence);
}
}
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
@ -1071,6 +1095,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job->owner = p->filp;
job->fence_ctx = entity->fence_context;
p->fence = dma_fence_get(&job->base.s_fence->finished);
amdgpu_cs_post_dependencies(p);
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
job->uf_sequence = cs->out.handle;
amdgpu_job_free_resources(job);
@ -1078,13 +1105,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
trace_amdgpu_cs_ioctl(job);
amd_sched_entity_push_job(&job->base);
return 0;
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false;
@ -1092,6 +1119,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!adev->accel_working)
return -EBUSY;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
parser.adev = adev;
parser.filp = filp;
@ -1153,21 +1182,28 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
{
union drm_amdgpu_wait_cs *wait = data;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
struct amdgpu_ring *ring = NULL;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
long r;
r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
wait->in.ring, &ring);
if (r)
return r;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
if (ctx == NULL)
return -EINVAL;
r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
wait->in.ip_type, wait->in.ip_instance,
wait->in.ring, &ring);
if (r) {
amdgpu_ctx_put(ctx);
return r;
}
fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
if (IS_ERR(fence))
r = PTR_ERR(fence);
@ -1203,15 +1239,17 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
struct dma_fence *fence;
int r;
r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance,
user->ring, &ring);
if (r)
return ERR_PTR(r);
ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
if (ctx == NULL)
return ERR_PTR(-EINVAL);
r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
user->ip_instance, user->ring, &ring);
if (r) {
amdgpu_ctx_put(ctx);
return ERR_PTR(r);
}
fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
amdgpu_ctx_put(ctx);
@ -1332,12 +1370,15 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
union drm_amdgpu_wait_fences *wait = data;
uint32_t fence_count = wait->in.fence_count;
struct drm_amdgpu_fence *fences_user;
struct drm_amdgpu_fence *fences;
int r;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
/* Get the fences from userspace */
fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
GFP_KERNEL);

View File

@ -52,12 +52,20 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
struct amd_sched_rq *rq;
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
if (ring == &adev->gfx.kiq.ring)
continue;
r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
rq, amdgpu_sched_jobs);
if (r)
goto failed;
}
r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
if (r)
goto failed;
return 0;
failed:
@ -86,6 +94,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
for (i = 0; i < adev->num_rings; i++)
amd_sched_entity_fini(&adev->rings[i]->sched,
&ctx->rings[i].entity);
amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,

View File

@ -54,8 +54,14 @@
#include <linux/pci.h>
#include <linux/firmware.h>
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev);
static const char *amdgpu_asic_name[] = {
"TAHITI",
@ -77,6 +83,7 @@ static const char *amdgpu_asic_name[] = {
"POLARIS11",
"POLARIS12",
"VEGA10",
"RAVEN",
"LAST",
};
@ -478,9 +485,8 @@ void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
/*
* amdgpu_wb_*()
* Writeback is the the method by which the the GPU updates special pages
* in memory with the status of certain GPU events (fences, ring pointers,
* etc.).
* Writeback is the method by which the GPU updates special pages in memory
* with the status of certain GPU events (fences, ring pointers,etc.).
*/
/**
@ -506,7 +512,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev)
*
* @adev: amdgpu_device pointer
*
* Disables Writeback and frees the Writeback memory (all asics).
* Initializes writeback and allocates writeback memory (all asics).
* Used at driver startup.
* Returns 0 on success or an -error on failure.
*/
@ -614,7 +620,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb)
* @mc: memory controller structure holding memory informations
* @base: base address at which to put VRAM
*
* Function will place try to place VRAM at base address provided
* Function will try to place VRAM at base address provided
* as parameter (which is so far either PCI aperture address or
* for IGP TOM base address).
*
@ -636,7 +642,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb)
* ones)
*
* Note: IGP TOM addr should be the same as the aperture addr, we don't
* explicitly check for that thought.
* explicitly check for that though.
*
* FIXME: when reducing VRAM size align new size on power of 2.
*/
@ -1067,6 +1073,10 @@ def_value:
static void amdgpu_check_vm_size(struct amdgpu_device *adev)
{
/* no need to check the default value */
if (amdgpu_vm_size == -1)
return;
if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
amdgpu_vm_size);
@ -1342,6 +1352,9 @@ int amdgpu_ip_block_add(struct amdgpu_device *adev,
if (!ip_block_version)
return -EINVAL;
DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks,
ip_block_version->funcs->name);
adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
return 0;
@ -1392,6 +1405,104 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
}
}
static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
{
const char *chip_name;
char fw_name[30];
int err;
const struct gpu_info_firmware_header_v1_0 *hdr;
adev->firmware.gpu_info_fw = NULL;
switch (adev->asic_type) {
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS11:
case CHIP_POLARIS10:
case CHIP_POLARIS12:
case CHIP_CARRIZO:
case CHIP_STONEY:
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_VERDE:
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_OLAND:
case CHIP_HAINAN:
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE:
case CHIP_HAWAII:
case CHIP_KAVERI:
case CHIP_KABINI:
case CHIP_MULLINS:
#endif
default:
return 0;
case CHIP_VEGA10:
chip_name = "vega10";
break;
case CHIP_RAVEN:
chip_name = "raven";
break;
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
if (err) {
dev_err(adev->dev,
"Failed to load gpu_info firmware \"%s\"\n",
fw_name);
goto out;
}
err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
if (err) {
dev_err(adev->dev,
"Failed to validate gpu_info firmware \"%s\"\n",
fw_name);
goto out;
}
hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
switch (hdr->version_major) {
case 1:
{
const struct gpu_info_firmware_v1_0 *gpu_info_fw =
(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
adev->gfx.config.max_texture_channel_caches =
le32_to_cpu(gpu_info_fw->gc_num_tccs);
adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
adev->gfx.config.double_offchip_lds_buf =
le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
adev->gfx.cu_info.max_waves_per_simd =
le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
adev->gfx.cu_info.max_scratch_slots_per_cu =
le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
break;
}
default:
dev_err(adev->dev,
"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
err = -EINVAL;
goto out;
}
out:
return err;
}
static int amdgpu_early_init(struct amdgpu_device *adev)
{
int i, r;
@ -1444,8 +1555,12 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
return r;
break;
#endif
case CHIP_VEGA10:
adev->family = AMDGPU_FAMILY_AI;
case CHIP_VEGA10:
case CHIP_RAVEN:
if (adev->asic_type == CHIP_RAVEN)
adev->family = AMDGPU_FAMILY_RV;
else
adev->family = AMDGPU_FAMILY_AI;
r = soc15_set_ip_blocks(adev);
if (r)
@ -1456,6 +1571,10 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
return -EINVAL;
}
r = amdgpu_device_parse_gpu_info_fw(adev);
if (r)
return r;
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
@ -1464,7 +1583,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
DRM_ERROR("disabled ip block: %d\n", i);
DRM_ERROR("disabled ip block: %d <%s>\n",
i, adev->ip_blocks[i].version->funcs->name);
adev->ip_blocks[i].status.valid = false;
} else {
if (adev->ip_blocks[i].version->funcs->early_init) {
@ -1552,6 +1672,40 @@ static int amdgpu_init(struct amdgpu_device *adev)
return 0;
}
static void amdgpu_fill_reset_magic(struct amdgpu_device *adev)
{
memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
}
static bool amdgpu_check_vram_lost(struct amdgpu_device *adev)
{
return !!memcmp(adev->gart.ptr, adev->reset_magic,
AMDGPU_RESET_MAGIC_NUM);
}
static int amdgpu_late_set_cg_state(struct amdgpu_device *adev)
{
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
/* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_GATE);
if (r) {
DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
}
}
return 0;
}
static int amdgpu_late_init(struct amdgpu_device *adev)
{
int i = 0, r;
@ -1568,20 +1722,13 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
}
adev->ip_blocks[i].status.late_initialized = true;
}
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
/* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_GATE);
if (r) {
DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
}
}
mod_delayed_work(system_wq, &adev->late_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
amdgpu_fill_reset_magic(adev);
return 0;
}
@ -1672,6 +1819,13 @@ static int amdgpu_fini(struct amdgpu_device *adev)
return 0;
}
static void amdgpu_late_init_func_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, late_init_work.work);
amdgpu_late_set_cg_state(adev);
}
int amdgpu_suspend(struct amdgpu_device *adev)
{
int i, r;
@ -1717,19 +1871,25 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev)
{
int i, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
static enum amd_ip_block_type ip_order[] = {
AMD_IP_BLOCK_TYPE_GMC,
AMD_IP_BLOCK_TYPE_COMMON,
AMD_IP_BLOCK_TYPE_IH,
};
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
int j;
struct amdgpu_ip_block *block;
if (r) {
DRM_ERROR("resume of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
for (j = 0; j < adev->num_ip_blocks; j++) {
block = &adev->ip_blocks[j];
if (block->version->type != ip_order[i] ||
!block->status.valid)
continue;
r = block->version->funcs->hw_init(adev);
DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
}
}
@ -1740,16 +1900,68 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
{
int i, r;
static enum amd_ip_block_type ip_order[] = {
AMD_IP_BLOCK_TYPE_SMC,
AMD_IP_BLOCK_TYPE_DCE,
AMD_IP_BLOCK_TYPE_GFX,
AMD_IP_BLOCK_TYPE_SDMA,
AMD_IP_BLOCK_TYPE_VCE,
};
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
int j;
struct amdgpu_ip_block *block;
for (j = 0; j < adev->num_ip_blocks; j++) {
block = &adev->ip_blocks[j];
if (block->version->type != ip_order[i] ||
!block->status.valid)
continue;
r = block->version->funcs->hw_init(adev);
DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
}
}
return 0;
}
static int amdgpu_resume_phase1(struct amdgpu_device *adev)
{
int i, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type ==
AMD_IP_BLOCK_TYPE_IH) {
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
DRM_ERROR("resume of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
}
}
return 0;
}
static int amdgpu_resume_phase2(struct amdgpu_device *adev)
{
int i, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH )
continue;
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
DRM_ERROR("resume of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@ -1762,20 +1974,14 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
static int amdgpu_resume(struct amdgpu_device *adev)
{
int i, r;
int r;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
DRM_ERROR("resume of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
}
r = amdgpu_resume_phase1(adev);
if (r)
return r;
r = amdgpu_resume_phase2(adev);
return 0;
return r;
}
static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
@ -1860,8 +2066,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
amdgpu_check_arguments(adev);
/* Registers mapping */
/* TODO: block userspace mapping of io register */
spin_lock_init(&adev->mmio_idx_lock);
spin_lock_init(&adev->smc_idx_lock);
spin_lock_init(&adev->pcie_idx_lock);
@ -1877,6 +2081,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(&adev->gtt_list);
spin_lock_init(&adev->gtt_list_lock);
INIT_LIST_HEAD(&adev->ring_lru_list);
spin_lock_init(&adev->ring_lru_list_lock);
INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler);
/* Registers mapping */
/* TODO: block userspace mapping of io register */
if (adev->asic_type >= CHIP_BONAIRE) {
adev->rmmio_base = pci_resource_start(adev->pdev, 5);
adev->rmmio_size = pci_resource_len(adev->pdev, 5);
@ -1989,6 +2200,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->accel_working = true;
amdgpu_vm_check_compute_bug(adev);
/* Initialize the buffer migration limit. */
if (amdgpu_moverate >= 0)
max_MBps = amdgpu_moverate;
@ -2017,6 +2230,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
DRM_ERROR("registering register debugfs failed (%d).\n", r);
r = amdgpu_debugfs_test_ib_ring_init(adev);
if (r)
DRM_ERROR("registering register test ib ring debugfs failed (%d).\n", r);
r = amdgpu_debugfs_firmware_init(adev);
if (r)
DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
@ -2073,7 +2290,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
amdgpu_fence_driver_fini(adev);
amdgpu_fbdev_fini(adev);
r = amdgpu_fini(adev);
if (adev->firmware.gpu_info_fw) {
release_firmware(adev->firmware.gpu_info_fw);
adev->firmware.gpu_info_fw = NULL;
}
adev->accel_working = false;
cancel_delayed_work_sync(&adev->late_init_work);
/* free i2c buses */
amdgpu_i2c_fini(adev);
amdgpu_atombios_fini(adev);
@ -2458,16 +2680,15 @@ err:
* amdgpu_sriov_gpu_reset - reset the asic
*
* @adev: amdgpu device pointer
* @voluntary: if this reset is requested by guest.
* (true means by guest and false means by HYPERVISOR )
* @job: which job trigger hang
*
* Attempt the reset the GPU if it has hung (all asics).
* for SRIOV case.
* Returns 0 for success or an error on failure.
*/
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job)
{
int i, r = 0;
int i, j, r = 0;
int resched;
struct amdgpu_bo *bo, *tmp;
struct amdgpu_ring *ring;
@ -2480,22 +2701,39 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
/* block scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
ring = adev->rings[i];
/* we start from the ring trigger GPU hang */
j = job ? job->ring->idx : 0;
/* block scheduler */
for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
ring = adev->rings[i % AMDGPU_MAX_RINGS];
if (!ring || !ring->sched.thread)
continue;
kthread_park(ring->sched.thread);
if (job && j != i)
continue;
/* here give the last chance to check if job removed from mirror-list
* since we already pay some time on kthread_park */
if (job && list_empty(&job->base.node)) {
kthread_unpark(ring->sched.thread);
goto give_up_reset;
}
if (amd_sched_invalidate_job(&job->base, amdgpu_job_hang_limit))
amd_sched_job_kickout(&job->base);
/* only do job_reset on the hang ring if @job not NULL */
amd_sched_hw_job_reset(&ring->sched);
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion_ring(ring);
}
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(adev);
/* request to take full control of GPU before re-initialization */
if (voluntary)
if (job)
amdgpu_virt_reset_gpu(adev);
else
amdgpu_virt_request_full_gpu(adev, true);
@ -2545,20 +2783,28 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
}
dma_fence_put(fence);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
ring = adev->rings[i % AMDGPU_MAX_RINGS];
if (!ring || !ring->sched.thread)
continue;
if (job && j != i) {
kthread_unpark(ring->sched.thread);
continue;
}
amd_sched_job_recovery(&ring->sched);
kthread_unpark(ring->sched.thread);
}
drm_helper_resume_force_mode(adev->ddev);
give_up_reset:
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
if (r) {
/* bad news, how to tell it to userspace ? */
dev_info(adev->dev, "GPU reset failed\n");
} else {
dev_info(adev->dev, "GPU reset successed!\n");
}
adev->gfx.in_reset = false;
@ -2578,10 +2824,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
{
int i, r;
int resched;
bool need_full_reset;
if (amdgpu_sriov_vf(adev))
return amdgpu_sriov_gpu_reset(adev, true);
bool need_full_reset, vram_lost = false;
if (!amdgpu_check_soft_reset(adev)) {
DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
@ -2641,16 +2884,27 @@ retry:
if (!r) {
dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
r = amdgpu_resume(adev);
}
}
if (!r) {
amdgpu_irq_gpu_reset_resume_helper(adev);
if (need_full_reset && amdgpu_need_backup(adev)) {
r = amdgpu_resume_phase1(adev);
if (r)
goto out;
vram_lost = amdgpu_check_vram_lost(adev);
if (vram_lost) {
DRM_ERROR("VRAM is lost!\n");
atomic_inc(&adev->vram_lost_counter);
}
r = amdgpu_ttm_recover_gart(adev);
if (r)
DRM_ERROR("gart recovery failed!!!\n");
goto out;
r = amdgpu_resume_phase2(adev);
if (r)
goto out;
if (vram_lost)
amdgpu_fill_reset_magic(adev);
}
}
out:
if (!r) {
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
if (r) {
dev_err(adev->dev, "ib ring test failed (%d).\n", r);
@ -2712,10 +2966,11 @@ retry:
drm_helper_resume_force_mode(adev->ddev);
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
if (r) {
if (r)
/* bad news, how to tell it to userspace ? */
dev_info(adev->dev, "GPU reset failed\n");
}
else
dev_info(adev->dev, "GPU reset successed!\n");
return r;
}
@ -3499,11 +3754,60 @@ static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev)
}
}
static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
int r = 0, i;
/* hold on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->sched.thread)
continue;
kthread_park(ring->sched.thread);
}
seq_printf(m, "run ib test:\n");
r = amdgpu_ib_ring_tests(adev);
if (r)
seq_printf(m, "ib ring tests failed (%d).\n", r);
else
seq_printf(m, "ib ring tests passed.\n");
/* go on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->sched.thread)
continue;
kthread_unpark(ring->sched.thread);
}
return 0;
}
static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = {
{"amdgpu_test_ib", &amdgpu_debugfs_test_ib}
};
static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev)
{
return amdgpu_debugfs_add_files(adev,
amdgpu_debugfs_test_ib_ring_list, 1);
}
int amdgpu_debugfs_init(struct drm_minor *minor)
{
return 0;
}
#else
static int amdgpu_debugfs_test_ib_init(struct amdgpu_device *adev)
{
return 0;
}
static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
return 0;

View File

@ -22,7 +22,7 @@
* Authors: Alex Deucher
*/
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "amdgpu_i2c.h"

View File

@ -39,7 +39,7 @@
#include <linux/module.h>
#include <linux/pm_runtime.h>
#include <linux/vga_switcheroo.h>
#include "drm_crtc_helper.h"
#include <drm/drm_crtc_helper.h>
#include "amdgpu.h"
#include "amdgpu_irq.h"
@ -65,9 +65,11 @@
* - 3.13.0 - Add PRT support
* - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
* - 3.15.0 - Export more gpu info for gfx9
* - 3.16.0 - Add reserved vmid support
* - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 15
#define KMS_DRIVER_MINOR 17
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@ -92,7 +94,8 @@ int amdgpu_vm_size = -1;
int amdgpu_vm_block_size = -1;
int amdgpu_vm_fault_stop = 0;
int amdgpu_vm_debug = 0;
int amdgpu_vram_page_split = 1024;
int amdgpu_vram_page_split = 512;
int amdgpu_vm_update_mode = -1;
int amdgpu_exp_hw_support = 0;
int amdgpu_sched_jobs = 32;
int amdgpu_sched_hw_submission = 2;
@ -110,6 +113,8 @@ int amdgpu_prim_buf_per_se = 0;
int amdgpu_pos_buf_per_se = 0;
int amdgpu_cntl_sb_buf_per_se = 0;
int amdgpu_param_buf_per_se = 0;
int amdgpu_job_hang_limit = 0;
int amdgpu_lbpw = -1;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@ -177,6 +182,9 @@ module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)");
module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
@ -232,6 +240,24 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)");
module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);
MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(lbpw, amdgpu_lbpw, int, 0444);
#ifdef CONFIG_DRM_AMDGPU_SI
int amdgpu_si_support = 0;
MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
module_param_named(si_support, amdgpu_si_support, int, 0444);
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
int amdgpu_cik_support = 0;
MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
module_param_named(cik_support, amdgpu_cik_support, int, 0444);
#endif
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
@ -460,6 +486,9 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
/* Raven */
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU|AMD_EXP_HW_SUPPORT},
{0, 0, 0}
};
@ -491,6 +520,7 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
static int amdgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
struct drm_device *dev;
unsigned long flags = ent->driver_data;
int ret;
@ -513,7 +543,29 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
if (ret)
return ret;
return drm_get_pci_dev(pdev, ent, &kms_driver);
dev = drm_dev_alloc(&kms_driver, &pdev->dev);
if (IS_ERR(dev))
return PTR_ERR(dev);
ret = pci_enable_device(pdev);
if (ret)
goto err_free;
dev->pdev = pdev;
pci_set_drvdata(pdev, dev);
ret = drm_dev_register(dev, ent->driver_data);
if (ret)
goto err_pci;
return 0;
err_pci:
pci_disable_device(pdev);
err_free:
drm_dev_unref(dev);
return ret;
}
static void
@ -521,7 +573,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
drm_put_dev(dev);
drm_dev_unregister(dev);
drm_dev_unref(dev);
}
static void
@ -715,11 +768,21 @@ static const struct file_operations amdgpu_driver_kms_fops = {
#endif
};
static bool
amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
bool in_vblank_irq, int *vpos, int *hpos,
ktime_t *stime, ktime_t *etime,
const struct drm_display_mode *mode)
{
return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
stime, etime, mode);
}
static struct drm_driver kms_driver = {
.driver_features =
DRIVER_USE_AGP |
DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
.load = amdgpu_driver_load_kms,
.open = amdgpu_driver_open_kms,
.postclose = amdgpu_driver_postclose_kms,
@ -729,8 +792,8 @@ static struct drm_driver kms_driver = {
.get_vblank_counter = amdgpu_get_vblank_counter_kms,
.enable_vblank = amdgpu_enable_vblank_kms,
.disable_vblank = amdgpu_disable_vblank_kms,
.get_vblank_timestamp = amdgpu_get_vblank_timestamp_kms,
.get_scanout_position = amdgpu_get_crtc_scanoutpos,
.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
.get_scanout_position = amdgpu_get_crtc_scanout_position,
#if defined(CONFIG_DEBUG_FS)
.debugfs_init = amdgpu_debugfs_init,
#endif
@ -807,7 +870,7 @@ static int __init amdgpu_init(void)
driver->num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler();
/* let modprobe override vga console setting */
return drm_pci_init(driver, pdriver);
return pci_register_driver(pdriver);
error_sched:
amdgpu_fence_slab_fini();
@ -822,7 +885,7 @@ error_sync:
static void __exit amdgpu_exit(void)
{
amdgpu_amdkfd_fini();
drm_pci_exit(driver, pdriver);
pci_unregister_driver(pdriver);
amdgpu_unregister_atpx_handler();
amdgpu_sync_fini();
amd_sched_fence_slab_fini();

View File

@ -541,6 +541,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
}
}
void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring)
{
if (ring)
amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
}
/*
* Common fence implementation
*/
@ -660,11 +666,17 @@ static const struct drm_info_list amdgpu_debugfs_fence_list[] = {
{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
{"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
};
static const struct drm_info_list amdgpu_debugfs_fence_list_sriov[] = {
{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
};
#endif
int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
if (amdgpu_sriov_vf(adev))
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, 1);
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2);
#else
return 0;

View File

@ -224,8 +224,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
*
* Unbinds the requested pages from the gart page table and
* replaces them with the dummy page (all asics).
* Returns 0 for success, -EINVAL for failure.
*/
void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages)
{
unsigned t;
@ -237,7 +238,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
if (!adev->gart.ready) {
WARN(1, "trying to unbind memory from uninitialized GART !\n");
return;
return -EINVAL;
}
t = offset / AMDGPU_GPU_PAGE_SIZE;
@ -258,6 +259,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
}
mb();
amdgpu_gart_flush_gpu_tlb(adev, 0);
return 0;
}
/**

View File

@ -219,16 +219,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
ttm_eu_backoff_reservation(&ticket, &list);
}
static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
{
if (r == -EDEADLK) {
r = amdgpu_gpu_reset(adev);
if (!r)
r = -EAGAIN;
}
return r;
}
/*
* GEM ioctls.
*/
@ -249,20 +239,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_VRAM_CLEARED|
AMDGPU_GEM_CREATE_SHADOW |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
r = -EINVAL;
goto error_unlock;
}
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
return -EINVAL;
/* reject invalid gem domains */
if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS |
AMDGPU_GEM_DOMAIN_OA)) {
r = -EINVAL;
goto error_unlock;
}
AMDGPU_GEM_DOMAIN_OA))
return -EINVAL;
/* create a gem object to contain this object in */
if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
@ -274,10 +261,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
size = size << AMDGPU_GWS_SHIFT;
else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
size = size << AMDGPU_OA_SHIFT;
else {
r = -EINVAL;
goto error_unlock;
}
else
return -EINVAL;
}
size = roundup(size, PAGE_SIZE);
@ -286,21 +271,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
args->in.domain_flags,
kernel, &gobj);
if (r)
goto error_unlock;
return r;
r = drm_gem_handle_create(filp, gobj, &handle);
/* drop reference from allocate - handle holds it now */
drm_gem_object_unreference_unlocked(gobj);
if (r)
goto error_unlock;
return r;
memset(args, 0, sizeof(*args));
args->out.handle = handle;
return 0;
error_unlock:
r = amdgpu_gem_handle_lockup(adev, r);
return r;
}
int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
@ -334,7 +315,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
AMDGPU_GEM_DOMAIN_CPU, 0,
0, &gobj);
if (r)
goto handle_lockup;
return r;
bo = gem_to_amdgpu_bo(gobj);
bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
@ -374,7 +355,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
/* drop reference from allocate - handle holds it now */
drm_gem_object_unreference_unlocked(gobj);
if (r)
goto handle_lockup;
return r;
args->handle = handle;
return 0;
@ -388,9 +369,6 @@ unlock_mmap_sem:
release_object:
drm_gem_object_unreference_unlocked(gobj);
handle_lockup:
r = amdgpu_gem_handle_lockup(adev, r);
return r;
}
@ -456,7 +434,6 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns)
int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
union drm_amdgpu_gem_wait_idle *args = data;
struct drm_gem_object *gobj;
struct amdgpu_bo *robj;
@ -484,7 +461,6 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
r = ret;
drm_gem_object_unreference_unlocked(gobj);
r = amdgpu_gem_handle_lockup(adev, r);
return r;
}
@ -593,9 +569,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
uint64_t va_flags;
int r = 0;
if (!adev->vm_manager.enabled)
return -ENOTTY;
if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
dev_err(&dev->pdev->dev,
"va_address 0x%lX is in reserved area 0x%X\n",
@ -621,6 +594,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->operation);
return -EINVAL;
}
if ((args->operation == AMDGPU_VA_OP_MAP) ||
(args->operation == AMDGPU_VA_OP_REPLACE)) {
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
}
INIT_LIST_HEAD(&list);
if ((args->operation != AMDGPU_VA_OP_CLEAR) &&

View File

@ -108,3 +108,209 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
p = next + 1;
}
}
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
{
int i, queue, pipe, mec;
/* policy for amdgpu compute queue ownership */
for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
queue = i % adev->gfx.mec.num_queue_per_pipe;
pipe = (i / adev->gfx.mec.num_queue_per_pipe)
% adev->gfx.mec.num_pipe_per_mec;
mec = (i / adev->gfx.mec.num_queue_per_pipe)
/ adev->gfx.mec.num_pipe_per_mec;
/* we've run out of HW */
if (mec >= adev->gfx.mec.num_mec)
break;
if (adev->gfx.mec.num_mec > 1) {
/* policy: amdgpu owns the first two queues of the first MEC */
if (mec == 0 && queue < 2)
set_bit(i, adev->gfx.mec.queue_bitmap);
} else {
/* policy: amdgpu owns all queues in the first pipe */
if (mec == 0 && pipe == 0)
set_bit(i, adev->gfx.mec.queue_bitmap);
}
}
/* update the number of active compute rings */
adev->gfx.num_compute_rings =
bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* If you hit this case and edited the policy, you probably just
* need to increase AMDGPU_MAX_COMPUTE_RINGS */
if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
}
static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
int queue_bit;
int mec, pipe, queue;
queue_bit = adev->gfx.mec.num_mec
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
while (queue_bit-- >= 0) {
if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
continue;
amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue);
/* Using pipes 2/3 from MEC 2 seems cause problems */
if (mec == 1 && pipe > 1)
continue;
ring->me = mec + 1;
ring->pipe = pipe;
ring->queue = queue;
return 0;
}
dev_err(adev->dev, "Failed to find a queue for KIQ\n");
return -EINVAL;
}
int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
int r = 0;
mutex_init(&kiq->ring_mutex);
r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
if (r)
return r;
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
r = amdgpu_gfx_kiq_acquire(adev, ring);
if (r)
return r;
ring->eop_gpu_addr = kiq->eop_gpu_addr;
sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
r = amdgpu_ring_init(adev, ring, 1024,
irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
if (r)
dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
return r;
}
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq)
{
amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
amdgpu_ring_fini(ring);
}
void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
}
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
unsigned hpd_size)
{
int r;
u32 *hpd;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
&kiq->eop_gpu_addr, (void **)&hpd);
if (r) {
dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
return r;
}
memset(hpd, 0, hpd_size);
r = amdgpu_bo_reserve(kiq->eop_obj, true);
if (unlikely(r != 0))
dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
amdgpu_bo_kunmap(kiq->eop_obj);
amdgpu_bo_unreserve(kiq->eop_obj);
return 0;
}
/* create MQD for each compute queue */
int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
unsigned mqd_size)
{
struct amdgpu_ring *ring = NULL;
int r, i;
/* create MQD for KIQ */
ring = &adev->gfx.kiq.ring;
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
return r;
}
/* prepare MQD backup */
adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
}
/* create MQD for each KCQ */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if (!ring->mqd_obj) {
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
return r;
}
/* prepare MQD backup */
adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
if (!adev->gfx.mec.mqd_backup[i])
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
}
}
return 0;
}
void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = NULL;
int i;
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
kfree(adev->gfx.mec.mqd_backup[i]);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
&ring->mqd_ptr);
}
ring = &adev->gfx.kiq.ring;
kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
&ring->mqd_ptr);
}

View File

@ -30,4 +30,64 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
unsigned max_sh);
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq);
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq);
void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
unsigned hpd_size);
int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
unsigned mqd_size);
void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev);
/**
* amdgpu_gfx_create_bitmask - create a bitmask
*
* @bit_width: length of the mask
*
* create a variable length bit mask.
* Returns the bitmask.
*/
static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
{
return (u32)((1ULL << bit_width) - 1);
}
static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev,
int mec, int pipe, int queue)
{
int bit = 0;
bit += mec * adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
bit += pipe * adev->gfx.mec.num_queue_per_pipe;
bit += queue;
return bit;
}
static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue)
{
*queue = bit % adev->gfx.mec.num_queue_per_pipe;
*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
% adev->gfx.mec.num_pipe_per_mec;
*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
/ adev->gfx.mec.num_pipe_per_mec;
}
static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
int mec, int pipe, int queue)
{
return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
adev->gfx.mec.queue_bitmap);
}
#endif

View File

@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib *ib = &ibs[0];
struct dma_fence *tmp = NULL;
bool skip_preamble, need_ctx_switch;
unsigned patch_offset = ~0;
struct amdgpu_vm *vm;
@ -160,8 +161,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r;
}
if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync)
if (ring->funcs->emit_pipeline_sync && job &&
((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
amdgpu_vm_need_pipeline_sync(ring, job))) {
amdgpu_ring_emit_pipeline_sync(ring);
dma_fence_put(tmp);
}
if (ring->funcs->insert_start)
ring->funcs->insert_start(ring);
if (vm) {
r = amdgpu_vm_flush(ring, job);
@ -188,8 +197,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
status |= AMDGPU_HAVE_CTX_SWITCH;
status |= job->preamble_status;
if (vm)
status |= AMDGPU_VM_DOMAIN;
amdgpu_ring_emit_cntxcntl(ring, status);
}
@ -208,6 +215,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
need_ctx_switch = false;
}
if (ring->funcs->emit_tmz)
amdgpu_ring_emit_tmz(ring, false);
if (ring->funcs->emit_hdp_invalidate
#ifdef CONFIG_X86_64
&& !(adev->flags & AMD_IS_APU)

View File

@ -62,8 +62,9 @@ enum amdgpu_ih_clientid
AMDGPU_IH_CLIENTID_MP0 = 0x1e,
AMDGPU_IH_CLIENTID_MP1 = 0x1f,
AMDGPU_IH_CLIENTID_MAX
AMDGPU_IH_CLIENTID_MAX,
AMDGPU_IH_CLIENTID_VCN = AMDGPU_IH_CLIENTID_UVD
};
#define AMDGPU_IH_CLIENTID_LEGACY 0

View File

@ -83,7 +83,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
reset_work);
amdgpu_gpu_reset(adev);
if (!amdgpu_sriov_vf(adev))
amdgpu_gpu_reset(adev);
}
/* Disable *all* interrupts */

View File

@ -36,7 +36,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
job->base.sched->name,
atomic_read(&job->ring->fence_drv.last_seq),
job->ring->fence_drv.sync_seq);
amdgpu_gpu_reset(job->adev);
if (amdgpu_sriov_vf(job->adev))
amdgpu_sriov_gpu_reset(job->adev, job);
else
amdgpu_gpu_reset(job->adev);
}
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@ -57,9 +61,10 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
(*job)->vm = vm;
(*job)->ibs = (void *)&(*job)[1];
(*job)->num_ibs = num_ibs;
(*job)->need_pipeline_sync = false;
amdgpu_sync_create(&(*job)->sync);
amdgpu_sync_create(&(*job)->dep_sync);
amdgpu_sync_create(&(*job)->sched_sync);
return 0;
}
@ -98,6 +103,8 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->dep_sync);
amdgpu_sync_free(&job->sched_sync);
kfree(job);
}
@ -107,6 +114,8 @@ void amdgpu_job_free(struct amdgpu_job *job)
dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->dep_sync);
amdgpu_sync_free(&job->sched_sync);
kfree(job);
}
@ -138,11 +147,18 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
struct amdgpu_job *job = to_amdgpu_job(sched_job);
struct amdgpu_vm *vm = job->vm;
struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync);
struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync);
int r;
if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) {
r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence);
if (r)
DRM_ERROR("Error adding fence to sync (%d)\n", r);
}
if (!fence)
fence = amdgpu_sync_get_fence(&job->sync);
while (fence == NULL && vm && !job->vm_id) {
struct amdgpu_ring *ring = job->ring;
int r;
r = amdgpu_vm_grab_id(vm, ring, &job->sync,
&job->base.s_fence->finished,
@ -153,9 +169,6 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
fence = amdgpu_sync_get_fence(&job->sync);
}
if (amd_sched_dependency_optimized(fence, sched_job->s_entity))
job->need_pipeline_sync = true;
return fence;
}
@ -163,6 +176,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
{
struct dma_fence *fence = NULL;
struct amdgpu_job *job;
struct amdgpu_fpriv *fpriv = NULL;
int r;
if (!sched_job) {
@ -174,10 +188,16 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
trace_amdgpu_sched_run_job(job);
r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence);
if (r)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
if (job->vm)
fpriv = container_of(job->vm, struct amdgpu_fpriv, vm);
/* skip ib schedule when vram is lost */
if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv))
DRM_ERROR("Skip scheduling IBs!\n");
else {
r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence);
if (r)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
/* if gpu reset, hw fence will be replaced here */
dma_fence_put(job->fence);
job->fence = dma_fence_get(fence);

View File

@ -87,6 +87,41 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
struct amdgpu_device *adev;
int r, acpi_status;
#ifdef CONFIG_DRM_AMDGPU_SI
if (!amdgpu_si_support) {
switch (flags & AMD_ASIC_MASK) {
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_VERDE:
case CHIP_OLAND:
case CHIP_HAINAN:
dev_info(dev->dev,
"SI support provided by radeon.\n");
dev_info(dev->dev,
"Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
);
return -ENODEV;
}
}
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
if (!amdgpu_cik_support) {
switch (flags & AMD_ASIC_MASK) {
case CHIP_KAVERI:
case CHIP_BONAIRE:
case CHIP_HAWAII:
case CHIP_KABINI:
case CHIP_MULLINS:
dev_info(dev->dev,
"CIK support provided by radeon.\n");
dev_info(dev->dev,
"Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
);
return -ENODEV;
}
}
#endif
adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
if (adev == NULL) {
return -ENOMEM;
@ -235,6 +270,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct drm_amdgpu_info *info = data;
struct amdgpu_mode_info *minfo = &adev->mode_info;
void __user *out = (void __user *)(uintptr_t)info->return_pointer;
@ -247,6 +283,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
@ -319,6 +357,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
ring_mask = adev->vcn.ring_dec.ready ? 1 : 0;
ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
ib_size_alignment = 16;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_enc_rings; i++)
ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i);
ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
ib_size_alignment = 1;
break;
default:
return -EINVAL;
}
@ -361,6 +412,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
break;
case AMDGPU_HW_IP_VCN_DEC:
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
break;
default:
return -EINVAL;
}
@ -397,6 +452,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
case AMDGPU_INFO_NUM_EVICTIONS:
ui64 = atomic64_read(&adev->num_evictions);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS:
ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VRAM_USAGE:
ui64 = atomic64_read(&adev->vram_usage);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
@ -730,6 +788,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev)
vga_switcheroo_process_delayed_switch();
}
bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv)
{
return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter);
}
/**
* amdgpu_driver_open_kms - drm callback for open
*
@ -757,7 +821,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
goto out_suspend;
}
r = amdgpu_vm_init(adev, &fpriv->vm);
r = amdgpu_vm_init(adev, &fpriv->vm,
AMDGPU_VM_CONTEXT_GFX);
if (r) {
kfree(fpriv);
goto out_suspend;
@ -782,6 +847,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
file_priv->driver_priv = fpriv;
out_suspend:
@ -814,8 +880,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
amdgpu_uvd_free_handles(adev, file_priv);
amdgpu_vce_free_handles(adev, file_priv);
if (adev->asic_type != CHIP_RAVEN) {
amdgpu_uvd_free_handles(adev, file_priv);
amdgpu_vce_free_handles(adev, file_priv);
}
amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
@ -945,50 +1013,10 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe)
amdgpu_irq_put(adev, &adev->crtc_irq, idx);
}
/**
* amdgpu_get_vblank_timestamp_kms - get vblank timestamp
*
* @dev: drm dev pointer
* @crtc: crtc to get the timestamp for
* @max_error: max error
* @vblank_time: time value
* @flags: flags passed to the driver
*
* Gets the timestamp on the requested crtc based on the
* scanout position. (all asics).
* Returns postive status flags on success, negative error on failure.
*/
int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
int *max_error,
struct timeval *vblank_time,
unsigned flags)
{
struct drm_crtc *crtc;
struct amdgpu_device *adev = dev->dev_private;
if (pipe >= dev->num_crtcs) {
DRM_ERROR("Invalid crtc %u\n", pipe);
return -EINVAL;
}
/* Get associated drm_crtc: */
crtc = &adev->mode_info.crtcs[pipe]->base;
if (!crtc) {
/* This can occur on driver load if some component fails to
* initialize completely and driver is unloaded */
DRM_ERROR("Uninitialized crtc %d\n", pipe);
return -EINVAL;
}
/* Helper routine in DRM core does all the work: */
return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
vblank_time, flags,
&crtc->hwmode);
}
const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
/* KMS */
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),

View File

@ -534,6 +534,9 @@ struct amdgpu_framebuffer {
((em) == ATOM_ENCODER_MODE_DP_MST))
/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
#define DRM_SCANOUTPOS_VALID (1 << 0)
#define DRM_SCANOUTPOS_IN_VBLANK (1 << 1)
#define DRM_SCANOUTPOS_ACCURATE (1 << 2)
#define USE_REAL_VBLANKSTART (1 << 30)
#define GET_DISTANCE_TO_VBLANKSTART (1 << 31)

View File

@ -960,6 +960,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
return -EINVAL;
/* hurrah the memory is not visible ! */
atomic64_inc(&adev->num_vram_cpu_page_faults);
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
for (i = 0; i < abo->placement.num_placement; i++) {

View File

@ -72,6 +72,7 @@ static int amdgpu_pp_early_init(void *handle)
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_VEGA10:
case CHIP_RAVEN:
adev->pp_enabled = true;
if (amdgpu_create_pp_handle(adev))
return -EINVAL;

View File

@ -24,12 +24,13 @@
*/
#include <linux/firmware.h>
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "amdgpu_ucode.h"
#include "soc15_common.h"
#include "psp_v3_1.h"
#include "psp_v10_0.h"
static void psp_set_funcs(struct amdgpu_device *adev);
@ -61,6 +62,12 @@ static int psp_sw_init(void *handle)
psp->compare_sram_data = psp_v3_1_compare_sram_data;
psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
break;
case CHIP_RAVEN:
psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf;
psp->ring_init = psp_v10_0_ring_init;
psp->cmd_submit = psp_v10_0_cmd_submit;
psp->compare_sram_data = psp_v10_0_compare_sram_data;
break;
default:
return -EINVAL;
}
@ -230,6 +237,13 @@ static int psp_asd_load(struct psp_context *psp)
int ret;
struct psp_gfx_cmd_resp *cmd;
/* If PSP version doesn't match ASD version, asd loading will be failed.
* add workaround to bypass it for sriov now.
* TODO: add version check to make it common
*/
if (amdgpu_sriov_vf(psp->adev))
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
@ -542,3 +556,12 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block =
.rev = 0,
.funcs = &psp_ip_funcs,
};
const struct amdgpu_ip_block_version psp_v10_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 10,
.minor = 0,
.rev = 0,
.funcs = &psp_ip_funcs,
};

View File

@ -138,4 +138,6 @@ extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
uint32_t field_val, uint32_t mask, bool check_changed);
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
#endif

View File

@ -0,0 +1,299 @@
/*
* Copyright 2017 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Andres Rodriguez
*/
#include "amdgpu.h"
#include "amdgpu_ring.h"
static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper,
int hw_ip)
{
if (!mapper)
return -EINVAL;
if (hw_ip > AMDGPU_MAX_IP_NUM)
return -EINVAL;
mapper->hw_ip = hw_ip;
mutex_init(&mapper->lock);
memset(mapper->queue_map, 0, sizeof(mapper->queue_map));
return 0;
}
static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper,
int ring)
{
return mapper->queue_map[ring];
}
static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper,
int ring, struct amdgpu_ring *pring)
{
if (WARN_ON(mapper->queue_map[ring])) {
DRM_ERROR("Un-expected ring re-map\n");
return -EINVAL;
}
mapper->queue_map[ring] = pring;
return 0;
}
static int amdgpu_identity_map(struct amdgpu_device *adev,
struct amdgpu_queue_mapper *mapper,
int ring,
struct amdgpu_ring **out_ring)
{
switch (mapper->hw_ip) {
case AMDGPU_HW_IP_GFX:
*out_ring = &adev->gfx.gfx_ring[ring];
break;
case AMDGPU_HW_IP_COMPUTE:
*out_ring = &adev->gfx.compute_ring[ring];
break;
case AMDGPU_HW_IP_DMA:
*out_ring = &adev->sdma.instance[ring].ring;
break;
case AMDGPU_HW_IP_UVD:
*out_ring = &adev->uvd.ring;
break;
case AMDGPU_HW_IP_VCE:
*out_ring = &adev->vce.ring[ring];
break;
case AMDGPU_HW_IP_UVD_ENC:
*out_ring = &adev->uvd.ring_enc[ring];
break;
case AMDGPU_HW_IP_VCN_DEC:
*out_ring = &adev->vcn.ring_dec;
break;
case AMDGPU_HW_IP_VCN_ENC:
*out_ring = &adev->vcn.ring_enc[ring];
break;
default:
*out_ring = NULL;
DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
return -EINVAL;
}
return amdgpu_update_cached_map(mapper, ring, *out_ring);
}
static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
{
switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
return AMDGPU_RING_TYPE_GFX;
case AMDGPU_HW_IP_COMPUTE:
return AMDGPU_RING_TYPE_COMPUTE;
case AMDGPU_HW_IP_DMA:
return AMDGPU_RING_TYPE_SDMA;
case AMDGPU_HW_IP_UVD:
return AMDGPU_RING_TYPE_UVD;
case AMDGPU_HW_IP_VCE:
return AMDGPU_RING_TYPE_VCE;
default:
DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
return -1;
}
}
static int amdgpu_lru_map(struct amdgpu_device *adev,
struct amdgpu_queue_mapper *mapper,
int user_ring,
struct amdgpu_ring **out_ring)
{
int r, i, j;
int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
int ring_blacklist[AMDGPU_MAX_RINGS];
struct amdgpu_ring *ring;
/* 0 is a valid ring index, so initialize to -1 */
memset(ring_blacklist, 0xff, sizeof(ring_blacklist));
for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) {
ring = mapper->queue_map[i];
if (ring)
ring_blacklist[j++] = ring->idx;
}
r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
j, out_ring);
if (r)
return r;
return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
}
/**
* amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
*
* @adev: amdgpu_device pointer
* @mgr: amdgpu_queue_mgr structure holding queue information
*
* Initialize the the selected @mgr (all asics).
*
* Returns 0 on success, error on failure.
*/
int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr)
{
int i, r;
if (!adev || !mgr)
return -EINVAL;
memset(mgr, 0, sizeof(*mgr));
for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) {
r = amdgpu_queue_mapper_init(&mgr->mapper[i], i);
if (r)
return r;
}
return 0;
}
/**
* amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct
*
* @adev: amdgpu_device pointer
* @mgr: amdgpu_queue_mgr structure holding queue information
*
* De-initialize the the selected @mgr (all asics).
*
* Returns 0 on success, error on failure.
*/
int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr)
{
return 0;
}
/**
* amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring
*
* @adev: amdgpu_device pointer
* @mgr: amdgpu_queue_mgr structure holding queue information
* @hw_ip: HW IP enum
* @instance: HW instance
* @ring: user ring id
* @our_ring: pointer to mapped amdgpu_ring
*
* Map a userspace ring id to an appropriate kernel ring. Different
* policies are configurable at a HW IP level.
*
* Returns 0 on success, error on failure.
*/
int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr,
int hw_ip, int instance, int ring,
struct amdgpu_ring **out_ring)
{
int r, ip_num_rings;
struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];
if (!adev || !mgr || !out_ring)
return -EINVAL;
if (hw_ip >= AMDGPU_MAX_IP_NUM)
return -EINVAL;
if (ring >= AMDGPU_MAX_RINGS)
return -EINVAL;
/* Right now all IPs have only one instance - multiple rings. */
if (instance != 0) {
DRM_ERROR("invalid ip instance: %d\n", instance);
return -EINVAL;
}
switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
ip_num_rings = adev->gfx.num_gfx_rings;
break;
case AMDGPU_HW_IP_COMPUTE:
ip_num_rings = adev->gfx.num_compute_rings;
break;
case AMDGPU_HW_IP_DMA:
ip_num_rings = adev->sdma.num_instances;
break;
case AMDGPU_HW_IP_UVD:
ip_num_rings = 1;
break;
case AMDGPU_HW_IP_VCE:
ip_num_rings = adev->vce.num_rings;
break;
case AMDGPU_HW_IP_UVD_ENC:
ip_num_rings = adev->uvd.num_enc_rings;
break;
case AMDGPU_HW_IP_VCN_DEC:
ip_num_rings = 1;
break;
case AMDGPU_HW_IP_VCN_ENC:
ip_num_rings = adev->vcn.num_enc_rings;
break;
default:
DRM_ERROR("unknown ip type: %d\n", hw_ip);
return -EINVAL;
}
if (ring >= ip_num_rings) {
DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n",
ring, ip_num_rings, hw_ip);
return -EINVAL;
}
mutex_lock(&mapper->lock);
*out_ring = amdgpu_get_cached_map(mapper, ring);
if (*out_ring) {
/* cache hit */
r = 0;
goto out_unlock;
}
switch (mapper->hw_ip) {
case AMDGPU_HW_IP_GFX:
case AMDGPU_HW_IP_UVD:
case AMDGPU_HW_IP_VCE:
case AMDGPU_HW_IP_UVD_ENC:
case AMDGPU_HW_IP_VCN_DEC:
case AMDGPU_HW_IP_VCN_ENC:
r = amdgpu_identity_map(adev, mapper, ring, out_ring);
break;
case AMDGPU_HW_IP_DMA:
case AMDGPU_HW_IP_COMPUTE:
r = amdgpu_lru_map(adev, mapper, ring, out_ring);
break;
default:
*out_ring = NULL;
r = -EINVAL;
DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
}
out_unlock:
mutex_unlock(&mapper->lock);
return r;
}

View File

@ -135,6 +135,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
if (ring->funcs->end_use)
ring->funcs->end_use(ring);
amdgpu_ring_lru_touch(ring->adev, ring);
}
/**
@ -253,10 +255,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
}
ring->max_dw = max_dw;
INIT_LIST_HEAD(&ring->lru_list);
amdgpu_ring_lru_touch(adev, ring);
if (amdgpu_debugfs_ring_init(adev, ring)) {
DRM_ERROR("Failed to register debugfs file for rings !\n");
}
return 0;
}
@ -294,6 +299,84 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
ring->adev->rings[ring->idx] = NULL;
}
static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
/* list_move_tail handles the case where ring isn't part of the list */
list_move_tail(&ring->lru_list, &adev->ring_lru_list);
}
static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
int *blacklist, int num_blacklist)
{
int i;
for (i = 0; i < num_blacklist; i++) {
if (ring->idx == blacklist[i])
return true;
}
return false;
}
/**
* amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
*
* @adev: amdgpu_device pointer
* @type: amdgpu_ring_type enum
* @blacklist: blacklisted ring ids array
* @num_blacklist: number of entries in @blacklist
* @ring: output ring
*
* Retrieve the amdgpu_ring structure for the least recently used ring of
* a specific IP block (all asics).
* Returns 0 on success, error on failure.
*/
int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
int num_blacklist, struct amdgpu_ring **ring)
{
struct amdgpu_ring *entry;
/* List is sorted in LRU order, find first entry corresponding
* to the desired HW IP */
*ring = NULL;
spin_lock(&adev->ring_lru_list_lock);
list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
if (entry->funcs->type != type)
continue;
if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
continue;
*ring = entry;
amdgpu_ring_lru_touch_locked(adev, *ring);
break;
}
spin_unlock(&adev->ring_lru_list_lock);
if (!*ring) {
DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type);
return -EINVAL;
}
return 0;
}
/**
* amdgpu_ring_lru_touch - mark a ring as recently being used
*
* @adev: amdgpu_device pointer
* @ring: ring to touch
*
* Move @ring to the tail of the lru list
*/
void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
spin_lock(&adev->ring_lru_list_lock);
amdgpu_ring_lru_touch_locked(adev, ring);
spin_unlock(&adev->ring_lru_list_lock);
}
/*
* Debugfs info
*/

View File

@ -47,7 +47,9 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_UVD,
AMDGPU_RING_TYPE_VCE,
AMDGPU_RING_TYPE_KIQ,
AMDGPU_RING_TYPE_UVD_ENC
AMDGPU_RING_TYPE_UVD_ENC,
AMDGPU_RING_TYPE_VCN_DEC,
AMDGPU_RING_TYPE_VCN_ENC
};
struct amdgpu_device;
@ -76,6 +78,7 @@ struct amdgpu_fence_driver {
int amdgpu_fence_driver_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
unsigned num_hw_submission);
@ -130,6 +133,7 @@ struct amdgpu_ring_funcs {
int (*test_ib)(struct amdgpu_ring *ring, long timeout);
/* insert NOP packets */
void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
void (*insert_start)(struct amdgpu_ring *ring);
void (*insert_end)(struct amdgpu_ring *ring);
/* pad the indirect buffer to the necessary number of dw */
void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
@ -142,6 +146,7 @@ struct amdgpu_ring_funcs {
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
};
struct amdgpu_ring {
@ -149,6 +154,7 @@ struct amdgpu_ring {
const struct amdgpu_ring_funcs *funcs;
struct amdgpu_fence_driver fence_drv;
struct amd_gpu_scheduler sched;
struct list_head lru_list;
struct amdgpu_bo *ring_obj;
volatile uint32_t *ring;
@ -180,6 +186,7 @@ struct amdgpu_ring {
u64 cond_exe_gpu_addr;
volatile u32 *cond_exe_cpu_addr;
unsigned vm_inv_eng;
bool has_compute_vm_bug;
#if defined(CONFIG_DEBUG_FS)
struct dentry *ent;
#endif
@ -194,6 +201,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned ring_size, struct amdgpu_irq_src *irq_src,
unsigned irq_type);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
int num_blacklist, struct amdgpu_ring **ring);
void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{
int i = 0;

View File

@ -298,6 +298,25 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
return NULL;
}
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
int i, r;
hash_for_each_safe(sync->fences, i, tmp, e, node) {
r = dma_fence_wait(e->fence, intr);
if (r)
return r;
hash_del(&e->node);
dma_fence_put(e->fence);
kmem_cache_free(amdgpu_sync_slab, e);
}
return 0;
}
/**
* amdgpu_sync_free - free the sync object
*

View File

@ -49,6 +49,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
int amdgpu_sync_init(void);
void amdgpu_sync_fini(void);

View File

@ -29,11 +29,11 @@
* Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
* Dave Airlie
*/
#include <ttm/ttm_bo_api.h>
#include <ttm/ttm_bo_driver.h>
#include <ttm/ttm_placement.h>
#include <ttm/ttm_module.h>
#include <ttm/ttm_page_alloc.h>
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_module.h>
#include <drm/ttm/ttm_page_alloc.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
#include <linux/seq_file.h>
@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
return r;
}
spin_lock(&gtt->adev->gtt_list_lock);
flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
if (r) {
DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
ttm->num_pages, gtt->offset);
return r;
goto error_gart_bind;
}
spin_lock(&gtt->adev->gtt_list_lock);
list_add_tail(&gtt->list, &gtt->adev->gtt_list);
error_gart_bind:
spin_unlock(&gtt->adev->gtt_list_lock);
return 0;
return r;
}
int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
int r;
if (gtt->userptr)
amdgpu_ttm_tt_unpin_userptr(ttm);
@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
return 0;
/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
if (gtt->adev->gart.ready)
amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
spin_lock(&gtt->adev->gtt_list_lock);
r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
if (r) {
DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
gtt->ttm.ttm.num_pages, gtt->offset);
goto error_unbind;
}
list_del_init(&gtt->list);
error_unbind:
spin_unlock(&gtt->adev->gtt_list_lock);
return 0;
return r;
}
static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
@ -1115,7 +1121,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* Change the size here instead of the init above so only lpfn is affected */
amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true,
r = amdgpu_bo_create(adev, adev->mc.stolen_size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
@ -1462,6 +1468,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
if (*pos >= adev->mc.mc_vram_size)
return -ENXIO;
while (size) {
unsigned long flags;
uint32_t value;

View File

@ -197,6 +197,27 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr)
}
}
void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr)
{
uint16_t version_major = le16_to_cpu(hdr->header_version_major);
uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
DRM_DEBUG("GPU_INFO\n");
amdgpu_ucode_print_common_hdr(hdr);
if (version_major == 1) {
const struct gpu_info_firmware_header_v1_0 *gpu_info_hdr =
container_of(hdr, struct gpu_info_firmware_header_v1_0, header);
DRM_DEBUG("version_major: %u\n",
le16_to_cpu(gpu_info_hdr->version_major));
DRM_DEBUG("version_minor: %u\n",
le16_to_cpu(gpu_info_hdr->version_minor));
} else {
DRM_ERROR("Unknown gpu_info ucode version: %u.%u\n", version_major, version_minor);
}
}
int amdgpu_ucode_validate(const struct firmware *fw)
{
const struct common_firmware_header *hdr =
@ -253,6 +274,15 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
case CHIP_RAVEN:
#if 0
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
#else
return AMDGPU_FW_LOAD_DIRECT;
#endif
default:
DRM_ERROR("Unknow firmware load type\n");
}
@ -349,7 +379,8 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
0, NULL, NULL, bo);
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
NULL, NULL, bo);
if (err) {
dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
goto failed;

View File

@ -113,6 +113,32 @@ struct sdma_firmware_header_v1_1 {
uint32_t digest_size;
};
/* gpu info payload */
struct gpu_info_firmware_v1_0 {
uint32_t gc_num_se;
uint32_t gc_num_cu_per_sh;
uint32_t gc_num_sh_per_se;
uint32_t gc_num_rb_per_se;
uint32_t gc_num_tccs;
uint32_t gc_num_gprs;
uint32_t gc_num_max_gs_thds;
uint32_t gc_gs_table_depth;
uint32_t gc_gsprim_buff_depth;
uint32_t gc_parameter_cache_depth;
uint32_t gc_double_offchip_lds_buffer;
uint32_t gc_wave_size;
uint32_t gc_max_waves_per_simd;
uint32_t gc_max_scratch_slots_per_cu;
uint32_t gc_lds_size;
};
/* version_major=1, version_minor=0 */
struct gpu_info_firmware_header_v1_0 {
struct common_firmware_header header;
uint16_t version_major; /* version */
uint16_t version_minor; /* version */
};
/* header is fixed size */
union amdgpu_firmware_header {
struct common_firmware_header common;
@ -124,6 +150,7 @@ union amdgpu_firmware_header {
struct rlc_firmware_header_v2_0 rlc_v2_0;
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct gpu_info_firmware_header_v1_0 gpu_info;
uint8_t raw[0x100];
};
@ -184,6 +211,7 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
int amdgpu_ucode_validate(const struct firmware *fw);
bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);

View File

@ -165,35 +165,14 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
(binary_id << 8));
/* allocate firmware, stack and heap BO */
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
NULL, NULL, &adev->vce.vcpu_bo);
r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
&adev->vce.gpu_addr, &adev->vce.cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
return r;
}
r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
if (r) {
amdgpu_bo_unref(&adev->vce.vcpu_bo);
dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
return r;
}
r = amdgpu_bo_pin(adev->vce.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM,
&adev->vce.gpu_addr);
amdgpu_bo_unreserve(adev->vce.vcpu_bo);
if (r) {
amdgpu_bo_unref(&adev->vce.vcpu_bo);
dev_err(adev->dev, "(%d) VCE bo pin failed\n", r);
return r;
}
ring = &adev->vce.ring[0];
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
r = amd_sched_entity_init(&ring->sched, &adev->vce.entity,
@ -230,7 +209,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
amdgpu_bo_unref(&adev->vce.vcpu_bo);
amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
(void **)&adev->vce.cpu_addr);
for (i = 0; i < adev->vce.num_rings; i++)
amdgpu_ring_fini(&adev->vce.ring[i]);

View File

@ -33,6 +33,8 @@
struct amdgpu_vce {
struct amdgpu_bo *vcpu_bo;
uint64_t gpu_addr;
void *cpu_addr;
void *saved_bo;
unsigned fw_version;
unsigned fb_version;
atomic_t handles[AMDGPU_MAX_VCE_HANDLES];

View File

@ -0,0 +1,654 @@
/*
* Copyright 2016 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*/
#include <linux/firmware.h>
#include <linux/module.h>
#include <drm/drmP.h>
#include <drm/drm.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_vcn.h"
#include "soc15d.h"
#include "soc15_common.h"
#include "vega10/soc15ip.h"
#include "raven1/VCN/vcn_1_0_offset.h"
/* 1 second timeout */
#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
/* Firmware Names */
#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
struct amd_sched_rq *rq;
unsigned long bo_size;
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned version_major, version_minor, family_id;
int r;
INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
switch (adev->asic_type) {
case CHIP_RAVEN:
fw_name = FIRMWARE_RAVEN;
break;
default:
return -EINVAL;
}
r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
if (r) {
dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
fw_name);
return r;
}
r = amdgpu_ucode_validate(adev->vcn.fw);
if (r) {
dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
fw_name);
release_firmware(adev->vcn.fw);
adev->vcn.fw = NULL;
return r;
}
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
version_major, version_minor, family_id);
bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
+ AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
+ AMDGPU_VCN_SESSION_SIZE * 40;
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
&adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
return r;
}
ring = &adev->vcn.ring_dec;
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
rq, amdgpu_sched_jobs);
if (r != 0) {
DRM_ERROR("Failed setting up VCN dec run queue.\n");
return r;
}
ring = &adev->vcn.ring_enc[0];
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
rq, amdgpu_sched_jobs);
if (r != 0) {
DRM_ERROR("Failed setting up VCN enc run queue.\n");
return r;
}
return 0;
}
int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
{
int i;
kfree(adev->vcn.saved_bo);
amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
amd_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc);
amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
&adev->vcn.gpu_addr,
(void **)&adev->vcn.cpu_addr);
amdgpu_ring_fini(&adev->vcn.ring_dec);
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
amdgpu_ring_fini(&adev->vcn.ring_enc[i]);
release_firmware(adev->vcn.fw);
return 0;
}
int amdgpu_vcn_suspend(struct amdgpu_device *adev)
{
unsigned size;
void *ptr;
if (adev->vcn.vcpu_bo == NULL)
return 0;
cancel_delayed_work_sync(&adev->vcn.idle_work);
size = amdgpu_bo_size(adev->vcn.vcpu_bo);
ptr = adev->vcn.cpu_addr;
adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL);
if (!adev->vcn.saved_bo)
return -ENOMEM;
memcpy_fromio(adev->vcn.saved_bo, ptr, size);
return 0;
}
int amdgpu_vcn_resume(struct amdgpu_device *adev)
{
unsigned size;
void *ptr;
if (adev->vcn.vcpu_bo == NULL)
return -EINVAL;
size = amdgpu_bo_size(adev->vcn.vcpu_bo);
ptr = adev->vcn.cpu_addr;
if (adev->vcn.saved_bo != NULL) {
memcpy_toio(ptr, adev->vcn.saved_bo, size);
kfree(adev->vcn.saved_bo);
adev->vcn.saved_bo = NULL;
} else {
const struct common_firmware_header *hdr;
unsigned offset;
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
le32_to_cpu(hdr->ucode_size_bytes));
size -= le32_to_cpu(hdr->ucode_size_bytes);
ptr += le32_to_cpu(hdr->ucode_size_bytes);
memset_io(ptr, 0, size);
}
return 0;
}
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, vcn.idle_work.work);
unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
if (fences == 0) {
if (adev->pm.dpm_enabled) {
amdgpu_dpm_enable_uvd(adev, false);
} else {
amdgpu_asic_set_uvd_clocks(adev, 0, 0);
}
} else {
schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
}
}
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
if (set_clocks) {
if (adev->pm.dpm_enabled) {
amdgpu_dpm_enable_uvd(adev, true);
} else {
amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
}
}
}
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
{
schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
}
int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t tmp = 0;
unsigned i;
int r;
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
return r;
}
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID));
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_INFO("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
return r;
}
static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
bool direct, struct dma_fence **fence)
{
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct list_head head;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
struct amdgpu_device *adev = ring->adev;
uint64_t addr;
int i, r;
memset(&tv, 0, sizeof(tv));
tv.bo = &bo->tbo;
INIT_LIST_HEAD(&head);
list_add(&tv.head, &head);
r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
if (r)
return r;
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
if (r)
goto err;
r = amdgpu_job_alloc_with_ib(adev, 64, &job);
if (r)
goto err;
ib = &job->ibs[0];
addr = amdgpu_bo_gpu_offset(bo);
ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0);
ib->ptr[1] = addr;
ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0);
ib->ptr[3] = addr >> 32;
ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0);
ib->ptr[5] = 0;
for (i = 6; i < 16; i += 2) {
ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0);
ib->ptr[i+1] = 0;
}
ib->length_dw = 16;
if (direct) {
r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
job->fence = dma_fence_get(f);
if (r)
goto err_free;
amdgpu_job_free(job);
} else {
r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec,
AMDGPU_FENCE_OWNER_UNDEFINED, &f);
if (r)
goto err_free;
}
ttm_eu_fence_buffer_objects(&ticket, &head, f);
if (fence)
*fence = dma_fence_get(f);
amdgpu_bo_unref(&bo);
dma_fence_put(f);
return 0;
err_free:
amdgpu_job_free(job);
err:
ttm_eu_backoff_reservation(&ticket, &head);
return r;
}
static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_bo *bo;
uint32_t *msg;
int r, i;
r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
NULL, NULL, &bo);
if (r)
return r;
r = amdgpu_bo_reserve(bo, false);
if (r) {
amdgpu_bo_unref(&bo);
return r;
}
r = amdgpu_bo_kmap(bo, (void **)&msg);
if (r) {
amdgpu_bo_unreserve(bo);
amdgpu_bo_unref(&bo);
return r;
}
msg[0] = cpu_to_le32(0x00000028);
msg[1] = cpu_to_le32(0x00000038);
msg[2] = cpu_to_le32(0x00000001);
msg[3] = cpu_to_le32(0x00000000);
msg[4] = cpu_to_le32(handle);
msg[5] = cpu_to_le32(0x00000000);
msg[6] = cpu_to_le32(0x00000001);
msg[7] = cpu_to_le32(0x00000028);
msg[8] = cpu_to_le32(0x00000010);
msg[9] = cpu_to_le32(0x00000000);
msg[10] = cpu_to_le32(0x00000007);
msg[11] = cpu_to_le32(0x00000000);
msg[12] = cpu_to_le32(0x00000780);
msg[13] = cpu_to_le32(0x00000440);
for (i = 14; i < 1024; ++i)
msg[i] = cpu_to_le32(0x0);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unreserve(bo);
return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
}
static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_bo *bo;
uint32_t *msg;
int r, i;
r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
NULL, NULL, &bo);
if (r)
return r;
r = amdgpu_bo_reserve(bo, false);
if (r) {
amdgpu_bo_unref(&bo);
return r;
}
r = amdgpu_bo_kmap(bo, (void **)&msg);
if (r) {
amdgpu_bo_unreserve(bo);
amdgpu_bo_unref(&bo);
return r;
}
msg[0] = cpu_to_le32(0x00000028);
msg[1] = cpu_to_le32(0x00000018);
msg[2] = cpu_to_le32(0x00000000);
msg[3] = cpu_to_le32(0x00000002);
msg[4] = cpu_to_le32(handle);
msg[5] = cpu_to_le32(0x00000000);
for (i = 6; i < 1024; ++i)
msg[i] = cpu_to_le32(0x0);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unreserve(bo);
return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
}
int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence;
long r;
r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
goto error;
}
r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
} else {
DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
r = 0;
}
dma_fence_put(fence);
error:
return r;
}
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t rptr = amdgpu_ring_get_rptr(ring);
unsigned i;
int r;
r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n",
ring->idx, r);
return r;
}
amdgpu_ring_write(ring, VCN_ENC_CMD_END);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
if (amdgpu_ring_get_rptr(ring) != rptr)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_INFO("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed\n",
ring->idx);
r = -ETIMEDOUT;
}
return r;
}
static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
uint64_t dummy;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
if (r)
return r;
ib = &job->ibs[0];
dummy = ib->gpu_addr + 1024;
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
ib->ptr[ib->length_dw++] = dummy;
ib->ptr[ib->length_dw++] = 0x0000000b;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
ib->ptr[ib->length_dw++] = 0x0000001c;
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000008;
ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
job->fence = dma_fence_get(f);
if (r)
goto err;
amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
return 0;
err:
amdgpu_job_free(job);
return r;
}
static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
uint64_t dummy;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
if (r)
return r;
ib = &job->ibs[0];
dummy = ib->gpu_addr + 1024;
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
ib->ptr[ib->length_dw++] = dummy;
ib->ptr[ib->length_dw++] = 0x0000000b;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002;
ib->ptr[ib->length_dw++] = 0x0000001c;
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000008;
ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
job->fence = dma_fence_get(f);
if (r)
goto err;
amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
return 0;
err:
amdgpu_job_free(job);
return r;
}
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
long r;
r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
goto error;
}
r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
} else {
DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
r = 0;
}
error:
dma_fence_put(fence);
return r;
}

View File

@ -0,0 +1,77 @@
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_VCN_H__
#define __AMDGPU_VCN_H__
#define AMDGPU_VCN_STACK_SIZE (200*1024)
#define AMDGPU_VCN_HEAP_SIZE (256*1024)
#define AMDGPU_VCN_SESSION_SIZE (50*1024)
#define AMDGPU_VCN_FIRMWARE_OFFSET 256
#define AMDGPU_VCN_MAX_ENC_RINGS 3
#define VCN_DEC_CMD_FENCE 0x00000000
#define VCN_DEC_CMD_TRAP 0x00000001
#define VCN_DEC_CMD_WRITE_REG 0x00000004
#define VCN_DEC_CMD_REG_READ_COND_WAIT 0x00000006
#define VCN_DEC_CMD_PACKET_START 0x0000000a
#define VCN_DEC_CMD_PACKET_END 0x0000000b
#define VCN_ENC_CMD_NO_OP 0x00000000
#define VCN_ENC_CMD_END 0x00000001
#define VCN_ENC_CMD_IB 0x00000002
#define VCN_ENC_CMD_FENCE 0x00000003
#define VCN_ENC_CMD_TRAP 0x00000004
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
struct amdgpu_vcn {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
unsigned fw_version;
void *saved_bo;
struct delayed_work idle_work;
const struct firmware *fw; /* VCN firmware */
struct amdgpu_ring ring_dec;
struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
struct amdgpu_irq_src irq;
struct amd_sched_entity entity_dec;
struct amd_sched_entity entity_enc;
unsigned num_enc_rings;
};
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
int amdgpu_vcn_suspend(struct amdgpu_device *adev);
int amdgpu_vcn_resume(struct amdgpu_device *adev);
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
#endif

View File

@ -22,6 +22,7 @@
*/
#include "amdgpu.h"
#define MAX_KIQ_REG_WAIT 100000
int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
{
@ -105,8 +106,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
/* enable virtual display */
adev->mode_info.num_crtc = 1;
adev->enable_virtual_display = true;
adev->cg_flags = 0;
adev->pg_flags = 0;
mutex_init(&adev->virt.lock_kiq);
mutex_init(&adev->virt.lock_reset);
}
@ -120,17 +122,19 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
BUG_ON(!ring->funcs->emit_rreg);
mutex_lock(&adev->virt.lock_kiq);
mutex_lock(&kiq->ring_mutex);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_rreg(ring, reg);
amdgpu_fence_emit(ring, &f);
amdgpu_ring_commit(ring);
mutex_unlock(&adev->virt.lock_kiq);
mutex_unlock(&kiq->ring_mutex);
r = dma_fence_wait(f, false);
if (r)
DRM_ERROR("wait for kiq fence error: %ld.\n", r);
r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
dma_fence_put(f);
if (r < 1) {
DRM_ERROR("wait for kiq fence error: %ld.\n", r);
return ~0;
}
val = adev->wb.wb[adev->virt.reg_val_offs];
@ -146,15 +150,15 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
BUG_ON(!ring->funcs->emit_wreg);
mutex_lock(&adev->virt.lock_kiq);
mutex_lock(&kiq->ring_mutex);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
amdgpu_fence_emit(ring, &f);
amdgpu_ring_commit(ring);
mutex_unlock(&adev->virt.lock_kiq);
mutex_unlock(&kiq->ring_mutex);
r = dma_fence_wait(f, false);
if (r)
r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
if (r < 1)
DRM_ERROR("wait for kiq fence error: %ld.\n", r);
dma_fence_put(f);
}

View File

@ -52,7 +52,6 @@ struct amdgpu_virt {
uint64_t csa_vmid0_addr;
bool chained_ib_support;
uint32_t reg_val_offs;
struct mutex lock_kiq;
struct mutex lock_reset;
struct amdgpu_irq_src ack_irq;
struct amdgpu_irq_src rcv_irq;
@ -97,7 +96,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary);
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job);
int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);

View File

@ -79,6 +79,12 @@ struct amdgpu_pte_update_params {
uint64_t flags);
/* indicate update pt or its shadow */
bool shadow;
/* The next two are used during VM update by CPU
* DMA addresses to use for mapping
* Kernel pointer of PD/PT BO that needs to be updated
*/
dma_addr_t *pages_addr;
void *kptr;
};
/* Helper to disable partial resident texture feature from a fence callback */
@ -275,12 +281,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
adev->vm_manager.block_size;
unsigned pt_idx, from, to;
int r;
u64 flags;
if (!parent->entries) {
unsigned num_entries = amdgpu_vm_num_entries(adev, level);
parent->entries = drm_calloc_large(num_entries,
sizeof(struct amdgpu_vm_pt));
parent->entries = kvmalloc_array(num_entries,
sizeof(struct amdgpu_vm_pt),
GFP_KERNEL | __GFP_ZERO);
if (!parent->entries)
return -ENOMEM;
memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
@ -299,6 +307,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
saddr = saddr & ((1 << shift) - 1);
eaddr = eaddr & ((1 << shift) - 1);
flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_VRAM_CLEARED;
if (vm->use_cpu_for_update)
flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
else
flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
AMDGPU_GEM_CREATE_SHADOW);
/* walk over the address space and allocate the page tables */
for (pt_idx = from; pt_idx <= to; ++pt_idx) {
struct reservation_object *resv = vm->root.bo->tbo.resv;
@ -310,10 +326,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
amdgpu_vm_bo_size(adev, level),
AMDGPU_GPU_PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
AMDGPU_GEM_CREATE_SHADOW |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_VRAM_CLEARED,
flags,
NULL, resv, &pt);
if (r)
return r;
@ -391,6 +404,71 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev,
atomic_read(&adev->gpu_reset_counter);
}
static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub)
{
return !!vm->reserved_vmid[vmhub];
}
/* idr_mgr->lock must be held */
static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
struct amdgpu_ring *ring,
struct amdgpu_sync *sync,
struct dma_fence *fence,
struct amdgpu_job *job)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
uint64_t fence_context = adev->fence_context + ring->idx;
struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub];
struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct dma_fence *updates = sync->last_vm_update;
int r = 0;
struct dma_fence *flushed, *tmp;
bool needs_flush = false;
flushed = id->flushed_updates;
if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
(atomic64_read(&id->owner) != vm->client_id) ||
(job->vm_pd_addr != id->pd_gpu_addr) ||
(updates && (!flushed || updates->context != flushed->context ||
dma_fence_is_later(updates, flushed))) ||
(!id->last_flush || (id->last_flush->context != fence_context &&
!dma_fence_is_signaled(id->last_flush)))) {
needs_flush = true;
/* to prevent one context starved by another context */
id->pd_gpu_addr = 0;
tmp = amdgpu_sync_peek_fence(&id->active, ring);
if (tmp) {
r = amdgpu_sync_fence(adev, sync, tmp);
return r;
}
}
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
r = amdgpu_sync_fence(ring->adev, &id->active, fence);
if (r)
goto out;
if (updates && (!flushed || updates->context != flushed->context ||
dma_fence_is_later(updates, flushed))) {
dma_fence_put(id->flushed_updates);
id->flushed_updates = dma_fence_get(updates);
}
id->pd_gpu_addr = job->vm_pd_addr;
atomic64_set(&id->owner, vm->client_id);
job->vm_needs_flush = needs_flush;
if (needs_flush) {
dma_fence_put(id->last_flush);
id->last_flush = NULL;
}
job->vm_id = id - id_mgr->ids;
trace_amdgpu_vm_grab_id(vm, ring, job);
out:
return r;
}
/**
* amdgpu_vm_grab_id - allocate the next free VMID
*
@ -415,12 +493,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
unsigned i;
int r = 0;
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
if (!fences)
return -ENOMEM;
mutex_lock(&id_mgr->lock);
if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) {
r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job);
mutex_unlock(&id_mgr->lock);
return r;
}
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
if (!fences) {
mutex_unlock(&id_mgr->lock);
return -ENOMEM;
}
/* Check if we have an idle VMID */
i = 0;
list_for_each_entry(idle, &id_mgr->ids_lru, list) {
@ -521,7 +604,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
id->pd_gpu_addr = job->vm_pd_addr;
dma_fence_put(id->flushed_updates);
id->flushed_updates = dma_fence_get(updates);
id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
atomic64_set(&id->owner, vm->client_id);
needs_flush:
@ -540,40 +622,118 @@ error:
return r;
}
static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_device *adev = ring->adev;
const struct amdgpu_ip_block *ip_block;
struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
/* only compute rings */
return false;
ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
if (!ip_block)
return false;
if (ip_block->version->major <= 7) {
/* gfx7 has no workaround */
return true;
} else if (ip_block->version->major == 8) {
if (adev->gfx.mec_fw_version >= 673)
/* gfx8 is fixed in MEC firmware 673 */
return false;
else
return true;
mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub]) {
list_add(&vm->reserved_vmid[vmhub]->list,
&id_mgr->ids_lru);
vm->reserved_vmid[vmhub] = NULL;
atomic_dec(&id_mgr->reserved_vmid_num);
}
return false;
mutex_unlock(&id_mgr->lock);
}
static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub)
{
u64 addr = mc_addr;
struct amdgpu_vm_id_manager *id_mgr;
struct amdgpu_vm_id *idle;
int r = 0;
if (adev->gart.gart_funcs->adjust_mc_addr)
addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr);
id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub])
goto unlock;
if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
AMDGPU_VM_MAX_RESERVED_VMID) {
DRM_ERROR("Over limitation of reserved vmid\n");
atomic_dec(&id_mgr->reserved_vmid_num);
r = -EINVAL;
goto unlock;
}
/* Select the first entry VMID */
idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list);
list_del_init(&idle->list);
vm->reserved_vmid[vmhub] = idle;
mutex_unlock(&id_mgr->lock);
return addr;
return 0;
unlock:
mutex_unlock(&id_mgr->lock);
return r;
}
/**
* amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
*
* @adev: amdgpu_device pointer
*/
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
{
const struct amdgpu_ip_block *ip_block;
bool has_compute_vm_bug;
struct amdgpu_ring *ring;
int i;
has_compute_vm_bug = false;
ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
if (ip_block) {
/* Compute has a VM bug for GFX version < 7.
Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
if (ip_block->version->major <= 7)
has_compute_vm_bug = true;
else if (ip_block->version->major == 8)
if (adev->gfx.mec_fw_version < 673)
has_compute_vm_bug = true;
}
for (i = 0; i < adev->num_rings; i++) {
ring = adev->rings[i];
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
/* only compute rings */
ring->has_compute_vm_bug = has_compute_vm_bug;
else
ring->has_compute_vm_bug = false;
}
}
bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vm_id *id;
bool gds_switch_needed;
bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
if (job->vm_id == 0)
return false;
id = &id_mgr->ids[job->vm_id];
gds_switch_needed = ring->funcs->emit_gds_switch && (
id->gds_base != job->gds_base ||
id->gds_size != job->gds_size ||
id->gws_base != job->gws_base ||
id->gws_size != job->gws_size ||
id->oa_base != job->oa_base ||
id->oa_size != job->oa_size);
if (amdgpu_vm_had_gpu_reset(adev, id))
return true;
return vm_flush_needed || gds_switch_needed;
}
static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
{
return (adev->mc.real_vram_size == adev->mc.visible_vram_size);
}
/**
@ -598,8 +758,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
id->gws_size != job->gws_size ||
id->oa_base != job->oa_base ||
id->oa_size != job->oa_size);
bool vm_flush_needed = job->vm_needs_flush ||
amdgpu_vm_ring_has_compute_vm_bug(ring);
bool vm_flush_needed = job->vm_needs_flush;
unsigned patch_offset = 0;
int r;
@ -614,15 +773,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync)
amdgpu_ring_emit_pipeline_sync(ring);
if (ring->funcs->emit_vm_flush && vm_flush_needed) {
u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
struct dma_fence *fence;
trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
r = amdgpu_fence_emit(ring, &fence);
if (r)
@ -631,6 +786,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
mutex_lock(&id_mgr->lock);
dma_fence_put(id->last_flush);
id->last_flush = fence;
id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
mutex_unlock(&id_mgr->lock);
}
@ -805,6 +961,53 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
return result;
}
/**
* amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
*
* @params: see amdgpu_pte_update_params definition
* @pe: kmap addr of the page entry
* @addr: dst addr to write into pe
* @count: number of page entries to update
* @incr: increase next addr by incr bytes
* @flags: hw access flags
*
* Write count number of PT/PD entries directly.
*/
static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr,
uint64_t flags)
{
unsigned int i;
uint64_t value;
for (i = 0; i < count; i++) {
value = params->pages_addr ?
amdgpu_vm_map_gart(params->pages_addr, addr) :
addr;
amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
i, value, flags);
addr += incr;
}
/* Flush HDP */
mb();
amdgpu_gart_flush_gpu_tlb(params->adev, 0);
}
static int amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo)
{
struct amdgpu_sync sync;
int r;
amdgpu_sync_create(&sync);
amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM);
r = amdgpu_sync_wait(&sync, true);
amdgpu_sync_free(&sync);
return r;
}
/*
* amdgpu_vm_update_level - update a single level in the hierarchy
*
@ -821,11 +1024,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
unsigned level)
{
struct amdgpu_bo *shadow;
struct amdgpu_ring *ring;
uint64_t pd_addr, shadow_addr;
struct amdgpu_ring *ring = NULL;
uint64_t pd_addr, shadow_addr = 0;
uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
unsigned count = 0, pt_idx, ndw;
unsigned count = 0, pt_idx, ndw = 0;
struct amdgpu_job *job;
struct amdgpu_pte_update_params params;
struct dma_fence *fence = NULL;
@ -834,34 +1037,54 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
if (!parent->entries)
return 0;
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
/* padding, etc. */
ndw = 64;
/* assume the worst case */
ndw += parent->last_entry_used * 6;
pd_addr = amdgpu_bo_gpu_offset(parent->bo);
shadow = parent->bo->shadow;
if (shadow) {
r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
if (r)
return r;
shadow_addr = amdgpu_bo_gpu_offset(shadow);
ndw *= 2;
} else {
shadow_addr = 0;
}
r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
if (r)
return r;
memset(&params, 0, sizeof(params));
params.adev = adev;
params.ib = &job->ibs[0];
shadow = parent->bo->shadow;
WARN_ON(vm->use_cpu_for_update && shadow);
if (vm->use_cpu_for_update && !shadow) {
r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
if (r)
return r;
r = amdgpu_vm_bo_wait(adev, parent->bo);
if (unlikely(r)) {
amdgpu_bo_kunmap(parent->bo);
return r;
}
params.func = amdgpu_vm_cpu_set_ptes;
} else {
if (shadow) {
r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
if (r)
return r;
}
ring = container_of(vm->entity.sched, struct amdgpu_ring,
sched);
/* padding, etc. */
ndw = 64;
/* assume the worst case */
ndw += parent->last_entry_used * 6;
pd_addr = amdgpu_bo_gpu_offset(parent->bo);
if (shadow) {
shadow_addr = amdgpu_bo_gpu_offset(shadow);
ndw *= 2;
} else {
shadow_addr = 0;
}
r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
if (r)
return r;
params.ib = &job->ibs[0];
params.func = amdgpu_vm_do_set_ptes;
}
/* walk over the address space and update the directory */
for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
@ -881,6 +1104,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
}
pt = amdgpu_bo_gpu_offset(bo);
pt = amdgpu_gart_get_vm_pde(adev, pt);
if (parent->entries[pt_idx].addr == pt)
continue;
@ -892,19 +1116,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
(count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
if (count) {
uint64_t pt_addr =
amdgpu_vm_adjust_mc_addr(adev, last_pt);
if (shadow)
amdgpu_vm_do_set_ptes(&params,
last_shadow,
pt_addr, count,
incr,
AMDGPU_PTE_VALID);
params.func(&params,
last_shadow,
last_pt, count,
incr,
AMDGPU_PTE_VALID);
amdgpu_vm_do_set_ptes(&params, last_pde,
pt_addr, count, incr,
AMDGPU_PTE_VALID);
params.func(&params, last_pde,
last_pt, count, incr,
AMDGPU_PTE_VALID);
}
count = 1;
@ -917,17 +1138,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
}
if (count) {
uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
if (vm->root.bo->shadow)
amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
count, incr, AMDGPU_PTE_VALID);
params.func(&params, last_shadow, last_pt,
count, incr, AMDGPU_PTE_VALID);
amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
count, incr, AMDGPU_PTE_VALID);
params.func(&params, last_pde, last_pt,
count, incr, AMDGPU_PTE_VALID);
}
if (params.ib->length_dw == 0) {
if (params.func == amdgpu_vm_cpu_set_ptes)
amdgpu_bo_kunmap(parent->bo);
else if (params.ib->length_dw == 0) {
amdgpu_job_free(job);
} else {
amdgpu_ring_pad_ib(ring, params.ib);
@ -970,6 +1191,32 @@ error_free:
return r;
}
/*
* amdgpu_vm_invalidate_level - mark all PD levels as invalid
*
* @parent: parent PD
*
* Mark all PD level as invalid after an error.
*/
static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
{
unsigned pt_idx;
/*
* Recurse into the subdirectories. This recursion is harmless because
* we only have a maximum of 5 layers.
*/
for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
if (!entry->bo)
continue;
entry->addr = ~0ULL;
amdgpu_vm_invalidate_level(entry);
}
}
/*
* amdgpu_vm_update_directories - make sure that all directories are valid
*
@ -982,7 +1229,13 @@ error_free:
int amdgpu_vm_update_directories(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
return amdgpu_vm_update_level(adev, vm, &vm->root, 0);
int r;
r = amdgpu_vm_update_level(adev, vm, &vm->root, 0);
if (r)
amdgpu_vm_invalidate_level(&vm->root);
return r;
}
/**
@ -1022,58 +1275,37 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
* @flags: mapping flags
*
* Update the page tables in the range @start - @end.
* Returns 0 for success, -EINVAL for failure.
*/
static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags)
{
struct amdgpu_device *adev = params->adev;
const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
uint64_t cur_pe_start, cur_nptes, cur_dst;
uint64_t addr; /* next GPU address to be updated */
uint64_t addr, pe_start;
struct amdgpu_bo *pt;
unsigned nptes; /* next number of ptes to be updated */
uint64_t next_pe_start;
unsigned nptes;
int r;
bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
/* initialize the variables */
addr = start;
pt = amdgpu_vm_get_pt(params, addr);
if (!pt) {
pr_err("PT not found, aborting update_ptes\n");
return;
}
if (params->shadow) {
if (!pt->shadow)
return;
pt = pt->shadow;
}
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
else
nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
cur_pe_start = amdgpu_bo_gpu_offset(pt);
cur_pe_start += (addr & mask) * 8;
cur_nptes = nptes;
cur_dst = dst;
/* for next ptb*/
addr += nptes;
dst += nptes * AMDGPU_GPU_PAGE_SIZE;
/* walk over the address space and update the page tables */
while (addr < end) {
for (addr = start; addr < end; addr += nptes) {
pt = amdgpu_vm_get_pt(params, addr);
if (!pt) {
pr_err("PT not found, aborting update_ptes\n");
return;
return -EINVAL;
}
if (params->shadow) {
if (WARN_ONCE(use_cpu_update,
"CPU VM update doesn't suuport shadow pages"))
return 0;
if (!pt->shadow)
return;
return 0;
pt = pt->shadow;
}
@ -1082,32 +1314,25 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
else
nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
next_pe_start = amdgpu_bo_gpu_offset(pt);
next_pe_start += (addr & mask) * 8;
if (use_cpu_update) {
r = amdgpu_bo_kmap(pt, (void *)&pe_start);
if (r)
return r;
} else
pe_start = amdgpu_bo_gpu_offset(pt);
if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
/* The next ptb is consecutive to current ptb.
* Don't call the update function now.
* Will update two ptbs together in future.
*/
cur_nptes += nptes;
} else {
params->func(params, cur_pe_start, cur_dst, cur_nptes,
AMDGPU_GPU_PAGE_SIZE, flags);
pe_start += (addr & mask) * 8;
cur_pe_start = next_pe_start;
cur_nptes = nptes;
cur_dst = dst;
}
params->func(params, pe_start, dst, nptes,
AMDGPU_GPU_PAGE_SIZE, flags);
/* for next ptb*/
addr += nptes;
dst += nptes * AMDGPU_GPU_PAGE_SIZE;
if (use_cpu_update)
amdgpu_bo_kunmap(pt);
}
params->func(params, cur_pe_start, cur_dst, cur_nptes,
AMDGPU_GPU_PAGE_SIZE, flags);
return 0;
}
/*
@ -1119,11 +1344,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
* @end: last PTE to handle
* @dst: addr those PTEs should point to
* @flags: hw mapping flags
* Returns 0 for success, -EINVAL for failure.
*/
static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags)
{
int r;
/**
* The MC L1 TLB supports variable sized pages, based on a fragment
* field in the PTE. When this field is set to a non-zero value, page
@ -1152,28 +1380,30 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
/* system pages are non continuously */
if (params->src || !(flags & AMDGPU_PTE_VALID) ||
(frag_start >= frag_end)) {
amdgpu_vm_update_ptes(params, start, end, dst, flags);
return;
}
(frag_start >= frag_end))
return amdgpu_vm_update_ptes(params, start, end, dst, flags);
/* handle the 4K area at the beginning */
if (start != frag_start) {
amdgpu_vm_update_ptes(params, start, frag_start,
dst, flags);
r = amdgpu_vm_update_ptes(params, start, frag_start,
dst, flags);
if (r)
return r;
dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
}
/* handle the area in the middle */
amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
flags | frag_flags);
r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
flags | frag_flags);
if (r)
return r;
/* handle the 4K area at the end */
if (frag_end != end) {
dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
}
return r;
}
/**
@ -1215,6 +1445,25 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
params.vm = vm;
params.src = src;
if (vm->use_cpu_for_update) {
/* params.src is used as flag to indicate system Memory */
if (pages_addr)
params.src = ~0;
/* Wait for PT BOs to be free. PTs share the same resv. object
* as the root PD BO
*/
r = amdgpu_vm_bo_wait(adev, vm->root.bo);
if (unlikely(r))
return r;
params.func = amdgpu_vm_cpu_set_ptes;
params.pages_addr = pages_addr;
params.shadow = false;
return amdgpu_vm_frag_ptes(&params, start, last + 1,
addr, flags);
}
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
/* sync to everything on unmapping */
@ -1294,9 +1543,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
goto error_free;
params.shadow = true;
amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
if (r)
goto error_free;
params.shadow = false;
amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
if (r)
goto error_free;
amdgpu_ring_pad_ib(ring, params.ib);
WARN_ON(params.ib->length_dw > ndw);
@ -2137,20 +2390,25 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
*
* @adev: amdgpu_device pointer
* @vm: requested vm
* @vm_context: Indicates if it GFX or Compute context
*
* Init @vm fields.
*/
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context)
{
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT(adev) * 8);
unsigned ring_instance;
struct amdgpu_ring *ring;
struct amd_sched_rq *rq;
int r;
int r, i;
u64 flags;
vm->va = RB_ROOT;
vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
vm->reserved_vmid[i] = NULL;
spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->invalidated);
INIT_LIST_HEAD(&vm->cleared);
@ -2167,14 +2425,29 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
if (r)
return r;
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_COMPUTE);
else
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_GFX);
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
"CPU update of VM recommended only for large BAR system\n");
vm->last_dir_update = NULL;
flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_VRAM_CLEARED;
if (vm->use_cpu_for_update)
flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
else
flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
AMDGPU_GEM_CREATE_SHADOW);
r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
AMDGPU_GEM_CREATE_SHADOW |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_VRAM_CLEARED,
flags,
NULL, NULL, &vm->root.bo);
if (r)
goto error_free_sched_entity;
@ -2219,7 +2492,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
for (i = 0; i <= level->last_entry_used; i++)
amdgpu_vm_free_levels(&level->entries[i]);
drm_free_large(level->entries);
kvfree(level->entries);
}
/**
@ -2235,6 +2508,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
struct amdgpu_bo_va_mapping *mapping, *tmp;
bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
int i;
amd_sched_entity_fini(vm->entity.sched, &vm->entity);
@ -2258,6 +2532,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_vm_free_levels(&vm->root);
dma_fence_put(vm->last_dir_update);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
amdgpu_vm_free_reserved_vmid(adev, vm, i);
}
/**
@ -2277,6 +2553,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
mutex_init(&id_mgr->lock);
INIT_LIST_HEAD(&id_mgr->ids_lru);
atomic_set(&id_mgr->reserved_vmid_num, 0);
/* skip over VMID 0, since it is the system VM */
for (j = 1; j < id_mgr->num_ids; ++j) {
@ -2295,6 +2572,23 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
atomic64_set(&adev->vm_manager.client_counter, 0);
spin_lock_init(&adev->vm_manager.prt_lock);
atomic_set(&adev->vm_manager.num_prt_users, 0);
/* If not overridden by the user, by default, only in large BAR systems
* Compute VM tables will be updated by CPU
*/
#ifdef CONFIG_X86_64
if (amdgpu_vm_update_mode == -1) {
if (amdgpu_vm_is_large_bar(adev))
adev->vm_manager.vm_update_mode =
AMDGPU_VM_USE_CPU_FOR_COMPUTE;
else
adev->vm_manager.vm_update_mode = 0;
} else
adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
#else
adev->vm_manager.vm_update_mode = 0;
#endif
}
/**
@ -2322,3 +2616,28 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
}
}
}
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
union drm_amdgpu_vm *args = data;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
int r;
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
/* current, we only have requirement to reserve vmid from gfxhub */
r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm,
AMDGPU_GFXHUB);
if (r)
return r;
break;
case AMDGPU_VM_OP_UNRESERVE_VMID:
amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB);
break;
default:
return -EINVAL;
}
return 0;
}

View File

@ -84,6 +84,16 @@ struct amdgpu_bo_list_entry;
/* hardcode that limit for now */
#define AMDGPU_VA_RESERVED_SIZE (8 << 20)
/* max vmids dedicated for process */
#define AMDGPU_VM_MAX_RESERVED_VMID 1
#define AMDGPU_VM_CONTEXT_GFX 0
#define AMDGPU_VM_CONTEXT_COMPUTE 1
/* See vm_update_mode */
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
struct amdgpu_vm_pt {
struct amdgpu_bo *bo;
@ -123,8 +133,13 @@ struct amdgpu_vm {
/* client id */
u64 client_id;
/* dedicated to vm */
struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS];
/* each VM will map on CSA */
struct amdgpu_bo_va *csa_bo_va;
/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
bool use_cpu_for_update;
};
struct amdgpu_vm_id {
@ -152,6 +167,7 @@ struct amdgpu_vm_id_manager {
unsigned num_ids;
struct list_head ids_lru;
struct amdgpu_vm_id ids[AMDGPU_NUM_VM];
atomic_t reserved_vmid_num;
};
struct amdgpu_vm_manager {
@ -168,8 +184,6 @@ struct amdgpu_vm_manager {
uint32_t block_size;
/* vram base address for page table entry */
u64 vram_base_offset;
/* is vm enabled? */
bool enabled;
/* vm pte handling */
const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
@ -181,11 +195,18 @@ struct amdgpu_vm_manager {
/* partial resident texture handling */
spinlock_t prt_lock;
atomic_t num_prt_users;
/* controls how VM page tables are updated for Graphics and Compute.
* BIT0[= 0] Graphics updated by SDMA [= 1] by CPU
* BIT1[= 0] Compute updated by SDMA [= 1] by CPU
*/
int vm_update_mode;
};
void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
struct list_head *validated,
@ -239,5 +260,9 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size);
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job);
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
#endif

View File

@ -22,7 +22,7 @@
*/
#include <linux/firmware.h>
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_ucode.h"

View File

@ -23,7 +23,7 @@
*/
#include <linux/firmware.h>
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "cikd.h"
#include "ppsmc.h"

View File

@ -24,7 +24,7 @@
#include <linux/firmware.h>
#include <linux/slab.h>
#include <linux/module.h>
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
@ -964,62 +964,62 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
}
static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
{mmGRBM_STATUS, false},
{mmGB_ADDR_CONFIG, false},
{mmMC_ARB_RAMCFG, false},
{mmGB_TILE_MODE0, false},
{mmGB_TILE_MODE1, false},
{mmGB_TILE_MODE2, false},
{mmGB_TILE_MODE3, false},
{mmGB_TILE_MODE4, false},
{mmGB_TILE_MODE5, false},
{mmGB_TILE_MODE6, false},
{mmGB_TILE_MODE7, false},
{mmGB_TILE_MODE8, false},
{mmGB_TILE_MODE9, false},
{mmGB_TILE_MODE10, false},
{mmGB_TILE_MODE11, false},
{mmGB_TILE_MODE12, false},
{mmGB_TILE_MODE13, false},
{mmGB_TILE_MODE14, false},
{mmGB_TILE_MODE15, false},
{mmGB_TILE_MODE16, false},
{mmGB_TILE_MODE17, false},
{mmGB_TILE_MODE18, false},
{mmGB_TILE_MODE19, false},
{mmGB_TILE_MODE20, false},
{mmGB_TILE_MODE21, false},
{mmGB_TILE_MODE22, false},
{mmGB_TILE_MODE23, false},
{mmGB_TILE_MODE24, false},
{mmGB_TILE_MODE25, false},
{mmGB_TILE_MODE26, false},
{mmGB_TILE_MODE27, false},
{mmGB_TILE_MODE28, false},
{mmGB_TILE_MODE29, false},
{mmGB_TILE_MODE30, false},
{mmGB_TILE_MODE31, false},
{mmGB_MACROTILE_MODE0, false},
{mmGB_MACROTILE_MODE1, false},
{mmGB_MACROTILE_MODE2, false},
{mmGB_MACROTILE_MODE3, false},
{mmGB_MACROTILE_MODE4, false},
{mmGB_MACROTILE_MODE5, false},
{mmGB_MACROTILE_MODE6, false},
{mmGB_MACROTILE_MODE7, false},
{mmGB_MACROTILE_MODE8, false},
{mmGB_MACROTILE_MODE9, false},
{mmGB_MACROTILE_MODE10, false},
{mmGB_MACROTILE_MODE11, false},
{mmGB_MACROTILE_MODE12, false},
{mmGB_MACROTILE_MODE13, false},
{mmGB_MACROTILE_MODE14, false},
{mmGB_MACROTILE_MODE15, false},
{mmCC_RB_BACKEND_DISABLE, false, true},
{mmGC_USER_RB_BACKEND_DISABLE, false, true},
{mmGB_BACKEND_MAP, false, false},
{mmPA_SC_RASTER_CONFIG, false, true},
{mmPA_SC_RASTER_CONFIG_1, false, true},
{mmGRBM_STATUS},
{mmGB_ADDR_CONFIG},
{mmMC_ARB_RAMCFG},
{mmGB_TILE_MODE0},
{mmGB_TILE_MODE1},
{mmGB_TILE_MODE2},
{mmGB_TILE_MODE3},
{mmGB_TILE_MODE4},
{mmGB_TILE_MODE5},
{mmGB_TILE_MODE6},
{mmGB_TILE_MODE7},
{mmGB_TILE_MODE8},
{mmGB_TILE_MODE9},
{mmGB_TILE_MODE10},
{mmGB_TILE_MODE11},
{mmGB_TILE_MODE12},
{mmGB_TILE_MODE13},
{mmGB_TILE_MODE14},
{mmGB_TILE_MODE15},
{mmGB_TILE_MODE16},
{mmGB_TILE_MODE17},
{mmGB_TILE_MODE18},
{mmGB_TILE_MODE19},
{mmGB_TILE_MODE20},
{mmGB_TILE_MODE21},
{mmGB_TILE_MODE22},
{mmGB_TILE_MODE23},
{mmGB_TILE_MODE24},
{mmGB_TILE_MODE25},
{mmGB_TILE_MODE26},
{mmGB_TILE_MODE27},
{mmGB_TILE_MODE28},
{mmGB_TILE_MODE29},
{mmGB_TILE_MODE30},
{mmGB_TILE_MODE31},
{mmGB_MACROTILE_MODE0},
{mmGB_MACROTILE_MODE1},
{mmGB_MACROTILE_MODE2},
{mmGB_MACROTILE_MODE3},
{mmGB_MACROTILE_MODE4},
{mmGB_MACROTILE_MODE5},
{mmGB_MACROTILE_MODE6},
{mmGB_MACROTILE_MODE7},
{mmGB_MACROTILE_MODE8},
{mmGB_MACROTILE_MODE9},
{mmGB_MACROTILE_MODE10},
{mmGB_MACROTILE_MODE11},
{mmGB_MACROTILE_MODE12},
{mmGB_MACROTILE_MODE13},
{mmGB_MACROTILE_MODE14},
{mmGB_MACROTILE_MODE15},
{mmCC_RB_BACKEND_DISABLE, true},
{mmGC_USER_RB_BACKEND_DISABLE, true},
{mmGB_BACKEND_MAP, false},
{mmPA_SC_RASTER_CONFIG, true},
{mmPA_SC_RASTER_CONFIG_1, true},
};
static uint32_t cik_read_indexed_register(struct amdgpu_device *adev,
@ -1050,11 +1050,10 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num,
if (reg_offset != cik_allowed_read_registers[i].reg_offset)
continue;
if (!cik_allowed_read_registers[i].untouched)
*value = cik_allowed_read_registers[i].grbm_indexed ?
cik_read_indexed_register(adev, se_num,
sh_num, reg_offset) :
RREG32(reg_offset);
*value = cik_allowed_read_registers[i].grbm_indexed ?
cik_read_indexed_register(adev, se_num,
sh_num, reg_offset) :
RREG32(reg_offset);
return 0;
}
return -EINVAL;

View File

@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "cikd.h"

View File

@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "vid.h"

View File

@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_i2c.h"

View File

@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_i2c.h"

View File

@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_i2c.h"
@ -118,14 +118,27 @@ static const struct {
static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev,
u32 block_offset, u32 reg)
{
DRM_INFO("xxxx: dce_v6_0_audio_endpt_rreg ----no impl!!!!\n");
return 0;
unsigned long flags;
u32 r;
spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
return r;
}
static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev,
u32 block_offset, u32 reg, u32 v)
{
DRM_INFO("xxxx: dce_v6_0_audio_endpt_wreg ----no impl!!!!\n");
unsigned long flags;
spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset,
reg | AZALIA_F0_CODEC_ENDPOINT_INDEX__AZALIA_ENDPOINT_REG_WRITE_EN_MASK);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
}
static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
@ -501,21 +514,16 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
{
int num_crtc = 0;
switch (adev->asic_type) {
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_VERDE:
num_crtc = 6;
break;
return 6;
case CHIP_OLAND:
num_crtc = 2;
break;
return 2;
default:
num_crtc = 0;
return 0;
}
return num_crtc;
}
void dce_v6_0_disable_dce(struct amdgpu_device *adev)
@ -1225,17 +1233,17 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev)
dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads);
}
}
/*
static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev)
{
int i;
u32 offset, tmp;
u32 tmp;
for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
offset = adev->mode_info.audio.pin[i].offset;
tmp = RREG32_AUDIO_ENDPT(offset,
AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
if (((tmp & PORT_CONNECTIVITY_MASK) >> PORT_CONNECTIVITY_SHIFT) == 1)
tmp = RREG32_AUDIO_ENDPT(adev->mode_info.audio.pin[i].offset,
ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
if (REG_GET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT,
PORT_CONNECTIVITY))
adev->mode_info.audio.pin[i].connected = false;
else
adev->mode_info.audio.pin[i].connected = true;
@ -1257,45 +1265,206 @@ static struct amdgpu_audio_pin *dce_v6_0_audio_get_pin(struct amdgpu_device *ade
return NULL;
}
static void dce_v6_0_afmt_audio_select_pin(struct drm_encoder *encoder)
static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder)
{
struct amdgpu_device *adev = encoder->dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
u32 offset;
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
offset = dig->afmt->offset;
WREG32(AFMT_AUDIO_SRC_CONTROL + offset,
AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id));
WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset,
REG_SET_FIELD(0, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT,
dig->afmt->pin->id));
}
static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
struct drm_display_mode *mode)
{
DRM_INFO("xxxx: dce_v6_0_audio_write_latency_fields---no imp!!!!!\n");
struct amdgpu_device *adev = encoder->dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
struct amdgpu_connector *amdgpu_connector = NULL;
int interlace = 0;
u32 tmp;
list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
return;
}
if (mode->flags & DRM_MODE_FLAG_INTERLACE)
interlace = 1;
if (connector->latency_present[interlace]) {
tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
VIDEO_LIPSYNC, connector->video_latency[interlace]);
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
AUDIO_LIPSYNC, connector->audio_latency[interlace]);
} else {
tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
VIDEO_LIPSYNC, 0);
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
AUDIO_LIPSYNC, 0);
}
WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
}
static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
{
DRM_INFO("xxxx: dce_v6_0_audio_write_speaker_allocation---no imp!!!!!\n");
struct amdgpu_device *adev = encoder->dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
struct amdgpu_connector *amdgpu_connector = NULL;
u8 *sadb = NULL;
int sad_count;
u32 tmp;
list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
return;
}
sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
}
/* program the speaker allocation */
tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
HDMI_CONNECTION, 0);
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
DP_CONNECTION, 0);
if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort)
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
DP_CONNECTION, 1);
else
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
HDMI_CONNECTION, 1);
if (sad_count)
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
SPEAKER_ALLOCATION, sadb[0]);
else
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
SPEAKER_ALLOCATION, 5); /* stereo */
WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
kfree(sadb);
}
static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
{
DRM_INFO("xxxx: dce_v6_0_audio_write_sad_regs---no imp!!!!!\n");
struct amdgpu_device *adev = encoder->dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
struct amdgpu_connector *amdgpu_connector = NULL;
struct cea_sad *sads;
int i, sad_count;
static const u16 eld_reg_to_type[][2] = {
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
};
list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
return;
}
sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
if (sad_count <= 0) {
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
return;
}
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
u32 tmp = 0;
u8 stereo_freqs = 0;
int max_channels = -1;
int j;
for (j = 0; j < sad_count; j++) {
struct cea_sad *sad = &sads[j];
if (sad->format == eld_reg_to_type[i][1]) {
if (sad->channels > max_channels) {
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
MAX_CHANNELS, sad->channels);
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
DESCRIPTOR_BYTE_2, sad->byte2);
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
SUPPORTED_FREQUENCIES, sad->freq);
max_channels = sad->channels;
}
if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
stereo_freqs |= sad->freq;
else
break;
}
}
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
}
kfree(sads);
}
*/
static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
struct amdgpu_audio_pin *pin,
bool enable)
{
DRM_INFO("xxxx: dce_v6_0_audio_enable---no imp!!!!!\n");
if (!pin)
return;
WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
}
static const u32 pin_offsets[7] =
@ -1311,42 +1480,372 @@ static const u32 pin_offsets[7] =
static int dce_v6_0_audio_init(struct amdgpu_device *adev)
{
int i;
if (!amdgpu_audio)
return 0;
adev->mode_info.audio.enabled = true;
switch (adev->asic_type) {
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_VERDE:
default:
adev->mode_info.audio.num_pins = 6;
break;
case CHIP_OLAND:
adev->mode_info.audio.num_pins = 2;
break;
}
for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
adev->mode_info.audio.pin[i].channels = -1;
adev->mode_info.audio.pin[i].rate = -1;
adev->mode_info.audio.pin[i].bits_per_sample = -1;
adev->mode_info.audio.pin[i].status_bits = 0;
adev->mode_info.audio.pin[i].category_code = 0;
adev->mode_info.audio.pin[i].connected = false;
adev->mode_info.audio.pin[i].offset = pin_offsets[i];
adev->mode_info.audio.pin[i].id = i;
dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
}
return 0;
}
static void dce_v6_0_audio_fini(struct amdgpu_device *adev)
{
int i;
if (!amdgpu_audio)
return;
if (!adev->mode_info.audio.enabled)
return;
for (i = 0; i < adev->mode_info.audio.num_pins; i++)
dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
adev->mode_info.audio.enabled = false;
}
/*
static void dce_v6_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
static void dce_v6_0_audio_set_vbi_packet(struct drm_encoder *encoder)
{
DRM_INFO("xxxx: dce_v6_0_afmt_update_ACR---no imp!!!!!\n");
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
u32 tmp;
tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1);
tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1);
WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
}
*/
/*
* build a HDMI Video Info Frame
*/
/*
static void dce_v6_0_afmt_update_avi_infoframe(struct drm_encoder *encoder,
void *buffer, size_t size)
static void dce_v6_0_audio_set_acr(struct drm_encoder *encoder,
uint32_t clock, int bpc)
{
DRM_INFO("xxxx: dce_v6_0_afmt_update_avi_infoframe---no imp!!!!!\n");
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
u32 tmp;
tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE,
bpc > 8 ? 0 : 1);
WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
}
static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder,
struct drm_display_mode *mode)
{
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct hdmi_avi_infoframe frame;
u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
uint8_t *payload = buffer + 3;
uint8_t *header = buffer;
ssize_t err;
u32 tmp;
err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode);
if (err < 0) {
DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
return;
}
err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
if (err < 0) {
DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
return;
}
WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
payload[0x0] | (payload[0x1] << 8) | (payload[0x2] << 16) | (payload[0x3] << 24));
WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
payload[0x4] | (payload[0x5] << 8) | (payload[0x6] << 16) | (payload[0x7] << 24));
WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
payload[0x8] | (payload[0x9] << 8) | (payload[0xA] << 16) | (payload[0xB] << 24));
WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
payload[0xC] | (payload[0xD] << 8) | (header[1] << 24));
tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
/* anything other than 0 */
tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1,
HDMI_AUDIO_INFO_LINE, 2);
WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
}
static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
{
DRM_INFO("xxxx: dce_v6_0_audio_set_dto---no imp!!!!!\n");
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
u32 tmp;
/*
* Two dtos: generally use dto0 for hdmi, dto1 for dp.
* Express [24MHz / target pixel clock] as an exact rational
* number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
* is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
*/
tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
DCCG_AUDIO_DTO0_SOURCE_SEL, amdgpu_crtc->crtc_id);
if (em == ATOM_ENCODER_MODE_HDMI) {
tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
DCCG_AUDIO_DTO_SEL, 0);
} else if (ENCODER_MODE_IS_DP(em)) {
tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
DCCG_AUDIO_DTO_SEL, 1);
}
WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
if (em == ATOM_ENCODER_MODE_HDMI) {
WREG32(mmDCCG_AUDIO_DTO0_PHASE, 24000);
WREG32(mmDCCG_AUDIO_DTO0_MODULE, clock);
} else if (ENCODER_MODE_IS_DP(em)) {
WREG32(mmDCCG_AUDIO_DTO1_PHASE, 24000);
WREG32(mmDCCG_AUDIO_DTO1_MODULE, clock);
}
}
*/
/*
* update the info frames with the data from the current display mode
*/
static void dce_v6_0_audio_set_packet(struct drm_encoder *encoder)
{
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
u32 tmp;
tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, 0xff);
WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset, tmp);
tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_RESET_FIFO_WHEN_AUDIO_DIS, 1);
tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
}
static void dce_v6_0_audio_set_mute(struct drm_encoder *encoder, bool mute)
{
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
u32 tmp;
tmp = RREG32(mmHDMI_GC + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_GC, HDMI_GC_AVMUTE, mute ? 1 : 0);
WREG32(mmHDMI_GC + dig->afmt->offset, tmp);
}
static void dce_v6_0_audio_hdmi_enable(struct drm_encoder *encoder, bool enable)
{
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
u32 tmp;
if (enable) {
tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
} else {
tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 0);
tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 0);
tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 0);
tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 0);
WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 0);
WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
}
}
static void dce_v6_0_audio_dp_enable(struct drm_encoder *encoder, bool enable)
{
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
u32 tmp;
if (enable) {
tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
tmp = RREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, 1);
WREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset, tmp);
tmp = RREG32(mmDP_SEC_CNTL + dig->afmt->offset);
tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1);
tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ATP_ENABLE, 1);
tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_AIP_ENABLE, 1);
tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
WREG32(mmDP_SEC_CNTL + dig->afmt->offset, tmp);
} else {
WREG32(mmDP_SEC_CNTL + dig->afmt->offset, 0);
}
}
static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
struct drm_display_mode *mode)
{
DRM_INFO("xxxx: dce_v6_0_afmt_setmode ----no impl !!!!!!!!\n");
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
struct amdgpu_connector *amdgpu_connector = NULL;
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
int bpc = 8;
if (!dig || !dig->afmt)
return;
list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
return;
}
if (!dig->afmt->enabled)
return;
dig->afmt->pin = dce_v6_0_audio_get_pin(adev);
if (!dig->afmt->pin)
return;
if (encoder->crtc) {
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
bpc = amdgpu_crtc->bpc;
}
/* disable audio before setting up hw */
dce_v6_0_audio_enable(adev, dig->afmt->pin, false);
dce_v6_0_audio_set_mute(encoder, true);
dce_v6_0_audio_write_speaker_allocation(encoder);
dce_v6_0_audio_write_sad_regs(encoder);
dce_v6_0_audio_write_latency_fields(encoder, mode);
if (em == ATOM_ENCODER_MODE_HDMI) {
dce_v6_0_audio_set_dto(encoder, mode->clock);
dce_v6_0_audio_set_vbi_packet(encoder);
dce_v6_0_audio_set_acr(encoder, mode->clock, bpc);
} else if (ENCODER_MODE_IS_DP(em)) {
dce_v6_0_audio_set_dto(encoder, adev->clock.default_dispclk * 10);
}
dce_v6_0_audio_set_packet(encoder);
dce_v6_0_audio_select_pin(encoder);
dce_v6_0_audio_set_avi_infoframe(encoder, mode);
dce_v6_0_audio_set_mute(encoder, false);
if (em == ATOM_ENCODER_MODE_HDMI) {
dce_v6_0_audio_hdmi_enable(encoder, 1);
} else if (ENCODER_MODE_IS_DP(em)) {
dce_v6_0_audio_dp_enable(encoder, 1);
}
/* enable audio after setting up hw */
dce_v6_0_audio_enable(adev, dig->afmt->pin, true);
}
static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable)
@ -1362,6 +1861,7 @@ static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable)
/* Silent, r600_hdmi_enable will raise WARN for us */
if (enable && dig->afmt->enabled)
return;
if (!enable && !dig->afmt->enabled)
return;
@ -2756,6 +3256,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
{
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
amdgpu_encoder->pixel_clock = adjusted_mode->clock;
@ -2765,7 +3266,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
/* set scaler clears this on some chips */
dce_v6_0_set_interleave(encoder->crtc, mode);
if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) {
dce_v6_0_afmt_enable(encoder, true);
dce_v6_0_afmt_setmode(encoder, adjusted_mode);
}
@ -2827,11 +3328,12 @@ static void dce_v6_0_encoder_disable(struct drm_encoder *encoder)
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig;
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
if (amdgpu_atombios_encoder_is_digital(encoder)) {
if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em))
dce_v6_0_afmt_enable(encoder, false);
dig = amdgpu_encoder->enc_priv;
dig->dig_encoder = -1;

View File

@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_i2c.h"

View File

@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_i2c.h"

File diff suppressed because it is too large Load Diff

View File

@ -21,12 +21,13 @@
*
*/
#include <linux/firmware.h>
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "amdgpu_gfx.h"
#include "cikd.h"
#include "cik.h"
#include "cik_structs.h"
#include "atom.h"
#include "amdgpu_ucode.h"
#include "clearstate_ci.h"
@ -48,7 +49,7 @@
#include "oss/oss_2_0_sh_mask.h"
#define GFX7_NUM_GFX_RINGS 1
#define GFX7_NUM_COMPUTE_RINGS 8
#define GFX7_MEC_HPD_SIZE 2048
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
@ -1606,19 +1607,6 @@ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
WREG32(mmGRBM_GFX_INDEX, data);
}
/**
* gfx_v7_0_create_bitmask - create a bitmask
*
* @bit_width: length of the mask
*
* create a variable length bit mask (CIK).
* Returns the bitmask.
*/
static u32 gfx_v7_0_create_bitmask(u32 bit_width)
{
return (u32)((1ULL << bit_width) - 1);
}
/**
* gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
*
@ -1637,8 +1625,8 @@ static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se);
mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se);
return (~data) & mask;
}
@ -1837,7 +1825,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
/**
* gmc_v7_0_init_compute_vmid - gart enable
*
* @rdev: amdgpu_device pointer
* @adev: amdgpu_device pointer
*
* Initialize compute vmid sh_mem registers
*
@ -2821,26 +2809,23 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
}
}
#define MEC_HPD_SIZE 2048
static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
{
int r;
u32 *hpd;
size_t mec_hpd_size;
/*
* KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
* CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
* Nonetheless, we assign only 1 pipe because all other pipes will
* be handled by KFD
*/
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe = 1;
adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
/* allocate space for ALL pipes (even the ones we don't own) */
mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
* GFX7_MEC_HPD_SIZE * 2;
if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
mec_hpd_size,
PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&adev->gfx.mec.hpd_eop_obj);
@ -2870,7 +2855,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
}
/* clear memory. Not sure if this is required or not */
memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
memset(hpd, 0, mec_hpd_size);
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@ -2917,33 +2902,256 @@ struct hqd_registers
u32 cp_mqd_control;
};
struct bonaire_mqd
static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
int mec, int pipe)
{
u32 header;
u32 dispatch_initiator;
u32 dimensions[3];
u32 start_idx[3];
u32 num_threads[3];
u32 pipeline_stat_enable;
u32 perf_counter_enable;
u32 pgm[2];
u32 tba[2];
u32 tma[2];
u32 pgm_rsrc[2];
u32 vmid;
u32 resource_limits;
u32 static_thread_mgmt01[2];
u32 tmp_ring_size;
u32 static_thread_mgmt23[2];
u32 restart[3];
u32 thread_trace_enable;
u32 reserved1;
u32 user_data[16];
u32 vgtcs_invoke_count[2];
struct hqd_registers queue_state;
u32 dequeue_cntr;
u32 interrupt_queue[64];
};
u64 eop_gpu_addr;
u32 tmp;
size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
* GFX7_MEC_HPD_SIZE * 2;
mutex_lock(&adev->srbm_mutex);
eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
cik_srbm_select(adev, mec + 1, pipe, 0, 0);
/* write the EOP addr */
WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
/* set the VMID assigned */
WREG32(mmCP_HPD_EOP_VMID, 0);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
tmp = RREG32(mmCP_HPD_EOP_CONTROL);
tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
WREG32(mmCP_HPD_EOP_CONTROL, tmp);
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
{
int i;
/* disable the queue if it's active */
if (RREG32(mmCP_HQD_ACTIVE) & 1) {
WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
if (i == adev->usec_timeout)
return -ETIMEDOUT;
WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
WREG32(mmCP_HQD_PQ_RPTR, 0);
WREG32(mmCP_HQD_PQ_WPTR, 0);
}
return 0;
}
static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
struct cik_mqd *mqd,
uint64_t mqd_gpu_addr,
struct amdgpu_ring *ring)
{
u64 hqd_gpu_addr;
u64 wb_gpu_addr;
/* init the mqd struct */
memset(mqd, 0, sizeof(struct cik_mqd));
mqd->header = 0xC0310800;
mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
/* enable doorbell? */
mqd->cp_hqd_pq_doorbell_control =
RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
if (ring->use_doorbell)
mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
else
mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
/* set the pointer to the MQD */
mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
/* set MQD vmid to 0 */
mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
hqd_gpu_addr = ring->gpu_addr >> 8;
mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up the HQD, this is similar to CP_RB0_CNTL */
mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
mqd->cp_hqd_pq_control &=
~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
mqd->cp_hqd_pq_control |=
order_base_2(ring->ring_size / 8);
mqd->cp_hqd_pq_control |=
(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
#ifdef __BIG_ENDIAN
mqd->cp_hqd_pq_control |=
2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
#endif
mqd->cp_hqd_pq_control &=
~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
mqd->cp_hqd_pq_control |=
CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
/* set the wb address wether it's enabled or not */
wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* enable the doorbell if requested */
if (ring->use_doorbell) {
mqd->cp_hqd_pq_doorbell_control =
RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
mqd->cp_hqd_pq_doorbell_control &=
~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
mqd->cp_hqd_pq_doorbell_control |=
(ring->doorbell_index <<
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
mqd->cp_hqd_pq_doorbell_control |=
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
mqd->cp_hqd_pq_doorbell_control &=
~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
} else {
mqd->cp_hqd_pq_doorbell_control = 0;
}
/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
/* set the vmid for the queue */
mqd->cp_hqd_vmid = 0;
/* defaults */
mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
/* activate the queue */
mqd->cp_hqd_active = 1;
}
int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
{
uint32_t tmp;
uint32_t mqd_reg;
uint32_t *mqd_data;
/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */
mqd_data = &mqd->cp_mqd_base_addr_lo;
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
/* program all HQD registers */
for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
/* activate the HQD */
for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
return 0;
}
static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
{
int r;
u64 mqd_gpu_addr;
struct cik_mqd *mqd;
struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
if (ring->mqd_obj == NULL) {
r = amdgpu_bo_create(adev,
sizeof(struct cik_mqd),
PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&ring->mqd_obj);
if (r) {
dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
return r;
}
}
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
goto out;
r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
&mqd_gpu_addr);
if (r) {
dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
goto out_unreserve;
}
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
if (r) {
dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
goto out_unreserve;
}
mutex_lock(&adev->srbm_mutex);
cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
gfx_v7_0_mqd_deactivate(adev);
gfx_v7_0_mqd_commit(adev, mqd);
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
amdgpu_bo_kunmap(ring->mqd_obj);
out_unreserve:
amdgpu_bo_unreserve(ring->mqd_obj);
out:
return 0;
}
/**
* gfx_v7_0_cp_compute_resume - setup the compute queue registers
@ -2958,13 +3166,6 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
{
int r, i, j;
u32 tmp;
bool use_doorbell = true;
u64 hqd_gpu_addr;
u64 mqd_gpu_addr;
u64 eop_gpu_addr;
u64 wb_gpu_addr;
u32 *buf;
struct bonaire_mqd *mqd;
struct amdgpu_ring *ring;
/* fix up chicken bits */
@ -2972,220 +3173,25 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
tmp |= (1 << 23);
WREG32(mmCP_CPF_DEBUG, tmp);
/* init the pipes */
mutex_lock(&adev->srbm_mutex);
for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
int me = (i < 4) ? 1 : 2;
int pipe = (i < 4) ? i : (i - 4);
/* init all pipes (even the ones we don't own) */
for (i = 0; i < adev->gfx.mec.num_mec; i++)
for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
gfx_v7_0_compute_pipe_init(adev, i, j);
eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
cik_srbm_select(adev, me, pipe, 0, 0);
/* write the EOP addr */
WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
/* set the VMID assigned */
WREG32(mmCP_HPD_EOP_VMID, 0);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
tmp = RREG32(mmCP_HPD_EOP_CONTROL);
tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
tmp |= order_base_2(MEC_HPD_SIZE / 8);
WREG32(mmCP_HPD_EOP_CONTROL, tmp);
}
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
/* init the queues. Just two for now. */
/* init the queues */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if (ring->mqd_obj == NULL) {
r = amdgpu_bo_create(adev,
sizeof(struct bonaire_mqd),
PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&ring->mqd_obj);
if (r) {
dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
return r;
}
}
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0)) {
gfx_v7_0_cp_compute_fini(adev);
return r;
}
r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
&mqd_gpu_addr);
r = gfx_v7_0_compute_queue_init(adev, i);
if (r) {
dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
gfx_v7_0_cp_compute_fini(adev);
return r;
}
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
if (r) {
dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
gfx_v7_0_cp_compute_fini(adev);
return r;
}
/* init the mqd struct */
memset(buf, 0, sizeof(struct bonaire_mqd));
mqd = (struct bonaire_mqd *)buf;
mqd->header = 0xC0310800;
mqd->static_thread_mgmt01[0] = 0xffffffff;
mqd->static_thread_mgmt01[1] = 0xffffffff;
mqd->static_thread_mgmt23[0] = 0xffffffff;
mqd->static_thread_mgmt23[1] = 0xffffffff;
mutex_lock(&adev->srbm_mutex);
cik_srbm_select(adev, ring->me,
ring->pipe,
ring->queue, 0);
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK;
WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
/* enable doorbell? */
mqd->queue_state.cp_hqd_pq_doorbell_control =
RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
if (use_doorbell)
mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
else
mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
mqd->queue_state.cp_hqd_pq_doorbell_control);
/* disable the queue if it's active */
mqd->queue_state.cp_hqd_dequeue_request = 0;
mqd->queue_state.cp_hqd_pq_rptr = 0;
mqd->queue_state.cp_hqd_pq_wptr= 0;
if (RREG32(mmCP_HQD_ACTIVE) & 1) {
WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
for (j = 0; j < adev->usec_timeout; j++) {
if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
}
/* set the pointer to the MQD */
mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
/* set MQD vmid to 0 */
mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
hqd_gpu_addr = ring->gpu_addr >> 8;
mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
/* set up the HQD, this is similar to CP_RB0_CNTL */
mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
mqd->queue_state.cp_hqd_pq_control &=
~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
mqd->queue_state.cp_hqd_pq_control |=
order_base_2(ring->ring_size / 8);
mqd->queue_state.cp_hqd_pq_control |=
(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
#ifdef __BIG_ENDIAN
mqd->queue_state.cp_hqd_pq_control |=
2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
#endif
mqd->queue_state.cp_hqd_pq_control &=
~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
mqd->queue_state.cp_hqd_pq_control |=
CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
/* set the wb address wether it's enabled or not */
wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
mqd->queue_state.cp_hqd_pq_rptr_report_addr);
WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
/* enable the doorbell if requested */
if (use_doorbell) {
mqd->queue_state.cp_hqd_pq_doorbell_control =
RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
mqd->queue_state.cp_hqd_pq_doorbell_control &=
~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
mqd->queue_state.cp_hqd_pq_doorbell_control |=
(ring->doorbell_index <<
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
mqd->queue_state.cp_hqd_pq_doorbell_control |=
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
mqd->queue_state.cp_hqd_pq_doorbell_control &=
~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
} else {
mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
}
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
mqd->queue_state.cp_hqd_pq_doorbell_control);
/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
/* set the vmid for the queue */
mqd->queue_state.cp_hqd_vmid = 0;
WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
/* activate the queue */
mqd->queue_state.cp_hqd_active = 1;
WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
amdgpu_bo_kunmap(ring->mqd_obj);
amdgpu_bo_unreserve(ring->mqd_obj);
ring->ready = true;
}
gfx_v7_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
@ -3797,6 +3803,9 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
gfx_v7_0_update_rlc(adev, tmp);
data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
if (orig != data)
WREG32(mmRLC_CGCG_CGLS_CTRL, data);
} else {
gfx_v7_0_enable_gui_idle_interrupt(adev, false);
@ -3806,11 +3815,11 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
RREG32(mmCB_CGTT_SCLK_CTRL);
data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
if (orig != data)
WREG32(mmRLC_CGCG_CGLS_CTRL, data);
gfx_v7_0_enable_gui_idle_interrupt(adev, true);
}
if (orig != data)
WREG32(mmRLC_CGCG_CGLS_CTRL, data);
}
static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
@ -4089,7 +4098,7 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
return (~data) & mask;
}
@ -4470,7 +4479,7 @@ static int gfx_v7_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
gfx_v7_0_set_ring_funcs(adev);
@ -4662,11 +4671,57 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.gb_addr_config = gb_addr_config;
}
static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
int mec, int pipe, int queue)
{
int r;
unsigned irq_type;
struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
/* mec0 is me1 */
ring->me = mec + 1;
ring->pipe = pipe;
ring->queue = queue;
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ ring->pipe;
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024,
&adev->gfx.eop_irq, irq_type);
if (r)
return r;
return 0;
}
static int gfx_v7_0_sw_init(void *handle)
{
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i, r;
int i, j, k, r, ring_id;
switch (adev->asic_type) {
case CHIP_KAVERI:
adev->gfx.mec.num_mec = 2;
break;
case CHIP_BONAIRE:
case CHIP_HAWAII:
case CHIP_KABINI:
case CHIP_MULLINS:
default:
adev->gfx.mec.num_mec = 1;
break;
}
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
/* EOP Event */
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@ -4716,29 +4771,23 @@ static int gfx_v7_0_sw_init(void *handle)
return r;
}
/* set up the compute queues */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
unsigned irq_type;
/* set up the compute queues - allocate horizontally across pipes */
ring_id = 0;
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
continue;
/* max 32 queues per MEC */
if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
DRM_ERROR("Too many (%d) compute rings!\n", i);
break;
r = gfx_v7_0_compute_ring_init(adev,
ring_id,
i, k, j);
if (r)
return r;
ring_id++;
}
}
ring = &adev->gfx.compute_ring[i];
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
ring->me = 1; /* first MEC */
ring->pipe = i / 8;
ring->queue = i % 8;
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024,
&adev->gfx.eop_irq, irq_type);
if (r)
return r;
}
/* reserve GDS, GWS and OA resource for gfx */
@ -4969,8 +5018,8 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
u32 mec_int_cntl, mec_int_cntl_reg;
/*
* amdgpu controls only pipe 0 of MEC1. That's why this function only
* handles the setting of interrupts for this specific pipe. All other
* amdgpu controls only the first MEC. That's why this function only
* handles the setting of interrupts for this specific MEC. All other
* pipes' interrupts are set by amdkfd.
*/
@ -4979,6 +5028,15 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
case 0:
mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
break;
case 1:
mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
break;
case 2:
mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
break;
case 3:
mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
break;
default:
DRM_DEBUG("invalid pipe %d\n", pipe);
return;
@ -5336,6 +5394,12 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
unsigned disable_masks[4 * 2];
u32 ao_cu_num;
if (adev->flags & AMD_IS_APU)
ao_cu_num = 2;
else
ao_cu_num = adev->gfx.config.max_cu_per_sh;
memset(cu_info, 0, sizeof(*cu_info));
@ -5354,9 +5418,9 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap;
for (k = 0; k < 16; k ++) {
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {
if (counter < 2)
if (counter < ao_cu_num)
ao_bitmap |= mask;
counter ++;
}

View File

@ -29,4 +29,9 @@ extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block;
extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block;
extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block;
struct amdgpu_device;
struct cik_mqd;
int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -27,4 +27,9 @@
extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block;
extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block;
struct amdgpu_device;
struct vi_mqd;
int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -31,128 +31,14 @@
#include "soc15_common.h"
int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
{
u32 tmp;
u64 value;
u32 i;
return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24;
}
/* Program MC. */
/* Update configuration */
WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR),
adev->mc.vram_start >> 18);
WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR),
adev->mc.vram_end >> 18);
value = adev->vram_scratch.gpu_addr - adev->mc.vram_start
+ adev->vm_manager.vram_base_offset;
WREG32(SOC15_REG_OFFSET(GC, 0,
mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB),
(u32)(value >> 12));
WREG32(SOC15_REG_OFFSET(GC, 0,
mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB),
(u32)(value >> 44));
if (amdgpu_sriov_vf(adev)) {
/* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so
vbios post doesn't program them, for SRIOV driver need to program them */
WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_BASE),
adev->mc.vram_start >> 24);
WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_TOP),
adev->mc.vram_end >> 24);
}
/* Disable AGP. */
WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BASE), 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_TOP), 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BOT), 0xFFFFFFFF);
/* GART Enable. */
/* Setup TLB control */
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL));
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
SYSTEM_ACCESS_MODE,
3);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL,
1);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS,
0);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
ECO_BITS,
0);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
MTYPE,
MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
ATC_EN,
1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
/* Setup L2 cache */
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL));
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL,
ENABLE_L2_FRAGMENT_PROCESSING,
0);
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL,
L2_PDE0_CACHE_TAG_GENERATION_MODE,
0);/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL,
CONTEXT1_IDENTITY_ACCESS_MODE,
1);
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL,
IDENTITY_MODE_FRAGMENT_SIZE,
0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp);
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2));
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2), tmp);
tmp = mmVM_L2_CNTL3_DEFAULT;
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), tmp);
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4));
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL4,
VMC_TAP_PDE_REQUEST_PHYSICAL,
0);
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL4,
VMC_TAP_PTE_REQUEST_PHYSICAL,
0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4), tmp);
/* setup context0 */
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32),
(u32)(adev->mc.gtt_start >> 12));
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32),
(u32)(adev->mc.gtt_start >> 44));
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32),
(u32)(adev->mc.gtt_end >> 12));
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32),
(u32)(adev->mc.gtt_end >> 44));
static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
{
uint64_t value;
BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
value = adev->gart.table_addr - adev->mc.vram_start
@ -160,49 +46,146 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
value &= 0x0000FFFFFFFFF000ULL;
value |= 0x1; /*valid bit*/
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32),
(u32)value);
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32),
(u32)(value >> 32));
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
lower_32_bits(value));
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32),
(u32)(adev->dummy_page.addr >> 12));
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32),
(u32)((u64)adev->dummy_page.addr >> 44));
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
upper_32_bits(value));
}
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2));
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY,
1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp);
static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
gfxhub_v1_0_init_gart_pt_regs(adev);
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL));
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->mc.gtt_start >> 12));
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
(u32)(adev->mc.gtt_start >> 44));
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
(u32)(adev->mc.gtt_end >> 12));
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
(u32)(adev->mc.gtt_end >> 44));
}
static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
/* Disable AGP. */
WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0);
WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0);
WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFFFF);
/* Program the system aperture low logical page number. */
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
adev->mc.vram_start >> 18);
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->mc.vram_end >> 18);
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->mc.vram_start
+ adev->vm_manager.vram_base_offset;
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
(u32)(value >> 44));
/* Program "protection fault". */
WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
(u32)(adev->dummy_page.addr >> 12));
WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
(u32)((u64)adev->dummy_page.addr >> 44));
WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
}
static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup TLB control */
tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup L2 cache */
tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
0);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL, tmp);
tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
tmp = mmVM_L2_CNTL3_DEFAULT;
WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
tmp = mmVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL4, tmp);
}
static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
{
uint32_t tmp;
tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL), tmp);
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp);
}
/* Disable identity aperture.*/
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF);
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F);
static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
{
WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0XFFFFFFFF);
WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
0x0000000F);
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0);
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0);
WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
0);
WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
0);
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0);
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0);
WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
}
static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i);
tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@ -223,15 +206,52 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
adev->vm_manager.block_size - 9);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i, tmp);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2,
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2,
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
}
static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
{
unsigned i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
2 * i, 0xffffffff);
WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
2 * i, 0x1f);
}
}
int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev)) {
/*
* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
* VF copy registers so vbios post doesn't program them, for
* SRIOV driver need to program them
*/
WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
adev->mc.vram_start >> 24);
WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
adev->mc.vram_end >> 24);
}
/* GART Enable. */
gfxhub_v1_0_init_gart_aperture_regs(adev);
gfxhub_v1_0_init_system_aperture_regs(adev);
gfxhub_v1_0_init_tlb_regs(adev);
gfxhub_v1_0_init_cache_regs(adev);
gfxhub_v1_0_enable_system_domain(adev);
gfxhub_v1_0_disable_identity_aperture(adev);
gfxhub_v1_0_setup_vmid_config(adev);
gfxhub_v1_0_program_invalidation(adev);
return 0;
}
@ -243,22 +263,20 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL) + i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i, 0);
/* Setup TLB control */
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL));
tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL,
0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL));
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), 0);
WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, 0);
}
/**
@ -271,7 +289,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
u32 tmp;
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL));
tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
@ -296,22 +314,11 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp);
WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
static int gfxhub_v1_0_early_init(void *handle)
void gfxhub_v1_0_init(struct amdgpu_device *adev)
{
return 0;
}
static int gfxhub_v1_0_late_init(void *handle)
{
return 0;
}
static int gfxhub_v1_0_sw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB];
hub->ctx0_ptb_addr_lo32 =
@ -330,96 +337,4 @@ static int gfxhub_v1_0_sw_init(void *handle)
SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS);
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
return 0;
}
static int gfxhub_v1_0_sw_fini(void *handle)
{
return 0;
}
static int gfxhub_v1_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
unsigned i;
for (i = 0 ; i < 18; ++i) {
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) +
2 * i, 0xffffffff);
WREG32(SOC15_REG_OFFSET(GC, 0,
mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) +
2 * i, 0x1f);
}
return 0;
}
static int gfxhub_v1_0_hw_fini(void *handle)
{
return 0;
}
static int gfxhub_v1_0_suspend(void *handle)
{
return 0;
}
static int gfxhub_v1_0_resume(void *handle)
{
return 0;
}
static bool gfxhub_v1_0_is_idle(void *handle)
{
return true;
}
static int gfxhub_v1_0_wait_for_idle(void *handle)
{
return 0;
}
static int gfxhub_v1_0_soft_reset(void *handle)
{
return 0;
}
static int gfxhub_v1_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
return 0;
}
static int gfxhub_v1_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
return 0;
}
const struct amd_ip_funcs gfxhub_v1_0_ip_funcs = {
.name = "gfxhub_v1_0",
.early_init = gfxhub_v1_0_early_init,
.late_init = gfxhub_v1_0_late_init,
.sw_init = gfxhub_v1_0_sw_init,
.sw_fini = gfxhub_v1_0_sw_fini,
.hw_init = gfxhub_v1_0_hw_init,
.hw_fini = gfxhub_v1_0_hw_fini,
.suspend = gfxhub_v1_0_suspend,
.resume = gfxhub_v1_0_resume,
.is_idle = gfxhub_v1_0_is_idle,
.wait_for_idle = gfxhub_v1_0_wait_for_idle,
.soft_reset = gfxhub_v1_0_soft_reset,
.set_clockgating_state = gfxhub_v1_0_set_clockgating_state,
.set_powergating_state = gfxhub_v1_0_set_powergating_state,
};
const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_GFXHUB,
.major = 1,
.minor = 0,
.rev = 0,
.funcs = &gfxhub_v1_0_ip_funcs,
};

View File

@ -28,7 +28,8 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev);
void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev);
void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value);
void gfxhub_v1_0_init(struct amdgpu_device *adev);
u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev);
extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs;
extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block;

View File

@ -21,7 +21,7 @@
*
*/
#include <linux/firmware.h>
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "gmc_v6_0.h"
#include "amdgpu_ucode.h"
@ -395,6 +395,12 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
return pte_flag;
}
static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
{
BUG_ON(addr & 0xFFFFFF0000000FFFULL);
return addr;
}
static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
@ -614,33 +620,6 @@ static void gmc_v6_0_gart_fini(struct amdgpu_device *adev)
amdgpu_gart_fini(adev);
}
static int gmc_v6_0_vm_init(struct amdgpu_device *adev)
{
/*
* number of VMs
* VMID 0 is reserved for System
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.num_level = 1;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
if (adev->flags & AMD_IS_APU) {
u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
tmp <<= 22;
adev->vm_manager.vram_base_offset = tmp;
} else
adev->vm_manager.vram_base_offset = 0;
return 0;
}
static void gmc_v6_0_vm_fini(struct amdgpu_device *adev)
{
}
static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
u32 status, u32 addr, u32 mc_client)
{
@ -855,6 +834,8 @@ static int gmc_v6_0_sw_init(void *handle)
adev->mc.mc_mask = 0xffffffffffULL;
adev->mc.stolen_size = 256 * 1024;
adev->need_dma32 = false;
dma_bits = adev->need_dma32 ? 32 : 40;
r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
@ -887,26 +868,34 @@ static int gmc_v6_0_sw_init(void *handle)
if (r)
return r;
if (!adev->vm_manager.enabled) {
r = gmc_v6_0_vm_init(adev);
if (r) {
dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
return r;
}
adev->vm_manager.enabled = true;
/*
* number of VMs
* VMID 0 is reserved for System
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.num_level = 1;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
if (adev->flags & AMD_IS_APU) {
u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
tmp <<= 22;
adev->vm_manager.vram_base_offset = tmp;
} else {
adev->vm_manager.vram_base_offset = 0;
}
return r;
return 0;
}
static int gmc_v6_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
gmc_v6_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
amdgpu_vm_manager_fini(adev);
gmc_v6_0_gart_fini(adev);
amdgpu_gem_force_release(adev);
amdgpu_bo_fini(adev);
@ -984,16 +973,10 @@ static bool gmc_v6_0_is_idle(void *handle)
static int gmc_v6_0_wait_for_idle(void *handle)
{
unsigned i;
u32 tmp;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
SRBM_STATUS__MCC_BUSY_MASK |
SRBM_STATUS__MCD_BUSY_MASK |
SRBM_STATUS__VMC_BUSY_MASK);
if (!tmp)
if (gmc_v6_0_is_idle(handle))
return 0;
udelay(1);
}
@ -1146,6 +1129,7 @@ static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = {
.flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
.set_pte_pde = gmc_v6_0_gart_set_pte_pde,
.set_prt = gmc_v6_0_set_prt,
.get_vm_pde = gmc_v6_0_get_vm_pde,
.get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
};

View File

@ -21,7 +21,7 @@
*
*/
#include <linux/firmware.h>
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "cikd.h"
#include "cik.h"
@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
return pte_flag;
}
static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
{
BUG_ON(addr & 0xFFFFFF0000000FFFULL);
return addr;
}
/**
* gmc_v8_0_set_fault_enable_default - update VM fault handling
*
@ -724,55 +730,6 @@ static void gmc_v7_0_gart_fini(struct amdgpu_device *adev)
amdgpu_gart_fini(adev);
}
/*
* vm
* VMID 0 is the physical GPU addresses as used by the kernel.
* VMIDs 1-15 are used for userspace clients and are handled
* by the amdgpu vm/hsa code.
*/
/**
* gmc_v7_0_vm_init - cik vm init callback
*
* @adev: amdgpu_device pointer
*
* Inits cik specific vm parameters (number of VMs, base of vram for
* VMIDs 1-15) (CIK).
* Returns 0 for success.
*/
static int gmc_v7_0_vm_init(struct amdgpu_device *adev)
{
/*
* number of VMs
* VMID 0 is reserved for System
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.num_level = 1;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
if (adev->flags & AMD_IS_APU) {
u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
tmp <<= 22;
adev->vm_manager.vram_base_offset = tmp;
} else
adev->vm_manager.vram_base_offset = 0;
return 0;
}
/**
* gmc_v7_0_vm_fini - cik vm fini callback
*
* @adev: amdgpu_device pointer
*
* Tear down any asic specific VM setup (CIK).
*/
static void gmc_v7_0_vm_fini(struct amdgpu_device *adev)
{
}
/**
* gmc_v7_0_vm_decode_fault - print human readable fault info
*
@ -1013,6 +970,8 @@ static int gmc_v7_0_sw_init(void *handle)
*/
adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
adev->mc.stolen_size = 256 * 1024;
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
* IGP - can handle 40-bits
@ -1051,27 +1010,34 @@ static int gmc_v7_0_sw_init(void *handle)
if (r)
return r;
if (!adev->vm_manager.enabled) {
r = gmc_v7_0_vm_init(adev);
if (r) {
dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
return r;
}
adev->vm_manager.enabled = true;
/*
* number of VMs
* VMID 0 is reserved for System
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.num_level = 1;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
if (adev->flags & AMD_IS_APU) {
u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
tmp <<= 22;
adev->vm_manager.vram_base_offset = tmp;
} else {
adev->vm_manager.vram_base_offset = 0;
}
return r;
return 0;
}
static int gmc_v7_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
amdgpu_vm_manager_fini(adev);
gmc_v7_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
amdgpu_vm_manager_fini(adev);
gmc_v7_0_gart_fini(adev);
amdgpu_gem_force_release(adev);
amdgpu_bo_fini(adev);
@ -1335,7 +1301,8 @@ static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = {
.flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
.set_pte_pde = gmc_v7_0_gart_set_pte_pde,
.set_prt = gmc_v7_0_set_prt,
.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags
.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
.get_vm_pde = gmc_v7_0_get_vm_pde
};
static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {

View File

@ -21,7 +21,7 @@
*
*/
#include <linux/firmware.h>
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "gmc_v8_0.h"
#include "amdgpu_ucode.h"
@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
return pte_flag;
}
static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
{
BUG_ON(addr & 0xFFFFFF0000000FFFULL);
return addr;
}
/**
* gmc_v8_0_set_fault_enable_default - update VM fault handling
*
@ -927,55 +933,6 @@ static void gmc_v8_0_gart_fini(struct amdgpu_device *adev)
amdgpu_gart_fini(adev);
}
/*
* vm
* VMID 0 is the physical GPU addresses as used by the kernel.
* VMIDs 1-15 are used for userspace clients and are handled
* by the amdgpu vm/hsa code.
*/
/**
* gmc_v8_0_vm_init - cik vm init callback
*
* @adev: amdgpu_device pointer
*
* Inits cik specific vm parameters (number of VMs, base of vram for
* VMIDs 1-15) (CIK).
* Returns 0 for success.
*/
static int gmc_v8_0_vm_init(struct amdgpu_device *adev)
{
/*
* number of VMs
* VMID 0 is reserved for System
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.num_level = 1;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
if (adev->flags & AMD_IS_APU) {
u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
tmp <<= 22;
adev->vm_manager.vram_base_offset = tmp;
} else
adev->vm_manager.vram_base_offset = 0;
return 0;
}
/**
* gmc_v8_0_vm_fini - cik vm fini callback
*
* @adev: amdgpu_device pointer
*
* Tear down any asic specific VM setup (CIK).
*/
static void gmc_v8_0_vm_fini(struct amdgpu_device *adev)
{
}
/**
* gmc_v8_0_vm_decode_fault - print human readable fault info
*
@ -1097,6 +1054,8 @@ static int gmc_v8_0_sw_init(void *handle)
*/
adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
adev->mc.stolen_size = 256 * 1024;
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
* IGP - can handle 40-bits
@ -1135,27 +1094,34 @@ static int gmc_v8_0_sw_init(void *handle)
if (r)
return r;
if (!adev->vm_manager.enabled) {
r = gmc_v8_0_vm_init(adev);
if (r) {
dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
return r;
}
adev->vm_manager.enabled = true;
/*
* number of VMs
* VMID 0 is reserved for System
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.num_level = 1;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
if (adev->flags & AMD_IS_APU) {
u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
tmp <<= 22;
adev->vm_manager.vram_base_offset = tmp;
} else {
adev->vm_manager.vram_base_offset = 0;
}
return r;
return 0;
}
static int gmc_v8_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
amdgpu_vm_manager_fini(adev);
gmc_v8_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
amdgpu_vm_manager_fini(adev);
gmc_v8_0_gart_fini(adev);
amdgpu_gem_force_release(adev);
amdgpu_bo_fini(adev);
@ -1654,7 +1620,8 @@ static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = {
.flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
.set_pte_pde = gmc_v8_0_gart_set_pte_pde,
.set_prt = gmc_v8_0_set_prt,
.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags
.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
.get_vm_pde = gmc_v8_0_get_vm_pde
};
static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {

View File

@ -33,6 +33,7 @@
#include "soc15_common.h"
#include "nbio_v6_1.h"
#include "nbio_v7_0.h"
#include "gfxhub_v1_0.h"
#include "mmhub_v1_0.h"
@ -215,7 +216,10 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
unsigned i, j;
/* flush hdp cache */
nbio_v6_1_hdp_flush(adev);
if (adev->flags & AMD_IS_APU)
nbio_v7_0_hdp_flush(adev);
else
nbio_v6_1_hdp_flush(adev);
spin_lock(&adev->mc.invalidate_lock);
@ -354,17 +358,19 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
return pte_flag;
}
static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
{
return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start;
addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
BUG_ON(addr & 0xFFFF00000000003FULL);
return addr;
}
static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
.flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
.set_pte_pde = gmc_v9_0_gart_set_pte_pde,
.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
.adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
.get_invalidate_req = gmc_v9_0_get_invalidate_req,
.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
.get_vm_pde = gmc_v9_0_get_vm_pde
};
static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
@ -415,6 +421,11 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
amdgpu_vram_location(adev, &adev->mc, base);
adev->mc.gtt_base_align = 0;
amdgpu_gtt_location(adev, mc);
/* base offset of vram pages */
if (adev->flags & AMD_IS_APU)
adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
else
adev->vm_manager.vram_base_offset = 0;
}
/**
@ -434,7 +445,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
/* hbm memory channel size */
chansize = 128;
tmp = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_CS_AON0_DramBaseAddress0));
tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
switch (tmp) {
@ -474,7 +485,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
/* size in MB on si */
adev->mc.mc_vram_size =
nbio_v6_1_get_memsize(adev) * 1024ULL * 1024ULL;
((adev->flags & AMD_IS_APU) ? nbio_v7_0_get_memsize(adev) :
nbio_v6_1_get_memsize(adev)) * 1024ULL * 1024ULL;
adev->mc.real_vram_size = adev->mc.mc_vram_size;
adev->mc.visible_vram_size = adev->mc.aper_size;
@ -514,64 +526,15 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
return amdgpu_gart_table_vram_alloc(adev);
}
/*
* vm
* VMID 0 is the physical GPU addresses as used by the kernel.
* VMIDs 1-15 are used for userspace clients and are handled
* by the amdgpu vm/hsa code.
*/
/**
* gmc_v9_0_vm_init - vm init callback
*
* @adev: amdgpu_device pointer
*
* Inits vega10 specific vm parameters (number of VMs, base of vram for
* VMIDs 1-15) (vega10).
* Returns 0 for success.
*/
static int gmc_v9_0_vm_init(struct amdgpu_device *adev)
{
/*
* number of VMs
* VMID 0 is reserved for System
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
/* TODO: fix num_level for APU when updating vm size and block size */
if (adev->flags & AMD_IS_APU)
adev->vm_manager.num_level = 1;
else
adev->vm_manager.num_level = 3;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
/*XXX This value is not zero for APU*/
adev->vm_manager.vram_base_offset = 0;
return 0;
}
/**
* gmc_v9_0_vm_fini - vm fini callback
*
* @adev: amdgpu_device pointer
*
* Tear down any asic specific VM setup.
*/
static void gmc_v9_0_vm_fini(struct amdgpu_device *adev)
{
return;
}
static int gmc_v9_0_sw_init(void *handle)
{
int r;
int dma_bits;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gfxhub_v1_0_init(adev);
mmhub_v1_0_init(adev);
spin_lock_init(&adev->mc.invalidate_lock);
if (adev->flags & AMD_IS_APU) {
@ -609,6 +572,12 @@ static int gmc_v9_0_sw_init(void *handle)
*/
adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
/*
* It needs to reserve 8M stolen memory for vega10
* TODO: Figure out how to avoid that...
*/
adev->mc.stolen_size = 8 * 1024 * 1024;
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 44-bits.
* IGP - can handle 44-bits
@ -641,15 +610,23 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
if (!adev->vm_manager.enabled) {
r = gmc_v9_0_vm_init(adev);
if (r) {
dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
return r;
}
adev->vm_manager.enabled = true;
}
return r;
/*
* number of VMs
* VMID 0 is reserved for System
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
/* TODO: fix num_level for APU when updating vm size and block size */
if (adev->flags & AMD_IS_APU)
adev->vm_manager.num_level = 1;
else
adev->vm_manager.num_level = 3;
amdgpu_vm_manager_init(adev);
return 0;
}
/**
@ -669,11 +646,7 @@ static int gmc_v9_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
amdgpu_vm_manager_fini(adev);
gmc_v9_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
amdgpu_vm_manager_fini(adev);
gmc_v9_0_gart_fini(adev);
amdgpu_gem_force_release(adev);
amdgpu_bo_fini(adev);
@ -686,6 +659,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
break;
case CHIP_RAVEN:
break;
default:
break;
}
@ -715,7 +690,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
return r;
/* After HDP is initialized, flush HDP.*/
nbio_v6_1_hdp_flush(adev);
if (adev->flags & AMD_IS_APU)
nbio_v7_0_hdp_flush(adev);
else
nbio_v6_1_hdp_flush(adev);
r = gfxhub_v1_0_gart_enable(adev);
if (r)
@ -725,12 +703,12 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
if (r)
return r;
tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL));
tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL);
tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK;
WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL), tmp);
WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp);
tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL));
WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL), tmp);
tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
@ -781,6 +759,12 @@ static int gmc_v9_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (amdgpu_sriov_vf(adev)) {
/* full access mode, so don't touch any GMC register */
DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
return 0;
}
amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
gmc_v9_0_gart_disable(adev);
@ -831,7 +815,16 @@ static int gmc_v9_0_soft_reset(void *handle)
static int gmc_v9_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
return 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
return mmhub_v1_0_set_clockgating(adev, state);
}
static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
mmhub_v1_0_get_clockgating(adev, flags);
}
static int gmc_v9_0_set_powergating_state(void *handle,
@ -855,6 +848,7 @@ const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
.soft_reset = gmc_v9_0_soft_reset,
.set_clockgating_state = gmc_v9_0_set_clockgating_state,
.set_powergating_state = gmc_v9_0_set_powergating_state,
.get_clockgating_state = gmc_v9_0_get_clockgating_state,
};
const struct amdgpu_ip_block_version gmc_v9_0_ip_block =

View File

@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "vid.h"

View File

@ -21,7 +21,7 @@
*
*/
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "cikd.h"

View File

@ -22,7 +22,7 @@
* Authors: Alex Deucher
*/
#include "drmP.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include "cikd.h"
#include "kv_dpm.h"

View File

@ -34,9 +34,12 @@
#include "soc15_common.h"
#define mmDAGB0_CNTL_MISC2_RV 0x008f
#define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0
u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
{
u64 base = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE));
u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE);
base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
base <<= 24;
@ -44,129 +47,9 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
return base;
}
int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
{
u32 tmp;
u64 value;
uint64_t addr;
u32 i;
/* Program MC. */
/* Update configuration */
DRM_INFO("%s -- in\n", __func__);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR),
adev->mc.vram_start >> 18);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR),
adev->mc.vram_end >> 18);
value = adev->vram_scratch.gpu_addr - adev->mc.vram_start +
adev->vm_manager.vram_base_offset;
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB),
(u32)(value >> 12));
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB),
(u32)(value >> 44));
if (amdgpu_sriov_vf(adev)) {
/* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so
vbios post doesn't program them, for SRIOV driver need to program them */
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE),
adev->mc.vram_start >> 24);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP),
adev->mc.vram_end >> 24);
}
/* Disable AGP. */
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BASE), 0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_TOP), 0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BOT), 0x00FFFFFF);
/* GART Enable. */
/* Setup TLB control */
tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL));
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
SYSTEM_ACCESS_MODE,
3);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL,
1);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS,
0);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
ECO_BITS,
0);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
MTYPE,
MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
ATC_EN,
1);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
/* Setup L2 cache */
tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL));
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL,
ENABLE_L2_FRAGMENT_PROCESSING,
0);
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL,
L2_PDE0_CACHE_TAG_GENERATION_MODE,
0);/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL,
CONTEXT1_IDENTITY_ACCESS_MODE,
1);
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL,
IDENTITY_MODE_FRAGMENT_SIZE,
0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp);
tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2));
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2), tmp);
tmp = mmVM_L2_CNTL3_DEFAULT;
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), tmp);
tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4));
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL4,
VMC_TAP_PDE_REQUEST_PHYSICAL,
0);
tmp = REG_SET_FIELD(tmp,
VM_L2_CNTL4,
VMC_TAP_PTE_REQUEST_PHYSICAL,
0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4), tmp);
/* setup context0 */
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32),
(u32)(adev->mc.gtt_start >> 12));
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32),
(u32)(adev->mc.gtt_start >> 44));
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32),
(u32)(adev->mc.gtt_end >> 12));
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32),
(u32)(adev->mc.gtt_end >> 44));
uint64_t value;
BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
value = adev->gart.table_addr - adev->mc.vram_start +
@ -174,54 +57,150 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
value &= 0x0000FFFFFFFFF000ULL;
value |= 0x1; /* valid bit */
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32),
(u32)value);
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32),
(u32)(value >> 32));
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
lower_32_bits(value));
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32),
(u32)(adev->dummy_page.addr >> 12));
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32),
(u32)((u64)adev->dummy_page.addr >> 44));
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
upper_32_bits(value));
}
tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2));
static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
mmhub_v1_0_init_gart_pt_regs(adev);
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->mc.gtt_start >> 12));
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
(u32)(adev->mc.gtt_start >> 44));
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
(u32)(adev->mc.gtt_end >> 12));
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
(u32)(adev->mc.gtt_end >> 44));
}
static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
uint32_t tmp;
/* Disable AGP. */
WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BASE, 0);
WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, 0);
WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, 0x00FFFFFF);
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
adev->mc.vram_start >> 18);
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->mc.vram_end >> 18);
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->mc.vram_start +
adev->vm_manager.vram_base_offset;
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
(u32)(value >> 44));
/* Program "protection fault". */
WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
(u32)(adev->dummy_page.addr >> 12));
WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
(u32)((u64)adev->dummy_page.addr >> 44));
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY,
1);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp);
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2, tmp);
}
addr = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL);
tmp = RREG32(addr);
static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup TLB control */
tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
0);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
tmp = mmVM_L2_CNTL3_DEFAULT;
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
tmp = mmVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL4, tmp);
}
static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
{
uint32_t tmp;
tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL), tmp);
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp);
}
tmp = RREG32(addr);
static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
{
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0XFFFFFFFF);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
0x0000000F);
/* Disable identity aperture.*/
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF);
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F);
WREG32_SOC15(MMHUB, 0,
mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
WREG32_SOC15(MMHUB, 0,
mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
0);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
0);
}
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0);
static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL)
+ i);
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
@ -243,14 +222,52 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
adev->vm_manager.block_size - 9);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) + i, tmp);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2,
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2,
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
}
static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
{
unsigned i;
for (i = 0; i < 18; ++i) {
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
2 * i, 0xffffffff);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
2 * i, 0x1f);
}
}
int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev)) {
/*
* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
* VF copy registers so vbios post doesn't program them, for
* SRIOV driver need to program them
*/
WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE,
adev->mc.vram_start >> 24);
WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP,
adev->mc.vram_end >> 24);
}
/* GART Enable. */
mmhub_v1_0_init_gart_aperture_regs(adev);
mmhub_v1_0_init_system_aperture_regs(adev);
mmhub_v1_0_init_tlb_regs(adev);
mmhub_v1_0_init_cache_regs(adev);
mmhub_v1_0_enable_system_domain(adev);
mmhub_v1_0_disable_identity_aperture(adev);
mmhub_v1_0_setup_vmid_config(adev);
mmhub_v1_0_program_invalidation(adev);
return 0;
}
@ -262,22 +279,22 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL) + i, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i, 0);
/* Setup TLB control */
tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL));
tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
tmp = REG_SET_FIELD(tmp,
MC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL,
0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL));
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), 0);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
}
/**
@ -289,7 +306,7 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
{
u32 tmp;
tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL));
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
@ -314,22 +331,11 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp);
WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
static int mmhub_v1_0_early_init(void *handle)
void mmhub_v1_0_init(struct amdgpu_device *adev)
{
return 0;
}
static int mmhub_v1_0_late_init(void *handle)
{
return 0;
}
static int mmhub_v1_0_sw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB];
hub->ctx0_ptb_addr_lo32 =
@ -349,69 +355,20 @@ static int mmhub_v1_0_sw_init(void *handle)
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
return 0;
}
static int mmhub_v1_0_sw_fini(void *handle)
{
return 0;
}
static int mmhub_v1_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
unsigned i;
for (i = 0; i < 18; ++i) {
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) +
2 * i, 0xffffffff);
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) +
2 * i, 0x1f);
}
return 0;
}
static int mmhub_v1_0_hw_fini(void *handle)
{
return 0;
}
static int mmhub_v1_0_suspend(void *handle)
{
return 0;
}
static int mmhub_v1_0_resume(void *handle)
{
return 0;
}
static bool mmhub_v1_0_is_idle(void *handle)
{
return true;
}
static int mmhub_v1_0_wait_for_idle(void *handle)
{
return 0;
}
static int mmhub_v1_0_soft_reset(void *handle)
{
return 0;
}
static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data, def1, data1, def2, data2;
uint32_t def, data, def1, data1, def2 = 0, data2 = 0;
def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG));
def1 = data1 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2));
def2 = data2 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2));
def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
if (adev->asic_type != CHIP_RAVEN) {
def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
def2 = data2 = RREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2);
} else
def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
data |= ATC_L2_MISC_CG__ENABLE_MASK;
@ -423,12 +380,13 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
if (adev->asic_type != CHIP_RAVEN)
data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
} else {
data &= ~ATC_L2_MISC_CG__ENABLE_MASK;
@ -439,22 +397,27 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
if (adev->asic_type != CHIP_RAVEN)
data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
}
if (def != data)
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data);
WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data);
if (def1 != data1)
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2), data1);
if (def1 != data1) {
if (adev->asic_type != CHIP_RAVEN)
WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1);
else
WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV, data1);
}
if (def2 != data2)
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2), data2);
if (adev->asic_type != CHIP_RAVEN && def2 != data2)
WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2);
}
static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@ -462,7 +425,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
{
uint32_t def, data;
def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL));
def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@ -470,7 +433,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
if (def != data)
WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data);
WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
}
static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
@ -478,7 +441,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
{
uint32_t def, data;
def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG));
def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
@ -486,7 +449,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
if (def != data)
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data);
WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data);
}
static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
@ -494,7 +457,7 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
{
uint32_t def, data;
def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL));
def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
(adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
@ -503,19 +466,18 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
if(def != data)
WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data);
WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
}
static int mmhub_v1_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (amdgpu_sriov_vf(adev))
return 0;
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_RAVEN:
mmhub_v1_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
athub_update_medium_grain_clock_gating(adev,
@ -532,54 +494,20 @@ static int mmhub_v1_0_set_clockgating_state(void *handle,
return 0;
}
static void mmhub_v1_0_get_clockgating_state(void *handle, u32 *flags)
void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
if (amdgpu_sriov_vf(adev))
*flags = 0;
/* AMD_CG_SUPPORT_MC_MGCG */
data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL));
data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_MC_MGCG;
/* AMD_CG_SUPPORT_MC_LS */
data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG));
data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_MC_LS;
}
static int mmhub_v1_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
return 0;
}
const struct amd_ip_funcs mmhub_v1_0_ip_funcs = {
.name = "mmhub_v1_0",
.early_init = mmhub_v1_0_early_init,
.late_init = mmhub_v1_0_late_init,
.sw_init = mmhub_v1_0_sw_init,
.sw_fini = mmhub_v1_0_sw_fini,
.hw_init = mmhub_v1_0_hw_init,
.hw_fini = mmhub_v1_0_hw_fini,
.suspend = mmhub_v1_0_suspend,
.resume = mmhub_v1_0_resume,
.is_idle = mmhub_v1_0_is_idle,
.wait_for_idle = mmhub_v1_0_wait_for_idle,
.soft_reset = mmhub_v1_0_soft_reset,
.set_clockgating_state = mmhub_v1_0_set_clockgating_state,
.set_powergating_state = mmhub_v1_0_set_powergating_state,
.get_clockgating_state = mmhub_v1_0_get_clockgating_state,
};
const struct amdgpu_ip_block_version mmhub_v1_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_MMHUB,
.major = 1,
.minor = 0,
.rev = 0,
.funcs = &mmhub_v1_0_ip_funcs,
};

View File

@ -28,6 +28,10 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev);
void mmhub_v1_0_gart_disable(struct amdgpu_device *adev);
void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value);
void mmhub_v1_0_init(struct amdgpu_device *adev);
int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs;
extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block;

View File

@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
r = -ETIME;
break;
}
msleep(1);
timeout -= 1;
mdelay(5);
timeout -= 5;
reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
mmBIF_BX_PF0_MAILBOX_CONTROL));
@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
r = xgpu_ai_mailbox_rcv_msg(adev, event);
while (r) {
if (timeout <= 0) {
pr_err("Doesn't get ack from pf.\n");
pr_err("Doesn't get msg:%d from pf.\n", event);
r = -ETIME;
break;
}
msleep(1);
timeout -= 1;
mdelay(5);
timeout -= 5;
r = xgpu_ai_mailbox_rcv_msg(adev, event);
}
@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
/* start to poll ack */
r = xgpu_ai_poll_ack(adev);
if (r)
return r;
pr_err("Doesn't get ack from pf, continue\n");
xgpu_ai_mailbox_set_valid(adev, false);
@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
req == IDH_REQ_GPU_FINI_ACCESS ||
req == IDH_REQ_GPU_RESET_ACCESS) {
r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
if (r)
if (r) {
pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
return r;
}
}
return 0;
@ -241,7 +243,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
}
/* Trigger recovery due to world switch failure */
amdgpu_sriov_gpu_reset(adev, false);
amdgpu_sriov_gpu_reset(adev, NULL);
}
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@ -264,12 +266,15 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
{
int r;
/* see what event we get */
r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
/* trigger gpu-reset by hypervisor only if TDR disbaled */
if (amdgpu_lockup_timeout == 0) {
/* see what event we get */
r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
/* only handle FLR_NOTIFY now */
if (!r)
schedule_work(&adev->virt.flr_work);
/* only handle FLR_NOTIFY now */
if (!r)
schedule_work(&adev->virt.flr_work);
}
return 0;
}
@ -296,11 +301,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
{
int r;
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq);
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
if (r)
return r;
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq);
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
if (r) {
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
return r;

View File

@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev)
r = -ETIME;
break;
}
msleep(1);
timeout -= 1;
mdelay(5);
timeout -= 5;
reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
}
@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
r = -ETIME;
break;
}
msleep(1);
timeout -= 1;
mdelay(5);
timeout -= 5;
r = xgpu_vi_mailbox_rcv_msg(adev, event);
}
@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev,
request == IDH_REQ_GPU_RESET_ACCESS) {
r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
if (r)
return r;
pr_err("Doesn't get ack from pf, continue\n");
}
return 0;
@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
}
/* Trigger recovery due to world switch failure */
amdgpu_sriov_gpu_reset(adev, false);
amdgpu_sriov_gpu_reset(adev, NULL);
}
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@ -537,12 +537,15 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev,
{
int r;
/* see what event we get */
r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
/* trigger gpu-reset by hypervisor only if TDR disbaled */
if (amdgpu_lockup_timeout == 0) {
/* see what event we get */
r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
/* only handle FLR_NOTIFY now */
if (!r)
schedule_work(&adev->virt.flr_work);
/* only handle FLR_NOTIFY now */
if (!r)
schedule_work(&adev->virt.flr_work);
}
return 0;
}

Some files were not shown because too many files have changed in this diff Show More