drm pull for 4.20-rc1

-----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJbz8FfAAoJEAx081l5xIa+t8AQAJ6KaMM7rYlRDzIr1Vuh0++2
 kFmfEQnSZnOWxO+zpyQpCJQr+/1aAHQ6+QzWq+/fX/bwH01H1Q4+z6t+4QoPqYlw
 rUYXZYRxnqGVPYx8hwNLmRbJcsXMVKly1SemBXIoabKkEtPNX5AZ4FXCR2WEbsV3
 /YLxsYQv2KMd8aoeC9Hupa07Jj9GfOtEo0a9B1hKmo+XiF9HqPadxaofqOpQ6MCh
 54itBgP7Kj4mwwr8KxG2JCNJagG5aG8q3yiEwaU5b0KzWya4o1wrOAJcBaEAIxpj
 JAgPde+f2L6w9dQbBBWeVFYKNn0jJqJdmbPs5Ek3i/NNFyx01Mn/3vlTZoRUqJN7
 TGXwOI/BWz1iTaHyFPqVH6RPQAoUUDeCwgHkXonogFxvQLpiFG+dRNqxue0XVUMX
 9tDSdZefWPoH3n9J/gDhwbV2Qbw/2n6yzCRYCb8HkqX1Y1JTmdYVgKvcnOwyYwsJ
 QzcVkWUJ31UAaZcTLCEW6SVqcUR0mso3LJAPSKp2NJiVLL8mSd/ViUTUbxRNkkXf
 H0abVGDjWAAZaT5uqNVqg4kV1Vc4Kj+/9QtspW4ktGezOz9DsctwJtfhTgOmT8Fx
 zlEwWmAbf1iJP9UgqI7r4+Nq24saqUYmIX0bowEasLIRO+l14Pf9mQJjgKRcMs/j
 SK4W5EreSFosKsxtQU4H
 =3yxi
 -----END PGP SIGNATURE-----

Merge tag 'drm-next-2018-10-24' of git://anongit.freedesktop.org/drm/drm

Pull drm updates from Dave Airlie:
 "This is going to rebuild more than drm as it adds a new helper to
  list.h for doing bulk updates. Seemed like a reasonable addition to
  me.

  Otherwise the usual merge window stuff lots of i915 and amdgpu, not so
  much nouveau, and piles of everything else.

  Core:
   - Adds a new list.h helper for doing bulk list updates for TTM.
   - Don't leak fb address in smem_start to userspace (comes with EXPORT
     workaround for people using mali out of tree hacks)
   - udmabuf device to turn memfd regions into dma-buf
   - Per-plane blend mode property
   - ref/unref replacements with get/put
   - fbdev conflicting framebuffers code cleaned up
   - host-endian format variants
   - panel orientation quirk for Acer One 10

  bridge:
   - TI SN65DSI86 chip support

  vkms:
   - GEM support.
   - Cursor support

  amdgpu:
   - Merge amdkfd and amdgpu into one module
   - CEC over DP AUX support
   - Picasso APU support + VCN dynamic powergating
   - Raven2 APU support
   - Vega20 enablement + kfd support
   - ACP powergating improvements
   - ABGR/XBGR display support
   - VCN jpeg support
   - xGMI support
   - DC i2c/aux cleanup
   - Ycbcr 4:2:0 support
   - GPUVM improvements
   - Powerplay and powerplay endian fixes
   - Display underflow fixes

  vmwgfx:
   - Move vmwgfx specific TTM code to vmwgfx
   - Split out vmwgfx buffer/resource validation code
   - Atomic operation rework

  bochs:
   - use more helpers
   - format/byteorder improvements

  qxl:
   - use more helpers

  i915:
   - GGTT coherency getparam
   - Turn off resource streamer API
   - More Icelake enablement + DMC firmware
   - Full PPGTT for Ivybridge, Haswell and Valleyview
   - DDB distribution based on resolution
   - Limited range DP display support

  nouveau:
   - CEC over DP AUX support
   - Initial HDMI 2.0 support

  virtio-gpu:
   - vmap support for PRIME objects

  tegra:
   - Initial Tegra194 support
   - DMA/IOMMU integration fixes

  msm:
   - a6xx perf improvements + clock prefix
   - GPU preemption optimisations
   - a6xx devfreq support
   - cursor support

  rockchip:
   - PX30 support
   - rgb output interface support

  mediatek:
   - HDMI output support on mt2701 and mt7623

  rcar-du:
   - Interlaced modes on Gen3
   - LVDS on R8A77980
   - D3 and E3 SoC support

  hisilicon:
   - misc fixes

  mxsfb:
   - runtime pm support

  sun4i:
   - R40 TCON support
   - Allwinner A64 support
   - R40 HDMI support

  omapdrm:
   - Driver rework changing display pipeline ordering to use common code
   - DMM memory barrier and irq fixes
   - Errata workarounds

  exynos:
   - out-bridge support for LVDS bridge driver
   - Samsung 16x16 tiled format support
   - Plane alpha and pixel blend mode support

  tilcdc:
   - suspend/resume update

  mali-dp:
   - misc updates"

* tag 'drm-next-2018-10-24' of git://anongit.freedesktop.org/drm/drm: (1382 commits)
  firmware/dmc/icl: Add missing MODULE_FIRMWARE() for Icelake.
  drm/i915/icl: Fix signal_levels
  drm/i915/icl: Fix DDI/TC port clk_off bits
  drm/i915/icl: create function to identify combophy port
  drm/i915/gen9+: Fix initial readout for Y tiled framebuffers
  drm/i915: Large page offsets for pread/pwrite
  drm/i915/selftests: Disable shrinker across mmap-exhaustion
  drm/i915/dp: Link train Fallback on eDP only if fallback link BW can fit panel's native mode
  drm/i915: Fix intel_dp_mst_best_encoder()
  drm/i915: Skip vcpi allocation for MSTB ports that are gone
  drm/i915: Don't unset intel_connector->mst_port
  drm/i915: Only reset seqno if actually idle
  drm/i915: Use the correct crtc when sanitizing plane mapping
  drm/i915: Restore vblank interrupts earlier
  drm/i915: Check fb stride against plane max stride
  drm/amdgpu/vcn:Fix uninitialized symbol error
  drm: panel-orientation-quirks: Add quirk for Acer One 10 (S1003)
  drm/amd/amdgpu: Fix debugfs error handling
  drm/amdgpu: Update gc_9_0 golden settings.
  drm/amd/powerplay: update PPtable with DC BTC and Tvr SocLimit fields
  ...
This commit is contained in:
Linus Torvalds 2018-10-28 17:49:53 -07:00
commit 53b3b6bbfd
893 changed files with 107890 additions and 25795 deletions

View File

@ -15,6 +15,13 @@ Required children nodes:
to external devices using the OF graph reprensentation (see ../graph.txt).
At least one port node is required.
Optional properties in grandchild nodes:
Any endpoint grandchild node may specify a desired video interface
according to ../../media/video-interfaces.txt, specifically
- bus-width: recognized values are <12>, <16>, <18> and <24>, and
override any output mode selection heuristic, forcing "rgb444",
"rgb565", "rgb666" and "rgb888" respectively.
Example:
hlcdc: hlcdc@f0030000 {
@ -50,3 +57,19 @@ Example:
#pwm-cells = <3>;
};
};
Example 2: With a video interface override to force rgb565; as above
but with these changes/additions:
&hlcdc {
hlcdc-display-controller {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_lcd_base &pinctrl_lcd_rgb565>;
port@0 {
hlcdc_panel_output: endpoint@0 {
bus-width = <16>;
};
};
};
};

View File

@ -22,7 +22,13 @@ among others.
Required properties:
- compatible: Must be "lvds-encoder"
- compatible: Must be one or more of the following
- "ti,ds90c185" for the TI DS90C185 FPD-Link Serializer
- "lvds-encoder" for a generic LVDS encoder device
When compatible with the generic version, nodes must list the
device-specific version corresponding to the device first
followed by the generic version.
Required nodes:

View File

@ -14,10 +14,22 @@ Required properties:
- "renesas,r8a7795-lvds" for R8A7795 (R-Car H3) compatible LVDS encoders
- "renesas,r8a7796-lvds" for R8A7796 (R-Car M3-W) compatible LVDS encoders
- "renesas,r8a77970-lvds" for R8A77970 (R-Car V3M) compatible LVDS encoders
- "renesas,r8a77980-lvds" for R8A77980 (R-Car V3H) compatible LVDS encoders
- "renesas,r8a77990-lvds" for R8A77990 (R-Car E3) compatible LVDS encoders
- "renesas,r8a77995-lvds" for R8A77995 (R-Car D3) compatible LVDS encoders
- reg: Base address and length for the memory-mapped registers
- clocks: A phandle + clock-specifier pair for the functional clock
- clocks: A list of phandles + clock-specifier pairs, one for each entry in
the clock-names property.
- clock-names: Name of the clocks. This property is model-dependent.
- The functional clock, which mandatory for all models, shall be listed
first, and shall be named "fck".
- On R8A77990 and R8A77995, the LVDS encoder can use the EXTAL or
DU_DOTCLKINx clocks. Those clocks are optional. When supplied they must be
named "extal" and "dclkin.x" respectively, with "x" being the DU_DOTCLKIN
numerical index.
- When the clocks property only contains the functional clock, the
clock-names property may be omitted.
- resets: A phandle + reset specifier for the module reset
Required nodes:

View File

@ -0,0 +1,87 @@
SN65DSI86 DSI to eDP bridge chip
--------------------------------
This is the binding for Texas Instruments SN65DSI86 bridge.
http://www.ti.com/general/docs/lit/getliterature.tsp?genericPartNumber=sn65dsi86&fileType=pdf
Required properties:
- compatible: Must be "ti,sn65dsi86"
- reg: i2c address of the chip, 0x2d as per datasheet
- enable-gpios: gpio specification for bridge_en pin (active high)
- vccio-supply: A 1.8V supply that powers up the digital IOs.
- vpll-supply: A 1.8V supply that powers up the displayport PLL.
- vcca-supply: A 1.2V supply that powers up the analog circuits.
- vcc-supply: A 1.2V supply that powers up the digital core.
Optional properties:
- interrupts-extended: Specifier for the SN65DSI86 interrupt line.
- gpio-controller: Marks the device has a GPIO controller.
- #gpio-cells : Should be two. The first cell is the pin number and
the second cell is used to specify flags.
See ../../gpio/gpio.txt for more information.
- #pwm-cells : Should be one. See ../../pwm/pwm.txt for description of
the cell formats.
- clock-names: should be "refclk"
- clocks: Specification for input reference clock. The reference
clock rate must be 12 MHz, 19.2 MHz, 26 MHz, 27 MHz or 38.4 MHz.
- data-lanes: See ../../media/video-interface.txt
- lane-polarities: See ../../media/video-interface.txt
- suspend-gpios: specification for GPIO1 pin on bridge (active low)
Required nodes:
This device has two video ports. Their connections are modelled using the
OF graph bindings specified in Documentation/devicetree/bindings/graph.txt.
- Video port 0 for DSI input
- Video port 1 for eDP output
Example
-------
edp-bridge@2d {
compatible = "ti,sn65dsi86";
#address-cells = <1>;
#size-cells = <0>;
reg = <0x2d>;
enable-gpios = <&msmgpio 33 GPIO_ACTIVE_HIGH>;
suspend-gpios = <&msmgpio 34 GPIO_ACTIVE_LOW>;
interrupts-extended = <&gpio3 4 IRQ_TYPE_EDGE_FALLING>;
vccio-supply = <&pm8916_l17>;
vcca-supply = <&pm8916_l6>;
vpll-supply = <&pm8916_l17>;
vcc-supply = <&pm8916_l6>;
clock-names = "refclk";
clocks = <&input_refclk>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
edp_bridge_in: endpoint {
remote-endpoint = <&dsi_out>;
};
};
port@1 {
reg = <1>;
edp_bridge_out: endpoint {
data-lanes = <2 1 3 0>;
lane-polarities = <0 1 0 1>;
remote-endpoint = <&edp_panel_in>;
};
};
};
}

View File

@ -0,0 +1,35 @@
TC358764 MIPI-DSI to LVDS panel bridge
Required properties:
- compatible: "toshiba,tc358764"
- reg: the virtual channel number of a DSI peripheral
- vddc-supply: core voltage supply, 1.2V
- vddio-supply: I/O voltage supply, 1.8V or 3.3V
- vddlvds-supply: LVDS1/2 voltage supply, 3.3V
- reset-gpios: a GPIO spec for the reset pin
The device node can contain following 'port' child nodes,
according to the OF graph bindings defined in [1]:
0: DSI Input, not required, if the bridge is DSI controlled
1: LVDS Output, mandatory
[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
Example:
bridge@0 {
reg = <0>;
compatible = "toshiba,tc358764";
vddc-supply = <&vcc_1v2_reg>;
vddio-supply = <&vcc_1v8_reg>;
vddlvds-supply = <&vcc_3v3_reg>;
reset-gpios = <&gpd1 6 GPIO_ACTIVE_LOW>;
#address-cells = <1>;
#size-cells = <0>;
port@1 {
reg = <1>;
lvds_ep: endpoint {
remote-endpoint = <&panel_ep>;
};
};
};

View File

@ -21,6 +21,9 @@ Required properties:
- samsung,pll-clock-frequency: specifies frequency of the oscillator clock
- #address-cells, #size-cells: should be set respectively to <1> and <0>
according to DSI host bindings (see MIPI DSI bindings [1])
- samsung,burst-clock-frequency: specifies DSI frequency in high-speed burst
mode
- samsung,esc-clock-frequency: specifies DSI frequency in escape mode
Optional properties:
- power-domains: a phandle to DSIM power domain node
@ -29,25 +32,9 @@ Child nodes:
Should contain DSI peripheral nodes (see MIPI DSI bindings [1]).
Video interfaces:
Device node can contain video interface port nodes according to [2].
The following are properties specific to those nodes:
port node inbound:
- reg: (required) must be 0.
port node outbound:
- reg: (required) must be 1.
endpoint node connected from mic node (reg = 0):
- remote-endpoint: specifies the endpoint in mic node. This node is required
for Exynos5433 mipi dsi. So mic can access to panel node
throughout this dsi node.
endpoint node connected to panel node (reg = 1):
- remote-endpoint: specifies the endpoint in panel node. This node is
required in all kinds of exynos mipi dsi to represent
the connection between mipi dsi and panel.
- samsung,burst-clock-frequency: specifies DSI frequency in high-speed burst
mode
- samsung,esc-clock-frequency: specifies DSI frequency in escape mode
Device node can contain following video interface port nodes according to [2]:
0: RGB input,
1: DSI output
[1]: Documentation/devicetree/bindings/display/mipi-dsi-bus.txt
[2]: Documentation/devicetree/bindings/media/video-interfaces.txt

View File

@ -16,7 +16,7 @@ The following assumes that only a single peripheral is connected to a DSI
host. Experience shows that this is true for the large majority of setups.
DSI host
--------
========
In addition to the standard properties and those defined by the parent bus of
a DSI host, the following properties apply to a node representing a DSI host.
@ -29,12 +29,24 @@ Required properties:
- #size-cells: Should be 0. There are cases where it makes sense to use a
different value here. See below.
DSI peripheral
--------------
Optional properties:
- clock-master: boolean. Should be enabled if the host is being used in
conjunction with another DSI host to drive the same peripheral. Hardware
supporting such a configuration generally requires the data on both the busses
to be driven by the same clock. Only the DSI host instance controlling this
clock should contain this property.
Peripherals are represented as child nodes of the DSI host's node. Properties
described here apply to all DSI peripherals, but individual bindings may want
to define additional, device-specific properties.
DSI peripheral
==============
Peripherals with DSI as control bus, or no control bus
------------------------------------------------------
Peripherals with the DSI bus as the primary control bus, or peripherals with
no control bus but use the DSI bus to transmit pixel data are represented
as child nodes of the DSI host's node. Properties described here apply to all
DSI peripherals, but individual bindings may want to define additional,
device-specific properties.
Required properties:
- reg: The virtual channel number of a DSI peripheral. Must be in the range
@ -49,9 +61,37 @@ case two alternative representations can be chosen:
property is the number of the first virtual channel and the second cell is
the number of consecutive virtual channels.
Example
-------
Peripherals with a different control bus
----------------------------------------
There are peripherals that have I2C/SPI (or some other non-DSI bus) as the
primary control bus, but are also connected to a DSI bus (mostly for the data
path). Connections between such peripherals and a DSI host can be represented
using the graph bindings [1], [2].
Peripherals that support dual channel DSI
-----------------------------------------
Peripherals with higher bandwidth requirements can be connected to 2 DSI
busses. Each DSI bus/channel drives some portion of the pixel data (generally
left/right half of each line of the display, or even/odd lines of the display).
The graph bindings should be used to represent the multiple DSI busses that are
connected to this peripheral. Each DSI host's output endpoint can be linked to
an input endpoint of the DSI peripheral.
[1] Documentation/devicetree/bindings/graph.txt
[2] Documentation/devicetree/bindings/media/video-interfaces.txt
Examples
========
- (1), (2) and (3) are examples of a DSI host and peripheral on the DSI bus
with different virtual channel configurations.
- (4) is an example of a peripheral on a I2C control bus connected to a
DSI host using of-graph bindings.
- (5) is an example of 2 DSI hosts driving a dual-channel DSI peripheral,
which uses I2C as its primary control bus.
1)
dsi-host {
...
@ -67,6 +107,7 @@ Example
...
};
2)
dsi-host {
...
@ -82,6 +123,7 @@ Example
...
};
3)
dsi-host {
...
@ -96,3 +138,98 @@ Example
...
};
4)
i2c-host {
...
dsi-bridge@35 {
compatible = "...";
reg = <0x35>;
ports {
...
port {
bridge_mipi_in: endpoint {
remote-endpoint = <&host_mipi_out>;
};
};
};
};
};
dsi-host {
...
ports {
...
port {
host_mipi_out: endpoint {
remote-endpoint = <&bridge_mipi_in>;
};
};
};
};
5)
i2c-host {
dsi-bridge@35 {
compatible = "...";
reg = <0x35>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dsi0_in: endpoint {
remote-endpoint = <&dsi0_out>;
};
};
port@1 {
reg = <1>;
dsi1_in: endpoint {
remote-endpoint = <&dsi1_out>;
};
};
};
};
};
dsi0-host {
...
/*
* this DSI instance drives the clock for both the host
* controllers
*/
clock-master;
ports {
...
port {
dsi0_out: endpoint {
remote-endpoint = <&dsi0_in>;
};
};
};
};
dsi1-host {
...
ports {
...
port {
dsi1_out: endpoint {
remote-endpoint = <&dsi1_in>;
};
};
};
};

View File

@ -15,6 +15,8 @@ Required Properties:
- "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU
- "renesas,du-r8a77965" for R8A77965 (R-Car M3-N) compatible DU
- "renesas,du-r8a77970" for R8A77970 (R-Car V3M) compatible DU
- "renesas,du-r8a77980" for R8A77980 (R-Car V3H) compatible DU
- "renesas,du-r8a77990" for R8A77990 (R-Car E3) compatible DU
- "renesas,du-r8a77995" for R8A77995 (R-Car D3) compatible DU
- reg: the memory-mapped I/O registers base address and length
@ -61,6 +63,8 @@ corresponding to each DU output.
R8A7796 (R-Car M3-W) DPAD 0 HDMI 0 LVDS 0 -
R8A77965 (R-Car M3-N) DPAD 0 HDMI 0 LVDS 0 -
R8A77970 (R-Car V3M) DPAD 0 LVDS 0 - -
R8A77980 (R-Car V3H) DPAD 0 LVDS 0 - -
R8A77990 (R-Car E3) DPAD 0 LVDS 0 LVDS 1 -
R8A77995 (R-Car D3) DPAD 0 LVDS 0 LVDS 1 -

View File

@ -8,6 +8,9 @@ Required properties:
- compatible: value should be one of the following
"rockchip,rk3036-vop";
"rockchip,rk3126-vop";
"rockchip,px30-vop-lit";
"rockchip,px30-vop-big";
"rockchip,rk3188-vop";
"rockchip,rk3288-vop";
"rockchip,rk3368-vop";
"rockchip,rk3366-vop";

View File

@ -78,6 +78,7 @@ Required properties:
- compatible: value must be one of:
* "allwinner,sun8i-a83t-dw-hdmi"
* "allwinner,sun50i-a64-dw-hdmi", "allwinner,sun8i-a83t-dw-hdmi"
- reg: base address and size of memory-mapped region
- reg-io-width: See dw_hdmi.txt. Shall be 1.
- interrupts: HDMI interrupt number
@ -96,6 +97,9 @@ Required properties:
first port should be the input endpoint. The second should be the
output, usually to an HDMI connector.
Optional properties:
- hvcc-supply: the VCC power supply of the controller
DWC HDMI PHY
------------
@ -103,6 +107,7 @@ Required properties:
- compatible: value must be one of:
* allwinner,sun8i-a83t-hdmi-phy
* allwinner,sun8i-h3-hdmi-phy
* allwinner,sun8i-r40-hdmi-phy
* allwinner,sun50i-a64-hdmi-phy
- reg: base address and size of memory-mapped region
- clocks: phandles to the clocks feeding the HDMI PHY
@ -112,9 +117,9 @@ Required properties:
- resets: phandle to the reset controller driving the PHY
- reset-names: must be "phy"
H3 and A64 HDMI PHY require additional clocks:
H3, A64 and R40 HDMI PHY require additional clocks:
- pll-0: parent of phy clock
- pll-1: second possible phy clock parent (A64 only)
- pll-1: second possible phy clock parent (A64/R40 only)
TV Encoder
----------
@ -151,6 +156,8 @@ Required properties:
* allwinner,sun8i-v3s-tcon
* allwinner,sun9i-a80-tcon-lcd
* allwinner,sun9i-a80-tcon-tv
* "allwinner,sun50i-a64-tcon-lcd", "allwinner,sun8i-a83t-tcon-lcd"
* "allwinner,sun50i-a64-tcon-tv", "allwinner,sun8i-a83t-tcon-tv"
- reg: base address and size of memory-mapped region
- interrupts: interrupt associated to this IP
- clocks: phandles to the clocks feeding the TCON.
@ -369,7 +376,11 @@ Required properties:
* allwinner,sun8i-a83t-de2-mixer-0
* allwinner,sun8i-a83t-de2-mixer-1
* allwinner,sun8i-h3-de2-mixer-0
* allwinner,sun8i-r40-de2-mixer-0
* allwinner,sun8i-r40-de2-mixer-1
* allwinner,sun8i-v3s-de2-mixer
* allwinner,sun50i-a64-de2-mixer-0
* allwinner,sun50i-a64-de2-mixer-1
- reg: base address and size of the memory-mapped region.
- clocks: phandles to the clocks feeding the mixer
* bus: the mixer interface clock
@ -403,6 +414,7 @@ Required properties:
* allwinner,sun8i-r40-display-engine
* allwinner,sun8i-v3s-display-engine
* allwinner,sun9i-a80-display-engine
* allwinner,sun50i-a64-display-engine
- allwinner,pipelines: list of phandle to the display engine
frontends (DE 1.0) or mixers (DE 2.0) available.

View File

@ -13,6 +13,7 @@ GPU Driver Documentation
tve200
v3d
vc4
vkms
bridge/dw-hdmi
xen-front

View File

@ -287,8 +287,14 @@ Atomic Mode Setting Function Reference
.. kernel-doc:: drivers/gpu/drm/drm_atomic.c
:export:
.. kernel-doc:: drivers/gpu/drm/drm_atomic.c
:internal:
Atomic Mode Setting IOCTL and UAPI Functions
--------------------------------------------
.. kernel-doc:: drivers/gpu/drm/drm_atomic_uapi.c
:doc: overview
.. kernel-doc:: drivers/gpu/drm/drm_atomic_uapi.c
:export:
CRTC Abstraction
================
@ -323,6 +329,12 @@ Frame Buffer Functions Reference
DRM Format Handling
===================
.. kernel-doc:: include/uapi/drm/drm_fourcc.h
:doc: overview
Format Functions Reference
--------------------------
.. kernel-doc:: include/drm/drm_fourcc.h
:internal:
@ -560,7 +572,7 @@ Tile Group Property
Explicit Fencing Properties
---------------------------
.. kernel-doc:: drivers/gpu/drm/drm_atomic.c
.. kernel-doc:: drivers/gpu/drm/drm_atomic_uapi.c
:doc: explicit fencing properties
Existing KMS Properties

View File

@ -297,7 +297,7 @@ made up of several fields, the more interesting ones being:
struct vm_operations_struct {
void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area);
int (*fault)(struct vm_fault *vmf);
vm_fault_t (*fault)(struct vm_fault *vmf);
};
@ -505,7 +505,7 @@ GPU Scheduler
Overview
--------
.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c
.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:doc: Overview
Scheduler Function References
@ -514,5 +514,5 @@ Scheduler Function References
.. kernel-doc:: include/drm/gpu_scheduler.h
:internal:
.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c
.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:export:

View File

@ -127,7 +127,8 @@ interfaces to fix these issues:
the acquire context explicitly on stack and then also pass it down into
drivers explicitly so that the legacy-on-atomic functions can use them.
Except for some driver code this is done.
Except for some driver code this is done. This task should be finished by
adding WARN_ON(!drm_drv_uses_atomic_modeset) in drm_modeset_lock_all().
* A bunch of the vtable hooks are now in the wrong place: DRM has a split
between core vfunc tables (named ``drm_foo_funcs``), which are used to
@ -137,13 +138,6 @@ interfaces to fix these issues:
``_helper_funcs`` since they are not part of the core ABI. There's a
``FIXME`` comment in the kerneldoc for each such case in ``drm_crtc.h``.
* There's a new helper ``drm_atomic_helper_best_encoder()`` which could be
used by all atomic drivers which don't select the encoder for a given
connector at runtime. That's almost all of them, and would allow us to get
rid of a lot of ``best_encoder`` boilerplate in drivers.
This was almost done, but new drivers added a few more cases again.
Contact: Daniel Vetter
Get rid of dev->struct_mutex from GEM drivers
@ -164,9 +158,8 @@ private lock. The tricky part is the BO free functions, since those can't
reliably take that lock any more. Instead state needs to be protected with
suitable subordinate locks or some cleanup work pushed to a worker thread. For
performance-critical drivers it might also be better to go with a more
fine-grained per-buffer object and per-context lockings scheme. Currently the
following drivers still use ``struct_mutex``: ``msm``, ``omapdrm`` and
``udl``.
fine-grained per-buffer object and per-context lockings scheme. Currently only the
``msm`` driver still use ``struct_mutex``.
Contact: Daniel Vetter, respective driver maintainers
@ -190,7 +183,8 @@ Convert drivers to use simple modeset suspend/resume
Most drivers (except i915 and nouveau) that use
drm_atomic_helper_suspend/resume() can probably be converted to use
drm_mode_config_helper_suspend/resume().
drm_mode_config_helper_suspend/resume(). Also there's still open-coded version
of the atomic suspend/resume code in older atomic modeset drivers.
Contact: Maintainer of the driver you plan to convert
@ -246,20 +240,10 @@ Core refactorings
Clean up the DRM header mess
----------------------------
Currently the DRM subsystem has only one global header, ``drmP.h``. This is
used both for functions exported to helper libraries and drivers and functions
only used internally in the ``drm.ko`` module. The goal would be to move all
header declarations not needed outside of ``drm.ko`` into
``drivers/gpu/drm/drm_*_internal.h`` header files. ``EXPORT_SYMBOL`` also
needs to be dropped for these functions.
This would nicely tie in with the below task to create kerneldoc after the API
is cleaned up. Or with the "hide legacy cruft better" task.
Note that this is well in progress, but ``drmP.h`` is still huge. The updated
plan is to switch to per-file driver API headers, which will also structure
the kerneldoc better. This should also allow more fine-grained ``#include``
directives.
The DRM subsystem originally had only one huge global header, ``drmP.h``. This
is now split up, but many source files still include it. The remaining part of
the cleanup work here is to replace any ``#include <drm/drmP.h>`` by only the
headers needed (and fixing up any missing pre-declarations in the headers).
In the end no .c file should need to include ``drmP.h`` anymore.
@ -278,26 +262,6 @@ See https://dri.freedesktop.org/docs/drm/ for what's there already.
Contact: Daniel Vetter
Hide legacy cruft better
------------------------
Way back DRM supported only drivers which shadow-attached to PCI devices with
userspace or fbdev drivers setting up outputs. Modern DRM drivers take charge
of the entire device, you can spot them with the DRIVER_MODESET flag.
Unfortunately there's still large piles of legacy code around which needs to
be hidden so that driver writers don't accidentally end up using it. And to
prevent security issues in those legacy IOCTLs from being exploited on modern
drivers. This has multiple possible subtasks:
* Extract support code for legacy features into a ``drm-legacy.ko`` kernel
module and compile it only when one of the legacy drivers is enabled.
This is mostly done, the only thing left is to split up ``drm_irq.c`` into
legacy cruft and the parts needed by modern KMS drivers.
Contact: Daniel Vetter
Make panic handling work
------------------------
@ -396,17 +360,12 @@ converting things over. For modeset tests we also first need a bit of
infrastructure to use dumb buffers for untiled buffers, to be able to run all
the non-i915 specific modeset tests.
Contact: Daniel Vetter
Extend virtual test driver (VKMS)
---------------------------------
Create a virtual KMS driver for testing (vkms)
----------------------------------------------
With all the latest helpers it should be fairly simple to create a virtual KMS
driver useful for testing, or for running X or similar on headless machines
(to be able to still use the GPU). This would be similar to vgem, but aimed at
the modeset side.
Once the basics are there there's tons of possibilities to extend it.
See the documentation of :ref:`VKMS <vkms>` for more details. This is an ideal
internship task, since it only requires a virtual machine and can be sized to
fit the available time.
Contact: Daniel Vetter

View File

@ -0,0 +1,24 @@
.. _vkms:
==========================================
drm/vkms Virtual Kernel Modesetting
==========================================
.. kernel-doc:: drivers/gpu/drm/vkms/vkms_drv.c
:doc: vkms (Virtual Kernel Modesetting)
TODO
====
CRC API
-------
- Optimize CRC computation ``compute_crc()`` and plane blending ``blend()``
- Use the alpha value to blend vaddr_src with vaddr_dst instead of
overwriting it in ``blend()``.
- Add igt test to check cleared alpha value for XRGB plane format.
- Add igt test to check extreme alpha values i.e. fully opaque and fully
transparent (intermediate values are affected by hw-specific rounding modes).

View File

@ -272,6 +272,7 @@ Code Seq#(hex) Include File Comments
't' 90-91 linux/toshiba.h toshiba and toshiba_acpi SMM
'u' 00-1F linux/smb_fs.h gone
'u' 20-3F linux/uvcvideo.h USB video class host driver
'u' 40-4f linux/udmabuf.h userspace dma-buf misc device
'v' 00-1F linux/ext2_fs.h conflict!
'v' 00-1F linux/fs.h conflict!
'v' 00-0F linux/sonypi.h conflict!

View File

@ -4756,8 +4756,11 @@ F: drivers/gpu/drm/tdfx/
DRM DRIVER FOR USB DISPLAYLINK VIDEO ADAPTERS
M: Dave Airlie <airlied@redhat.com>
R: Sean Paul <sean@poorly.run>
L: dri-devel@lists.freedesktop.org
S: Odd Fixes
F: drivers/gpu/drm/udl/
T: git git://anongit.freedesktop.org/drm/drm-misc
DRM DRIVER FOR VMWARE VIRTUAL GPU
M: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
@ -4787,8 +4790,8 @@ F: include/uapi/drm/
F: include/linux/vga*
DRM DRIVERS AND MISC GPU PATCHES
M: Gustavo Padovan <gustavo@padovan.org>
M: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
M: Maxime Ripard <maxime.ripard@bootlin.com>
M: Sean Paul <sean@poorly.run>
W: https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
S: Maintained
@ -4857,6 +4860,7 @@ F: drivers/gpu/drm/fsl-dcu/
F: Documentation/devicetree/bindings/display/fsl,dcu.txt
F: Documentation/devicetree/bindings/display/fsl,tcon.txt
F: Documentation/devicetree/bindings/display/panel/nec,nl4827hc19-05b.txt
T: git git://anongit.freedesktop.org/drm/drm-misc
DRM DRIVERS FOR FREESCALE IMX
M: Philipp Zabel <p.zabel@pengutronix.de>
@ -4906,9 +4910,10 @@ F: Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
DRM DRIVERS FOR RENESAS
M: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
M: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
L: dri-devel@lists.freedesktop.org
L: linux-renesas-soc@vger.kernel.org
T: git git://linuxtv.org/pinchartl/fbdev
T: git git://linuxtv.org/pinchartl/media drm/du/next
S: Supported
F: drivers/gpu/drm/rcar-du/
F: drivers/gpu/drm/shmobile/
@ -10001,9 +10006,12 @@ F: drivers/media/tuners/mxl5007t.*
MXSFB DRM DRIVER
M: Marek Vasut <marex@denx.de>
M: Stefan Agner <stefan@agner.ch>
L: dri-devel@lists.freedesktop.org
S: Supported
F: drivers/gpu/drm/mxsfb/
F: Documentation/devicetree/bindings/display/mxsfb.txt
T: git git://anongit.freedesktop.org/drm/drm-misc
MYLEX DAC960 PCI RAID Controller
M: Hannes Reinecke <hare@kernel.org>
@ -15530,6 +15538,14 @@ S: Maintained
F: lib/iov_iter.c
F: include/linux/uio.h
USERSPACE DMA BUFFER DRIVER
M: Gerd Hoffmann <kraxel@redhat.com>
S: Maintained
L: dri-devel@lists.freedesktop.org
F: drivers/dma-buf/udmabuf.c
F: include/uapi/linux/udmabuf.h
T: git git://anongit.freedesktop.org/drm/drm-misc
USERSPACE I/O (UIO)
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
S: Maintained

View File

@ -30,4 +30,13 @@ config SW_SYNC
WARNING: improper use of this can result in deadlocking kernel
drivers from userspace. Intended for test and debug only.
config UDMABUF
bool "userspace dmabuf misc driver"
default n
depends on DMA_SHARED_BUFFER
depends on MEMFD_CREATE || COMPILE_TEST
help
A driver to let userspace turn memfd regions into dma-bufs.
Qemu can use this to create host dmabufs for guest framebuffers.
endmenu

View File

@ -1,3 +1,4 @@
obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
obj-$(CONFIG_SYNC_FILE) += sync_file.o
obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o
obj-$(CONFIG_UDMABUF) += udmabuf.o

View File

@ -405,7 +405,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
|| !exp_info->ops->map_dma_buf
|| !exp_info->ops->unmap_dma_buf
|| !exp_info->ops->release
|| !exp_info->ops->map
|| !exp_info->ops->mmap)) {
return ERR_PTR(-EINVAL);
}

293
drivers/dma-buf/udmabuf.c Normal file
View File

@ -0,0 +1,293 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/cred.h>
#include <linux/device.h>
#include <linux/dma-buf.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/memfd.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/udmabuf.h>
static const u32 list_limit = 1024; /* udmabuf_create_list->count limit */
static const size_t size_limit_mb = 64; /* total dmabuf size, in megabytes */
struct udmabuf {
pgoff_t pagecount;
struct page **pages;
};
static int udmabuf_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct udmabuf *ubuf = vma->vm_private_data;
vmf->page = ubuf->pages[vmf->pgoff];
get_page(vmf->page);
return 0;
}
static const struct vm_operations_struct udmabuf_vm_ops = {
.fault = udmabuf_vm_fault,
};
static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
{
struct udmabuf *ubuf = buf->priv;
if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
return -EINVAL;
vma->vm_ops = &udmabuf_vm_ops;
vma->vm_private_data = ubuf;
return 0;
}
static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
enum dma_data_direction direction)
{
struct udmabuf *ubuf = at->dmabuf->priv;
struct sg_table *sg;
int ret;
sg = kzalloc(sizeof(*sg), GFP_KERNEL);
if (!sg)
return ERR_PTR(-ENOMEM);
ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->pagecount,
0, ubuf->pagecount << PAGE_SHIFT,
GFP_KERNEL);
if (ret < 0)
goto err;
if (!dma_map_sg(at->dev, sg->sgl, sg->nents, direction)) {
ret = -EINVAL;
goto err;
}
return sg;
err:
sg_free_table(sg);
kfree(sg);
return ERR_PTR(ret);
}
static void unmap_udmabuf(struct dma_buf_attachment *at,
struct sg_table *sg,
enum dma_data_direction direction)
{
sg_free_table(sg);
kfree(sg);
}
static void release_udmabuf(struct dma_buf *buf)
{
struct udmabuf *ubuf = buf->priv;
pgoff_t pg;
for (pg = 0; pg < ubuf->pagecount; pg++)
put_page(ubuf->pages[pg]);
kfree(ubuf->pages);
kfree(ubuf);
}
static void *kmap_udmabuf(struct dma_buf *buf, unsigned long page_num)
{
struct udmabuf *ubuf = buf->priv;
struct page *page = ubuf->pages[page_num];
return kmap(page);
}
static void kunmap_udmabuf(struct dma_buf *buf, unsigned long page_num,
void *vaddr)
{
kunmap(vaddr);
}
static const struct dma_buf_ops udmabuf_ops = {
.map_dma_buf = map_udmabuf,
.unmap_dma_buf = unmap_udmabuf,
.release = release_udmabuf,
.map = kmap_udmabuf,
.unmap = kunmap_udmabuf,
.mmap = mmap_udmabuf,
};
#define SEALS_WANTED (F_SEAL_SHRINK)
#define SEALS_DENIED (F_SEAL_WRITE)
static long udmabuf_create(const struct udmabuf_create_list *head,
const struct udmabuf_create_item *list)
{
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
struct file *memfd = NULL;
struct udmabuf *ubuf;
struct dma_buf *buf;
pgoff_t pgoff, pgcnt, pgidx, pgbuf = 0, pglimit;
struct page *page;
int seals, ret = -EINVAL;
u32 i, flags;
ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
if (!ubuf)
return -ENOMEM;
pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
for (i = 0; i < head->count; i++) {
if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
goto err;
if (!IS_ALIGNED(list[i].size, PAGE_SIZE))
goto err;
ubuf->pagecount += list[i].size >> PAGE_SHIFT;
if (ubuf->pagecount > pglimit)
goto err;
}
ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(*ubuf->pages),
GFP_KERNEL);
if (!ubuf->pages) {
ret = -ENOMEM;
goto err;
}
pgbuf = 0;
for (i = 0; i < head->count; i++) {
ret = -EBADFD;
memfd = fget(list[i].memfd);
if (!memfd)
goto err;
if (!shmem_mapping(file_inode(memfd)->i_mapping))
goto err;
seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
if (seals == -EINVAL)
goto err;
ret = -EINVAL;
if ((seals & SEALS_WANTED) != SEALS_WANTED ||
(seals & SEALS_DENIED) != 0)
goto err;
pgoff = list[i].offset >> PAGE_SHIFT;
pgcnt = list[i].size >> PAGE_SHIFT;
for (pgidx = 0; pgidx < pgcnt; pgidx++) {
page = shmem_read_mapping_page(
file_inode(memfd)->i_mapping, pgoff + pgidx);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto err;
}
ubuf->pages[pgbuf++] = page;
}
fput(memfd);
memfd = NULL;
}
exp_info.ops = &udmabuf_ops;
exp_info.size = ubuf->pagecount << PAGE_SHIFT;
exp_info.priv = ubuf;
buf = dma_buf_export(&exp_info);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
goto err;
}
flags = 0;
if (head->flags & UDMABUF_FLAGS_CLOEXEC)
flags |= O_CLOEXEC;
return dma_buf_fd(buf, flags);
err:
while (pgbuf > 0)
put_page(ubuf->pages[--pgbuf]);
if (memfd)
fput(memfd);
kfree(ubuf->pages);
kfree(ubuf);
return ret;
}
static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
{
struct udmabuf_create create;
struct udmabuf_create_list head;
struct udmabuf_create_item list;
if (copy_from_user(&create, (void __user *)arg,
sizeof(create)))
return -EFAULT;
head.flags = create.flags;
head.count = 1;
list.memfd = create.memfd;
list.offset = create.offset;
list.size = create.size;
return udmabuf_create(&head, &list);
}
static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
{
struct udmabuf_create_list head;
struct udmabuf_create_item *list;
int ret = -EINVAL;
u32 lsize;
if (copy_from_user(&head, (void __user *)arg, sizeof(head)))
return -EFAULT;
if (head.count > list_limit)
return -EINVAL;
lsize = sizeof(struct udmabuf_create_item) * head.count;
list = memdup_user((void __user *)(arg + sizeof(head)), lsize);
if (IS_ERR(list))
return PTR_ERR(list);
ret = udmabuf_create(&head, list);
kfree(list);
return ret;
}
static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
long ret;
switch (ioctl) {
case UDMABUF_CREATE:
ret = udmabuf_ioctl_create(filp, arg);
break;
case UDMABUF_CREATE_LIST:
ret = udmabuf_ioctl_create_list(filp, arg);
break;
default:
ret = -ENOTTY;
break;
}
return ret;
}
static const struct file_operations udmabuf_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = udmabuf_ioctl,
};
static struct miscdevice udmabuf_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = "udmabuf",
.fops = &udmabuf_fops,
};
static int __init udmabuf_dev_init(void)
{
return misc_register(&udmabuf_misc);
}
static void __exit udmabuf_dev_exit(void)
{
misc_deregister(&udmabuf_misc);
}
module_init(udmabuf_dev_init)
module_exit(udmabuf_dev_exit)
MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
MODULE_LICENSE("GPL v2");

View File

@ -110,6 +110,26 @@ config DRM_FBDEV_OVERALLOC
is 100. Typical values for double buffering will be 200,
triple buffering 300.
config DRM_FBDEV_LEAK_PHYS_SMEM
bool "Shamelessly allow leaking of fbdev physical address (DANGEROUS)"
depends on DRM_FBDEV_EMULATION && EXPERT
default n
help
In order to keep user-space compatibility, we want in certain
use-cases to keep leaking the fbdev physical address to the
user-space program handling the fbdev buffer.
This affects, not only, Amlogic, Allwinner or Rockchip devices
with ARM Mali GPUs using an userspace Blob.
This option is not supported by upstream developers and should be
removed as soon as possible and be considered as a broken and
legacy behaviour from a modern fbdev device driver.
Please send any bug reports when using this to your proprietary
software vendor that requires this.
If in doubt, say "N" or spread the word to your closed source
library vendor.
config DRM_LOAD_EDID_FIRMWARE
bool "Allow to specify an EDID data set instead of probing for it"
depends on DRM
@ -285,8 +305,6 @@ source "drivers/gpu/drm/bridge/Kconfig"
source "drivers/gpu/drm/sti/Kconfig"
source "drivers/gpu/drm/amd/amdkfd/Kconfig"
source "drivers/gpu/drm/imx/Kconfig"
source "drivers/gpu/drm/v3d/Kconfig"

View File

@ -18,7 +18,8 @@ drm-y := drm_auth.o drm_bufs.o drm_cache.o \
drm_encoder.o drm_mode_object.o drm_property.o \
drm_plane.o drm_color_mgmt.o drm_print.o \
drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o
drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
drm_atomic_uapi.o
drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
drm-$(CONFIG_DRM_VM) += drm_vm.o

View File

@ -42,3 +42,4 @@ config DRM_AMDGPU_GART_DEBUGFS
source "drivers/gpu/drm/amd/acp/Kconfig"
source "drivers/gpu/drm/amd/display/Kconfig"
source "drivers/gpu/drm/amd/amdkfd/Kconfig"

View File

@ -35,7 +35,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH) \
-I$(FULL_AMD_DISPLAY_PATH)/include \
-I$(FULL_AMD_DISPLAY_PATH)/dc \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
-I$(FULL_AMD_PATH)/amdkfd
amdgpu-y := amdgpu_drv.o
@ -51,8 +52,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o \
amdgpu_ids.o
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
amdgpu_gmc.o amdgpu_xgmi.o
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@ -62,7 +63,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o
vega20_reg_init.o nbio_v7_4.o
# add DF block
amdgpu-y += \
@ -73,7 +74,7 @@ amdgpu-y += \
amdgpu-y += \
gmc_v7_0.o \
gmc_v8_0.o \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o
# add IH block
amdgpu-y += \
@ -88,7 +89,8 @@ amdgpu-y += \
amdgpu-y += \
amdgpu_psp.o \
psp_v3_1.o \
psp_v10_0.o
psp_v10_0.o \
psp_v11_0.o
# add SMC block
amdgpu-y += \
@ -108,6 +110,7 @@ amdgpu-y += \
# add async DMA block
amdgpu-y += \
amdgpu_sdma.o \
sdma_v2_4.o \
sdma_v3_0.o \
sdma_v4_0.o
@ -134,6 +137,9 @@ amdgpu-y += \
amdgpu-y += amdgpu_amdkfd.o
ifneq ($(CONFIG_HSA_AMD),)
AMDKFD_PATH := ../amdkfd
include $(FULL_AMD_PATH)/amdkfd/Makefile
amdgpu-y += $(AMDKFD_FILES)
amdgpu-y += \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \

View File

@ -28,6 +28,8 @@
#ifndef __AMDGPU_H__
#define __AMDGPU_H__
#include "amdgpu_ctx.h"
#include <linux/atomic.h>
#include <linux/wait.h>
#include <linux/list.h>
@ -69,12 +71,32 @@
#include "amdgpu_vcn.h"
#include "amdgpu_mn.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gfx.h"
#include "amdgpu_sdma.h"
#include "amdgpu_dm.h"
#include "amdgpu_virt.h"
#include "amdgpu_gart.h"
#include "amdgpu_debugfs.h"
#include "amdgpu_job.h"
#include "amdgpu_bo_list.h"
#include "amdgpu_gem.h"
#define MAX_GPU_INSTANCE 16
struct amdgpu_gpu_instance
{
struct amdgpu_device *adev;
int mgpu_fan_enabled;
};
struct amdgpu_mgpu_info
{
struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE];
struct mutex mutex;
uint32_t num_gpu;
uint32_t num_dgpu;
uint32_t num_apu;
};
/*
* Modules parameters.
@ -129,6 +151,7 @@ extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
extern int amdgpu_emu_mode;
extern uint amdgpu_smu_memory_pool_size;
extern struct amdgpu_mgpu_info mgpu_info;
#ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support;
@ -148,9 +171,6 @@ extern int amdgpu_cik_support;
#define AMDGPUFB_CONN_LIMIT 4
#define AMDGPU_BIOS_NUM_SCRATCH 16
/* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 2
/* hard reset data */
#define AMDGPU_ASIC_RESET_DATA 0x39d5e86b
@ -171,13 +191,6 @@ extern int amdgpu_cik_support;
#define AMDGPU_RESET_VCE (1 << 13)
#define AMDGPU_RESET_VCE1 (1 << 14)
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
#define AMDGPU_GFX_SAFE_MODE 0x00000001L
#define AMDGPU_GFX_PG_DISABLED_MODE 0x00000002L
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
/* max cursor sizes (in pixels) */
#define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128
@ -205,13 +218,6 @@ enum amdgpu_cp_irq {
AMDGPU_CP_IRQ_LAST
};
enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_TRAP0 = 0,
AMDGPU_SDMA_IRQ_TRAP1,
AMDGPU_SDMA_IRQ_LAST
};
enum amdgpu_thermal_irq {
AMDGPU_THERMAL_IRQ_LOW_TO_HIGH = 0,
AMDGPU_THERMAL_IRQ_HIGH_TO_LOW,
@ -224,6 +230,10 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_LAST
};
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 20
int amdgpu_device_ip_set_clockgating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state);
@ -271,70 +281,6 @@ amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
const struct amdgpu_ip_block_version *ip_block_version);
/* provided by hw blocks that can move/clear data. e.g., gfx or sdma */
struct amdgpu_buffer_funcs {
/* maximum bytes in a single operation */
uint32_t copy_max_bytes;
/* number of dw to reserve per operation */
unsigned copy_num_dw;
/* used for buffer migration */
void (*emit_copy_buffer)(struct amdgpu_ib *ib,
/* src addr in bytes */
uint64_t src_offset,
/* dst addr in bytes */
uint64_t dst_offset,
/* number of byte to transfer */
uint32_t byte_count);
/* maximum bytes in a single operation */
uint32_t fill_max_bytes;
/* number of dw to reserve per operation */
unsigned fill_num_dw;
/* used for buffer clearing */
void (*emit_fill_buffer)(struct amdgpu_ib *ib,
/* value to write to memory */
uint32_t src_data,
/* dst addr in bytes */
uint64_t dst_offset,
/* number of byte to fill */
uint32_t byte_count);
};
/* provided by hw blocks that can write ptes, e.g., sdma */
struct amdgpu_vm_pte_funcs {
/* number of dw to reserve per operation */
unsigned copy_pte_num_dw;
/* copy pte entries from GART */
void (*copy_pte)(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src,
unsigned count);
/* write pte one entry at a time with addr mapping */
void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
uint64_t value, unsigned count,
uint32_t incr);
/* for linear pte/pde updates without addr mapping */
void (*set_pte_pde)(struct amdgpu_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint64_t flags);
};
/* provided by the ih block */
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
u32 (*get_wptr)(struct amdgpu_device *adev);
bool (*prescreen_iv)(struct amdgpu_device *adev);
void (*decode_iv)(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
void (*set_rptr)(struct amdgpu_device *adev);
};
/*
* BIOS.
*/
@ -360,34 +306,6 @@ struct amdgpu_clock {
uint32_t max_pixel_clock;
};
/*
* GEM.
*/
#define AMDGPU_GEM_DOMAIN_MAX 0x3
#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
void amdgpu_gem_object_free(struct drm_gem_object *obj);
int amdgpu_gem_object_open(struct drm_gem_object *obj,
struct drm_file *file_priv);
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv);
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
struct drm_gem_object *
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sg);
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags);
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf);
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
/* sub-allocation manager, it has to be protected by another lock.
* By conception this is an helper for other part of the driver
* like the indirect buffer or semaphore, which both have their
@ -437,22 +355,6 @@ struct amdgpu_sa_bo {
struct dma_fence *fence;
};
/*
* GEM objects.
*/
void amdgpu_gem_force_release(struct amdgpu_device *adev);
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
struct drm_gem_object **obj);
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
int amdgpu_mode_dumb_mmap(struct drm_file *filp,
struct drm_device *dev,
uint32_t handle, uint64_t *offset_p);
int amdgpu_fence_slab_init(void);
void amdgpu_fence_slab_fini(void);
@ -525,16 +427,25 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,
/*
* Other graphics doorbells can be allocated here: from 0x8c to 0xef
* Other graphics doorbells can be allocated here: from 0x8c to 0xdf
* Graphics voltage island aperture 1
* default non-graphics QWORD index is 0xF0 - 0xFF inclusive
* default non-graphics QWORD index is 0xe0 - 0xFF inclusive
*/
/* sDMA engines */
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
/* sDMA engines reserved from 0xe0 -oxef */
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1,
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xE9,
/* For vega10 sriov, the sdma doorbell must be fixed as follow
* to keep the same setting with host driver, or it will
* happen conflicts
*/
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 = 0xF0,
AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 = 0xF2,
AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
/* Interrupt handler */
AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
@ -599,84 +510,6 @@ struct amdgpu_ib {
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
/*
* Queue manager
*/
struct amdgpu_queue_mapper {
int hw_ip;
struct mutex lock;
/* protected by lock */
struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
};
struct amdgpu_queue_mgr {
struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
};
int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr);
int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr);
int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr,
u32 hw_ip, u32 instance, u32 ring,
struct amdgpu_ring **out_ring);
/*
* context related structures
*/
struct amdgpu_ctx_ring {
uint64_t sequence;
struct dma_fence **fences;
struct drm_sched_entity entity;
};
struct amdgpu_ctx {
struct kref refcount;
struct amdgpu_device *adev;
struct amdgpu_queue_mgr queue_mgr;
unsigned reset_counter;
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
struct dma_fence **fences;
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
struct mutex lock;
atomic_t guilty;
};
struct amdgpu_ctx_mgr {
struct amdgpu_device *adev;
struct mutex lock;
/* protected by lock */
struct idr ctx_handles;
};
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
struct dma_fence *fence, uint64_t *seq);
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq);
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority);
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
/*
* file private structure
*/
@ -690,271 +523,6 @@ struct amdgpu_fpriv {
struct amdgpu_ctx_mgr ctx_mgr;
};
/*
* GFX stuff
*/
#include "clearstate_defs.h"
struct amdgpu_rlc_funcs {
void (*enter_safe_mode)(struct amdgpu_device *adev);
void (*exit_safe_mode)(struct amdgpu_device *adev);
};
struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
volatile uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
volatile uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
volatile uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
bool in_safe_mode;
const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */
u32 save_and_restore_offset;
u32 clear_state_descriptor_offset;
u32 avail_scratch_ram_locations;
u32 reg_restore_list_size;
u32 reg_list_format_start;
u32 reg_list_format_separate_start;
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
u32 reg_list_format_direct_reg_list_length;
u32 save_restore_list_cntl_size_bytes;
u32 save_restore_list_gpm_size_bytes;
u32 save_restore_list_srm_size_bytes;
u32 *register_list_format;
u32 *register_restore;
u8 *save_restore_list_cntl;
u8 *save_restore_list_gpm;
u8 *save_restore_list_srm;
bool is_rlc_v2_1;
};
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
struct amdgpu_bo *mec_fw_obj;
u64 mec_fw_gpu_addr;
u32 num_mec;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
/* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
struct amdgpu_kiq {
u64 eop_gpu_addr;
struct amdgpu_bo *eop_obj;
spinlock_t ring_lock;
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
};
/*
* GPU scratch registers structures, functions & helpers
*/
struct amdgpu_scratch {
unsigned num_reg;
uint32_t reg_base;
uint32_t free_mask;
};
/*
* GFX configurations
*/
#define AMDGPU_GFX_MAX_SE 4
#define AMDGPU_GFX_MAX_SH_PER_SE 2
struct amdgpu_rb_config {
uint32_t rb_backend_disable;
uint32_t user_rb_backend_disable;
uint32_t raster_config;
uint32_t raster_config_1;
};
struct gb_addr_config {
uint16_t pipe_interleave_size;
uint8_t num_pipes;
uint8_t max_compress_frags;
uint8_t num_banks;
uint8_t num_se;
uint8_t num_rb_per_se;
};
struct amdgpu_gfx_config {
unsigned max_shader_engines;
unsigned max_tile_pipes;
unsigned max_cu_per_sh;
unsigned max_sh_per_se;
unsigned max_backends_per_se;
unsigned max_texture_channel_caches;
unsigned max_gprs;
unsigned max_gs_threads;
unsigned max_hw_contexts;
unsigned sc_prim_fifo_size_frontend;
unsigned sc_prim_fifo_size_backend;
unsigned sc_hiz_tile_fifo_size;
unsigned sc_earlyz_tile_fifo_size;
unsigned num_tile_pipes;
unsigned backend_enable_mask;
unsigned mem_max_burst_length_bytes;
unsigned mem_row_size_in_kb;
unsigned shader_engine_tile_size;
unsigned num_gpus;
unsigned multi_gpu_tile_size;
unsigned mc_arb_ramcfg;
unsigned gb_addr_config;
unsigned num_rbs;
unsigned gs_vgt_table_depth;
unsigned gs_prim_buffer_depth;
uint32_t tile_mode_array[32];
uint32_t macrotile_mode_array[16];
struct gb_addr_config gb_addr_config_fields;
struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
/* gfx configure feature */
uint32_t double_offchip_lds_buf;
/* cached value of DB_DEBUG2 */
uint32_t db_debug2;
};
struct amdgpu_cu_info {
uint32_t simd_per_cu;
uint32_t max_waves_per_simd;
uint32_t wave_front_size;
uint32_t max_scratch_slots_per_cu;
uint32_t lds_size;
/* total active CU number */
uint32_t number;
uint32_t ao_cu_mask;
uint32_t ao_cu_bitmap[4][4];
uint32_t bitmap[4][4];
};
struct amdgpu_gfx_funcs {
/* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields);
void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
};
struct amdgpu_ngg_buf {
struct amdgpu_bo *bo;
uint64_t gpu_addr;
uint32_t size;
uint32_t bo_size;
};
enum {
NGG_PRIM = 0,
NGG_POS,
NGG_CNTL,
NGG_PARAM,
NGG_BUF_MAX
};
struct amdgpu_ngg {
struct amdgpu_ngg_buf buf[NGG_BUF_MAX];
uint32_t gds_reserve_addr;
uint32_t gds_reserve_size;
bool init;
};
struct sq_work {
struct work_struct work;
unsigned ih_data;
};
struct amdgpu_gfx {
struct mutex gpu_clock_mutex;
struct amdgpu_gfx_config config;
struct amdgpu_rlc rlc;
struct amdgpu_mec mec;
struct amdgpu_kiq kiq;
struct amdgpu_scratch scratch;
const struct firmware *me_fw; /* ME firmware */
uint32_t me_fw_version;
const struct firmware *pfp_fw; /* PFP firmware */
uint32_t pfp_fw_version;
const struct firmware *ce_fw; /* CE firmware */
uint32_t ce_fw_version;
const struct firmware *rlc_fw; /* RLC firmware */
uint32_t rlc_fw_version;
const struct firmware *mec_fw; /* MEC firmware */
uint32_t mec_fw_version;
const struct firmware *mec2_fw; /* MEC2 firmware */
uint32_t mec2_fw_version;
uint32_t me_feature_version;
uint32_t ce_feature_version;
uint32_t pfp_feature_version;
uint32_t rlc_feature_version;
uint32_t rlc_srlc_fw_version;
uint32_t rlc_srlc_feature_version;
uint32_t rlc_srlg_fw_version;
uint32_t rlc_srlg_feature_version;
uint32_t rlc_srls_fw_version;
uint32_t rlc_srls_feature_version;
uint32_t mec_feature_version;
uint32_t mec2_feature_version;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsigned num_gfx_rings;
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
unsigned num_compute_rings;
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
struct amdgpu_irq_src priv_inst_irq;
struct amdgpu_irq_src cp_ecc_error_irq;
struct amdgpu_irq_src sq_irq;
struct sq_work sq_work;
/* gfx status */
uint32_t gfx_current_status;
/* ce ram size*/
unsigned ce_ram_size;
struct amdgpu_cu_info cu_info;
const struct amdgpu_gfx_funcs *funcs;
/* reset mask */
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
/* s3/s4 mask */
bool in_suspend;
/* NGG */
struct amdgpu_ngg ngg;
/* pipe reservation */
struct mutex pipe_reserve_mutex;
DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib);
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
@ -986,7 +554,7 @@ struct amdgpu_cs_parser {
/* scheduler job object */
struct amdgpu_job *job;
struct amdgpu_ring *ring;
struct drm_sched_entity *entity;
/* buffer objects */
struct ww_acquire_ctx ticket;
@ -1037,58 +605,6 @@ struct amdgpu_wb {
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
/*
* SDMA
*/
struct amdgpu_sdma_instance {
/* SDMA firmware */
const struct firmware *fw;
uint32_t fw_version;
uint32_t feature_version;
struct amdgpu_ring ring;
bool burst_nop;
};
struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
#ifdef CONFIG_DRM_AMDGPU_SI
//SI DMA has a difference trap irq number for the second engine
struct amdgpu_irq_src trap_irq_1;
#endif
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
int num_instances;
uint32_t srbm_soft_reset;
};
/*
* Firmware
*/
enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_DIRECT = 0,
AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_PSP,
};
struct amdgpu_firmware {
struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
enum amdgpu_firmware_load_type load_type;
struct amdgpu_bo *fw_buf;
unsigned int fw_size;
unsigned int max_ucodes;
/* firmwares are loaded by psp instead of smu from vega10 */
const struct amdgpu_psp_funcs *funcs;
struct amdgpu_bo *rbuf;
struct mutex mutex;
/* gpu info firmware data pointer */
const struct firmware *gpu_info_fw;
void *fw_buf_ptr;
uint64_t fw_buf_mc;
};
/*
* Benchmarking
*/
@ -1100,31 +616,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
*/
void amdgpu_test_moves(struct amdgpu_device *adev);
/*
* amdgpu smumgr functions
*/
struct amdgpu_smumgr_funcs {
int (*check_fw_load_finish)(struct amdgpu_device *adev, uint32_t fwtype);
int (*request_smu_load_fw)(struct amdgpu_device *adev);
int (*request_smu_specific_fw)(struct amdgpu_device *adev, uint32_t fwtype);
};
/*
* amdgpu smumgr
*/
struct amdgpu_smumgr {
struct amdgpu_bo *toc_buf;
struct amdgpu_bo *smu_buf;
/* asic priv smu data */
void *priv;
spinlock_t smu_lock;
/* smumgr functions */
const struct amdgpu_smumgr_funcs *smumgr_funcs;
/* ucode loading complete flag */
uint32_t fw_flags;
};
/*
* ASIC specific register table accessible by UMD
*/
@ -1166,23 +657,9 @@ struct amdgpu_asic_funcs {
/*
* IOCTL.
*/
int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
@ -1190,9 +667,6 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
/* VRAM scratch page for HDP bug, default vram page */
struct amdgpu_vram_scratch {
struct amdgpu_bo *robj;
@ -1477,9 +951,6 @@ struct amdgpu_device {
u32 cg_flags;
u32 pg_flags;
/* amdgpu smumgr */
struct amdgpu_smumgr smu;
/* gfx */
struct amdgpu_gfx gfx;
@ -1544,6 +1015,9 @@ struct amdgpu_device {
bool has_hw_reset;
u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
/* s3/s4 mask */
bool in_suspend;
/* record last mm index being written through WREG32*/
unsigned long last_mm_index;
bool in_gpu_reset;
@ -1666,22 +1140,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8))
#define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16))
static inline struct amdgpu_sdma_instance *
amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
if (&adev->sdma.instance[i].ring == ring)
break;
if (i < AMDGPU_MAX_SDMA_INSTANCES)
return &adev->sdma.instance[i];
else
return NULL;
}
/*
* ASICs macro.
*/
@ -1700,74 +1158,16 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
#define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
/* Common functions */
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job* job, bool force);
struct amdgpu_job* job);
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
void amdgpu_display_update_priority(struct amdgpu_device *adev);
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
u64 num_vis_bytes);
void amdgpu_device_vram_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc, u64 base);
void amdgpu_device_gart_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 *registers,
@ -1818,6 +1218,12 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg);
/*
* functions used by amdgpu_xgmi.c
*/
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
/*
* functions used by amdgpu_encoder.c
*/

View File

@ -116,136 +116,47 @@ static int acp_sw_fini(void *handle)
return 0;
}
/* power off a tile/block within ACP */
static int acp_suspend_tile(void *cgs_dev, int tile)
{
u32 val = 0;
u32 count = 0;
if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
pr_err("Invalid ACP tile : %d to suspend\n", tile);
return -1;
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
val &= ACP_TILE_ON_MASK;
if (val == 0x0) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
val = val | (1 << tile);
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
0x500 + tile);
count = ACP_TIMEOUT_LOOP;
while (true) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+ tile);
val = val & ACP_TILE_ON_MASK;
if (val == ACP_TILE_OFF_MASK)
break;
if (--count == 0) {
pr_err("Timeout reading ACP PGFSM status\n");
return -ETIMEDOUT;
}
udelay(100);
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
val |= ACP_TILE_OFF_RETAIN_REG_MASK;
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
}
return 0;
}
/* power on a tile/block within ACP */
static int acp_resume_tile(void *cgs_dev, int tile)
{
u32 val = 0;
u32 count = 0;
if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
pr_err("Invalid ACP tile to resume\n");
return -1;
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
val = val & ACP_TILE_ON_MASK;
if (val != 0x0) {
cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
0x600 + tile);
count = ACP_TIMEOUT_LOOP;
while (true) {
val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+ tile);
val = val & ACP_TILE_ON_MASK;
if (val == 0x0)
break;
if (--count == 0) {
pr_err("Timeout reading ACP PGFSM status\n");
return -ETIMEDOUT;
}
udelay(100);
}
val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
if (tile == ACP_TILE_P1)
val = val & (ACP_TILE_P1_MASK);
else if (tile == ACP_TILE_P2)
val = val & (ACP_TILE_P2_MASK);
cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
}
return 0;
}
struct acp_pm_domain {
void *cgs_dev;
void *adev;
struct generic_pm_domain gpd;
};
static int acp_poweroff(struct generic_pm_domain *genpd)
{
int i, ret;
struct acp_pm_domain *apd;
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) {
/* Donot return abruptly if any of power tile fails to suspend.
* Log it and continue powering off other tile
*/
for (i = 4; i >= 0 ; i--) {
ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
if (ret)
pr_err("ACP tile %d tile suspend failed\n", i);
}
adev = apd->adev;
/* call smu to POWER GATE ACP block
* smu will
* 1. turn off the acp clock
* 2. power off the acp tiles
* 3. check and enter ulv state
*/
if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
}
return 0;
}
static int acp_poweron(struct generic_pm_domain *genpd)
{
int i, ret;
struct acp_pm_domain *apd;
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) {
for (i = 0; i < 2; i++) {
ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i);
if (ret) {
pr_err("ACP tile %d resume failed\n", i);
break;
}
}
/* Disable DSPs which are not going to be used */
for (i = 0; i < 3; i++) {
ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i);
/* Continue suspending other DSP, even if one fails */
if (ret)
pr_err("ACP DSP %d suspend failed\n", i);
}
adev = apd->adev;
/* call smu to UNGATE ACP block
* smu will
* 1. exit ulv
* 2. turn on acp clock
* 3. power on acp tiles
*/
if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
}
return 0;
}
@ -289,30 +200,31 @@ static int acp_hw_init(void *handle)
r = amd_acp_hw_init(adev->acp.cgs_device,
ip_block->version->major, ip_block->version->minor);
/* -ENODEV means board uses AZ rather than ACP */
if (r == -ENODEV)
if (r == -ENODEV) {
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
return 0;
else if (r)
} else if (r) {
return r;
}
if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
return -EINVAL;
acp_base = adev->rmmio_base;
if (adev->asic_type != CHIP_STONEY) {
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
if (adev->acp.acp_genpd == NULL)
return -ENOMEM;
adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
adev->acp.acp_genpd->gpd.power_on = acp_poweron;
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
if (adev->acp.acp_genpd == NULL)
return -ENOMEM;
adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
adev->acp.acp_genpd->gpd.power_on = acp_poweron;
adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device;
adev->acp.acp_genpd->adev = adev;
pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
}
pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
GFP_KERNEL);
@ -429,17 +341,16 @@ static int acp_hw_init(void *handle)
if (r)
return r;
if (adev->asic_type != CHIP_STONEY) {
for (i = 0; i < ACP_DEVS ; i++) {
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
if (r) {
dev_err(dev, "Failed to add dev to genpd\n");
return r;
}
for (i = 0; i < ACP_DEVS ; i++) {
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
if (r) {
dev_err(dev, "Failed to add dev to genpd\n");
return r;
}
}
/* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
@ -497,8 +408,10 @@ static int acp_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* return early if no ACP */
if (!adev->acp.acp_cell)
if (!adev->acp.acp_genpd) {
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
return 0;
}
/* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
@ -536,19 +449,17 @@ static int acp_hw_fini(void *handle)
udelay(100);
}
if (adev->acp.acp_genpd) {
for (i = 0; i < ACP_DEVS ; i++) {
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
ret = pm_genpd_remove_device(dev);
/* If removal fails, dont giveup and try rest */
if (ret)
dev_err(dev, "remove dev from genpd failed\n");
}
kfree(adev->acp.acp_genpd);
for (i = 0; i < ACP_DEVS ; i++) {
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
ret = pm_genpd_remove_device(dev);
/* If removal fails, dont giveup and try rest */
if (ret)
dev_err(dev, "remove dev from genpd failed\n");
}
mfd_remove_devices(adev->acp.parent);
kfree(adev->acp.acp_res);
kfree(adev->acp.acp_genpd);
kfree(adev->acp.acp_cell);
return 0;
@ -556,11 +467,21 @@ static int acp_hw_fini(void *handle)
static int acp_suspend(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* power up on suspend */
if (!adev->acp.acp_cell)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
return 0;
}
static int acp_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* power down again on resume */
if (!adev->acp.acp_cell)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
return 0;
}
@ -593,6 +514,12 @@ static int acp_set_clockgating_state(void *handle,
static int acp_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = state == AMD_PG_STATE_GATE ? true : false;
if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
return 0;
}

View File

@ -31,6 +31,7 @@
#include <drm/drm_crtc_helper.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_display.h"
#include "amd_acpi.h"
#include "atom.h"
@ -358,7 +359,9 @@ out:
*
* Checks the acpi event and if it matches an atif event,
* handles it.
* Returns NOTIFY code
*
* Returns:
* NOTIFY_BAD or NOTIFY_DONE, depending on the event.
*/
static int amdgpu_atif_handler(struct amdgpu_device *adev,
struct acpi_bus_event *event)
@ -372,11 +375,16 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
if (strcmp(event->device_class, ACPI_VIDEO_CLASS) != 0)
return NOTIFY_DONE;
/* Is this actually our event? */
if (!atif ||
!atif->notification_cfg.enabled ||
event->type != atif->notification_cfg.command_code)
/* Not our event */
return NOTIFY_DONE;
event->type != atif->notification_cfg.command_code) {
/* These events will generate keypresses otherwise */
if (event->type == ACPI_VIDEO_NOTIFY_PROBE)
return NOTIFY_BAD;
else
return NOTIFY_DONE;
}
if (atif->functions.sbios_requests) {
struct atif_sbios_requests req;
@ -385,7 +393,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
count = amdgpu_atif_get_sbios_requests(atif, &req);
if (count <= 0)
return NOTIFY_DONE;
return NOTIFY_BAD;
DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);

View File

@ -28,7 +28,6 @@
#include <linux/module.h>
const struct kgd2kfd_calls *kgd2kfd;
bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
static const unsigned int compute_vmid_bitmap = 0xFF00;
@ -36,45 +35,23 @@ int amdgpu_amdkfd_init(void)
{
int ret;
#if defined(CONFIG_HSA_AMD_MODULE)
int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
kgd2kfd_init_p = symbol_request(kgd2kfd_init);
if (kgd2kfd_init_p == NULL)
return -ENOENT;
ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret) {
symbol_put(kgd2kfd_init);
kgd2kfd = NULL;
}
#elif defined(CONFIG_HSA_AMD)
#ifdef CONFIG_HSA_AMD
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret)
kgd2kfd = NULL;
amdgpu_amdkfd_gpuvm_init_mem_limits();
#else
kgd2kfd = NULL;
ret = -ENOENT;
#endif
#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
amdgpu_amdkfd_gpuvm_init_mem_limits();
#endif
return ret;
}
void amdgpu_amdkfd_fini(void)
{
if (kgd2kfd) {
if (kgd2kfd)
kgd2kfd->exit();
symbol_put(kgd2kfd_init);
}
}
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
@ -99,6 +76,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
case CHIP_VEGA10:
case CHIP_VEGA20:
case CHIP_RAVEN:
kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
break;
@ -146,7 +124,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
int i, n;
int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
@ -155,7 +133,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
.gpuvm_size = min(adev->vm_manager.max_pfn
<< AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_VA_HOLE_START),
AMDGPU_GMC_HOLE_START),
.drm_render_minor = adev->ddev->render->index
};
@ -185,7 +163,15 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
if (adev->asic_type >= CHIP_VEGA10) {
if (adev->asic_type < CHIP_VEGA10) {
kgd2kfd->device_init(adev->kfd, &gpu_resources);
return;
}
n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
for (i = 0; i < n; i += 2) {
/* On SOC15 the BIF is involved in routing
* doorbells using the low 12 bits of the
* address. Communicate the assignments to
@ -193,20 +179,31 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
* process in case of 64-bit doorbells so we
* can use each doorbell assignment twice.
*/
gpu_resources.sdma_doorbell[0][0] =
AMDGPU_DOORBELL64_sDMA_ENGINE0;
gpu_resources.sdma_doorbell[0][1] =
AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
gpu_resources.sdma_doorbell[1][0] =
AMDGPU_DOORBELL64_sDMA_ENGINE1;
gpu_resources.sdma_doorbell[1][1] =
AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
/* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
* SDMA, IH and VCN. So don't use them for the CP.
*/
gpu_resources.reserved_doorbell_mask = 0x1f0;
gpu_resources.reserved_doorbell_val = 0x0f0;
if (adev->asic_type == CHIP_VEGA10) {
gpu_resources.sdma_doorbell[0][i] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
gpu_resources.sdma_doorbell[0][i+1] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
gpu_resources.sdma_doorbell[1][i] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
gpu_resources.sdma_doorbell[1][i+1] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
} else {
gpu_resources.sdma_doorbell[0][i] =
AMDGPU_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
gpu_resources.sdma_doorbell[0][i+1] =
AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
gpu_resources.sdma_doorbell[1][i] =
AMDGPU_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
gpu_resources.sdma_doorbell[1][i+1] =
AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
}
}
/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
* SDMA, IH and VCN. So don't use them for the CP.
*/
gpu_resources.reserved_doorbell_mask = 0x1e0;
gpu_resources.reserved_doorbell_val = 0x0e0;
kgd2kfd->device_init(adev->kfd, &gpu_resources);
}
@ -267,7 +264,8 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
amdgpu_device_gpu_recover(adev, NULL, false);
if (amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL);
}
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
@ -437,6 +435,13 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
}
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
return adev->gmc.xgmi.hive_id;
}
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len)
@ -510,7 +515,7 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD)
#ifndef CONFIG_HSA_AMD
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
{
return false;

View File

@ -145,6 +145,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
#define read_user_wptr(mmptr, wptr, dst) \
({ \
@ -162,17 +163,18 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
})
/* GPUVM API */
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
void **process_info,
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
void **vm, void **process_info,
struct dma_fence **ef);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp,
struct file *filp, unsigned int pasid,
void **vm, void **process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
void *vm, struct kgd_mem **mem,

View File

@ -142,7 +142,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
uint64_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
@ -205,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
@ -873,7 +874,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
}
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base)
uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@ -881,7 +882,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
pr_err("trying to set page table base for wrong VMID\n");
return;
}
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
lower_32_bits(page_table_base));
}
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)

View File

@ -45,8 +45,6 @@ enum hqd_dequeue_request_type {
RESET_WAVES
};
struct vi_sdma_mqd;
/*
* Register access functions
*/
@ -100,7 +98,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
uint64_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
@ -164,6 +162,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
@ -281,7 +280,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
lock_srbm(kgd, mec, pipe, 0, 0);
WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK);
WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
unlock_srbm(kgd);
@ -833,7 +833,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
}
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base)
uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@ -841,7 +841,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
pr_err("trying to set page table base for wrong VMID\n");
return;
}
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
lower_32_bits(page_table_base));
}
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)

View File

@ -138,7 +138,7 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
uint64_t page_table_base);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
@ -201,6 +201,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
@ -214,7 +215,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
.gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
.set_compute_idle = amdgpu_amdkfd_set_compute_idle,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
@ -1011,11 +1013,10 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
}
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base)
uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
AMDGPU_PTE_VALID;
uint64_t base = page_table_base | AMDGPU_PTE_VALID;
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",

View File

@ -364,7 +364,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
struct amdgpu_bo *pd = vm->root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
struct amdgpu_vm_parser param;
uint64_t addr, flags = AMDGPU_PTE_VALID;
int ret;
param.domain = AMDGPU_GEM_DOMAIN_VRAM;
@ -383,9 +382,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
return ret;
}
addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
vm->pd_phys_addr = addr;
vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
if (vm->use_cpu_for_update) {
ret = amdgpu_bo_kmap(pd, NULL);
@ -678,7 +675,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
if (!ctx->vm_pd)
return -ENOMEM;
ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true;
@ -743,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
return -ENOMEM;
}
ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true;
@ -1003,8 +998,8 @@ create_evict_fence_fail:
return ret;
}
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
void **process_info,
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
void **vm, void **process_info,
struct dma_fence **ef)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@ -1016,7 +1011,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
return -ENOMEM;
/* Initialize AMDGPU part of the VM */
ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid);
if (ret) {
pr_err("Failed init vm ret %d\n", ret);
goto amdgpu_vm_init_fail;
@ -1039,7 +1034,7 @@ amdgpu_vm_init_fail:
}
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp,
struct file *filp, unsigned int pasid,
void **vm, void **process_info,
struct dma_fence **ef)
{
@ -1054,7 +1049,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
return -EINVAL;
/* Convert VM into a compute VM */
ret = amdgpu_vm_make_compute(adev, avm);
ret = amdgpu_vm_make_compute(adev, avm, pasid);
if (ret)
return ret;
@ -1117,11 +1112,34 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
kfree(vm);
}
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
if (WARN_ON(!kgd || !vm))
return;
pr_debug("Releasing process vm %p\n", vm);
/* The original pasid of amdgpu vm has already been
* released during making a amdgpu vm to a compute vm
* The current pasid is managed by kfd and will be
* released on kfd process destroy. Set amdgpu pasid
* to 0 to avoid duplicate release.
*/
amdgpu_vm_release_compute(adev, avm);
}
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
{
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
struct amdgpu_bo *pd = avm->root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
if (adev->asic_type < CHIP_VEGA10)
return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
return avm->pd_phys_addr;
}
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

View File

@ -29,6 +29,7 @@
#include "amdgpu_atombios.h"
#include "amdgpu_atomfirmware.h"
#include "amdgpu_i2c.h"
#include "amdgpu_display.h"
#include "atom.h"
#include "atom-bits.h"

View File

@ -117,6 +117,10 @@ union igp_info {
union umc_info {
struct atom_umc_info_v3_1 v31;
};
union vram_info {
struct atom_vram_info_header_v2_3 v23;
};
/*
* Return vram width from integrated system info table, if available,
* or 0 if not.
@ -174,7 +178,7 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
case ATOM_DGPU_VRAM_TYPE_GDDR5:
vram_type = AMDGPU_VRAM_TYPE_GDDR5;
break;
case ATOM_DGPU_VRAM_TYPE_HBM:
case ATOM_DGPU_VRAM_TYPE_HBM2:
vram_type = AMDGPU_VRAM_TYPE_HBM;
break;
default:
@ -195,7 +199,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
int index;
u16 data_offset, size;
union igp_info *igp_info;
union umc_info *umc_info;
union vram_info *vram_info;
u8 frev, crev;
u8 mem_type;
@ -204,7 +208,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
integratedsysteminfo);
else
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
umc_info);
vram_info);
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size,
&frev, &crev, &data_offset)) {
@ -219,11 +223,11 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
return 0;
}
} else {
umc_info = (union umc_info *)
vram_info = (union vram_info *)
(mode_info->atom_context->bios + data_offset);
switch (crev) {
case 1:
mem_type = umc_info->v31.vram_type;
case 3:
mem_type = vram_info->v23.vram_module[0].memory_type;
return convert_atom_mem_type_to_vram_type(adev, mem_type);
default:
return 0;

View File

@ -49,8 +49,11 @@ static void amdgpu_bo_list_free(struct kref *ref)
refcount);
struct amdgpu_bo_list_entry *e;
amdgpu_bo_list_for_each_entry(e, list)
amdgpu_bo_unref(&e->robj);
amdgpu_bo_list_for_each_entry(e, list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
amdgpu_bo_unref(&bo);
}
call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
}
@ -67,7 +70,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
unsigned i;
int r;
if (num_entries > SIZE_MAX / sizeof(struct amdgpu_bo_list_entry))
if (num_entries > (SIZE_MAX - sizeof(struct amdgpu_bo_list))
/ sizeof(struct amdgpu_bo_list_entry))
return -EINVAL;
size = sizeof(struct amdgpu_bo_list);
@ -111,21 +115,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
entry = &array[last_entry++];
}
entry->robj = bo;
entry->priority = min(info[i].bo_priority,
AMDGPU_BO_LIST_MAX_PRIORITY);
entry->tv.bo = &entry->robj->tbo;
entry->tv.shared = !entry->robj->prime_shared_count;
entry->tv.bo = &bo->tbo;
entry->tv.shared = !bo->prime_shared_count;
if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
list->gds_obj = entry->robj;
if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
list->gws_obj = entry->robj;
if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
list->oa_obj = entry->robj;
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
list->gds_obj = bo;
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
list->gws_obj = bo;
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
list->oa_obj = bo;
total_size += amdgpu_bo_size(entry->robj);
trace_amdgpu_bo_list_set(list, entry->robj);
total_size += amdgpu_bo_size(bo);
trace_amdgpu_bo_list_set(list, bo);
}
list->first_userptr = first_userptr;
@ -137,8 +140,11 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
return 0;
error_free:
while (i--)
amdgpu_bo_unref(&array[i].robj);
while (i--) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
amdgpu_bo_unref(&bo);
}
kvfree(list);
return r;
@ -190,9 +196,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
* with the same priority, i.e. it must be stable.
*/
amdgpu_bo_list_for_each_entry(e, list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
unsigned priority = e->priority;
if (!e->robj->parent)
if (!bo->parent)
list_add_tail(&e->tv.head, &bucket[priority]);
e->user_pages = NULL;

View File

@ -32,7 +32,6 @@ struct amdgpu_bo_va;
struct amdgpu_fpriv;
struct amdgpu_bo_list_entry {
struct amdgpu_bo *robj;
struct ttm_validate_buffer tv;
struct amdgpu_bo_va *bo_va;
uint32_t priority;

View File

@ -34,6 +34,7 @@
#include "atombios_dp.h"
#include "amdgpu_connectors.h"
#include "amdgpu_i2c.h"
#include "amdgpu_display.h"
#include <linux/pm_runtime.h>

View File

@ -32,12 +32,14 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gem.h"
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
struct drm_amdgpu_cs_chunk_fence *data,
uint32_t *offset)
{
struct drm_gem_object *gobj;
struct amdgpu_bo *bo;
unsigned long size;
int r;
@ -45,21 +47,21 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
if (gobj == NULL)
return -EINVAL;
p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
p->uf_entry.priority = 0;
p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
p->uf_entry.tv.bo = &bo->tbo;
p->uf_entry.tv.shared = true;
p->uf_entry.user_pages = NULL;
drm_gem_object_put_unlocked(gobj);
size = amdgpu_bo_size(p->uf_entry.robj);
size = amdgpu_bo_size(bo);
if (size != PAGE_SIZE || (data->offset + 8) > size) {
r = -EINVAL;
goto error_unref;
}
if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
r = -EINVAL;
goto error_unref;
}
@ -69,7 +71,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
return 0;
error_unref:
amdgpu_bo_unref(&p->uf_entry.robj);
amdgpu_bo_unref(&bo);
return r;
}
@ -228,7 +230,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
goto free_all_kdata;
}
if (p->uf_entry.robj)
if (p->uf_entry.tv.bo)
p->job->uf_addr = uf_offset;
kfree(chunk_array);
@ -457,13 +459,13 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
p->evictable = list_prev_entry(p->evictable, tv.head)) {
struct amdgpu_bo_list_entry *candidate = p->evictable;
struct amdgpu_bo *bo = candidate->robj;
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
bool update_bytes_moved_vis;
uint32_t other;
/* If we reached our current BO we can forget it */
if (candidate->robj == validated)
if (bo == validated)
break;
/* We can't move pinned BOs here */
@ -528,7 +530,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
int r;
list_for_each_entry(lobj, validated, tv.head) {
struct amdgpu_bo *bo = lobj->robj;
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
bool binding_userptr = false;
struct mm_struct *usermm;
@ -603,7 +605,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
if (p->uf_entry.robj && !p->uf_entry.robj->parent)
if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
list_add(&p->uf_entry.tv.head, &p->validated);
while (1) {
@ -619,7 +621,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
INIT_LIST_HEAD(&need_pages);
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = e->robj;
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
&e->user_invalidated) && e->user_pages) {
@ -638,7 +640,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
list_del(&e->tv.head);
list_add(&e->tv.head, &need_pages);
amdgpu_bo_unreserve(e->robj);
amdgpu_bo_unreserve(bo);
}
}
@ -657,7 +659,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
/* Fill the page arrays for all userptrs. */
list_for_each_entry(e, &need_pages, tv.head) {
struct ttm_tt *ttm = e->robj->tbo.ttm;
struct ttm_tt *ttm = e->tv.bo->ttm;
e->user_pages = kvmalloc_array(ttm->num_pages,
sizeof(struct page*),
@ -716,23 +718,23 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
oa = p->bo_list->oa_obj;
amdgpu_bo_list_for_each_entry(e, p->bo_list)
e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
if (gds) {
p->job->gds_base = amdgpu_bo_gpu_offset(gds);
p->job->gds_size = amdgpu_bo_size(gds);
p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
}
if (gws) {
p->job->gws_base = amdgpu_bo_gpu_offset(gws);
p->job->gws_size = amdgpu_bo_size(gws);
p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
}
if (oa) {
p->job->oa_base = amdgpu_bo_gpu_offset(oa);
p->job->oa_size = amdgpu_bo_size(oa);
p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
}
if (!r && p->uf_entry.robj) {
struct amdgpu_bo *uf = p->uf_entry.robj;
if (!r && p->uf_entry.tv.bo) {
struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
r = amdgpu_ttm_alloc_gart(&uf->tbo);
p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
@ -748,8 +750,7 @@ error_free_pages:
if (!e->user_pages)
continue;
release_pages(e->user_pages,
e->robj->tbo.ttm->num_pages);
release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
kvfree(e->user_pages);
}
@ -762,9 +763,11 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
int r;
list_for_each_entry(e, &p->validated, tv.head) {
struct reservation_object *resv = e->robj->tbo.resv;
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
struct reservation_object *resv = bo->tbo.resv;
r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
amdgpu_bo_explicit_sync(e->robj));
amdgpu_bo_explicit_sync(bo));
if (r)
return r;
@ -807,11 +810,16 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
kfree(parser->chunks);
if (parser->job)
amdgpu_job_free(parser->job);
amdgpu_bo_unref(&parser->uf_entry.robj);
if (parser->uf_entry.tv.bo) {
struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
amdgpu_bo_unref(&uf);
}
}
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
{
struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_device *adev = p->adev;
struct amdgpu_vm *vm = &fpriv->vm;
@ -820,6 +828,71 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
struct amdgpu_bo *bo;
int r;
/* Only for UVD/VCE VM emulation */
if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
unsigned i, j;
for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct amdgpu_bo_va_mapping *m;
struct amdgpu_bo *aobj = NULL;
struct amdgpu_cs_chunk *chunk;
uint64_t offset, va_start;
struct amdgpu_ib *ib;
uint8_t *kptr;
chunk = &p->chunks[i];
ib = &p->job->ibs[j];
chunk_ib = chunk->kdata;
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
if (r) {
DRM_ERROR("IB va_start is invalid\n");
return r;
}
if ((va_start + chunk_ib->ib_bytes) >
(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
DRM_ERROR("IB va_start+ib_bytes is invalid\n");
return -EINVAL;
}
/* the IB should be reserved at this point */
r = amdgpu_bo_kmap(aobj, (void **)&kptr);
if (r) {
return r;
}
offset = m->start * AMDGPU_GPU_PAGE_SIZE;
kptr += va_start - offset;
if (ring->funcs->parse_cs) {
memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
amdgpu_bo_kunmap(aobj);
r = amdgpu_ring_parse_cs(ring, p, j);
if (r)
return r;
} else {
ib->ptr = (uint32_t *)kptr;
r = amdgpu_ring_patch_cs_in_place(ring, p, j);
amdgpu_bo_kunmap(aobj);
if (r)
return r;
}
j++;
}
}
if (!p->job->vm)
return amdgpu_cs_sync_rings(p);
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
@ -852,7 +925,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
struct dma_fence *f;
/* ignore duplicates */
bo = e->robj;
bo = ttm_to_amdgpu_bo(e->tv.bo);
if (!bo)
continue;
@ -882,101 +955,25 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
if (r)
return r;
r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
if (r)
return r;
p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
if (amdgpu_vm_debug) {
/* Invalidate all BOs to test for userspace bugs */
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
/* ignore duplicates */
if (!e->robj)
if (!bo)
continue;
amdgpu_vm_bo_invalidate(adev, e->robj, false);
amdgpu_vm_bo_invalidate(adev, bo, false);
}
}
return r;
}
static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
struct amdgpu_cs_parser *p)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_ring *ring = p->ring;
int r;
/* Only for UVD/VCE VM emulation */
if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {
unsigned i, j;
for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct amdgpu_bo_va_mapping *m;
struct amdgpu_bo *aobj = NULL;
struct amdgpu_cs_chunk *chunk;
uint64_t offset, va_start;
struct amdgpu_ib *ib;
uint8_t *kptr;
chunk = &p->chunks[i];
ib = &p->job->ibs[j];
chunk_ib = chunk->kdata;
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
if (r) {
DRM_ERROR("IB va_start is invalid\n");
return r;
}
if ((va_start + chunk_ib->ib_bytes) >
(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
DRM_ERROR("IB va_start+ib_bytes is invalid\n");
return -EINVAL;
}
/* the IB should be reserved at this point */
r = amdgpu_bo_kmap(aobj, (void **)&kptr);
if (r) {
return r;
}
offset = m->start * AMDGPU_GPU_PAGE_SIZE;
kptr += va_start - offset;
if (p->ring->funcs->parse_cs) {
memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
amdgpu_bo_kunmap(aobj);
r = amdgpu_ring_parse_cs(ring, p, j);
if (r)
return r;
} else {
ib->ptr = (uint32_t *)kptr;
r = amdgpu_ring_patch_cs_in_place(ring, p, j);
amdgpu_bo_kunmap(aobj);
if (r)
return r;
}
j++;
}
}
if (p->job->vm) {
p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
r = amdgpu_bo_vm_update_pte(p);
if (r)
return r;
r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
if (r)
return r;
}
return amdgpu_cs_sync_rings(p);
}
@ -985,14 +982,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
{
struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
int i, j;
int r, ce_preempt = 0, de_preempt = 0;
struct amdgpu_ring *ring;
int i, j;
for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
struct amdgpu_cs_chunk *chunk;
struct amdgpu_ib *ib;
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct amdgpu_ring *ring;
struct drm_sched_entity *entity;
chunk = &parser->chunks[i];
ib = &parser->job->ibs[j];
@ -1014,8 +1012,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return -EINVAL;
}
r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
chunk_ib->ip_instance, chunk_ib->ring, &ring);
r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
chunk_ib->ip_instance, chunk_ib->ring,
&entity);
if (r)
return r;
@ -1023,14 +1022,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
parser->job->preamble_status |=
AMDGPU_PREAMBLE_IB_PRESENT;
if (parser->ring && parser->ring != ring)
if (parser->entity && parser->entity != entity)
return -EINVAL;
parser->ring = ring;
parser->entity = entity;
r = amdgpu_ib_get(adev, vm,
ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
ib);
ring = to_amdgpu_ring(entity->rq->sched);
r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
chunk_ib->ib_bytes : 0, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
@ -1044,12 +1043,13 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
}
/* UVD & VCE fw doesn't support user fences */
ring = to_amdgpu_ring(parser->entity->rq->sched);
if (parser->job->uf_addr && (
parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
ring->funcs->type == AMDGPU_RING_TYPE_VCE))
return -EINVAL;
return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);
return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
}
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@ -1065,24 +1065,23 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
sizeof(struct drm_amdgpu_cs_chunk_dep);
for (i = 0; i < num_deps; ++i) {
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx;
struct drm_sched_entity *entity;
struct dma_fence *fence;
ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
if (ctx == NULL)
return -EINVAL;
r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
deps[i].ip_type,
deps[i].ip_instance,
deps[i].ring, &ring);
r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
deps[i].ip_instance,
deps[i].ring, &entity);
if (r) {
amdgpu_ctx_put(ctx);
return r;
}
fence = amdgpu_ctx_get_fence(ctx, ring,
fence = amdgpu_ctx_get_fence(ctx, entity,
deps[i].handle);
if (IS_ERR(fence)) {
r = PTR_ERR(fence);
@ -1105,7 +1104,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
{
int r;
struct dma_fence *fence;
r = drm_syncobj_find_fence(p->filp, handle, &fence);
r = drm_syncobj_find_fence(p->filp, handle, 0, &fence);
if (r)
return r;
@ -1194,16 +1193,16 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
int i;
for (i = 0; i < p->num_post_dep_syncobjs; ++i)
drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence);
}
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_ring *ring = p->ring;
struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
struct drm_sched_entity *entity = p->entity;
enum drm_sched_priority priority;
struct amdgpu_ring *ring;
struct amdgpu_bo_list_entry *e;
struct amdgpu_job *job;
uint64_t seq;
@ -1220,7 +1219,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
/* No memory allocation is allowed while holding the mn lock */
amdgpu_mn_lock(p->mn);
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = e->robj;
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
r = -ERESTARTSYS;
@ -1231,15 +1230,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job->owner = p->filp;
p->fence = dma_fence_get(&job->base.s_fence->finished);
r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
if (r) {
dma_fence_put(p->fence);
dma_fence_put(&job->base.s_fence->finished);
amdgpu_job_free(job);
amdgpu_mn_unlock(p->mn);
return r;
}
amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
amdgpu_cs_post_dependencies(p);
if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
@ -1261,6 +1252,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
ring = to_amdgpu_ring(entity->rq->sched);
amdgpu_ring_priority_get(ring, priority);
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);
@ -1300,6 +1293,12 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (r)
goto out;
r = amdgpu_cs_dependencies(adev, &parser);
if (r) {
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
goto out;
}
r = amdgpu_cs_parser_bos(&parser, data);
if (r) {
if (r == -ENOMEM)
@ -1311,16 +1310,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
reserved_buffers = true;
r = amdgpu_cs_dependencies(adev, &parser);
if (r) {
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
goto out;
}
for (i = 0; i < parser.job->num_ibs; i++)
trace_amdgpu_cs(&parser, i);
r = amdgpu_cs_ib_vm_chunk(adev, &parser);
r = amdgpu_cs_vm_handling(&parser);
if (r)
goto out;
@ -1344,9 +1337,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
union drm_amdgpu_wait_cs *wait = data;
struct amdgpu_device *adev = dev->dev_private;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
struct amdgpu_ring *ring = NULL;
struct drm_sched_entity *entity;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
long r;
@ -1355,15 +1347,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
if (ctx == NULL)
return -EINVAL;
r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
wait->in.ip_type, wait->in.ip_instance,
wait->in.ring, &ring);
r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
wait->in.ring, &entity);
if (r) {
amdgpu_ctx_put(ctx);
return r;
}
fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
if (IS_ERR(fence))
r = PTR_ERR(fence);
else if (fence) {
@ -1395,7 +1386,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
struct drm_file *filp,
struct drm_amdgpu_fence *user)
{
struct amdgpu_ring *ring;
struct drm_sched_entity *entity;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
int r;
@ -1404,14 +1395,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
if (ctx == NULL)
return ERR_PTR(-EINVAL);
r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
user->ip_instance, user->ring, &ring);
r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
user->ring, &entity);
if (r) {
amdgpu_ctx_put(ctx);
return ERR_PTR(r);
}
fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
amdgpu_ctx_put(ctx);
return fence;

View File

@ -27,6 +27,30 @@
#include "amdgpu.h"
#include "amdgpu_sched.h"
#define to_amdgpu_ctx_entity(e) \
container_of((e), struct amdgpu_ctx_entity, entity)
const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_GFX] = 1,
[AMDGPU_HW_IP_COMPUTE] = 4,
[AMDGPU_HW_IP_DMA] = 2,
[AMDGPU_HW_IP_UVD] = 1,
[AMDGPU_HW_IP_VCE] = 1,
[AMDGPU_HW_IP_UVD_ENC] = 1,
[AMDGPU_HW_IP_VCN_DEC] = 1,
[AMDGPU_HW_IP_VCN_ENC] = 1,
};
static int amdgput_ctx_total_num_entities(void)
{
unsigned i, num_entities = 0;
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
num_entities += amdgpu_ctx_num_entities[i];
return num_entities;
}
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
@ -48,6 +72,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
struct drm_file *filp,
struct amdgpu_ctx *ctx)
{
unsigned num_entities = amdgput_ctx_total_num_entities();
unsigned i, j;
int r;
@ -60,51 +85,104 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
sizeof(struct dma_fence*), GFP_KERNEL);
if (!ctx->fences)
return -ENOMEM;
mutex_init(&ctx->lock);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
ctx->rings[i].sequence = 1;
ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
ctx->entities[0] = kcalloc(num_entities,
sizeof(struct amdgpu_ctx_entity),
GFP_KERNEL);
if (!ctx->entities[0]) {
r = -ENOMEM;
goto error_free_fences;
}
for (i = 0; i < num_entities; ++i) {
struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
entity->sequence = 1;
entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
}
for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
ctx->entities[i] = ctx->entities[i - 1] +
amdgpu_ctx_num_entities[i - 1];
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
ctx->init_priority = priority;
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
/* create context entity for each ring */
for (i = 0; i < adev->num_rings; i++) {
struct amdgpu_ring *ring = adev->rings[i];
struct drm_sched_rq *rq;
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
unsigned num_rings;
rq = &ring->sched.sched_rq[priority];
switch (i) {
case AMDGPU_HW_IP_GFX:
rings[0] = &adev->gfx.gfx_ring[0];
num_rings = 1;
break;
case AMDGPU_HW_IP_COMPUTE:
for (j = 0; j < adev->gfx.num_compute_rings; ++j)
rings[j] = &adev->gfx.compute_ring[j];
num_rings = adev->gfx.num_compute_rings;
break;
case AMDGPU_HW_IP_DMA:
for (j = 0; j < adev->sdma.num_instances; ++j)
rings[j] = &adev->sdma.instance[j].ring;
num_rings = adev->sdma.num_instances;
break;
case AMDGPU_HW_IP_UVD:
rings[0] = &adev->uvd.inst[0].ring;
num_rings = 1;
break;
case AMDGPU_HW_IP_VCE:
rings[0] = &adev->vce.ring[0];
num_rings = 1;
break;
case AMDGPU_HW_IP_UVD_ENC:
rings[0] = &adev->uvd.inst[0].ring_enc[0];
num_rings = 1;
break;
case AMDGPU_HW_IP_VCN_DEC:
rings[0] = &adev->vcn.ring_dec;
num_rings = 1;
break;
case AMDGPU_HW_IP_VCN_ENC:
rings[0] = &adev->vcn.ring_enc[0];
num_rings = 1;
break;
case AMDGPU_HW_IP_VCN_JPEG:
rings[0] = &adev->vcn.ring_jpeg;
num_rings = 1;
break;
}
if (ring == &adev->gfx.kiq.ring)
continue;
for (j = 0; j < num_rings; ++j)
rqs[j] = &rings[j]->sched.sched_rq[priority];
r = drm_sched_entity_init(&ctx->rings[i].entity,
&rq, 1, &ctx->guilty);
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
r = drm_sched_entity_init(&ctx->entities[i][j].entity,
rqs, num_rings, &ctx->guilty);
if (r)
goto failed;
goto error_cleanup_entities;
}
r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
if (r)
goto failed;
return 0;
failed:
for (j = 0; j < i; j++)
drm_sched_entity_destroy(&ctx->rings[j].entity);
error_cleanup_entities:
for (i = 0; i < num_entities; ++i)
drm_sched_entity_destroy(&ctx->entities[0][i].entity);
kfree(ctx->entities[0]);
error_free_fences:
kfree(ctx->fences);
ctx->fences = NULL;
return r;
@ -113,25 +191,47 @@ failed:
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
if (!adev)
return;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
for (i = 0; i < num_entities; ++i)
for (j = 0; j < amdgpu_sched_jobs; ++j)
dma_fence_put(ctx->rings[i].fences[j]);
dma_fence_put(ctx->entities[0][i].fences[j]);
kfree(ctx->fences);
ctx->fences = NULL;
amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
kfree(ctx->entities[0]);
mutex_destroy(&ctx->lock);
kfree(ctx);
}
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity)
{
if (hw_ip >= AMDGPU_HW_IP_NUM) {
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
return -EINVAL;
}
/* Right now all IPs have only one instance - multiple rings. */
if (instance != 0) {
DRM_DEBUG("invalid ip instance: %d\n", instance);
return -EINVAL;
}
if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
return -EINVAL;
}
*entity = &ctx->entities[hw_ip][ring].entity;
return 0;
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct drm_file *filp,
@ -168,17 +268,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
unsigned num_entities;
u32 i;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
for (i = 0; i < ctx->adev->num_rings; i++) {
num_entities = 0;
for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
num_entities += amdgpu_ctx_num_entities[i];
if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
continue;
drm_sched_entity_destroy(&ctx->rings[i].entity);
}
for (i = 0; i < num_entities; i++)
drm_sched_entity_destroy(&ctx->entities[0][i].entity);
amdgpu_ctx_fini(ref);
}
@ -334,56 +434,56 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
return 0;
}
int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
struct dma_fence *fence, uint64_t* handler)
void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
struct dma_fence *fence, uint64_t* handle)
{
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
uint64_t seq = cring->sequence;
unsigned idx = 0;
struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
uint64_t seq = centity->sequence;
struct dma_fence *other = NULL;
unsigned idx = 0;
idx = seq & (amdgpu_sched_jobs - 1);
other = cring->fences[idx];
other = centity->fences[idx];
if (other)
BUG_ON(!dma_fence_is_signaled(other));
dma_fence_get(fence);
spin_lock(&ctx->ring_lock);
cring->fences[idx] = fence;
cring->sequence++;
centity->fences[idx] = fence;
centity->sequence++;
spin_unlock(&ctx->ring_lock);
dma_fence_put(other);
if (handler)
*handler = seq;
return 0;
if (handle)
*handle = seq;
}
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq)
struct drm_sched_entity *entity,
uint64_t seq)
{
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
struct dma_fence *fence;
spin_lock(&ctx->ring_lock);
if (seq == ~0ull)
seq = ctx->rings[ring->idx].sequence - 1;
seq = centity->sequence - 1;
if (seq >= cring->sequence) {
if (seq >= centity->sequence) {
spin_unlock(&ctx->ring_lock);
return ERR_PTR(-EINVAL);
}
if (seq + amdgpu_sched_jobs < cring->sequence) {
if (seq + amdgpu_sched_jobs < centity->sequence) {
spin_unlock(&ctx->ring_lock);
return NULL;
}
fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
spin_unlock(&ctx->ring_lock);
return fence;
@ -392,35 +492,28 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority)
{
int i;
struct amdgpu_device *adev = ctx->adev;
struct drm_sched_rq *rq;
struct drm_sched_entity *entity;
struct amdgpu_ring *ring;
unsigned num_entities = amdgput_ctx_total_num_entities();
enum drm_sched_priority ctx_prio;
unsigned i;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
for (i = 0; i < adev->num_rings; i++) {
ring = adev->rings[i];
entity = &ctx->rings[i].entity;
rq = &ring->sched.sched_rq[ctx_prio];
for (i = 0; i < num_entities; i++) {
struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
continue;
drm_sched_entity_set_rq(entity, rq);
drm_sched_entity_set_priority(entity, ctx_prio);
}
}
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity)
{
struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
struct dma_fence *other = cring->fences[idx];
struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
struct dma_fence *other = centity->fences[idx];
if (other) {
signed long r;
@ -444,6 +537,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
{
unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id, i;
@ -459,13 +553,11 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
return;
}
for (i = 0; i < ctx->adev->num_rings; i++) {
for (i = 0; i < num_entities; i++) {
struct drm_sched_entity *entity;
if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
continue;
max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
max_wait);
entity = &ctx->entities[0][i].entity;
max_wait = drm_sched_entity_flush(entity, max_wait);
}
}
mutex_unlock(&mgr->lock);
@ -473,6 +565,7 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id, i;
@ -484,16 +577,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
if (!ctx->adev)
return;
for (i = 0; i < ctx->adev->num_rings; i++) {
if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
continue;
if (kref_read(&ctx->refcount) == 1)
drm_sched_entity_fini(&ctx->rings[i].entity);
else
DRM_ERROR("ctx %p is still alive\n", ctx);
if (kref_read(&ctx->refcount) != 1) {
DRM_ERROR("ctx %p is still alive\n", ctx);
continue;
}
for (i = 0; i < num_entities; i++)
drm_sched_entity_fini(&ctx->entities[0][i].entity);
}
}

View File

@ -0,0 +1,88 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_CTX_H__
#define __AMDGPU_CTX_H__
#include "amdgpu_ring.h"
struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
struct amdgpu_ctx_entity {
uint64_t sequence;
struct dma_fence **fences;
struct drm_sched_entity entity;
};
struct amdgpu_ctx {
struct kref refcount;
struct amdgpu_device *adev;
unsigned reset_counter;
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
struct dma_fence **fences;
struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
struct mutex lock;
atomic_t guilty;
};
struct amdgpu_ctx_mgr {
struct amdgpu_device *adev;
struct mutex lock;
/* protected by lock */
struct idr ctx_handles;
};
extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity);
void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
struct dma_fence *fence, uint64_t *seq);
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity,
uint64_t seq);
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority);
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
#endif

View File

@ -826,21 +826,13 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
struct drm_minor *minor = adev->ddev->primary;
struct dentry *ent, *root = minor->debugfs_root;
unsigned i, j;
unsigned int i;
for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
ent = debugfs_create_file(debugfs_regs_names[i],
S_IFREG | S_IRUGO, root,
adev, debugfs_regs[i]);
if (IS_ERR(ent)) {
for (j = 0; j < i; j++) {
debugfs_remove(adev->debugfs_regs[i]);
adev->debugfs_regs[i] = NULL;
}
return PTR_ERR(ent);
}
if (!i)
if (!i && !IS_ERR_OR_NULL(ent))
i_size_write(ent->d_inode, adev->rmmio_size);
adev->debugfs_regs[i] = ent;
}

View File

@ -62,6 +62,8 @@
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
@ -651,71 +653,6 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
__clear_bit(wb, adev->wb.used);
}
/**
* amdgpu_device_vram_location - try to find VRAM location
*
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
* @base: base address at which to put VRAM
*
* Function will try to place VRAM at base address provided
* as parameter.
*/
void amdgpu_device_vram_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc, u64 base)
{
uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
mc->vram_start = base;
mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
if (limit && limit < mc->real_vram_size)
mc->real_vram_size = limit;
dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
mc->mc_vram_size >> 20, mc->vram_start,
mc->vram_end, mc->real_vram_size >> 20);
}
/**
* amdgpu_device_gart_location - try to find GART location
*
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
*
* Function will place try to place GART before or after VRAM.
*
* If GART size is bigger than space left then we ajust GART size.
* Thus function will never fails.
*/
void amdgpu_device_gart_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc)
{
u64 size_af, size_bf;
mc->gart_size += adev->pm.smu_prv_buffer_size;
size_af = adev->gmc.mc_mask - mc->vram_end;
size_bf = mc->vram_start;
if (size_bf > size_af) {
if (mc->gart_size > size_bf) {
dev_warn(adev->dev, "limiting GART\n");
mc->gart_size = size_bf;
}
mc->gart_start = 0;
} else {
if (mc->gart_size > size_af) {
dev_warn(adev->dev, "limiting GART\n");
mc->gart_size = size_af;
}
/* VCE doesn't like it when BOs cross a 4GB segment, so align
* the GART base on a 4GB boundary as well.
*/
mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL);
}
mc->gart_end = mc->gart_start + mc->gart_size - 1;
dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
mc->gart_size >> 20, mc->gart_start, mc->gart_end);
}
/**
* amdgpu_device_resize_fb_bar - try to resize FB BAR
*
@ -1397,7 +1334,12 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
chip_name = "vega12";
break;
case CHIP_RAVEN:
chip_name = "raven";
if (adev->rev_id >= 8)
chip_name = "raven2";
else if (adev->pdev->device == 0x15d8)
chip_name = "picasso";
else
chip_name = "raven";
break;
}
@ -1551,6 +1493,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
}
adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
if (amdgpu_sriov_vf(adev))
adev->powerplay.pp_feature &= ~PP_GFXOFF_MASK;
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
@ -1581,6 +1525,92 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
return 0;
}
static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
{
int i, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.sw)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
if (r) {
DRM_ERROR("hw_init of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
adev->ip_blocks[i].status.hw = true;
}
}
return 0;
}
static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
{
int i, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.sw)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
if (r) {
DRM_ERROR("hw_init of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
adev->ip_blocks[i].status.hw = true;
}
return 0;
}
static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
{
int r = 0;
int i;
if (adev->asic_type >= CHIP_VEGA10) {
for (i = 0; i < adev->num_ip_blocks; i++) {
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
if (adev->in_gpu_reset || adev->in_suspend) {
if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
DRM_ERROR("resume of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
} else {
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
if (r) {
DRM_ERROR("hw_init of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
}
adev->ip_blocks[i].status.hw = true;
}
}
}
if (adev->powerplay.pp_funcs->load_firmware) {
r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
if (r) {
pr_err("firmware loading failed\n");
return r;
}
}
return 0;
}
/**
* amdgpu_device_ip_init - run init for hardware IPs
*
@ -1637,20 +1667,23 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
}
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.sw)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
if (r) {
DRM_ERROR("hw_init of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
adev->ip_blocks[i].status.hw = true;
}
r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
if (r)
return r;
r = amdgpu_device_ip_hw_init_phase1(adev);
if (r)
return r;
r = amdgpu_device_fw_loading(adev);
if (r)
return r;
r = amdgpu_device_ip_hw_init_phase2(adev);
if (r)
return r;
amdgpu_xgmi_add_device(adev);
amdgpu_amdkfd_device_init(adev);
if (amdgpu_sriov_vf(adev))
@ -1690,25 +1723,28 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
}
/**
* amdgpu_device_ip_late_set_cg_state - late init for clockgating
* amdgpu_device_set_cg_state - set clockgating for amdgpu device
*
* @adev: amdgpu_device pointer
*
* Late initialization pass enabling clockgating for hardware IPs.
* The list of all the hardware IPs that make up the asic is walked and the
* set_clockgating_state callbacks are run. This stage is run late
* in the init process.
* set_clockgating_state callbacks are run.
* Late initialization pass enabling clockgating for hardware IPs.
* Fini or suspend, pass disabling clockgating for hardware IPs.
* Returns 0 on success, negative error code on failure.
*/
static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{
int i = 0, r;
int i, j, r;
if (amdgpu_emu_mode == 1)
return 0;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
for (j = 0; j < adev->num_ip_blocks; j++) {
i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
if (!adev->ip_blocks[i].status.late_initialized)
continue;
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@ -1717,7 +1753,7 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_GATE);
state);
if (r) {
DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@ -1729,15 +1765,16 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
return 0;
}
static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev)
static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
{
int i = 0, r;
int i, j, r;
if (amdgpu_emu_mode == 1)
return 0;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
for (j = 0; j < adev->num_ip_blocks; j++) {
i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
if (!adev->ip_blocks[i].status.late_initialized)
continue;
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@ -1746,7 +1783,7 @@ static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev)
adev->ip_blocks[i].version->funcs->set_powergating_state) {
/* enable powergating to save power */
r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
AMD_PG_STATE_GATE);
state);
if (r) {
DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@ -1774,7 +1811,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->funcs->late_init) {
r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
@ -1783,12 +1820,12 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
adev->ip_blocks[i].status.late_initialized = true;
}
adev->ip_blocks[i].status.late_initialized = true;
}
amdgpu_device_ip_late_set_cg_state(adev);
amdgpu_device_ip_late_set_pg_state(adev);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
queue_delayed_work(system_wq, &adev->late_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
@ -1814,22 +1851,15 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
int i, r;
amdgpu_amdkfd_device_fini(adev);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
/* need to disable SMC first */
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* ungate blocks before hw fini so that we can shutdown the blocks safely */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_UNGATE);
if (r) {
DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false);
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
/* XXX handle errors */
if (r) {
@ -1845,20 +1875,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* ungate blocks before hw fini so that we can shutdown the blocks safely */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_UNGATE);
if (r) {
DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
}
r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
/* XXX handle errors */
if (r) {
@ -1875,6 +1891,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
amdgpu_ucode_free_bo(adev);
amdgpu_free_static_csa(adev);
amdgpu_device_wb_fini(adev);
amdgpu_device_vram_scratch_fini(adev);
@ -1905,14 +1922,47 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
return 0;
}
static int amdgpu_device_enable_mgpu_fan_boost(void)
{
struct amdgpu_gpu_instance *gpu_ins;
struct amdgpu_device *adev;
int i, ret = 0;
mutex_lock(&mgpu_info.mutex);
/*
* MGPU fan boost feature should be enabled
* only when there are two or more dGPUs in
* the system
*/
if (mgpu_info.num_dgpu < 2)
goto out;
for (i = 0; i < mgpu_info.num_dgpu; i++) {
gpu_ins = &(mgpu_info.gpu_ins[i]);
adev = gpu_ins->adev;
if (!(adev->flags & AMD_IS_APU) &&
!gpu_ins->mgpu_fan_enabled &&
adev->powerplay.pp_funcs &&
adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
if (ret)
break;
gpu_ins->mgpu_fan_enabled = 1;
}
}
out:
mutex_unlock(&mgpu_info.mutex);
return ret;
}
/**
* amdgpu_device_ip_late_init_func_handler - work handler for clockgating
* amdgpu_device_ip_late_init_func_handler - work handler for ib test
*
* @work: work_struct
*
* Work handler for amdgpu_device_ip_late_set_cg_state. We put the
* clockgating setup into a worker thread to speed up driver init and
* resume from suspend.
* @work: work_struct.
*/
static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
{
@ -1923,6 +1973,23 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
r = amdgpu_device_enable_mgpu_fan_boost();
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
mutex_lock(&adev->gfx.gfx_off_mutex);
if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
adev->gfx.gfx_off_state = true;
}
mutex_unlock(&adev->gfx.gfx_off_mutex);
}
/**
@ -1940,23 +2007,14 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
{
int i, r;
if (amdgpu_sriov_vf(adev))
amdgpu_virt_request_full_gpu(adev, false);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
/* displays are handled separately */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
/* ungate blocks so that suspend can properly shut them down */
if (adev->ip_blocks[i].version->funcs->set_clockgating_state) {
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_UNGATE);
if (r) {
DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
}
}
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
@ -1967,9 +2025,6 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
}
}
if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, false);
return 0;
}
@ -1988,36 +2043,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
{
int i, r;
if (amdgpu_sriov_vf(adev))
amdgpu_virt_request_full_gpu(adev, false);
/* ungate SMC block first */
r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
AMD_CG_STATE_UNGATE);
if (r) {
DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r);
}
/* call smu to disable gfx off feature first when suspend */
if (adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
/* displays are handled in phase1 */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
continue;
/* ungate blocks so that suspend can properly shut them down */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_UNGATE);
if (r) {
DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
}
}
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
@ -2027,9 +2058,6 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
}
}
if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, false);
return 0;
}
@ -2048,11 +2076,17 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
{
int r;
if (amdgpu_sriov_vf(adev))
amdgpu_virt_request_full_gpu(adev, false);
r = amdgpu_device_ip_suspend_phase1(adev);
if (r)
return r;
r = amdgpu_device_ip_suspend_phase2(adev);
if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, false);
return r;
}
@ -2178,7 +2212,8 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
continue;
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
@ -2210,6 +2245,11 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
r = amdgpu_device_ip_resume_phase1(adev);
if (r)
return r;
r = amdgpu_device_fw_loading(adev);
if (r)
return r;
r = amdgpu_device_ip_resume_phase2(adev);
return r;
@ -2335,7 +2375,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
adev->vm_manager.vm_pte_num_rings = 0;
adev->vm_manager.vm_pte_num_rqs = 0;
adev->gmc.gmc_funcs = NULL;
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
@ -2367,6 +2407,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->gfx.gpu_clock_mutex);
mutex_init(&adev->srbm_mutex);
mutex_init(&adev->gfx.pipe_reserve_mutex);
mutex_init(&adev->gfx.gfx_off_mutex);
mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
@ -2393,7 +2434,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_DELAYED_WORK(&adev->late_init_work,
amdgpu_device_ip_late_init_func_handler);
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
amdgpu_device_delay_enable_gfx_off);
adev->gfx.gfx_off_req_count = 1;
adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
/* Registers mapping */
@ -2700,11 +2744,14 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
adev->in_suspend = true;
drm_kms_helper_poll_disable(dev);
if (fbcon)
amdgpu_fbdev_set_suspend(adev, 1);
cancel_delayed_work_sync(&adev->late_init_work);
if (!amdgpu_device_has_dc_support(adev)) {
/* turn off display hw */
drm_modeset_lock_all(dev);
@ -2883,6 +2930,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
#ifdef CONFIG_PM
dev->dev->power.disable_depth--;
#endif
adev->in_suspend = false;
return 0;
}
@ -3041,71 +3090,22 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
}
/**
* amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
*
* @adev: amdgpu_device pointer
* @ring: amdgpu_ring for the engine handling the buffer operations
* @bo: amdgpu_bo buffer whose shadow is being restored
* @fence: dma_fence associated with the operation
*
* Restores the VRAM buffer contents from the shadow in GTT. Used to
* restore things like GPUVM page tables after a GPU reset where
* the contents of VRAM might be lost.
* Returns 0 on success, negative error code on failure.
*/
static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
struct dma_fence **fence)
{
uint32_t domain;
int r;
if (!bo->shadow)
return 0;
r = amdgpu_bo_reserve(bo, true);
if (r)
return r;
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
/* if bo has been evicted, then no need to recover */
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
r = amdgpu_bo_validate(bo->shadow);
if (r) {
DRM_ERROR("bo validate failed!\n");
goto err;
}
r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
NULL, fence, true);
if (r) {
DRM_ERROR("recover page table failed!\n");
goto err;
}
}
err:
amdgpu_bo_unreserve(bo);
return r;
}
/**
* amdgpu_device_handle_vram_lost - Handle the loss of VRAM contents
* amdgpu_device_recover_vram - Recover some VRAM contents
*
* @adev: amdgpu_device pointer
*
* Restores the contents of VRAM buffers from the shadows in GTT. Used to
* restore things like GPUVM page tables after a GPU reset where
* the contents of VRAM might be lost.
* Returns 0 on success, 1 on failure.
*
* Returns:
* 0 on success, negative error code on failure.
*/
static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_bo *bo, *tmp;
struct dma_fence *fence = NULL, *next = NULL;
long r = 1;
int i = 0;
long tmo;
struct amdgpu_bo *shadow;
long r = 1, tmo;
if (amdgpu_sriov_runtime(adev))
tmo = msecs_to_jiffies(8000);
@ -3114,44 +3114,40 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
DRM_INFO("recover vram bo from shadow start\n");
mutex_lock(&adev->shadow_list_lock);
list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
next = NULL;
amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
/* No need to recover an evicted BO */
if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
continue;
r = amdgpu_bo_restore_shadow(shadow, &next);
if (r)
break;
if (fence) {
r = dma_fence_wait_timeout(fence, false, tmo);
if (r == 0)
pr_err("wait fence %p[%d] timeout\n", fence, i);
else if (r < 0)
pr_err("wait fence %p[%d] interrupted\n", fence, i);
if (r < 1) {
dma_fence_put(fence);
fence = next;
dma_fence_put(fence);
fence = next;
if (r <= 0)
break;
}
i++;
} else {
fence = next;
}
dma_fence_put(fence);
fence = next;
}
mutex_unlock(&adev->shadow_list_lock);
if (fence) {
r = dma_fence_wait_timeout(fence, false, tmo);
if (r == 0)
pr_err("wait fence %p[%d] timeout\n", fence, i);
else if (r < 0)
pr_err("wait fence %p[%d] interrupted\n", fence, i);
}
if (fence)
tmo = dma_fence_wait_timeout(fence, false, tmo);
dma_fence_put(fence);
if (r > 0)
DRM_INFO("recover vram bo from shadow done\n");
else
if (r <= 0 || tmo <= 0) {
DRM_ERROR("recover vram bo from shadow failed\n");
return -EIO;
}
return (r > 0) ? 0 : 1;
DRM_INFO("recover vram bo from shadow done\n");
return 0;
}
/**
@ -3204,6 +3200,10 @@ retry:
if (r)
goto out;
r = amdgpu_device_fw_loading(adev);
if (r)
return r;
r = amdgpu_device_ip_resume_phase2(adev);
if (r)
goto out;
@ -3225,8 +3225,8 @@ out:
}
}
if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost))
r = amdgpu_device_handle_vram_lost(adev);
if (!r)
r = amdgpu_device_recover_vram(adev);
return r;
}
@ -3260,6 +3260,10 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
/* we need recover gart prior to run SMC/CP/SDMA resume */
amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
r = amdgpu_device_fw_loading(adev);
if (r)
return r;
/* now we are okay to resume SMC/CP/SDMA */
r = amdgpu_device_ip_reinit_late_sriov(adev);
if (r)
@ -3272,38 +3276,50 @@ error:
amdgpu_virt_release_full_gpu(adev, true);
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
atomic_inc(&adev->vram_lost_counter);
r = amdgpu_device_handle_vram_lost(adev);
r = amdgpu_device_recover_vram(adev);
}
return r;
}
/**
* amdgpu_device_should_recover_gpu - check if we should try GPU recovery
*
* @adev: amdgpu device pointer
*
* Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
* a hung GPU.
*/
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
{
if (!amdgpu_device_ip_check_soft_reset(adev)) {
DRM_INFO("Timeout, but no hardware hang detected.\n");
return false;
}
if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 &&
!amdgpu_sriov_vf(adev))) {
DRM_INFO("GPU recovery disabled.\n");
return false;
}
return true;
}
/**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
* @adev: amdgpu device pointer
* @job: which job trigger hang
* @force: forces reset regardless of amdgpu_gpu_recovery
*
* Attempt to reset the GPU if it has hung (all asics).
* Returns 0 for success or an error on failure.
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job, bool force)
struct amdgpu_job *job)
{
int i, r, resched;
if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
return 0;
}
if (!force && (amdgpu_gpu_recovery == 0 ||
(amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) {
DRM_INFO("GPU recovery disabled.\n");
return 0;
}
dev_info(adev->dev, "GPU reset begin!\n");
mutex_lock(&adev->lock_reset);

View File

@ -23,6 +23,21 @@
#ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
void amdgpu_display_update_priority(struct amdgpu_device *adev);
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,

View File

@ -278,6 +278,9 @@ enum amdgpu_pcie_gen {
#define amdgpu_dpm_get_fan_speed_rpm(adev, s) \
((adev)->powerplay.pp_funcs->get_fan_speed_rpm)((adev)->powerplay.pp_handle, (s))
#define amdgpu_dpm_set_fan_speed_rpm(adev, s) \
((adev)->powerplay.pp_funcs->set_fan_speed_rpm)((adev)->powerplay.pp_handle, (s))
#define amdgpu_dpm_get_sclk(adev, l) \
((adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)))
@ -357,6 +360,10 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\
(adev)->powerplay.pp_handle, type, parameter, size))
#define amdgpu_dpm_enable_mgpu_fan_boost(adev) \
((adev)->powerplay.pp_funcs->enable_mgpu_fan_boost(\
(adev)->powerplay.pp_handle))
struct amdgpu_dpm {
struct amdgpu_ps *ps;
/* number of valid power states */

View File

@ -36,6 +36,7 @@
#include "amdgpu.h"
#include "amdgpu_irq.h"
#include "amdgpu_gem.h"
#include "amdgpu_amdkfd.h"
@ -113,8 +114,8 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/
uint amdgpu_pp_feature_mask = 0xfffd3fff;
/* OverDrive(bit 14) disabled by default*/
uint amdgpu_pp_feature_mask = 0xffffbfff;
int amdgpu_ngg = 0;
int amdgpu_prim_buf_per_se = 0;
int amdgpu_pos_buf_per_se = 0;
@ -126,6 +127,9 @@ int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
int amdgpu_emu_mode = 0;
uint amdgpu_smu_memory_pool_size = 0;
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
};
/**
* DOC: vramlimit (int)
@ -531,6 +535,102 @@ MODULE_PARM_DESC(smu_memory_pool_size,
"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
#ifdef CONFIG_HSA_AMD
/**
* DOC: sched_policy (int)
* Set scheduling policy. Default is HWS(hardware scheduling) with over-subscription.
* Setting 1 disables over-subscription. Setting 2 disables HWS and statically
* assigns queues to HQDs.
*/
int sched_policy = KFD_SCHED_POLICY_HWS;
module_param(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
/**
* DOC: hws_max_conc_proc (int)
* Maximum number of processes that HWS can schedule concurrently. The maximum is the
* number of VMIDs assigned to the HWS, which is also the default.
*/
int hws_max_conc_proc = 8;
module_param(hws_max_conc_proc, int, 0444);
MODULE_PARM_DESC(hws_max_conc_proc,
"Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
/**
* DOC: cwsr_enable (int)
* CWSR(compute wave store and resume) allows the GPU to preempt shader execution in
* the middle of a compute wave. Default is 1 to enable this feature. Setting 0
* disables it.
*/
int cwsr_enable = 1;
module_param(cwsr_enable, int, 0444);
MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
/**
* DOC: max_num_of_queues_per_device (int)
* Maximum number of queues per device. Valid setting is between 1 and 4096. Default
* is 4096.
*/
int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device,
"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
/**
* DOC: send_sigterm (int)
* Send sigterm to HSA process on unhandled exceptions. Default is not to send sigterm
* but just print errors on dmesg. Setting 1 enables sending sigterm.
*/
int send_sigterm;
module_param(send_sigterm, int, 0444);
MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
/**
* DOC: debug_largebar (int)
* Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
* system. This limits the VRAM size reported to ROCm applications to the visible
* size, usually 256MB.
* Default value is 0, diabled.
*/
int debug_largebar;
module_param(debug_largebar, int, 0444);
MODULE_PARM_DESC(debug_largebar,
"Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
/**
* DOC: ignore_crat (int)
* Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
* table to get information about AMD APUs. This option can serve as a workaround on
* systems with a broken CRAT table.
*/
int ignore_crat;
module_param(ignore_crat, int, 0444);
MODULE_PARM_DESC(ignore_crat,
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
/**
* DOC: noretry (int)
* This parameter sets sh_mem_config.retry_disable. Default value, 0, enables retry.
* Setting 1 disables retry.
* Retry is needed for recoverable page faults.
*/
int noretry;
module_param(noretry, int, 0644);
MODULE_PARM_DESC(noretry,
"Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
/**
* DOC: halt_if_hws_hang (int)
* Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
* Setting 1 enables halt on hang.
*/
int halt_if_hws_hang;
module_param(halt_if_hws_hang, int, 0644);
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
#endif
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@ -770,14 +870,15 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
/* Vega 20 */
{0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
{0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
/* Raven */
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
{0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
{0, 0, 0}
};
@ -786,28 +887,6 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
static struct drm_driver kms_driver;
static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
{
struct apertures_struct *ap;
bool primary = false;
ap = alloc_apertures(1);
if (!ap)
return -ENOMEM;
ap->ranges[0].base = pci_resource_start(pdev, 0);
ap->ranges[0].size = pci_resource_len(pdev, 0);
#ifdef CONFIG_X86
primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
#endif
drm_fb_helper_remove_conflicting_framebuffers(ap, "amdgpudrmfb", primary);
kfree(ap);
return 0;
}
static int amdgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@ -826,30 +905,18 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
/*
* Initialize amdkfd before starting radeon. If it was not loaded yet,
* defer radeon probing
*/
ret = amdgpu_amdkfd_init();
if (ret == -EPROBE_DEFER)
return ret;
/* Get rid of things like offb */
ret = amdgpu_kick_out_firmware_fb(pdev);
ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb");
if (ret)
return ret;
/* warn the user if they mix atomic and non-atomic capable GPUs */
if ((kms_driver.driver_features & DRIVER_ATOMIC) && !supports_atomic)
DRM_ERROR("Mixing atomic and non-atomic capable GPUs!\n");
/* support atomic early so the atomic debugfs stuff gets created */
if (supports_atomic)
kms_driver.driver_features |= DRIVER_ATOMIC;
dev = drm_dev_alloc(&kms_driver, &pdev->dev);
if (IS_ERR(dev))
return PTR_ERR(dev);
if (!supports_atomic)
dev->driver_features &= ~DRIVER_ATOMIC;
ret = pci_enable_device(pdev);
if (ret)
goto err_free;
@ -882,8 +949,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
drm_dev_unregister(dev);
drm_dev_put(dev);
DRM_ERROR("Device removal is currently not supported outside of fbcon\n");
drm_dev_unplug(dev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
@ -1101,7 +1168,7 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
static struct drm_driver kms_driver = {
.driver_features =
DRIVER_USE_AGP |
DRIVER_USE_AGP | DRIVER_ATOMIC |
DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
.load = amdgpu_driver_load_kms,
@ -1178,6 +1245,10 @@ static int __init amdgpu_init(void)
pdriver = &amdgpu_kms_pci_driver;
driver->num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler();
/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
amdgpu_amdkfd_init();
/* let modprobe override vga console setting */
return pci_register_driver(pdriver);

View File

@ -28,6 +28,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "atom.h"
#include "atombios_encoders.h"

View File

@ -33,6 +33,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "cikd.h"
#include "amdgpu_gem.h"
#include <drm/drm_fb_helper.h>

View File

@ -216,8 +216,10 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
* Checks the current fence value and calculates the last
* signalled fence value. Wakes the fence queue if the
* sequence number has increased.
*
* Returns true if fence was processed
*/
void amdgpu_fence_process(struct amdgpu_ring *ring)
bool amdgpu_fence_process(struct amdgpu_ring *ring)
{
struct amdgpu_fence_driver *drv = &ring->fence_drv;
uint32_t seq, last_seq;
@ -229,11 +231,12 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
if (seq != ring->fence_drv.sync_seq)
if (del_timer(&ring->fence_drv.fallback_timer) &&
seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
if (unlikely(seq == last_seq))
return;
return false;
last_seq &= drv->num_fences_mask;
seq &= drv->num_fences_mask;
@ -260,6 +263,8 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
dma_fence_put(fence);
} while (last_seq != seq);
return true;
}
/**
@ -274,7 +279,8 @@ static void amdgpu_fence_fallback(struct timer_list *t)
struct amdgpu_ring *ring = from_timer(ring, t,
fence_drv.fallback_timer);
amdgpu_fence_process(ring);
if (amdgpu_fence_process(ring))
DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
}
/**
@ -701,7 +707,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
struct amdgpu_device *adev = dev->dev_private;
seq_printf(m, "gpu recover\n");
amdgpu_device_gpu_recover(adev, NULL, true);
amdgpu_device_gpu_recover(adev, NULL);
return 0;
}

View File

@ -112,7 +112,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
{
int r;
if (adev->gart.robj == NULL) {
if (adev->gart.bo == NULL) {
struct amdgpu_bo_param bp;
memset(&bp, 0, sizeof(bp));
@ -123,7 +123,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
r = amdgpu_bo_create(adev, &bp, &adev->gart.robj);
r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
if (r) {
return r;
}
@ -145,19 +145,18 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
{
int r;
r = amdgpu_bo_reserve(adev->gart.robj, false);
r = amdgpu_bo_reserve(adev->gart.bo, false);
if (unlikely(r != 0))
return r;
r = amdgpu_bo_pin(adev->gart.robj, AMDGPU_GEM_DOMAIN_VRAM);
r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM);
if (r) {
amdgpu_bo_unreserve(adev->gart.robj);
amdgpu_bo_unreserve(adev->gart.bo);
return r;
}
r = amdgpu_bo_kmap(adev->gart.robj, &adev->gart.ptr);
r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr);
if (r)
amdgpu_bo_unpin(adev->gart.robj);
amdgpu_bo_unreserve(adev->gart.robj);
adev->gart.table_addr = amdgpu_bo_gpu_offset(adev->gart.robj);
amdgpu_bo_unpin(adev->gart.bo);
amdgpu_bo_unreserve(adev->gart.bo);
return r;
}
@ -173,14 +172,14 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
{
int r;
if (adev->gart.robj == NULL) {
if (adev->gart.bo == NULL) {
return;
}
r = amdgpu_bo_reserve(adev->gart.robj, true);
r = amdgpu_bo_reserve(adev->gart.bo, true);
if (likely(r == 0)) {
amdgpu_bo_kunmap(adev->gart.robj);
amdgpu_bo_unpin(adev->gart.robj);
amdgpu_bo_unreserve(adev->gart.robj);
amdgpu_bo_kunmap(adev->gart.bo);
amdgpu_bo_unpin(adev->gart.bo);
amdgpu_bo_unreserve(adev->gart.bo);
adev->gart.ptr = NULL;
}
}
@ -196,10 +195,10 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
*/
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
{
if (adev->gart.robj == NULL) {
if (adev->gart.bo == NULL) {
return;
}
amdgpu_bo_unref(&adev->gart.robj);
amdgpu_bo_unref(&adev->gart.bo);
}
/*

View File

@ -40,8 +40,7 @@ struct amdgpu_bo;
#define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE)
struct amdgpu_gart {
u64 table_addr;
struct amdgpu_bo *robj;
struct amdgpu_bo *bo;
void *ptr;
unsigned num_gpu_pages;
unsigned num_cpu_pages;

View File

@ -24,13 +24,6 @@
#ifndef __AMDGPU_GDS_H__
#define __AMDGPU_GDS_H__
/* Because TTM request that alloacted buffer should be PAGE_SIZE aligned,
* we should report GDS/GWS/OA size as PAGE_SIZE aligned
* */
#define AMDGPU_GDS_SHIFT 2
#define AMDGPU_GWS_SHIFT PAGE_SHIFT
#define AMDGPU_OA_SHIFT PAGE_SHIFT
struct amdgpu_ring;
struct amdgpu_bo;

View File

@ -244,16 +244,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
size = size << AMDGPU_GDS_SHIFT;
else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS)
size = size << AMDGPU_GWS_SHIFT;
else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
size = size << AMDGPU_OA_SHIFT;
else
return -EINVAL;
/* GDS allocations must be DW aligned */
if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS)
size = ALIGN(size, 4);
}
size = roundup(size, PAGE_SIZE);
if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
r = amdgpu_bo_reserve(vm->root.base.bo, false);
@ -572,16 +566,16 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
if (args->va_address >= AMDGPU_VA_HOLE_START &&
args->va_address < AMDGPU_VA_HOLE_END) {
if (args->va_address >= AMDGPU_GMC_HOLE_START &&
args->va_address < AMDGPU_GMC_HOLE_END) {
dev_dbg(&dev->pdev->dev,
"va_address 0x%LX is in VA hole 0x%LX-0x%LX\n",
args->va_address, AMDGPU_VA_HOLE_START,
AMDGPU_VA_HOLE_END);
args->va_address, AMDGPU_GMC_HOLE_START,
AMDGPU_GMC_HOLE_END);
return -EINVAL;
}
args->va_address &= AMDGPU_VA_HOLE_MASK;
args->va_address &= AMDGPU_GMC_HOLE_MASK;
if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) {
dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n",

View File

@ -0,0 +1,92 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_GEM_H__
#define __AMDGPU_GEM_H__
#include <drm/amdgpu_drm.h>
#include <drm/drm_gem.h>
/*
* GEM.
*/
#define AMDGPU_GEM_DOMAIN_MAX 0x3
#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
void amdgpu_gem_object_free(struct drm_gem_object *obj);
int amdgpu_gem_object_open(struct drm_gem_object *obj,
struct drm_file *file_priv);
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv);
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
struct drm_gem_object *
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
struct sg_table *sg);
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags);
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf);
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
/*
* GEM objects.
*/
void amdgpu_gem_force_release(struct amdgpu_device *adev);
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
struct drm_gem_object **obj);
int amdgpu_mode_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
int amdgpu_mode_dumb_mmap(struct drm_file *filp,
struct drm_device *dev,
uint32_t handle, uint64_t *offset_p);
int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
#endif

View File

@ -26,9 +26,44 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
/*
* GPU scratch registers helpers function.
* GPU GFX IP block helpers function.
*/
int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
int pipe, int queue)
{
int bit = 0;
bit += mec * adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
bit += pipe * adev->gfx.mec.num_queue_per_pipe;
bit += queue;
return bit;
}
void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue)
{
*queue = bit % adev->gfx.mec.num_queue_per_pipe;
*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
% adev->gfx.mec.num_pipe_per_mec;
*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
/ adev->gfx.mec.num_pipe_per_mec;
}
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
int mec, int pipe, int queue)
{
return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
adev->gfx.mec.queue_bitmap);
}
/**
* amdgpu_gfx_scratch_get - Allocate a scratch register
*
@ -340,3 +375,40 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
&ring->mqd_gpu_addr,
&ring->mqd_ptr);
}
/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
*
* @adev: amdgpu_device pointer
* @bool enable true: enable gfx off feature, false: disable gfx off feature
*
* 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
* 2. other client can send request to disable gfx off feature, the request should be honored.
* 3. other client can cancel their request of disable gfx off feature
* 4. other client should not send request to enable gfx off feature before disable gfx off feature.
*/
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
{
if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK))
return;
if (!adev->powerplay.pp_funcs->set_powergating_by_smu)
return;
mutex_lock(&adev->gfx.gfx_off_mutex);
if (!enable)
adev->gfx.gfx_off_req_count++;
else if (adev->gfx.gfx_off_req_count > 0)
adev->gfx.gfx_off_req_count--;
if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
} else if (!enable && adev->gfx.gfx_off_state) {
if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false))
adev->gfx.gfx_off_state = false;
}
mutex_unlock(&adev->gfx.gfx_off_mutex);
}

View File

@ -24,13 +24,316 @@
#ifndef __AMDGPU_GFX_H__
#define __AMDGPU_GFX_H__
/*
* GFX stuff
*/
#include "clearstate_defs.h"
#include "amdgpu_ring.h"
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
#define AMDGPU_GFX_SAFE_MODE 0x00000001L
#define AMDGPU_GFX_PG_DISABLED_MODE 0x00000002L
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
struct amdgpu_rlc_funcs {
void (*enter_safe_mode)(struct amdgpu_device *adev);
void (*exit_safe_mode)(struct amdgpu_device *adev);
};
struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
volatile uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
volatile uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
volatile uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
bool in_safe_mode;
const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */
u32 save_and_restore_offset;
u32 clear_state_descriptor_offset;
u32 avail_scratch_ram_locations;
u32 reg_restore_list_size;
u32 reg_list_format_start;
u32 reg_list_format_separate_start;
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
u32 reg_list_format_direct_reg_list_length;
u32 save_restore_list_cntl_size_bytes;
u32 save_restore_list_gpm_size_bytes;
u32 save_restore_list_srm_size_bytes;
u32 *register_list_format;
u32 *register_restore;
u8 *save_restore_list_cntl;
u8 *save_restore_list_gpm;
u8 *save_restore_list_srm;
bool is_rlc_v2_1;
};
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
struct amdgpu_bo *mec_fw_obj;
u64 mec_fw_gpu_addr;
u32 num_mec;
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
/* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
struct amdgpu_kiq {
u64 eop_gpu_addr;
struct amdgpu_bo *eop_obj;
spinlock_t ring_lock;
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
};
/*
* GPU scratch registers structures, functions & helpers
*/
struct amdgpu_scratch {
unsigned num_reg;
uint32_t reg_base;
uint32_t free_mask;
};
/*
* GFX configurations
*/
#define AMDGPU_GFX_MAX_SE 4
#define AMDGPU_GFX_MAX_SH_PER_SE 2
struct amdgpu_rb_config {
uint32_t rb_backend_disable;
uint32_t user_rb_backend_disable;
uint32_t raster_config;
uint32_t raster_config_1;
};
struct gb_addr_config {
uint16_t pipe_interleave_size;
uint8_t num_pipes;
uint8_t max_compress_frags;
uint8_t num_banks;
uint8_t num_se;
uint8_t num_rb_per_se;
};
struct amdgpu_gfx_config {
unsigned max_shader_engines;
unsigned max_tile_pipes;
unsigned max_cu_per_sh;
unsigned max_sh_per_se;
unsigned max_backends_per_se;
unsigned max_texture_channel_caches;
unsigned max_gprs;
unsigned max_gs_threads;
unsigned max_hw_contexts;
unsigned sc_prim_fifo_size_frontend;
unsigned sc_prim_fifo_size_backend;
unsigned sc_hiz_tile_fifo_size;
unsigned sc_earlyz_tile_fifo_size;
unsigned num_tile_pipes;
unsigned backend_enable_mask;
unsigned mem_max_burst_length_bytes;
unsigned mem_row_size_in_kb;
unsigned shader_engine_tile_size;
unsigned num_gpus;
unsigned multi_gpu_tile_size;
unsigned mc_arb_ramcfg;
unsigned gb_addr_config;
unsigned num_rbs;
unsigned gs_vgt_table_depth;
unsigned gs_prim_buffer_depth;
uint32_t tile_mode_array[32];
uint32_t macrotile_mode_array[16];
struct gb_addr_config gb_addr_config_fields;
struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
/* gfx configure feature */
uint32_t double_offchip_lds_buf;
/* cached value of DB_DEBUG2 */
uint32_t db_debug2;
};
struct amdgpu_cu_info {
uint32_t simd_per_cu;
uint32_t max_waves_per_simd;
uint32_t wave_front_size;
uint32_t max_scratch_slots_per_cu;
uint32_t lds_size;
/* total active CU number */
uint32_t number;
uint32_t ao_cu_mask;
uint32_t ao_cu_bitmap[4][4];
uint32_t bitmap[4][4];
};
struct amdgpu_gfx_funcs {
/* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
u32 sh_num, u32 instance);
void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd,
uint32_t wave, uint32_t *dst, int *no_fields);
void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd,
uint32_t wave, uint32_t thread, uint32_t start,
uint32_t size, uint32_t *dst);
void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd,
uint32_t wave, uint32_t start, uint32_t size,
uint32_t *dst);
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
u32 queue);
};
struct amdgpu_ngg_buf {
struct amdgpu_bo *bo;
uint64_t gpu_addr;
uint32_t size;
uint32_t bo_size;
};
enum {
NGG_PRIM = 0,
NGG_POS,
NGG_CNTL,
NGG_PARAM,
NGG_BUF_MAX
};
struct amdgpu_ngg {
struct amdgpu_ngg_buf buf[NGG_BUF_MAX];
uint32_t gds_reserve_addr;
uint32_t gds_reserve_size;
bool init;
};
struct sq_work {
struct work_struct work;
unsigned ih_data;
};
struct amdgpu_gfx {
struct mutex gpu_clock_mutex;
struct amdgpu_gfx_config config;
struct amdgpu_rlc rlc;
struct amdgpu_mec mec;
struct amdgpu_kiq kiq;
struct amdgpu_scratch scratch;
const struct firmware *me_fw; /* ME firmware */
uint32_t me_fw_version;
const struct firmware *pfp_fw; /* PFP firmware */
uint32_t pfp_fw_version;
const struct firmware *ce_fw; /* CE firmware */
uint32_t ce_fw_version;
const struct firmware *rlc_fw; /* RLC firmware */
uint32_t rlc_fw_version;
const struct firmware *mec_fw; /* MEC firmware */
uint32_t mec_fw_version;
const struct firmware *mec2_fw; /* MEC2 firmware */
uint32_t mec2_fw_version;
uint32_t me_feature_version;
uint32_t ce_feature_version;
uint32_t pfp_feature_version;
uint32_t rlc_feature_version;
uint32_t rlc_srlc_fw_version;
uint32_t rlc_srlc_feature_version;
uint32_t rlc_srlg_fw_version;
uint32_t rlc_srlg_feature_version;
uint32_t rlc_srls_fw_version;
uint32_t rlc_srls_feature_version;
uint32_t mec_feature_version;
uint32_t mec2_feature_version;
bool mec_fw_write_wait;
bool me_fw_write_wait;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsigned num_gfx_rings;
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
unsigned num_compute_rings;
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
struct amdgpu_irq_src priv_inst_irq;
struct amdgpu_irq_src cp_ecc_error_irq;
struct amdgpu_irq_src sq_irq;
struct sq_work sq_work;
/* gfx status */
uint32_t gfx_current_status;
/* ce ram size*/
unsigned ce_ram_size;
struct amdgpu_cu_info cu_info;
const struct amdgpu_gfx_funcs *funcs;
/* reset mask */
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
/* NGG */
struct amdgpu_ngg ngg;
/* gfx off */
bool gfx_off_state; /* true: enabled, false: disabled */
struct mutex gfx_off_mutex;
uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
struct delayed_work gfx_off_delay_work;
/* pipe reservation */
struct mutex pipe_reserve_mutex;
DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
/**
* amdgpu_gfx_create_bitmask - create a bitmask
*
* @bit_width: length of the mask
*
* create a variable length bit mask.
* Returns the bitmask.
*/
static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
{
return (u32)((1ULL << bit_width) - 1);
}
int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
unsigned max_sh);
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
unsigned max_sh);
int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
@ -47,47 +350,13 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
unsigned mqd_size);
void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev);
/**
* amdgpu_gfx_create_bitmask - create a bitmask
*
* @bit_width: length of the mask
*
* create a variable length bit mask.
* Returns the bitmask.
*/
static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
{
return (u32)((1ULL << bit_width) - 1);
}
static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev,
int mec, int pipe, int queue)
{
int bit = 0;
bit += mec * adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
bit += pipe * adev->gfx.mec.num_queue_per_pipe;
bit += queue;
return bit;
}
static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue)
{
*queue = bit % adev->gfx.mec.num_queue_per_pipe;
*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
% adev->gfx.mec.num_pipe_per_mec;
*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
/ adev->gfx.mec.num_pipe_per_mec;
}
static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
int mec, int pipe, int queue)
{
return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
adev->gfx.mec.queue_bitmap);
}
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
int pipe, int queue);
void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue);
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
int pipe, int queue);
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
#endif

View File

@ -0,0 +1,215 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*/
#include "amdgpu.h"
/**
* amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
*
* @bo: the BO to get the PDE for
* @level: the level in the PD hirarchy
* @addr: resulting addr
* @flags: resulting flags
*
* Get the address and flags to be used for a PDE (Page Directory Entry).
*/
void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
uint64_t *addr, uint64_t *flags)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_dma_tt *ttm;
switch (bo->tbo.mem.mem_type) {
case TTM_PL_TT:
ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
*addr = ttm->dma_address[0];
break;
case TTM_PL_VRAM:
*addr = amdgpu_bo_gpu_offset(bo);
break;
default:
*addr = 0;
break;
}
*flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem);
amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
}
/**
* amdgpu_gmc_pd_addr - return the address of the root directory
*
*/
uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
uint64_t pd_addr;
/* TODO: move that into ASIC specific code */
if (adev->asic_type >= CHIP_VEGA10) {
uint64_t flags = AMDGPU_PTE_VALID;
amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
pd_addr |= flags;
} else {
pd_addr = amdgpu_bo_gpu_offset(bo);
}
return pd_addr;
}
/**
* amdgpu_gmc_agp_addr - return the address in the AGP address space
*
* @tbo: TTM BO which needs the address, must be in GTT domain
*
* Tries to figure out how to access the BO through the AGP aperture. Returns
* AMDGPU_BO_INVALID_OFFSET if that is not possible.
*/
uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_dma_tt *ttm;
if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
return AMDGPU_BO_INVALID_OFFSET;
ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
return AMDGPU_BO_INVALID_OFFSET;
return adev->gmc.agp_start + ttm->dma_address[0];
}
/**
* amdgpu_gmc_vram_location - try to find VRAM location
*
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
* @base: base address at which to put VRAM
*
* Function will try to place VRAM at base address provided
* as parameter.
*/
void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base)
{
uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
mc->vram_start = base;
mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
if (limit && limit < mc->real_vram_size)
mc->real_vram_size = limit;
if (mc->xgmi.num_physical_nodes == 0) {
mc->fb_start = mc->vram_start;
mc->fb_end = mc->vram_end;
}
dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
mc->mc_vram_size >> 20, mc->vram_start,
mc->vram_end, mc->real_vram_size >> 20);
}
/**
* amdgpu_gmc_gart_location - try to find GART location
*
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
*
* Function will place try to place GART before or after VRAM.
*
* If GART size is bigger than space left then we ajust GART size.
* Thus function will never fails.
*/
void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
{
const uint64_t four_gb = 0x100000000ULL;
u64 size_af, size_bf;
/*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
mc->gart_size += adev->pm.smu_prv_buffer_size;
/* VCE doesn't like it when BOs cross a 4GB segment, so align
* the GART base on a 4GB boundary as well.
*/
size_bf = mc->fb_start;
size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
if (mc->gart_size > max(size_bf, size_af)) {
dev_warn(adev->dev, "limiting GART\n");
mc->gart_size = max(size_bf, size_af);
}
if ((size_bf >= mc->gart_size && size_bf < size_af) ||
(size_af < mc->gart_size))
mc->gart_start = 0;
else
mc->gart_start = max_mc_address - mc->gart_size + 1;
mc->gart_start &= ~(four_gb - 1);
mc->gart_end = mc->gart_start + mc->gart_size - 1;
dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
mc->gart_size >> 20, mc->gart_start, mc->gart_end);
}
/**
* amdgpu_gmc_agp_location - try to find AGP location
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
*
* Function will place try to find a place for the AGP BAR in the MC address
* space.
*
* AGP BAR will be assigned the largest available hole in the address space.
* Should be called after VRAM and GART locations are setup.
*/
void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
{
const uint64_t sixteen_gb = 1ULL << 34;
const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
u64 size_af, size_bf;
if (mc->fb_start > mc->gart_start) {
size_bf = (mc->fb_start & sixteen_gb_mask) -
ALIGN(mc->gart_end + 1, sixteen_gb);
size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
} else {
size_bf = mc->fb_start & sixteen_gb_mask;
size_af = (mc->gart_start & sixteen_gb_mask) -
ALIGN(mc->fb_end + 1, sixteen_gb);
}
if (size_bf > size_af) {
mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
mc->agp_size = size_bf;
} else {
mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
mc->agp_size = size_af;
}
mc->agp_end = mc->agp_start + mc->agp_size - 1;
dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
mc->agp_size >> 20, mc->agp_start, mc->agp_end);
}

View File

@ -30,6 +30,19 @@
#include "amdgpu_irq.h"
/* VA hole for 48bit addresses on Vega10 */
#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL
#define AMDGPU_GMC_HOLE_END 0xffff800000000000ULL
/*
* Hardware is programmed as if the hole doesn't exists with start and end
* address values.
*
* This mask is used to remove the upper 16bits of the VA and so come up with
* the linear addr value.
*/
#define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL
struct firmware;
/*
@ -74,6 +87,20 @@ struct amdgpu_gmc_funcs {
u64 *dst, u64 *flags);
};
struct amdgpu_xgmi {
/* from psp */
u64 device_id;
u64 hive_id;
/* fixed per family */
u64 node_segment_size;
/* physical node (0-3) */
unsigned physical_node_id;
/* number of nodes (0-4) */
unsigned num_physical_nodes;
/* gpu list in the same hive */
struct list_head head;
};
struct amdgpu_gmc {
resource_size_t aper_size;
resource_size_t aper_base;
@ -81,11 +108,22 @@ struct amdgpu_gmc {
* about vram size near mc fb location */
u64 mc_vram_size;
u64 visible_vram_size;
u64 agp_size;
u64 agp_start;
u64 agp_end;
u64 gart_size;
u64 gart_start;
u64 gart_end;
u64 vram_start;
u64 vram_end;
/* FB region , it's same as local vram region in single GPU, in XGMI
* configuration, this region covers all GPUs in the same hive ,
* each GPU in the hive has the same view of this FB region .
* GPU0's vram starts at offset (0 * segment size) ,
* GPU1 starts at offset (1 * segment size), etc.
*/
u64 fb_start;
u64 fb_end;
unsigned vram_width;
u64 real_vram_size;
int vram_mtrr;
@ -109,8 +147,17 @@ struct amdgpu_gmc {
atomic_t vm_fault_info_updated;
const struct amdgpu_gmc_funcs *gmc_funcs;
struct amdgpu_xgmi xgmi;
};
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
/**
* amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
*
@ -126,4 +173,28 @@ static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
return (gmc->real_vram_size == gmc->visible_vram_size);
}
/**
* amdgpu_gmc_sign_extend - sign extend the given gmc address
*
* @addr: address to extend
*/
static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
{
if (addr >= AMDGPU_GMC_HOLE_START)
addr |= AMDGPU_GMC_HOLE_END;
return addr;
}
void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
uint64_t *addr, uint64_t *flags);
uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo);
uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo);
void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base);
void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
#endif

View File

@ -32,6 +32,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "atom.h"
#include "amdgpu_trace.h"
#define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000)
@ -170,6 +171,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
(amdgpu_sriov_vf(adev) && need_ctx_switch) ||
amdgpu_vm_need_pipeline_sync(ring, job))) {
need_pipe_sync = true;
if (tmp)
trace_amdgpu_ib_pipe_sync(job, tmp);
dma_fence_put(tmp);
}
@ -349,6 +354,14 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
if (!ring || !ring->ready)
continue;
/* skip IB tests for KIQ in general for the below reasons:
* 1. We never submit IBs to the KIQ
* 2. KIQ doesn't use the EOP interrupts,
* we use some other CP interrupt.
*/
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
continue;
/* MM engine need more time */
if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
ring->funcs->type == AMDGPU_RING_TYPE_VCE ||

View File

@ -574,7 +574,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
/* skip over VMID 0, since it is the system VM */
for (j = 1; j < id_mgr->num_ids; ++j) {
amdgpu_vmid_reset(adev, i, j);
amdgpu_sync_create(&id_mgr->ids[i].active);
amdgpu_sync_create(&id_mgr->ids[j].active);
list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
}
}

View File

@ -24,46 +24,21 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "amdgpu_amdkfd.h"
/**
* amdgpu_ih_ring_alloc - allocate memory for the IH ring
*
* @adev: amdgpu_device pointer
*
* Allocate a ring buffer for the interrupt controller.
* Returns 0 for success, errors for failure.
*/
static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev)
{
int r;
/* Allocate ring buffer */
if (adev->irq.ih.ring_obj == NULL) {
r = amdgpu_bo_create_kernel(adev, adev->irq.ih.ring_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
&adev->irq.ih.ring_obj,
&adev->irq.ih.gpu_addr,
(void **)&adev->irq.ih.ring);
if (r) {
DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r);
return r;
}
}
return 0;
}
/**
* amdgpu_ih_ring_init - initialize the IH state
*
* @adev: amdgpu_device pointer
* @ih: ih ring to initialize
* @ring_size: ring size to allocate
* @use_bus_addr: true when we can use dma_alloc_coherent
*
* Initializes the IH state and allocates a buffer
* for the IH ring buffer.
* Returns 0 for success, errors for failure.
*/
int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
bool use_bus_addr)
int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
unsigned ring_size, bool use_bus_addr)
{
u32 rb_bufsz;
int r;
@ -71,70 +46,76 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
/* Align ring size */
rb_bufsz = order_base_2(ring_size / 4);
ring_size = (1 << rb_bufsz) * 4;
adev->irq.ih.ring_size = ring_size;
adev->irq.ih.ptr_mask = adev->irq.ih.ring_size - 1;
adev->irq.ih.rptr = 0;
adev->irq.ih.use_bus_addr = use_bus_addr;
ih->ring_size = ring_size;
ih->ptr_mask = ih->ring_size - 1;
ih->rptr = 0;
ih->use_bus_addr = use_bus_addr;
if (adev->irq.ih.use_bus_addr) {
if (!adev->irq.ih.ring) {
/* add 8 bytes for the rptr/wptr shadows and
* add them to the end of the ring allocation.
*/
adev->irq.ih.ring = pci_alloc_consistent(adev->pdev,
adev->irq.ih.ring_size + 8,
&adev->irq.ih.rb_dma_addr);
if (adev->irq.ih.ring == NULL)
return -ENOMEM;
memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8);
adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0;
adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1;
}
return 0;
if (use_bus_addr) {
if (ih->ring)
return 0;
/* add 8 bytes for the rptr/wptr shadows and
* add them to the end of the ring allocation.
*/
ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
&ih->rb_dma_addr, GFP_KERNEL);
if (ih->ring == NULL)
return -ENOMEM;
memset((void *)ih->ring, 0, ih->ring_size + 8);
ih->wptr_offs = (ih->ring_size / 4) + 0;
ih->rptr_offs = (ih->ring_size / 4) + 1;
} else {
r = amdgpu_device_wb_get(adev, &adev->irq.ih.wptr_offs);
r = amdgpu_device_wb_get(adev, &ih->wptr_offs);
if (r)
return r;
r = amdgpu_device_wb_get(adev, &ih->rptr_offs);
if (r) {
dev_err(adev->dev, "(%d) ih wptr_offs wb alloc failed\n", r);
amdgpu_device_wb_free(adev, ih->wptr_offs);
return r;
}
r = amdgpu_device_wb_get(adev, &adev->irq.ih.rptr_offs);
r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&ih->ring_obj, &ih->gpu_addr,
(void **)&ih->ring);
if (r) {
amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
dev_err(adev->dev, "(%d) ih rptr_offs wb alloc failed\n", r);
amdgpu_device_wb_free(adev, ih->rptr_offs);
amdgpu_device_wb_free(adev, ih->wptr_offs);
return r;
}
return amdgpu_ih_ring_alloc(adev);
}
return 0;
}
/**
* amdgpu_ih_ring_fini - tear down the IH state
*
* @adev: amdgpu_device pointer
* @ih: ih ring to tear down
*
* Tears down the IH state and frees buffer
* used for the IH ring buffer.
*/
void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
{
if (adev->irq.ih.use_bus_addr) {
if (adev->irq.ih.ring) {
/* add 8 bytes for the rptr/wptr shadows and
* add them to the end of the ring allocation.
*/
pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8,
(void *)adev->irq.ih.ring,
adev->irq.ih.rb_dma_addr);
adev->irq.ih.ring = NULL;
}
if (ih->use_bus_addr) {
if (!ih->ring)
return;
/* add 8 bytes for the rptr/wptr shadows and
* add them to the end of the ring allocation.
*/
dma_free_coherent(adev->dev, ih->ring_size + 8,
(void *)ih->ring, ih->rb_dma_addr);
ih->ring = NULL;
} else {
amdgpu_bo_free_kernel(&adev->irq.ih.ring_obj,
&adev->irq.ih.gpu_addr,
(void **)&adev->irq.ih.ring);
amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
amdgpu_device_wb_free(adev, adev->irq.ih.rptr_offs);
amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
(void **)&ih->ring);
amdgpu_device_wb_free(adev, ih->wptr_offs);
amdgpu_device_wb_free(adev, ih->rptr_offs);
}
}
@ -142,133 +123,45 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
* amdgpu_ih_process - interrupt handler
*
* @adev: amdgpu_device pointer
* @ih: ih ring to process
*
* Interrupt hander (VI), walk the IH ring.
* Returns irq process return code.
*/
int amdgpu_ih_process(struct amdgpu_device *adev)
int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
void (*callback)(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih))
{
struct amdgpu_iv_entry entry;
u32 wptr;
if (!adev->irq.ih.enabled || adev->shutdown)
if (!ih->enabled || adev->shutdown)
return IRQ_NONE;
wptr = amdgpu_ih_get_wptr(adev);
restart_ih:
/* is somebody else already processing irqs? */
if (atomic_xchg(&adev->irq.ih.lock, 1))
if (atomic_xchg(&ih->lock, 1))
return IRQ_NONE;
DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, adev->irq.ih.rptr, wptr);
DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
/* Order reading of wptr vs. reading of IH ring data */
rmb();
while (adev->irq.ih.rptr != wptr) {
u32 ring_index = adev->irq.ih.rptr >> 2;
/* Prescreening of high-frequency interrupts */
if (!amdgpu_ih_prescreen_iv(adev)) {
adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
continue;
}
/* Before dispatching irq to IP blocks, send it to amdkfd */
amdgpu_amdkfd_interrupt(adev,
(const void *) &adev->irq.ih.ring[ring_index]);
entry.iv_entry = (const uint32_t *)
&adev->irq.ih.ring[ring_index];
amdgpu_ih_decode_iv(adev, &entry);
adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
amdgpu_irq_dispatch(adev, &entry);
while (ih->rptr != wptr) {
callback(adev, ih);
ih->rptr &= ih->ptr_mask;
}
amdgpu_ih_set_rptr(adev);
atomic_set(&adev->irq.ih.lock, 0);
atomic_set(&ih->lock, 0);
/* make sure wptr hasn't changed while processing */
wptr = amdgpu_ih_get_wptr(adev);
if (wptr != adev->irq.ih.rptr)
if (wptr != ih->rptr)
goto restart_ih;
return IRQ_HANDLED;
}
/**
* amdgpu_ih_add_fault - Add a page fault record
*
* @adev: amdgpu device pointer
* @key: 64-bit encoding of PASID and address
*
* This should be called when a retry page fault interrupt is
* received. If this is a new page fault, it will be added to a hash
* table. The return value indicates whether this is a new fault, or
* a fault that was already known and is already being handled.
*
* If there are too many pending page faults, this will fail. Retry
* interrupts should be ignored in this case until there is enough
* free space.
*
* Returns 0 if the fault was added, 1 if the fault was already known,
* -ENOSPC if there are too many pending faults.
*/
int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
{
unsigned long flags;
int r = -ENOSPC;
if (WARN_ON_ONCE(!adev->irq.ih.faults))
/* Should be allocated in <IP>_ih_sw_init on GPUs that
* support retry faults and require retry filtering.
*/
return r;
spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
/* Only let the hash table fill up to 50% for best performance */
if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
goto unlock_out;
r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
if (!r)
adev->irq.ih.faults->count++;
/* chash_table_copy_in should never fail unless we're losing count */
WARN_ON_ONCE(r < 0);
unlock_out:
spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
return r;
}
/**
* amdgpu_ih_clear_fault - Remove a page fault record
*
* @adev: amdgpu device pointer
* @key: 64-bit encoding of PASID and address
*
* This should be called when a page fault has been handled. Any
* future interrupt with this key will be processed as a new
* page fault.
*/
void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
{
unsigned long flags;
int r;
if (!adev->irq.ih.faults)
return;
spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
if (!WARN_ON_ONCE(r < 0)) {
adev->irq.ih.faults->count--;
WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
}
spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
}

View File

@ -24,20 +24,8 @@
#ifndef __AMDGPU_IH_H__
#define __AMDGPU_IH_H__
#include <linux/chash.h>
#include "soc15_ih_clientid.h"
struct amdgpu_device;
#define AMDGPU_IH_CLIENTID_LEGACY 0
#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
#define AMDGPU_PAGEFAULT_HASH_BITS 8
struct amdgpu_retryfault_hashtable {
DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
spinlock_t lock;
int count;
};
struct amdgpu_iv_entry;
/*
* R6xx+ IH ring
@ -57,30 +45,28 @@ struct amdgpu_ih_ring {
bool use_doorbell;
bool use_bus_addr;
dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */
struct amdgpu_retryfault_hashtable *faults;
};
#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
struct amdgpu_iv_entry {
unsigned client_id;
unsigned src_id;
unsigned ring_id;
unsigned vmid;
unsigned vmid_src;
uint64_t timestamp;
unsigned timestamp_src;
unsigned pasid;
unsigned pasid_src;
unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW];
const uint32_t *iv_entry;
/* provided by the ih block */
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
u32 (*get_wptr)(struct amdgpu_device *adev);
bool (*prescreen_iv)(struct amdgpu_device *adev);
void (*decode_iv)(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
void (*set_rptr)(struct amdgpu_device *adev);
};
int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
bool use_bus_addr);
void amdgpu_ih_ring_fini(struct amdgpu_device *adev);
int amdgpu_ih_process(struct amdgpu_device *adev);
int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key);
void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key);
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
unsigned ring_size, bool use_bus_addr);
void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
void (*callback)(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih));
#endif

View File

@ -51,6 +51,7 @@
#include "atom.h"
#include "amdgpu_connectors.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include <linux/pm_runtime.h>
@ -105,8 +106,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
reset_work);
if (!amdgpu_sriov_vf(adev))
amdgpu_device_gpu_recover(adev, NULL, false);
if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL);
}
/**
@ -123,7 +124,7 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
int r;
spin_lock_irqsave(&adev->irq.lock, irqflags);
for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
@ -146,6 +147,34 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
spin_unlock_irqrestore(&adev->irq.lock, irqflags);
}
/**
* amdgpu_irq_callback - callback from the IH ring
*
* @adev: amdgpu device pointer
* @ih: amdgpu ih ring
*
* Callback from IH ring processing to handle the entry at the current position
* and advance the read pointer.
*/
static void amdgpu_irq_callback(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih)
{
u32 ring_index = ih->rptr >> 2;
struct amdgpu_iv_entry entry;
/* Prescreening of high-frequency interrupts */
if (!amdgpu_ih_prescreen_iv(adev))
return;
/* Before dispatching irq to IP blocks, send it to amdkfd */
amdgpu_amdkfd_interrupt(adev, (const void *) &ih->ring[ring_index]);
entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
amdgpu_ih_decode_iv(adev, &entry);
amdgpu_irq_dispatch(adev, &entry);
}
/**
* amdgpu_irq_handler - IRQ handler
*
@ -163,7 +192,7 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
struct amdgpu_device *adev = dev->dev_private;
irqreturn_t ret;
ret = amdgpu_ih_process(adev);
ret = amdgpu_ih_process(adev, &adev->irq.ih, amdgpu_irq_callback);
if (ret == IRQ_HANDLED)
pm_runtime_mark_last_busy(dev->dev);
return ret;
@ -273,7 +302,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
cancel_work_sync(&adev->reset_work);
}
for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
@ -313,7 +342,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
unsigned client_id, unsigned src_id,
struct amdgpu_irq_src *source)
{
if (client_id >= AMDGPU_IH_CLIENTID_MAX)
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX)
return -EINVAL;
if (src_id >= AMDGPU_MAX_IRQ_SRC_ID)
@ -367,7 +396,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
trace_amdgpu_iv(entry);
if (client_id >= AMDGPU_IH_CLIENTID_MAX) {
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
return;
}
@ -440,7 +469,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
{
int i, j, k;
for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;

View File

@ -25,19 +25,38 @@
#define __AMDGPU_IRQ_H__
#include <linux/irqdomain.h>
#include "soc15_ih_clientid.h"
#include "amdgpu_ih.h"
#define AMDGPU_MAX_IRQ_SRC_ID 0x100
#define AMDGPU_MAX_IRQ_SRC_ID 0x100
#define AMDGPU_MAX_IRQ_CLIENT_ID 0x100
#define AMDGPU_IRQ_CLIENTID_LEGACY 0
#define AMDGPU_IRQ_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
#define AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW 4
struct amdgpu_device;
struct amdgpu_iv_entry;
enum amdgpu_interrupt_state {
AMDGPU_IRQ_STATE_DISABLE,
AMDGPU_IRQ_STATE_ENABLE,
};
struct amdgpu_iv_entry {
unsigned client_id;
unsigned src_id;
unsigned ring_id;
unsigned vmid;
unsigned vmid_src;
uint64_t timestamp;
unsigned timestamp_src;
unsigned pasid;
unsigned pasid_src;
unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW];
const uint32_t *iv_entry;
};
struct amdgpu_irq_src {
unsigned num_types;
atomic_t *enabled_types;
@ -63,7 +82,7 @@ struct amdgpu_irq {
bool installed;
spinlock_t lock;
/* interrupt sources */
struct amdgpu_irq_client client[AMDGPU_IH_CLIENTID_MAX];
struct amdgpu_irq_client client[AMDGPU_IRQ_CLIENTID_MAX];
/* status, etc. */
bool msi_enabled; /* msi enabled */

View File

@ -33,11 +33,18 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
DRM_ERROR("ring %s timeout, but soft recovered\n",
s_job->sched->name);
return;
}
DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
ring->fence_drv.sync_seq);
amdgpu_device_gpu_recover(ring->adev, job, false);
if (amdgpu_device_should_recover_gpu(ring->adev))
amdgpu_device_gpu_recover(ring->adev, job);
}
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@ -66,6 +73,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
amdgpu_sync_create(&(*job)->sync);
amdgpu_sync_create(&(*job)->sched_sync);
(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
return 0;
}
@ -82,8 +90,6 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
if (r)
kfree(*job);
else
(*job)->vm_pd_addr = adev->gart.table_addr;
return r;
}

View File

@ -37,6 +37,32 @@
#include <linux/slab.h>
#include <linux/pm_runtime.h>
#include "amdgpu_amdkfd.h"
#include "amdgpu_gem.h"
#include "amdgpu_display.h"
static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
struct amdgpu_gpu_instance *gpu_instance;
int i;
mutex_lock(&mgpu_info.mutex);
for (i = 0; i < mgpu_info.num_gpu; i++) {
gpu_instance = &(mgpu_info.gpu_ins[i]);
if (gpu_instance->adev == adev) {
mgpu_info.gpu_ins[i] =
mgpu_info.gpu_ins[mgpu_info.num_gpu - 1];
mgpu_info.num_gpu--;
if (adev->flags & AMD_IS_APU)
mgpu_info.num_apu--;
else
mgpu_info.num_dgpu--;
break;
}
}
mutex_unlock(&mgpu_info.mutex);
}
/**
* amdgpu_driver_unload_kms - Main unload function for KMS.
@ -53,6 +79,8 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
if (adev == NULL)
return;
amdgpu_unregister_gpu_instance(adev);
if (adev->rmmio == NULL)
goto done_free;
@ -73,6 +101,31 @@ done_free:
dev->dev_private = NULL;
}
static void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
{
struct amdgpu_gpu_instance *gpu_instance;
mutex_lock(&mgpu_info.mutex);
if (mgpu_info.num_gpu >= MAX_GPU_INSTANCE) {
DRM_ERROR("Cannot register more gpu instance\n");
mutex_unlock(&mgpu_info.mutex);
return;
}
gpu_instance = &(mgpu_info.gpu_ins[mgpu_info.num_gpu]);
gpu_instance->adev = adev;
gpu_instance->mgpu_fan_enabled = 0;
mgpu_info.num_gpu++;
if (adev->flags & AMD_IS_APU)
mgpu_info.num_apu++;
else
mgpu_info.num_dgpu++;
mutex_unlock(&mgpu_info.mutex);
}
/**
* amdgpu_driver_load_kms - Main load function for KMS.
*
@ -167,6 +220,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
pm_runtime_put_autosuspend(dev->dev);
}
amdgpu_register_gpu_instance(adev);
out:
if (r) {
/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
@ -255,12 +309,133 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->psp.asd_fw_version;
fw_info->feature = adev->psp.asd_feature_version;
break;
case AMDGPU_INFO_FW_DMCU:
fw_info->ver = adev->dm.dmcu_fw_version;
fw_info->feature = 0;
break;
default:
return -EINVAL;
}
return 0;
}
static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
struct drm_amdgpu_info *info,
struct drm_amdgpu_info_hw_ip *result)
{
uint32_t ib_start_alignment = 0;
uint32_t ib_size_alignment = 0;
enum amd_ip_block_type type;
unsigned int num_rings = 0;
unsigned int i, j;
if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
return -EINVAL;
switch (info->query_hw_ip.type) {
case AMDGPU_HW_IP_GFX:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
if (adev->gfx.gfx_ring[i].ready)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
if (adev->gfx.compute_ring[i].ready)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
if (adev->sdma.instance[i].ring.ready)
++num_rings;
ib_start_alignment = 256;
ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_UVD:
type = AMD_IP_BLOCK_TYPE_UVD;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (adev->uvd.harvest_config & (1 << i))
continue;
if (adev->uvd.inst[i].ring.ready)
++num_rings;
}
ib_start_alignment = 64;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
if (adev->vce.ring[i].ready)
++num_rings;
ib_start_alignment = 4;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (adev->uvd.harvest_config & (1 << i))
continue;
for (j = 0; j < adev->uvd.num_enc_rings; j++)
if (adev->uvd.inst[i].ring_enc[j].ready)
++num_rings;
}
ib_start_alignment = 64;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
if (adev->vcn.ring_dec.ready)
++num_rings;
ib_start_alignment = 16;
ib_size_alignment = 16;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_enc_rings; i++)
if (adev->vcn.ring_enc[i].ready)
++num_rings;
ib_start_alignment = 64;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_VCN_JPEG:
type = AMD_IP_BLOCK_TYPE_VCN;
if (adev->vcn.ring_jpeg.ready)
++num_rings;
ib_start_alignment = 16;
ib_size_alignment = 16;
break;
default:
return -EINVAL;
}
for (i = 0; i < adev->num_ip_blocks; i++)
if (adev->ip_blocks[i].version->type == type &&
adev->ip_blocks[i].status.valid)
break;
if (i == adev->num_ip_blocks)
return 0;
num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
num_rings);
result->hw_ip_version_major = adev->ip_blocks[i].version->major;
result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
result->capabilities_flags = 0;
result->available_rings = (1 << num_rings) - 1;
result->ib_start_alignment = ib_start_alignment;
result->ib_size_alignment = ib_size_alignment;
return 0;
}
/*
* Userspace get information ioctl
*/
@ -286,7 +461,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_crtc *crtc;
uint32_t ui32 = 0;
uint64_t ui64 = 0;
int i, j, found;
int i, found;
int ui32_size = sizeof(ui32);
if (!info->return_size || !info->return_pointer)
@ -316,101 +491,14 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
case AMDGPU_INFO_HW_IP_INFO: {
struct drm_amdgpu_info_hw_ip ip = {};
enum amd_ip_block_type type;
uint32_t ring_mask = 0;
uint32_t ib_start_alignment = 0;
uint32_t ib_size_alignment = 0;
int ret;
if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
return -EINVAL;
ret = amdgpu_hw_ip_info(adev, info, &ip);
if (ret)
return ret;
switch (info->query_hw_ip.type) {
case AMDGPU_HW_IP_GFX:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
ring_mask |= adev->gfx.gfx_ring[i].ready << i;
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
ring_mask |= adev->gfx.compute_ring[i].ready << i;
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
ring_mask |= adev->sdma.instance[i].ring.ready << i;
ib_start_alignment = 256;
ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_UVD:
type = AMD_IP_BLOCK_TYPE_UVD;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (adev->uvd.harvest_config & (1 << i))
continue;
ring_mask |= adev->uvd.inst[i].ring.ready;
}
ib_start_alignment = 64;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
ring_mask |= adev->vce.ring[i].ready << i;
ib_start_alignment = 4;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (adev->uvd.harvest_config & (1 << i))
continue;
for (j = 0; j < adev->uvd.num_enc_rings; j++)
ring_mask |= adev->uvd.inst[i].ring_enc[j].ready << j;
}
ib_start_alignment = 64;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
ring_mask = adev->vcn.ring_dec.ready;
ib_start_alignment = 16;
ib_size_alignment = 16;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_enc_rings; i++)
ring_mask |= adev->vcn.ring_enc[i].ready << i;
ib_start_alignment = 64;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_VCN_JPEG:
type = AMD_IP_BLOCK_TYPE_VCN;
ring_mask = adev->vcn.ring_jpeg.ready;
ib_start_alignment = 16;
ib_size_alignment = 16;
break;
default:
return -EINVAL;
}
for (i = 0; i < adev->num_ip_blocks; i++) {
if (adev->ip_blocks[i].version->type == type &&
adev->ip_blocks[i].status.valid) {
ip.hw_ip_version_major = adev->ip_blocks[i].version->major;
ip.hw_ip_version_minor = adev->ip_blocks[i].version->minor;
ip.capabilities_flags = 0;
ip.available_rings = ring_mask;
ip.ib_start_alignment = ib_start_alignment;
ip.ib_size_alignment = ib_size_alignment;
break;
}
}
return copy_to_user(out, &ip,
min((size_t)size, sizeof(ip))) ? -EFAULT : 0;
ret = copy_to_user(out, &ip, min((size_t)size, sizeof(ip)));
return ret ? -EFAULT : 0;
}
case AMDGPU_INFO_HW_IP_COUNT: {
enum amd_ip_block_type type;
@ -492,13 +580,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_amdgpu_info_gds gds_info;
memset(&gds_info, 0, sizeof(gds_info));
gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT;
gds_info.compute_partition_size = adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT;
gds_info.gds_total_size = adev->gds.mem.total_size >> AMDGPU_GDS_SHIFT;
gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT;
gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT;
gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT;
gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT;
gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
gds_info.gds_total_size = adev->gds.mem.total_size;
gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
return copy_to_user(out, &gds_info,
min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
}
@ -617,16 +705,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
vm_size -= AMDGPU_VA_RESERVED_SIZE;
/* Older VCE FW versions are buggy and can handle only 40bits */
if (adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
if (adev->vce.fw_version &&
adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
vm_size = min(vm_size, 1ULL << 40);
dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
dev_info.virtual_address_max =
min(vm_size, AMDGPU_VA_HOLE_START);
min(vm_size, AMDGPU_GMC_HOLE_START);
if (vm_size > AMDGPU_VA_HOLE_START) {
dev_info.high_va_offset = AMDGPU_VA_HOLE_END;
dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size;
if (vm_size > AMDGPU_GMC_HOLE_START) {
dev_info.high_va_offset = AMDGPU_GMC_HOLE_END;
dev_info.high_va_max = AMDGPU_GMC_HOLE_END | vm_size;
}
dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
@ -941,10 +1030,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
pm_runtime_get_sync(dev->dev);
if (adev->asic_type != CHIP_RAVEN) {
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL)
amdgpu_uvd_free_handles(adev, file_priv);
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
amdgpu_vce_free_handles(adev, file_priv);
}
amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
@ -1262,6 +1351,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* DMCU */
query_fw.fw_type = AMDGPU_INFO_FW_DMCU;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);

View File

@ -51,18 +51,6 @@
*
*/
static bool amdgpu_bo_need_backup(struct amdgpu_device *adev)
{
if (adev->flags & AMD_IS_APU)
return false;
if (amdgpu_gpu_recovery == 0 ||
(amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))
return false;
return true;
}
/**
* amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting
*
@ -163,10 +151,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
if (flags & AMDGPU_GEM_CREATE_SHADOW)
places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
else
places[c].lpfn = 0;
places[c].lpfn = 0;
places[c].flags = TTM_PL_FLAG_TT;
if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
places[c].flags |= TTM_PL_FLAG_WC |
@ -253,6 +238,11 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
bool free = false;
int r;
if (!size) {
amdgpu_bo_unref(bo_ptr);
return 0;
}
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = align;
@ -346,7 +336,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
if (r)
return r;
amdgpu_bo_unreserve(*bo_ptr);
if (*bo_ptr)
amdgpu_bo_unreserve(*bo_ptr);
return 0;
}
@ -436,7 +427,11 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
int r;
page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
size = ALIGN(size, PAGE_SIZE);
if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS |
AMDGPU_GEM_DOMAIN_OA))
size <<= PAGE_SHIFT;
else
size = ALIGN(size, PAGE_SIZE);
if (!amdgpu_bo_validate_size(adev, size, bp->domain))
return -ENOMEM;
@ -451,7 +446,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
return -ENOMEM;
drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
INIT_LIST_HEAD(&bo->shadow_list);
INIT_LIST_HEAD(&bo->va);
bo->vm_bo = NULL;
bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
bp->domain;
bo->allowed_domains = bo->preferred_domains;
@ -541,7 +536,7 @@ fail_unreserve:
}
static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
unsigned long size, int byte_align,
unsigned long size,
struct amdgpu_bo *bo)
{
struct amdgpu_bo_param bp;
@ -552,7 +547,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = byte_align;
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_SHADOW;
@ -563,7 +557,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo);
mutex_lock(&adev->shadow_list_lock);
list_add_tail(&bo->shadow_list, &adev->shadow_list);
list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
mutex_unlock(&adev->shadow_list_lock);
}
@ -596,12 +590,12 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (r)
return r;
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_bo_need_backup(adev)) {
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
if (!bp->resv)
WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
NULL));
r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr));
r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
if (!bp->resv)
reservation_object_unlock((*bo_ptr)->tbo.resv);
@ -695,13 +689,10 @@ retry:
}
/**
* amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object
* @adev: amdgpu device object
* @ring: amdgpu_ring for the engine handling the buffer operations
* @bo: &amdgpu_bo buffer to be restored
* @resv: reservation object with embedded fence
* amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
*
* @shadow: &amdgpu_bo shadow to be restored
* @fence: dma_fence associated with the operation
* @direct: whether to submit the job directly
*
* Copies a buffer object's shadow content back to the object.
* This is used for recovering a buffer from its shadow in case of a gpu
@ -710,36 +701,19 @@ retry:
* Returns:
* 0 for success or a negative error code on failure.
*/
int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
struct reservation_object *resv,
struct dma_fence **fence,
bool direct)
int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
{
struct amdgpu_bo *shadow = bo->shadow;
uint64_t bo_addr, shadow_addr;
int r;
struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
uint64_t shadow_addr, parent_addr;
if (!shadow)
return -EINVAL;
shadow_addr = amdgpu_bo_gpu_offset(shadow);
parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
bo_addr = amdgpu_bo_gpu_offset(bo);
shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
r = reservation_object_reserve_shared(bo->tbo.resv);
if (r)
goto err;
r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
amdgpu_bo_size(bo), resv, fence,
direct, false);
if (!r)
amdgpu_bo_fence(bo, *fence, true);
err:
return r;
return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
amdgpu_bo_size(shadow), NULL, fence,
true, false);
}
/**
@ -1019,10 +993,12 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
{
/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
if (0 && (adev->flags & AMD_IS_APU)) {
#ifndef CONFIG_HIBERNATION
if (adev->flags & AMD_IS_APU) {
/* Useless to evict on IGP chips */
return 0;
}
#endif
return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
}
@ -1360,15 +1336,13 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
{
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
!amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem));
WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
!bo->pin_count);
!bo->pin_count && bo->tbo.type != ttm_bo_type_kernel);
WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
!(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
return bo->tbo.offset;
return amdgpu_gmc_sign_extend(bo->tbo.offset);
}
/**

View File

@ -89,8 +89,8 @@ struct amdgpu_bo {
void *metadata;
u32 metadata_size;
unsigned prime_shared_count;
/* list of all virtual address to which this bo is associated to */
struct list_head va;
/* per VM structure for page tables and with virtual addresses */
struct amdgpu_vm_bo_base *vm_bo;
/* Constant after initialization */
struct drm_gem_object gem_base;
struct amdgpu_bo *parent;
@ -193,19 +193,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
return drm_vma_node_offset_addr(&bo->tbo.vma_node);
}
/**
* amdgpu_bo_gpu_accessible - return whether the bo is currently in memory that
* is accessible to the GPU.
*/
static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
{
switch (bo->tbo.mem.mem_type) {
case TTM_PL_TT: return amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem);
case TTM_PL_VRAM: return true;
default: return false;
}
}
/**
* amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
*/
@ -286,12 +273,8 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
struct reservation_object *resv,
struct dma_fence **fence, bool direct);
int amdgpu_bo_validate(struct amdgpu_bo *bo);
int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
struct reservation_object *resv,
struct dma_fence **fence,
bool direct);
int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
struct dma_fence **fence);
uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
uint32_t domain);

View File

@ -27,6 +27,7 @@
#include "amdgpu_drv.h"
#include "amdgpu_pm.h"
#include "amdgpu_dpm.h"
#include "amdgpu_display.h"
#include "atom.h"
#include <linux/power_supply.h>
#include <linux/hwmon.h>
@ -473,6 +474,8 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
* in each power level within a power state. The pp_od_clk_voltage is used for
* this.
*
* < For Vega10 and previous ASICs >
*
* Reading the file will display:
*
* - a list of engine clock levels and voltages labeled OD_SCLK
@ -490,6 +493,44 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
* "c" (commit) to the file to commit your changes. If you want to reset to the
* default power levels, write "r" (reset) to the file to reset them.
*
*
* < For Vega20 >
*
* Reading the file will display:
*
* - minimum and maximum engine clock labeled OD_SCLK
*
* - maximum memory clock labeled OD_MCLK
*
* - three <frequency, voltage> points labeled OD_VDDC_CURVE.
* They can be used to calibrate the sclk voltage curve.
*
* - a list of valid ranges for sclk, mclk, and voltage curve points
* labeled OD_RANGE
*
* To manually adjust these settings:
*
* - First select manual using power_dpm_force_performance_level
*
* - For clock frequency setting, enter a new value by writing a
* string that contains "s/m index clock" to the file. The index
* should be 0 if to set minimum clock. And 1 if to set maximum
* clock. E.g., "s 0 500" will update minimum sclk to be 500 MHz.
* "m 1 800" will update maximum mclk to be 800Mhz.
*
* For sclk voltage curve, enter the new values by writing a
* string that contains "vc point clock voltage" to the file. The
* points are indexed by 0, 1 and 2. E.g., "vc 0 300 600" will
* update point1 with clock set as 300Mhz and voltage as
* 600mV. "vc 2 1000 1000" will update point3 with clock set
* as 1000Mhz and voltage 1000mV.
*
* - When you have edited all of the states as needed, write "c" (commit)
* to the file to commit your changes
*
* - If you want to reset to the default power levels, write "r" (reset)
* to the file to reset them
*
*/
static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
@ -519,6 +560,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
type = PP_OD_RESTORE_DEFAULT_TABLE;
else if (*buf == 'c')
type = PP_OD_COMMIT_DPM_TABLE;
else if (!strncmp(buf, "vc", 2))
type = PP_OD_EDIT_VDDC_CURVE;
else
return -EINVAL;
@ -526,6 +569,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
tmp_str = buf_cpy;
if (type == PP_OD_EDIT_VDDC_CURVE)
tmp_str++;
while (isspace(*++tmp_str));
while (tmp_str[0]) {
@ -569,6 +614,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
if (adev->powerplay.pp_funcs->print_clock_levels) {
size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size);
size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
return size;
} else {
@ -1074,12 +1120,19 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
struct amdgpu_device *adev = dev_get_drvdata(dev);
int err;
u32 value;
u32 pwm_mode;
/* Can't adjust fan when the card is off */
if ((adev->flags & AMD_IS_PX) &&
(adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
pr_info("manual fan speed control should be enabled first\n");
return -EINVAL;
}
err = kstrtou32(buf, 10, &value);
if (err)
return err;
@ -1141,6 +1194,148 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
return sprintf(buf, "%i\n", speed);
}
static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
u32 min_rpm = 0;
u32 size = sizeof(min_rpm);
int r;
if (!adev->powerplay.pp_funcs->read_sensor)
return -EINVAL;
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
(void *)&min_rpm, &size);
if (r)
return r;
return snprintf(buf, PAGE_SIZE, "%d\n", min_rpm);
}
static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
u32 max_rpm = 0;
u32 size = sizeof(max_rpm);
int r;
if (!adev->powerplay.pp_funcs->read_sensor)
return -EINVAL;
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
(void *)&max_rpm, &size);
if (r)
return r;
return snprintf(buf, PAGE_SIZE, "%d\n", max_rpm);
}
static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
int err;
u32 rpm = 0;
/* Can't adjust fan when the card is off */
if ((adev->flags & AMD_IS_PX) &&
(adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm);
if (err)
return err;
}
return sprintf(buf, "%i\n", rpm);
}
static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
int err;
u32 value;
u32 pwm_mode;
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
if (pwm_mode != AMD_FAN_CTRL_MANUAL)
return -ENODATA;
/* Can't adjust fan when the card is off */
if ((adev->flags & AMD_IS_PX) &&
(adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
err = kstrtou32(buf, 10, &value);
if (err)
return err;
if (adev->powerplay.pp_funcs->set_fan_speed_rpm) {
err = amdgpu_dpm_set_fan_speed_rpm(adev, value);
if (err)
return err;
}
return count;
}
static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
u32 pwm_mode = 0;
if (!adev->powerplay.pp_funcs->get_fan_control_mode)
return -EINVAL;
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1);
}
static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
int err;
int value;
u32 pwm_mode;
/* Can't adjust fan when the card is off */
if ((adev->flags & AMD_IS_PX) &&
(adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
if (!adev->powerplay.pp_funcs->set_fan_control_mode)
return -EINVAL;
err = kstrtoint(buf, 10, &value);
if (err)
return err;
if (value == 0)
pwm_mode = AMD_FAN_CTRL_AUTO;
else if (value == 1)
pwm_mode = AMD_FAN_CTRL_MANUAL;
else
return -EINVAL;
amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
return count;
}
static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
struct device_attribute *attr,
char *buf)
@ -1360,8 +1555,16 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
*
* - pwm1_max: pulse width modulation fan control maximum level (255)
*
* - fan1_min: an minimum value Unit: revolution/min (RPM)
*
* - fan1_max: an maxmum value Unit: revolution/max (RPM)
*
* - fan1_input: fan speed in RPM
*
* - fan[1-*]_target: Desired fan speed Unit: revolution/min (RPM)
*
* - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable
*
* You can use hwmon tools like sensors to view this information on your system.
*
*/
@ -1374,6 +1577,10 @@ static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0);
static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0);
static SENSOR_DEVICE_ATTR(fan1_min, S_IRUGO, amdgpu_hwmon_get_fan1_min, NULL, 0);
static SENSOR_DEVICE_ATTR(fan1_max, S_IRUGO, amdgpu_hwmon_get_fan1_max, NULL, 0);
static SENSOR_DEVICE_ATTR(fan1_target, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_fan1_target, amdgpu_hwmon_set_fan1_target, 0);
static SENSOR_DEVICE_ATTR(fan1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_fan1_enable, amdgpu_hwmon_set_fan1_enable, 0);
static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0);
static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0);
static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0);
@ -1392,6 +1599,10 @@ static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_pwm1_min.dev_attr.attr,
&sensor_dev_attr_pwm1_max.dev_attr.attr,
&sensor_dev_attr_fan1_input.dev_attr.attr,
&sensor_dev_attr_fan1_min.dev_attr.attr,
&sensor_dev_attr_fan1_max.dev_attr.attr,
&sensor_dev_attr_fan1_target.dev_attr.attr,
&sensor_dev_attr_fan1_enable.dev_attr.attr,
&sensor_dev_attr_in0_input.dev_attr.attr,
&sensor_dev_attr_in0_label.dev_attr.attr,
&sensor_dev_attr_in1_input.dev_attr.attr,
@ -1410,13 +1621,16 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
struct amdgpu_device *adev = dev_get_drvdata(dev);
umode_t effective_mode = attr->mode;
/* Skip fan attributes if fan is not present */
if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_input.dev_attr.attr))
attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
return 0;
/* Skip limit attributes if DPM is not enabled */
@ -1426,7 +1640,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
return 0;
/* mask fan attributes if we have no bindings for this asic to expose */
@ -1451,10 +1670,18 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
/* hide max/min values if we can't both query and manage the fan */
if ((!adev->powerplay.pp_funcs->set_fan_speed_percent &&
!adev->powerplay.pp_funcs->get_fan_speed_percent) &&
(!adev->powerplay.pp_funcs->set_fan_speed_rpm &&
!adev->powerplay.pp_funcs->get_fan_speed_rpm) &&
(attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
return 0;
if ((!adev->powerplay.pp_funcs->set_fan_speed_rpm &&
!adev->powerplay.pp_funcs->get_fan_speed_rpm) &&
(attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_min.dev_attr.attr))
return 0;
/* only APUs have vddnb */
if (!(adev->flags & AMD_IS_APU) &&
(attr == &sensor_dev_attr_in1_input.dev_attr.attr ||
@ -1719,18 +1946,6 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
mutex_lock(&adev->pm.mutex);
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
mutex_unlock(&adev->pm.mutex);
} else {
if (enable) {
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.uvd_active = true;
adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
mutex_unlock(&adev->pm.mutex);
} else {
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.uvd_active = false;
mutex_unlock(&adev->pm.mutex);
}
amdgpu_pm_compute_clocks(adev);
}
}
@ -1741,29 +1956,6 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
mutex_lock(&adev->pm.mutex);
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
mutex_unlock(&adev->pm.mutex);
} else {
if (enable) {
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.vce_active = true;
/* XXX select vce level based on ring/task */
adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
mutex_unlock(&adev->pm.mutex);
amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_CG_STATE_UNGATE);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_UNGATE);
amdgpu_pm_compute_clocks(adev);
} else {
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_GATE);
amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
AMD_CG_STATE_GATE);
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.vce_active = false;
mutex_unlock(&adev->pm.mutex);
amdgpu_pm_compute_clocks(adev);
}
}
}
@ -1965,6 +2157,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
{
uint32_t value;
uint64_t value64;
uint32_t query = 0;
int size;
@ -2003,6 +2196,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
seq_printf(m, "GPU Load: %u %%\n", value);
seq_printf(m, "\n");
/* SMC feature mask */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, (void *)&value64, &size))
seq_printf(m, "SMC Feature Mask: 0x%016llx\n", value64);
/* UVD clocks */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) {
if (!value) {

View File

@ -35,6 +35,7 @@
#include "amdgpu.h"
#include "amdgpu_display.h"
#include "amdgpu_gem.h"
#include <drm/amdgpu_drm.h>
#include <linux/dma-buf.h>
@ -43,10 +44,10 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops;
/**
* amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
* implementation
* @obj: GEM buffer object
* @obj: GEM buffer object (BO)
*
* Returns:
* A scatter/gather table for the pinned pages of the buffer object's memory.
* A scatter/gather table for the pinned pages of the BO's memory.
*/
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
{
@ -58,9 +59,9 @@ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
/**
* amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
* @obj: GEM buffer object
* @obj: GEM BO
*
* Sets up an in-kernel virtual mapping of the buffer object's memory.
* Sets up an in-kernel virtual mapping of the BO's memory.
*
* Returns:
* The virtual address of the mapping or an error pointer.
@ -80,10 +81,10 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
/**
* amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation
* @obj: GEM buffer object
* @vaddr: virtual address (unused)
* @obj: GEM BO
* @vaddr: Virtual address (unused)
*
* Tears down the in-kernel virtual mapping of the buffer object's memory.
* Tears down the in-kernel virtual mapping of the BO's memory.
*/
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
{
@ -94,14 +95,14 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
/**
* amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation
* @obj: GEM buffer object
* @vma: virtual memory area
* @obj: GEM BO
* @vma: Virtual memory area
*
* Sets up a userspace mapping of the buffer object's memory in the given
* Sets up a userspace mapping of the BO's memory in the given
* virtual memory area.
*
* Returns:
* 0 on success or negative error code.
* 0 on success or a negative error code on failure.
*/
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
{
@ -144,10 +145,10 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
* @attach: DMA-buf attachment
* @sg: Scatter/gather table
*
* Import shared DMA buffer memory exported by another device.
* Imports shared DMA buffer memory exported by another device.
*
* Returns:
* A new GEM buffer object of the given DRM device, representing the memory
* A new GEM BO of the given DRM device, representing the memory
* described by the given DMA-buf attachment and scatter/gather table.
*/
struct drm_gem_object *
@ -190,7 +191,7 @@ error:
/**
* amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
* @dma_buf: shared DMA buffer
* @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment
*
* Makes sure that the shared DMA buffer can be accessed by the target device.
@ -198,7 +199,7 @@ error:
* all DMA devices.
*
* Returns:
* 0 on success or negative error code.
* 0 on success or a negative error code on failure.
*/
static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach)
@ -250,11 +251,11 @@ error_detach:
/**
* amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
* @dma_buf: shared DMA buffer
* @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment
*
* This is called when a shared DMA buffer no longer needs to be accessible by
* the other device. For now, simply unpins the buffer from GTT.
* another device. For now, simply unpins the buffer from GTT.
*/
static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach)
@ -279,10 +280,10 @@ error:
/**
* amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
* @obj: GEM buffer object
* @obj: GEM BO
*
* Returns:
* The buffer object's reservation object.
* The BO's reservation object.
*/
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
{
@ -293,15 +294,15 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
/**
* amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
* @dma_buf: shared DMA buffer
* @direction: direction of DMA transfer
* @dma_buf: Shared DMA buffer
* @direction: Direction of DMA transfer
*
* This is called before CPU access to the shared DMA buffer's memory. If it's
* a read access, the buffer is moved to the GTT domain if possible, for optimal
* CPU read performance.
*
* Returns:
* 0 on success or negative error code.
* 0 on success or a negative error code on failure.
*/
static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction direction)
@ -348,14 +349,14 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops = {
/**
* amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
* @dev: DRM device
* @gobj: GEM buffer object
* @flags: flags like DRM_CLOEXEC and DRM_RDWR
* @gobj: GEM BO
* @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
*
* The main work is done by the &drm_gem_prime_export helper, which in turn
* uses &amdgpu_gem_prime_res_obj.
*
* Returns:
* Shared DMA buffer representing the GEM buffer object from the given device.
* Shared DMA buffer representing the GEM BO from the given device.
*/
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
@ -386,7 +387,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
* uses &amdgpu_gem_prime_import_sg_table.
*
* Returns:
* GEM buffer object representing the shared DMA buffer for the given device.
* GEM BO representing the shared DMA buffer for the given device.
*/
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf)

View File

@ -31,6 +31,7 @@
#include "soc15_common.h"
#include "psp_v3_1.h"
#include "psp_v10_0.h"
#include "psp_v11_0.h"
static void psp_set_funcs(struct amdgpu_device *adev);
@ -52,12 +53,14 @@ static int psp_sw_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
psp_v3_1_set_psp_funcs(psp);
break;
case CHIP_RAVEN:
psp_v10_0_set_psp_funcs(psp);
break;
case CHIP_VEGA20:
psp_v11_0_set_psp_funcs(psp);
break;
default:
return -EINVAL;
}
@ -131,6 +134,13 @@ psp_cmd_submit_buf(struct psp_context *psp,
msleep(1);
}
/* the status field must be 0 after FW is loaded */
if (ucode && psp->cmd_buf_mem->resp.status) {
DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n",
psp->cmd_buf_mem->resp.status, ucode->ucode_id);
return -EINVAL;
}
if (ucode) {
ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo;
ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi;
@ -160,7 +170,7 @@ static int psp_tmr_init(struct psp_context *psp)
* Note: this memory need be reserved till the driver
* uninitializes.
*/
ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000,
ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, 0x100000,
AMDGPU_GEM_DOMAIN_VRAM,
&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
@ -176,7 +186,9 @@ static int psp_tmr_load(struct psp_context *psp)
if (!cmd)
return -ENOMEM;
psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000);
psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n",
PSP_TMR_SIZE, psp->tmr_mc_addr);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr, 1);
@ -440,8 +452,6 @@ static int psp_hw_fini(void *handle)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
return 0;
amdgpu_ucode_fini_bo(adev);
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
@ -594,3 +604,12 @@ const struct amdgpu_ip_block_version psp_v10_0_ip_block =
.rev = 0,
.funcs = &psp_ip_funcs,
};
const struct amdgpu_ip_block_version psp_v11_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_PSP,
.major = 11,
.minor = 0,
.rev = 0,
.funcs = &psp_ip_funcs,
};

View File

@ -32,8 +32,10 @@
#define PSP_CMD_BUFFER_SIZE 0x1000
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
#define PSP_1_MEG 0x100000
#define PSP_TMR_SIZE 0x400000
struct psp_context;
struct psp_xgmi_topology_info;
enum psp_ring_type
{
@ -63,18 +65,27 @@ struct psp_funcs
int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd);
int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type);
int (*ring_create)(struct psp_context *psp,
enum psp_ring_type ring_type);
int (*ring_stop)(struct psp_context *psp,
enum psp_ring_type ring_type);
int (*ring_destroy)(struct psp_context *psp,
enum psp_ring_type ring_type);
int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode,
uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index);
int (*cmd_submit)(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
int index);
bool (*compare_sram_data)(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
enum AMDGPU_UCODE_ID ucode_type);
bool (*smu_reload_quirk)(struct psp_context *psp);
int (*mode1_reset)(struct psp_context *psp);
uint64_t (*xgmi_get_device_id)(struct psp_context *psp);
uint64_t (*xgmi_get_hive_id)(struct psp_context *psp);
int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices,
struct psp_xgmi_topology_info *topology);
int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices,
struct psp_xgmi_topology_info *topology);
};
struct psp_context
@ -83,11 +94,11 @@ struct psp_context
struct psp_ring km_ring;
struct psp_gfx_cmd_resp *cmd;
const struct psp_funcs *funcs;
const struct psp_funcs *funcs;
/* fence buffer */
struct amdgpu_bo *fw_pri_bo;
uint64_t fw_pri_mc_addr;
struct amdgpu_bo *fw_pri_bo;
uint64_t fw_pri_mc_addr;
void *fw_pri_buf;
/* sos firmware */
@ -100,8 +111,8 @@ struct psp_context
uint8_t *sos_start_addr;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
uint64_t tmr_mc_addr;
struct amdgpu_bo *tmr_bo;
uint64_t tmr_mc_addr;
void *tmr_buf;
/* asd firmware and buffer */
@ -110,13 +121,13 @@ struct psp_context
uint32_t asd_feature_version;
uint32_t asd_ucode_size;
uint8_t *asd_start_addr;
struct amdgpu_bo *asd_shared_bo;
uint64_t asd_shared_mc_addr;
struct amdgpu_bo *asd_shared_bo;
uint64_t asd_shared_mc_addr;
void *asd_shared_buf;
/* fence buffer */
struct amdgpu_bo *fence_buf_bo;
uint64_t fence_buf_mc_addr;
struct amdgpu_bo *fence_buf_bo;
uint64_t fence_buf_mc_addr;
void *fence_buf;
/* cmd buffer */
@ -130,6 +141,23 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID);
};
struct psp_xgmi_topology_info {
/* Generated by PSP to identify the GPU instance within xgmi connection */
uint64_t device_id;
/*
* If all bits set to 0 , driver indicates it wants to retrieve the xgmi
* connection vector topology, but not access enable the connections
* if some or all bits are set to 1, driver indicates it want to retrieve the
* current xgmi topology and access enable the link to GPU[i] associated
* with the bit position in the vector.
* On return,: bits indicated which xgmi links are present/active depending
* on the value passed in. The relative bit offset for the relative GPU index
* within the hive is always marked active.
*/
uint32_t connection_mask;
uint32_t reserved; /* must be 0 */
};
#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
@ -149,6 +177,18 @@ struct amdgpu_psp_funcs {
((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
#define psp_mode1_reset(psp) \
((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
#define psp_xgmi_get_device_id(psp) \
((psp)->funcs->xgmi_get_device_id ? (psp)->funcs->xgmi_get_device_id((psp)) : 0)
#define psp_xgmi_get_hive_id(psp) \
((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0)
#define psp_xgmi_get_topology_info(psp, num_device, topology) \
((psp)->funcs->xgmi_get_topology_info ? \
(psp)->funcs->xgmi_get_topology_info((psp), (num_device), (topology)) : -EINVAL)
#define psp_xgmi_set_topology_info(psp, num_device, topology) \
((psp)->funcs->xgmi_set_topology_info ? \
(psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL)
#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
extern const struct amd_ip_funcs psp_ip_funcs;
@ -159,5 +199,6 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
int psp_gpu_reset(struct amdgpu_device *adev);
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
#endif

View File

@ -1,316 +0,0 @@
/*
* Copyright 2017 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Andres Rodriguez
*/
#include "amdgpu.h"
#include "amdgpu_ring.h"
static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper,
int hw_ip)
{
if (!mapper)
return -EINVAL;
if (hw_ip > AMDGPU_MAX_IP_NUM)
return -EINVAL;
mapper->hw_ip = hw_ip;
mutex_init(&mapper->lock);
memset(mapper->queue_map, 0, sizeof(mapper->queue_map));
return 0;
}
static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper,
int ring)
{
return mapper->queue_map[ring];
}
static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper,
int ring, struct amdgpu_ring *pring)
{
if (WARN_ON(mapper->queue_map[ring])) {
DRM_ERROR("Un-expected ring re-map\n");
return -EINVAL;
}
mapper->queue_map[ring] = pring;
return 0;
}
static int amdgpu_identity_map(struct amdgpu_device *adev,
struct amdgpu_queue_mapper *mapper,
u32 ring,
struct amdgpu_ring **out_ring)
{
switch (mapper->hw_ip) {
case AMDGPU_HW_IP_GFX:
*out_ring = &adev->gfx.gfx_ring[ring];
break;
case AMDGPU_HW_IP_COMPUTE:
*out_ring = &adev->gfx.compute_ring[ring];
break;
case AMDGPU_HW_IP_DMA:
*out_ring = &adev->sdma.instance[ring].ring;
break;
case AMDGPU_HW_IP_UVD:
*out_ring = &adev->uvd.inst[0].ring;
break;
case AMDGPU_HW_IP_VCE:
*out_ring = &adev->vce.ring[ring];
break;
case AMDGPU_HW_IP_UVD_ENC:
*out_ring = &adev->uvd.inst[0].ring_enc[ring];
break;
case AMDGPU_HW_IP_VCN_DEC:
*out_ring = &adev->vcn.ring_dec;
break;
case AMDGPU_HW_IP_VCN_ENC:
*out_ring = &adev->vcn.ring_enc[ring];
break;
case AMDGPU_HW_IP_VCN_JPEG:
*out_ring = &adev->vcn.ring_jpeg;
break;
default:
*out_ring = NULL;
DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
return -EINVAL;
}
return amdgpu_update_cached_map(mapper, ring, *out_ring);
}
static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
{
switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
return AMDGPU_RING_TYPE_GFX;
case AMDGPU_HW_IP_COMPUTE:
return AMDGPU_RING_TYPE_COMPUTE;
case AMDGPU_HW_IP_DMA:
return AMDGPU_RING_TYPE_SDMA;
case AMDGPU_HW_IP_UVD:
return AMDGPU_RING_TYPE_UVD;
case AMDGPU_HW_IP_VCE:
return AMDGPU_RING_TYPE_VCE;
default:
DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
return -1;
}
}
static int amdgpu_lru_map(struct amdgpu_device *adev,
struct amdgpu_queue_mapper *mapper,
u32 user_ring, bool lru_pipe_order,
struct amdgpu_ring **out_ring)
{
int r, i, j;
int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
int ring_blacklist[AMDGPU_MAX_RINGS];
struct amdgpu_ring *ring;
/* 0 is a valid ring index, so initialize to -1 */
memset(ring_blacklist, 0xff, sizeof(ring_blacklist));
for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) {
ring = mapper->queue_map[i];
if (ring)
ring_blacklist[j++] = ring->idx;
}
r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
j, lru_pipe_order, out_ring);
if (r)
return r;
return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
}
/**
* amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
*
* @adev: amdgpu_device pointer
* @mgr: amdgpu_queue_mgr structure holding queue information
*
* Initialize the the selected @mgr (all asics).
*
* Returns 0 on success, error on failure.
*/
int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr)
{
int i, r;
if (!adev || !mgr)
return -EINVAL;
memset(mgr, 0, sizeof(*mgr));
for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) {
r = amdgpu_queue_mapper_init(&mgr->mapper[i], i);
if (r)
return r;
}
return 0;
}
/**
* amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct
*
* @adev: amdgpu_device pointer
* @mgr: amdgpu_queue_mgr structure holding queue information
*
* De-initialize the the selected @mgr (all asics).
*
* Returns 0 on success, error on failure.
*/
int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr)
{
return 0;
}
/**
* amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring
*
* @adev: amdgpu_device pointer
* @mgr: amdgpu_queue_mgr structure holding queue information
* @hw_ip: HW IP enum
* @instance: HW instance
* @ring: user ring id
* @our_ring: pointer to mapped amdgpu_ring
*
* Map a userspace ring id to an appropriate kernel ring. Different
* policies are configurable at a HW IP level.
*
* Returns 0 on success, error on failure.
*/
int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
struct amdgpu_queue_mgr *mgr,
u32 hw_ip, u32 instance, u32 ring,
struct amdgpu_ring **out_ring)
{
int i, r, ip_num_rings = 0;
struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];
if (!adev || !mgr || !out_ring)
return -EINVAL;
if (hw_ip >= AMDGPU_MAX_IP_NUM)
return -EINVAL;
if (ring >= AMDGPU_MAX_RINGS)
return -EINVAL;
/* Right now all IPs have only one instance - multiple rings. */
if (instance != 0) {
DRM_DEBUG("invalid ip instance: %d\n", instance);
return -EINVAL;
}
switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
ip_num_rings = adev->gfx.num_gfx_rings;
break;
case AMDGPU_HW_IP_COMPUTE:
ip_num_rings = adev->gfx.num_compute_rings;
break;
case AMDGPU_HW_IP_DMA:
ip_num_rings = adev->sdma.num_instances;
break;
case AMDGPU_HW_IP_UVD:
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (!(adev->uvd.harvest_config & (1 << i)))
ip_num_rings++;
}
break;
case AMDGPU_HW_IP_VCE:
ip_num_rings = adev->vce.num_rings;
break;
case AMDGPU_HW_IP_UVD_ENC:
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (!(adev->uvd.harvest_config & (1 << i)))
ip_num_rings++;
}
ip_num_rings =
adev->uvd.num_enc_rings * ip_num_rings;
break;
case AMDGPU_HW_IP_VCN_DEC:
ip_num_rings = 1;
break;
case AMDGPU_HW_IP_VCN_ENC:
ip_num_rings = adev->vcn.num_enc_rings;
break;
case AMDGPU_HW_IP_VCN_JPEG:
ip_num_rings = 1;
break;
default:
DRM_DEBUG("unknown ip type: %d\n", hw_ip);
return -EINVAL;
}
if (ring >= ip_num_rings) {
DRM_DEBUG("Ring index:%d exceeds maximum:%d for ip:%d\n",
ring, ip_num_rings, hw_ip);
return -EINVAL;
}
mutex_lock(&mapper->lock);
*out_ring = amdgpu_get_cached_map(mapper, ring);
if (*out_ring) {
/* cache hit */
r = 0;
goto out_unlock;
}
switch (mapper->hw_ip) {
case AMDGPU_HW_IP_GFX:
case AMDGPU_HW_IP_UVD:
case AMDGPU_HW_IP_VCE:
case AMDGPU_HW_IP_UVD_ENC:
case AMDGPU_HW_IP_VCN_DEC:
case AMDGPU_HW_IP_VCN_ENC:
case AMDGPU_HW_IP_VCN_JPEG:
r = amdgpu_identity_map(adev, mapper, ring, out_ring);
break;
case AMDGPU_HW_IP_DMA:
r = amdgpu_lru_map(adev, mapper, ring, false, out_ring);
break;
case AMDGPU_HW_IP_COMPUTE:
r = amdgpu_lru_map(adev, mapper, ring, true, out_ring);
break;
default:
*out_ring = NULL;
r = -EINVAL;
DRM_DEBUG("unknown HW IP type: %d\n", mapper->hw_ip);
}
out_unlock:
mutex_unlock(&mapper->lock);
return r;
}

View File

@ -135,9 +135,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
if (ring->funcs->end_use)
ring->funcs->end_use(ring);
if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)
amdgpu_ring_lru_touch(ring->adev, ring);
}
/**
@ -320,8 +317,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->max_dw = max_dw;
ring->priority = DRM_SCHED_PRIORITY_NORMAL;
mutex_init(&ring->priority_mutex);
INIT_LIST_HEAD(&ring->lru_list);
amdgpu_ring_lru_touch(adev, ring);
for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i)
atomic_set(&ring->num_jobs[i], 0);
@ -368,99 +363,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
ring->adev->rings[ring->idx] = NULL;
}
static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
/* list_move_tail handles the case where ring isn't part of the list */
list_move_tail(&ring->lru_list, &adev->ring_lru_list);
}
static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
int *blacklist, int num_blacklist)
{
int i;
for (i = 0; i < num_blacklist; i++) {
if (ring->idx == blacklist[i])
return true;
}
return false;
}
/**
* amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
*
* @adev: amdgpu_device pointer
* @type: amdgpu_ring_type enum
* @blacklist: blacklisted ring ids array
* @num_blacklist: number of entries in @blacklist
* @lru_pipe_order: find a ring from the least recently used pipe
* @ring: output ring
*
* Retrieve the amdgpu_ring structure for the least recently used ring of
* a specific IP block (all asics).
* Returns 0 on success, error on failure.
*/
int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
int *blacklist, int num_blacklist,
bool lru_pipe_order, struct amdgpu_ring **ring)
{
struct amdgpu_ring *entry;
/* List is sorted in LRU order, find first entry corresponding
* to the desired HW IP */
*ring = NULL;
spin_lock(&adev->ring_lru_list_lock);
list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
if (entry->funcs->type != type)
continue;
if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
continue;
if (!*ring) {
*ring = entry;
/* We are done for ring LRU */
if (!lru_pipe_order)
break;
}
/* Move all rings on the same pipe to the end of the list */
if (entry->pipe == (*ring)->pipe)
amdgpu_ring_lru_touch_locked(adev, entry);
}
/* Move the ring we found to the end of the list */
if (*ring)
amdgpu_ring_lru_touch_locked(adev, *ring);
spin_unlock(&adev->ring_lru_list_lock);
if (!*ring) {
DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type);
return -EINVAL;
}
return 0;
}
/**
* amdgpu_ring_lru_touch - mark a ring as recently being used
*
* @adev: amdgpu_device pointer
* @ring: ring to touch
*
* Move @ring to the tail of the lru list
*/
void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
spin_lock(&adev->ring_lru_list_lock);
amdgpu_ring_lru_touch_locked(adev, ring);
spin_unlock(&adev->ring_lru_list_lock);
}
/**
* amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
*
@ -481,6 +383,31 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
/**
* amdgpu_ring_soft_recovery - try to soft recover a ring lockup
*
* @ring: ring to try the recovery on
* @vmid: VMID we try to get going again
* @fence: timedout fence
*
* Tries to get a ring proceeding again when it is stuck.
*/
bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence)
{
ktime_t deadline = ktime_add_us(ktime_get(), 10000);
if (!ring->funcs->soft_recovery)
return false;
atomic_inc(&ring->adev->gpu_reset_counter);
while (!dma_fence_is_signaled(fence) &&
ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
ring->funcs->soft_recovery(ring, vmid);
return dma_fence_is_signaled(fence);
}
/*
* Debugfs info
*/

View File

@ -97,7 +97,7 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
unsigned flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
void amdgpu_fence_process(struct amdgpu_ring *ring);
bool amdgpu_fence_process(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
@ -168,6 +168,8 @@ struct amdgpu_ring_funcs {
/* priority functions */
void (*set_priority) (struct amdgpu_ring *ring,
enum drm_sched_priority priority);
/* Try to soft recover the ring to make the fence signal */
void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
};
struct amdgpu_ring {
@ -175,7 +177,6 @@ struct amdgpu_ring {
const struct amdgpu_ring_funcs *funcs;
struct amdgpu_fence_driver fence_drv;
struct drm_gpu_scheduler sched;
struct list_head lru_list;
struct amdgpu_bo *ring_obj;
volatile uint32_t *ring;
@ -221,6 +222,30 @@ struct amdgpu_ring {
#endif
};
#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
@ -234,13 +259,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned ring_size, struct amdgpu_irq_src *irq_src,
unsigned irq_type);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
int *blacklist, int num_blacklist,
bool lru_pipe_order, struct amdgpu_ring **ring);
void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
uint32_t reg0, uint32_t val0,
uint32_t reg1, uint32_t val1);
bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence);
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{

View File

@ -226,6 +226,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
struct amdgpu_sa_bo *sa_bo;
fences[i] = NULL;
if (list_empty(&sa_manager->flist[i]))
continue;
@ -296,10 +298,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
spin_lock(&sa_manager->wq.lock);
do {
for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
fences[i] = NULL;
for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
tries[i] = 0;
}
do {
amdgpu_sa_bo_try_free(sa_manager);

View File

@ -0,0 +1,44 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_sdma.h"
/*
* GPU SDMA IP block helpers function.
*/
struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
if (&adev->sdma.instance[i].ring == ring)
break;
if (i < AMDGPU_MAX_SDMA_INSTANCES)
return &adev->sdma.instance[i];
else
return NULL;
}

View File

@ -0,0 +1,97 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_SDMA_H__
#define __AMDGPU_SDMA_H__
/* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 2
enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_TRAP0 = 0,
AMDGPU_SDMA_IRQ_TRAP1,
AMDGPU_SDMA_IRQ_LAST
};
struct amdgpu_sdma_instance {
/* SDMA firmware */
const struct firmware *fw;
uint32_t fw_version;
uint32_t feature_version;
struct amdgpu_ring ring;
bool burst_nop;
};
struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
int num_instances;
uint32_t srbm_soft_reset;
};
/*
* Provided by hw blocks that can move/clear data. e.g., gfx or sdma
* But currently, we use sdma to move data.
*/
struct amdgpu_buffer_funcs {
/* maximum bytes in a single operation */
uint32_t copy_max_bytes;
/* number of dw to reserve per operation */
unsigned copy_num_dw;
/* used for buffer migration */
void (*emit_copy_buffer)(struct amdgpu_ib *ib,
/* src addr in bytes */
uint64_t src_offset,
/* dst addr in bytes */
uint64_t dst_offset,
/* number of byte to transfer */
uint32_t byte_count);
/* maximum bytes in a single operation */
uint32_t fill_max_bytes;
/* number of dw to reserve per operation */
unsigned fill_num_dw;
/* used for buffer clearing */
void (*emit_fill_buffer)(struct amdgpu_ib *ib,
/* value to write to memory */
uint32_t src_data,
/* dst addr in bytes */
uint64_t dst_offset,
/* number of byte to fill */
uint32_t byte_count);
};
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
struct amdgpu_sdma_instance *
amdgpu_get_sdma_instance(struct amdgpu_ring *ring);
#endif

View File

@ -103,7 +103,7 @@ TRACE_EVENT(amdgpu_iv,
__entry->src_data[2] = iv->src_data[2];
__entry->src_data[3] = iv->src_data[3];
),
TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n",
TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x",
__entry->client_id, __entry->src_id,
__entry->ring_id, __entry->vmid,
__entry->timestamp, __entry->pasid,
@ -150,10 +150,10 @@ TRACE_EVENT(amdgpu_cs,
TP_fast_assign(
__entry->bo_list = p->bo_list;
__entry->ring = p->ring->idx;
__entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx;
__entry->dw = p->job->ibs[i].length_dw;
__entry->fences = amdgpu_fence_count_emitted(
p->ring);
to_amdgpu_ring(p->entity->rq->sched));
),
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
__entry->bo_list, __entry->ring, __entry->dw,
@ -462,6 +462,30 @@ TRACE_EVENT(amdgpu_bo_move,
__entry->new_placement, __entry->bo_size)
);
TRACE_EVENT(amdgpu_ib_pipe_sync,
TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
__field(const char *,name)
__field(uint64_t, id)
__field(struct dma_fence *, fence)
__field(uint64_t, ctx)
__field(unsigned, seqno)
),
TP_fast_assign(
__entry->name = sched_job->base.sched->name;
__entry->id = sched_job->base.id;
__entry->fence = fence;
__entry->ctx = fence->context;
__entry->seqno = fence->seqno;
),
TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
__entry->name, __entry->id,
__entry->fence, __entry->ctx,
__entry->seqno)
);
#undef AMDGPU_JOB_GET_TIMELINE_NAME
#endif

View File

@ -1,4 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
// SPDX-License-Identifier: MIT
/* Copyright Red Hat Inc 2010.
*
* Permission is hereby granted, free of charge, to any person obtaining a

View File

@ -47,6 +47,7 @@
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_sdma.h"
#include "bif/bif_4_1_d.h"
#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@ -255,6 +256,13 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
abo = ttm_to_amdgpu_bo(bo);
switch (bo->mem.mem_type) {
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
case AMDGPU_PL_OA:
placement->num_placement = 0;
placement->num_busy_placement = 0;
return;
case TTM_PL_VRAM:
if (!adev->mman.buffer_funcs_enabled) {
/* Move to system memory */
@ -282,6 +290,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
case TTM_PL_TT:
default:
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
break;
}
*placement = abo->placement;
}
@ -344,7 +353,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
{
uint64_t addr = 0;
if (mem->mem_type != TTM_PL_TT || amdgpu_gtt_mgr_has_gart_addr(mem)) {
if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) {
addr = mm_node->start << PAGE_SHIFT;
addr += bo->bdev->man[mem->mem_type].gpu_offset;
}
@ -432,8 +441,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
/* Map only what needs to be accessed. Map src to window 0 and
* dst to window 1
*/
if (src->mem->mem_type == TTM_PL_TT &&
!amdgpu_gtt_mgr_has_gart_addr(src->mem)) {
if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) {
r = amdgpu_map_buffer(src->bo, src->mem,
PFN_UP(cur_size + src_page_offset),
src_node_start, 0, ring,
@ -446,8 +454,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
from += src_page_offset;
}
if (dst->mem->mem_type == TTM_PL_TT &&
!amdgpu_gtt_mgr_has_gart_addr(dst->mem)) {
if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) {
r = amdgpu_map_buffer(dst->bo, dst->mem,
PFN_UP(cur_size + dst_page_offset),
dst_node_start, 1, ring,
@ -525,7 +532,11 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
if (r)
goto error;
r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
/* Always block for VM page tables before committing the new location */
if (bo->type == ttm_bo_type_kernel)
r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem);
else
r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
dma_fence_put(fence);
return r;
@ -676,6 +687,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
amdgpu_move_null(bo, new_mem);
return 0;
}
if (old_mem->mem_type == AMDGPU_PL_GDS ||
old_mem->mem_type == AMDGPU_PL_GWS ||
old_mem->mem_type == AMDGPU_PL_OA ||
new_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_GWS ||
new_mem->mem_type == AMDGPU_PL_OA) {
/* Nothing to save here */
amdgpu_move_null(bo, new_mem);
return 0;
}
if (!adev->mman.buffer_funcs_enabled)
goto memcpy;
@ -1082,42 +1103,48 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
struct ttm_mem_reg tmp;
struct ttm_placement placement;
struct ttm_place placements;
uint64_t flags;
uint64_t addr, flags;
int r;
if (bo->mem.mem_type != TTM_PL_TT ||
amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
return 0;
/* allocate GTT space */
tmp = bo->mem;
tmp.mm_node = NULL;
placement.num_placement = 1;
placement.placement = &placements;
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
TTM_PL_FLAG_TT;
addr = amdgpu_gmc_agp_addr(bo);
if (addr != AMDGPU_BO_INVALID_OFFSET) {
bo->mem.start = addr >> PAGE_SHIFT;
} else {
r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
if (unlikely(r))
return r;
/* allocate GART space */
tmp = bo->mem;
tmp.mm_node = NULL;
placement.num_placement = 1;
placement.placement = &placements;
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
TTM_PL_FLAG_TT;
/* compute PTE flags for this buffer object */
flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
if (unlikely(r))
return r;
/* Bind pages */
gtt->offset = (u64)tmp.start << PAGE_SHIFT;
r = amdgpu_ttm_gart_bind(adev, bo, flags);
if (unlikely(r)) {
ttm_bo_mem_put(bo, &tmp);
return r;
/* compute PTE flags for this buffer object */
flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
/* Bind pages */
gtt->offset = (u64)tmp.start << PAGE_SHIFT;
r = amdgpu_ttm_gart_bind(adev, bo, flags);
if (unlikely(r)) {
ttm_bo_mem_put(bo, &tmp);
return r;
}
ttm_bo_mem_put(bo, &bo->mem);
bo->mem = tmp;
}
ttm_bo_mem_put(bo, &bo->mem);
bo->mem = tmp;
bo->offset = (bo->mem.start << PAGE_SHIFT) +
bo->bdev->man[bo->mem.mem_type].gpu_offset;
@ -1427,13 +1454,14 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
}
/**
* amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
* amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
*
* @ttm: The ttm_tt object to compute the flags for
* @mem: The memory registry backing this ttm_tt object
*
* Figure out the flags to use for a VM PDE (Page Directory Entry).
*/
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem)
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
{
uint64_t flags = 0;
@ -1447,6 +1475,22 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
flags |= AMDGPU_PTE_SNOOPED;
}
return flags;
}
/**
* amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
*
* @ttm: The ttm_tt object to compute the flags for
* @mem: The memory registry backing this ttm_tt object
* Figure out the flags to use for a VM PTE (Page Table Entry).
*/
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem)
{
uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
flags |= adev->gart.gart_pte_flags;
flags |= AMDGPU_PTE_READABLE;
@ -1769,14 +1813,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
* This is used for VGA emulation and pre-OS scanout buffers to
* avoid display artifacts while transitioning between pre-OS
* and driver. */
if (adev->gmc.stolen_size) {
r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->stolen_vga_memory,
NULL, NULL);
if (r)
return r;
}
r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->stolen_vga_memory,
NULL, NULL);
if (r)
return r;
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
@ -1803,45 +1845,45 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
(unsigned)(gtt_size / (1024 * 1024)));
/* Initialize various on-chip memory pools */
adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT;
adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT;
adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT;
adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
/* GDS Memory */
if (adev->gds.mem.total_size) {
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
adev->gds.mem.total_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing GDS heap.\n");
return r;
}
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
adev->gds.mem.total_size);
if (r) {
DRM_ERROR("Failed initializing GDS heap.\n");
return r;
}
/* GWS */
if (adev->gds.gws.total_size) {
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
adev->gds.gws.total_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing gws heap.\n");
return r;
}
r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
&adev->gds.gds_gfx_bo, NULL, NULL);
if (r)
return r;
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
adev->gds.gws.total_size);
if (r) {
DRM_ERROR("Failed initializing gws heap.\n");
return r;
}
/* OA */
if (adev->gds.oa.total_size) {
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
adev->gds.oa.total_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing oa heap.\n");
return r;
}
r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
&adev->gds.gws_gfx_bo, NULL, NULL);
if (r)
return r;
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
adev->gds.oa.total_size);
if (r) {
DRM_ERROR("Failed initializing oa heap.\n");
return r;
}
r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
&adev->gds.oa_gfx_bo, NULL, NULL);
if (r)
return r;
/* Register debugfs entries for amdgpu_ttm */
r = amdgpu_ttm_debugfs_init(adev);
if (r) {
@ -1876,12 +1918,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
if (adev->gds.mem.total_size)
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
if (adev->gds.gws.total_size)
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
if (adev->gds.oa.total_size)
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_bo_device_release(&adev->mman.bdev);
amdgpu_ttm_global_fini(adev);
adev->mman.initialized = false;
@ -1987,7 +2026,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
src_addr = num_dw * 4;
src_addr += job->ibs[0].gpu_addr;
dst_addr = adev->gart.table_addr;
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
dst_addr, num_bytes);
@ -2047,7 +2086,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
if (r)
return r;
job->vm_needs_flush = vm_needs_flush;
if (vm_needs_flush) {
job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
job->vm_needs_flush = true;
}
if (resv) {
r = amdgpu_sync_resv(adev, &job->sync, resv,
AMDGPU_FENCE_OWNER_UNDEFINED,
@ -2183,7 +2225,7 @@ error_free:
static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
unsigned ttm_pl = *(int *)node->info_ent->data;
unsigned ttm_pl = (uintptr_t)node->info_ent->data;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
@ -2193,12 +2235,12 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
return 0;
}
static int ttm_pl_vram = TTM_PL_VRAM;
static int ttm_pl_tt = TTM_PL_TT;
static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram},
{"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt},
{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM},
{"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT},
{"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS},
{"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS},
{"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA},
{"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
#ifdef CONFIG_SWIOTLB
{"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}

View File

@ -116,6 +116,7 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated);
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem);

View File

@ -277,6 +277,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_PITCAIRN:
case CHIP_VERDE:
case CHIP_OLAND:
case CHIP_HAINAN:
return AMDGPU_FW_LOAD_DIRECT;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
@ -296,19 +297,15 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_SMU;
return AMDGPU_FW_LOAD_SMU;
case CHIP_VEGA10:
case CHIP_RAVEN:
case CHIP_VEGA12:
case CHIP_VEGA20:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
case CHIP_VEGA20:
return AMDGPU_FW_LOAD_DIRECT;
default:
DRM_ERROR("Unknown firmware load type\n");
}
@ -322,6 +319,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
{
const struct common_firmware_header *header = NULL;
const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
if (NULL == ucode->fw)
return 0;
@ -333,8 +331,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
return 0;
header = (const struct common_firmware_header *)ucode->fw->data;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
(ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
@ -343,7 +341,9 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM &&
ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM &&
ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV)) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@ -365,6 +365,20 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
le32_to_cpu(header->ucode_array_offset_bytes) +
le32_to_cpu(cp_hdr->jt_offset) * 4),
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_ERAM) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
le32_to_cpu(dmcu_hdr->intv_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes)),
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_INTV) {
ucode->ucode_size = le32_to_cpu(dmcu_hdr->intv_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
le32_to_cpu(header->ucode_array_offset_bytes) +
le32_to_cpu(dmcu_hdr->intv_offset_bytes)),
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
@ -406,32 +420,41 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
return 0;
}
int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
{
if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
if (!adev->firmware.fw_buf) {
dev_err(adev->dev, "failed to create kernel buffer for firmware.fw_buf\n");
return -ENOMEM;
} else if (amdgpu_sriov_vf(adev)) {
memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size);
}
}
return 0;
}
void amdgpu_ucode_free_bo(struct amdgpu_device *adev)
{
if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
}
int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
{
uint64_t fw_offset = 0;
int i, err;
int i;
struct amdgpu_firmware_info *ucode = NULL;
const struct common_firmware_header *header = NULL;
if (!adev->firmware.fw_size) {
dev_warn(adev->dev, "No ip firmware need to load\n");
/* for baremetal, the ucode is allocated in gtt, so don't need to fill the bo when reset/suspend */
if (!amdgpu_sriov_vf(adev) && (adev->in_gpu_reset || adev->in_suspend))
return 0;
}
if (!adev->in_gpu_reset) {
err = amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
if (err) {
dev_err(adev->dev, "failed to create kernel buffer for firmware.fw_buf\n");
goto failed;
}
}
memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size);
/*
* if SMU loaded firmware, it needn't add SMC, UVD, and VCE
* ucode info here
@ -448,7 +471,6 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (ucode->fw) {
header = (const struct common_firmware_header *)ucode->fw->data;
amdgpu_ucode_init_single_fw(adev, ucode, adev->firmware.fw_buf_mc + fw_offset,
adev->firmware.fw_buf_ptr + fw_offset);
if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
@ -463,33 +485,4 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
}
}
return 0;
failed:
if (err)
adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
return err;
}
int amdgpu_ucode_fini_bo(struct amdgpu_device *adev)
{
int i;
struct amdgpu_firmware_info *ucode = NULL;
if (!adev->firmware.fw_size)
return 0;
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (ucode->fw) {
ucode->mc_addr = 0;
ucode->kaddr = NULL;
}
}
amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
return 0;
}

View File

@ -157,6 +157,13 @@ struct gpu_info_firmware_header_v1_0 {
uint16_t version_minor; /* version */
};
/* version_major=1, version_minor=0 */
struct dmcu_firmware_header_v1_0 {
struct common_firmware_header header;
uint32_t intv_offset_bytes; /* interrupt vectors offset from end of header, in bytes */
uint32_t intv_size_bytes; /* size of interrupt vectors, in bytes */
};
/* header is fixed size */
union amdgpu_firmware_header {
struct common_firmware_header common;
@ -170,6 +177,7 @@ union amdgpu_firmware_header {
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct gpu_info_firmware_header_v1_0 gpu_info;
struct dmcu_firmware_header_v1_0 dmcu;
uint8_t raw[0x100];
};
@ -193,8 +201,11 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
AMDGPU_UCODE_ID_UVD,
AMDGPU_UCODE_ID_UVD1,
AMDGPU_UCODE_ID_VCE,
AMDGPU_UCODE_ID_VCN,
AMDGPU_UCODE_ID_DMCU_ERAM,
AMDGPU_UCODE_ID_DMCU_INTV,
AMDGPU_UCODE_ID_MAXIMUM,
};
@ -205,6 +216,12 @@ enum AMDGPU_UCODE_STATUS {
AMDGPU_UCODE_STATUS_LOADED,
};
enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_DIRECT = 0,
AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_PSP,
};
/* conform to smu_ucode_xfer_cz.h */
#define AMDGPU_SDMA0_UCODE_LOADED 0x00000001
#define AMDGPU_SDMA1_UCODE_LOADED 0x00000002
@ -232,6 +249,24 @@ struct amdgpu_firmware_info {
uint32_t tmr_mc_addr_hi;
};
struct amdgpu_firmware {
struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
enum amdgpu_firmware_load_type load_type;
struct amdgpu_bo *fw_buf;
unsigned int fw_size;
unsigned int max_ucodes;
/* firmwares are loaded by psp instead of smu from vega10 */
const struct amdgpu_psp_funcs *funcs;
struct amdgpu_bo *rbuf;
struct mutex mutex;
/* gpu info firmware data pointer */
const struct firmware *gpu_info_fw;
void *fw_buf_ptr;
uint64_t fw_buf_mc;
};
void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
@ -241,8 +276,10 @@ void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
int amdgpu_ucode_validate(const struct firmware *fw);
bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
int amdgpu_ucode_fini_bo(struct amdgpu_device *adev);
int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
enum amdgpu_firmware_load_type
amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);

View File

@ -36,14 +36,19 @@
#include "soc15_common.h"
#include "vcn/vcn_1_0_offset.h"
#include "vcn/vcn_1_0_sh_mask.h"
/* 1 second timeout */
#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
/* Firmware Names */
#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
MODULE_FIRMWARE(FIRMWARE_RAVEN2);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@ -59,7 +64,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_RAVEN:
fw_name = FIRMWARE_RAVEN;
if (adev->rev_id >= 8)
fw_name = FIRMWARE_RAVEN2;
else if (adev->pdev->device == 0x15d8)
fw_name = FIRMWARE_PICASSO;
else
fw_name = FIRMWARE_RAVEN;
break;
default:
return -EINVAL;
@ -111,8 +121,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
version_major, version_minor, family_id);
}
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
+ AMDGPU_VCN_SESSION_SIZE * 40;
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
@ -203,20 +212,164 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
return 0;
}
static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
struct dpg_pause_state *new_state)
{
int ret_code;
uint32_t reg_data = 0;
uint32_t reg_data2 = 0;
struct amdgpu_ring *ring;
/* pause/unpause if state is changed */
if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
new_state->fw_based, new_state->jpeg);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
ret_code = 0;
if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
if (!ret_code) {
/* pause DPG non-jpeg */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
/* Restore */
ring = &adev->vcn.ring_enc[0];
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
ring = &adev->vcn.ring_enc[1];
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
ring = &adev->vcn.ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2));
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
}
} else {
/* unpause dpg non-jpeg, no need to wait */
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
adev->vcn.pause_state.fw_based = new_state->fw_based;
}
/* pause/unpause if state is changed */
if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
new_state->fw_based, new_state->jpeg);
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
ret_code = 0;
if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
if (!ret_code) {
/* Make sure JPRG Snoop is disabled before sending the pause */
reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
/* pause DPG jpeg */
reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
/* Restore */
ring = &adev->vcn.ring_jpeg;
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
lower_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
ring = &adev->vcn.ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2));
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
}
} else {
/* unpause dpg jpeg, no need to wait */
reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
}
adev->vcn.pause_state.jpeg = new_state->jpeg;
}
return 0;
}
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, vcn.idle_work.work);
unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
unsigned i;
unsigned int fences = 0;
unsigned int i;
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
}
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
struct dpg_pause_state new_state;
if (fences)
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg))
new_state.jpeg = VCN_DPG_STATE__PAUSE;
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
amdgpu_vcn_pause_dpg_mode(adev, &new_state);
}
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
if (fences == 0) {
amdgpu_gfx_off_ctrl(adev, true);
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, false);
else
@ -233,12 +386,29 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, true);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_UNGATE);
}
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
struct dpg_pause_state new_state;
if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = adev->vcn.pause_state.fw_based;
if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
new_state.jpeg = VCN_DPG_STATE__PAUSE;
else
new_state.jpeg = adev->vcn.pause_state.jpeg;
amdgpu_vcn_pause_dpg_mode(adev, &new_state);
}
}
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
@ -253,7 +423,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD);
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
@ -261,11 +431,11 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
return r;
}
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID));
tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
@ -605,7 +775,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD);
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
@ -615,12 +785,12 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
}
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0, 0, 0));
PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID));
tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
@ -654,7 +824,7 @@ static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle,
ib = &job->ibs[0];
ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH), 0, 0, PACKETJ_TYPE0);
ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, PACKETJ_TYPE0);
ib->ptr[1] = 0xDEADBEEF;
for (i = 2; i < 16; i += 2) {
ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
@ -703,7 +873,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = 0;
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH));
tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);

View File

@ -24,9 +24,9 @@
#ifndef __AMDGPU_VCN_H__
#define __AMDGPU_VCN_H__
#define AMDGPU_VCN_STACK_SIZE (200*1024)
#define AMDGPU_VCN_HEAP_SIZE (256*1024)
#define AMDGPU_VCN_SESSION_SIZE (50*1024)
#define AMDGPU_VCN_STACK_SIZE (128*1024)
#define AMDGPU_VCN_CONTEXT_SIZE (512*1024)
#define AMDGPU_VCN_FIRMWARE_OFFSET 256
#define AMDGPU_VCN_MAX_ENC_RINGS 3
@ -56,6 +56,16 @@ enum engine_status_constants {
UVD_STATUS__RBC_BUSY = 0x1,
};
enum internal_dpg_state {
VCN_DPG_STATE__UNPAUSE = 0,
VCN_DPG_STATE__PAUSE,
};
struct dpg_pause_state {
enum internal_dpg_state fw_based;
enum internal_dpg_state jpeg;
};
struct amdgpu_vcn {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
@ -69,6 +79,8 @@ struct amdgpu_vcn {
struct amdgpu_ring ring_jpeg;
struct amdgpu_irq_src irq;
unsigned num_enc_rings;
enum amd_powergating_state cur_state;
struct dpg_pause_state pause_state;
};
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);

View File

@ -22,18 +22,13 @@
*/
#include "amdgpu.h"
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 20
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
{
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
addr -= AMDGPU_VA_RESERVED_SIZE;
if (addr >= AMDGPU_VA_HOLE_START)
addr |= AMDGPU_VA_HOLE_END;
addr = amdgpu_gmc_sign_extend(addr);
return addr;
}
@ -76,7 +71,7 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) {
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va)
{
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK;
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
struct ww_acquire_ctx ticket;
struct list_head list;
struct amdgpu_bo_list_entry pd;

File diff suppressed because it is too large Load Diff

View File

@ -29,6 +29,8 @@
#include <linux/rbtree.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_file.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <linux/chash.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
@ -48,9 +50,6 @@ struct amdgpu_bo_list_entry;
/* number of entries in page table */
#define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size)
/* PTBs (Page Table Blocks) need to be aligned to 32K */
#define AMDGPU_VM_PTB_ALIGN_SIZE 32768
#define AMDGPU_PTE_VALID (1ULL << 0)
#define AMDGPU_PTE_SYSTEM (1ULL << 1)
#define AMDGPU_PTE_SNOOPED (1ULL << 2)
@ -103,19 +102,6 @@ struct amdgpu_bo_list_entry;
/* hardcode that limit for now */
#define AMDGPU_VA_RESERVED_SIZE (1ULL << 20)
/* VA hole for 48bit addresses on Vega10 */
#define AMDGPU_VA_HOLE_START 0x0000800000000000ULL
#define AMDGPU_VA_HOLE_END 0xffff800000000000ULL
/*
* Hardware is programmed as if the hole doesn't exists with start and end
* address values.
*
* This mask is used to remove the upper 16bits of the VA and so come up with
* the linear addr value.
*/
#define AMDGPU_VA_HOLE_MASK 0x0000ffffffffffffULL
/* max vmids dedicated for process */
#define AMDGPU_VM_MAX_RESERVED_VMID 1
@ -143,7 +129,7 @@ struct amdgpu_vm_bo_base {
struct amdgpu_bo *bo;
/* protected by bo being reserved */
struct list_head bo_list;
struct amdgpu_vm_bo_base *next;
/* protected by spinlock */
struct list_head vm_status;
@ -160,6 +146,27 @@ struct amdgpu_vm_pt {
struct amdgpu_vm_pt *entries;
};
/* provided by hw blocks that can write ptes, e.g., sdma */
struct amdgpu_vm_pte_funcs {
/* number of dw to reserve per operation */
unsigned copy_pte_num_dw;
/* copy pte entries from GART */
void (*copy_pte)(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src,
unsigned count);
/* write pte one entry at a time with addr mapping */
void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
uint64_t value, unsigned count,
uint32_t incr);
/* for linear pte/pde updates without addr mapping */
void (*set_pte_pde)(struct amdgpu_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint64_t flags);
};
#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
#define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL)
@ -172,6 +179,13 @@ struct amdgpu_task_info {
pid_t tgid;
};
#define AMDGPU_PAGEFAULT_HASH_BITS 8
struct amdgpu_retryfault_hashtable {
DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
spinlock_t lock;
int count;
};
struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
@ -182,13 +196,16 @@ struct amdgpu_vm {
/* PT BOs which relocated and their parent need an update */
struct list_head relocated;
/* BOs moved, but not yet updated in the PT */
/* per VM BOs moved, but not yet updated in the PT */
struct list_head moved;
spinlock_t moved_lock;
/* All BOs of this VM not currently in the state machine */
struct list_head idle;
/* regular invalidated BOs, but not yet updated in the PT */
struct list_head invalidated;
spinlock_t invalidated_lock;
/* BO mappings freed, but not yet updated in the PT */
struct list_head freed;
@ -226,6 +243,12 @@ struct amdgpu_vm {
/* Some basic info about the task */
struct amdgpu_task_info task_info;
/* Store positions of group of BOs */
struct ttm_lru_bulk_move lru_bulk_move;
/* mark whether can do the bulk move */
bool bulk_moveable;
struct amdgpu_retryfault_hashtable *fault_hash;
};
struct amdgpu_vm_manager {
@ -244,10 +267,9 @@ struct amdgpu_vm_manager {
/* vram base address for page table entry */
u64 vram_base_offset;
/* vm pte handling */
const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
unsigned vm_pte_num_rings;
atomic_t vm_pte_next_ring;
const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
struct drm_sched_rq *vm_pte_rqs[AMDGPU_MAX_RINGS];
unsigned vm_pte_num_rqs;
/* partial resident texture handling */
spinlock_t prt_lock;
@ -266,11 +288,16 @@ struct amdgpu_vm_manager {
spinlock_t pasid_lock;
};
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, unsigned int pasid);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid);
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
unsigned int pasid);
@ -330,8 +357,15 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
struct amdgpu_task_info *task_info);
struct amdgpu_task_info *task_info);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key);
void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key);
#endif

View File

@ -124,6 +124,28 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
return usage;
}
/**
* amdgpu_vram_mgr_virt_start - update virtual start address
*
* @mem: ttm_mem_reg to update
* @node: just allocated node
*
* Calculate a virtual BO start address to easily check if everything is CPU
* accessible.
*/
static void amdgpu_vram_mgr_virt_start(struct ttm_mem_reg *mem,
struct drm_mm_node *node)
{
unsigned long start;
start = node->start + node->size;
if (start > mem->num_pages)
start -= mem->num_pages;
else
start = 0;
mem->start = max(mem->start, start);
}
/**
* amdgpu_vram_mgr_new - allocate new ranges
*
@ -176,10 +198,25 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
pages_left = mem->num_pages;
spin_lock(&mgr->lock);
for (i = 0; i < num_nodes; ++i) {
for (i = 0; pages_left >= pages_per_node; ++i) {
unsigned long pages = rounddown_pow_of_two(pages_left);
r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
pages_per_node, 0,
place->fpfn, lpfn,
mode);
if (unlikely(r))
break;
usage += nodes[i].size << PAGE_SHIFT;
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
pages_left -= pages;
}
for (; pages_left; ++i) {
unsigned long pages = min(pages_left, pages_per_node);
uint32_t alignment = mem->page_alignment;
unsigned long start;
if (pages == pages_per_node)
alignment = pages_per_node;
@ -193,16 +230,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
usage += nodes[i].size << PAGE_SHIFT;
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
/* Calculate a virtual BO start address to easily check if
* everything is CPU accessible.
*/
start = nodes[i].start + nodes[i].size;
if (start > mem->num_pages)
start -= mem->num_pages;
else
start = 0;
mem->start = max(mem->start, start);
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
pages_left -= pages;
}
spin_unlock(&mgr->lock);

View File

@ -0,0 +1,119 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*
*/
#include <linux/list.h>
#include "amdgpu.h"
#include "amdgpu_psp.h"
static DEFINE_MUTEX(xgmi_mutex);
#define AMDGPU_MAX_XGMI_HIVE 8
#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
struct amdgpu_hive_info {
uint64_t hive_id;
struct list_head device_list;
};
static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
static unsigned hive_count = 0;
static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
{
int i;
struct amdgpu_hive_info *tmp;
if (!adev->gmc.xgmi.hive_id)
return NULL;
for (i = 0 ; i < hive_count; ++i) {
tmp = &xgmi_hives[i];
if (tmp->hive_id == adev->gmc.xgmi.hive_id)
return tmp;
}
if (i >= AMDGPU_MAX_XGMI_HIVE)
return NULL;
/* initialize new hive if not exist */
tmp = &xgmi_hives[hive_count++];
tmp->hive_id = adev->gmc.xgmi.hive_id;
INIT_LIST_HEAD(&tmp->device_list);
return tmp;
}
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE];
struct amdgpu_hive_info *hive;
struct amdgpu_xgmi *entry;
struct amdgpu_device *tmp_adev;
int count = 0, ret = -EINVAL;
if ((adev->asic_type < CHIP_VEGA20) ||
(adev->flags & AMD_IS_APU) )
return 0;
adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp);
adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
memset(&tmp_topology[0], 0, sizeof(tmp_topology));
mutex_lock(&xgmi_mutex);
hive = amdgpu_get_xgmi_hive(adev);
if (!hive)
goto exit;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
list_for_each_entry(entry, &hive->device_list, head)
tmp_topology[count++].device_id = entry->device_id;
ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology);
if (ret) {
dev_err(adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.device_id,
adev->gmc.xgmi.hive_id, ret);
goto exit;
}
/* Each psp need to set the latest topology */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
if (ret) {
dev_err(tmp_adev->dev,
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
tmp_adev->gmc.xgmi.device_id,
tmp_adev->gmc.xgmi.hive_id, ret);
/* To do : continue with some node failed or disable the whole hive */
break;
}
}
if (!ret)
dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
adev->gmc.xgmi.physical_node_id,
adev->gmc.xgmi.hive_id);
exit:
mutex_unlock(&xgmi_mutex);
return ret;
}

View File

@ -28,6 +28,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "atom.h"
#include "atombios_encoders.h"
#include "atombios_dp.h"

View File

@ -6277,12 +6277,12 @@ static int ci_dpm_sw_init(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230,
ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230,
&adev->pm.dpm.thermal.irq);
if (ret)
return ret;
ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231,
ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231,
&adev->pm.dpm.thermal.irq);
if (ret)
return ret;

View File

@ -2002,6 +2002,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
if (amdgpu_dpm == -1)
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
else
@ -2014,8 +2016,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v8_2_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
break;
@ -2023,6 +2023,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block);
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
if (amdgpu_dpm == -1)
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
else
@ -2035,8 +2037,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v8_5_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block);
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
break;
@ -2044,6 +2044,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block);
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@ -2053,8 +2055,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v8_1_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block);
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
break;
@ -2063,6 +2064,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@ -2072,8 +2075,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v8_3_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
break;

View File

@ -276,7 +276,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
entry->client_id = AMDGPU_IH_CLIENTID_LEGACY;
entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
entry->src_id = dw[0] & 0xff;
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
@ -318,7 +318,7 @@ static int cik_ih_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = amdgpu_ih_ring_init(adev, 64 * 1024, false);
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
@ -332,7 +332,7 @@ static int cik_ih_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini(adev);
amdgpu_ih_ring_fini(adev);
amdgpu_ih_ring_fini(adev, &adev->irq.ih);
amdgpu_irq_remove_domain(adev);
return 0;
@ -468,8 +468,7 @@ static const struct amdgpu_ih_funcs cik_ih_funcs = {
static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev)
{
if (adev->irq.ih_funcs == NULL)
adev->irq.ih_funcs = &cik_ih_funcs;
adev->irq.ih_funcs = &cik_ih_funcs;
}
const struct amdgpu_ip_block_version cik_ih_ip_block =

View File

@ -970,19 +970,19 @@ static int cik_sdma_sw_init(void *handle)
}
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224,
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
&adev->sdma.trap_irq);
if (r)
return r;
/* SDMA Privileged inst */
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241,
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
&adev->sdma.illegal_inst_irq);
if (r)
return r;
/* SDMA Privileged inst */
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247,
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 247,
&adev->sdma.illegal_inst_irq);
if (r)
return r;
@ -1370,10 +1370,8 @@ static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = {
static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
{
if (adev->mman.buffer_funcs == NULL) {
adev->mman.buffer_funcs = &cik_sdma_buffer_funcs;
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
adev->mman.buffer_funcs = &cik_sdma_buffer_funcs;
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
@ -1386,16 +1384,16 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
{
struct drm_gpu_scheduler *sched;
unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++)
adev->vm_manager.vm_pte_rings[i] =
&adev->sdma.instance[i].ring;
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
sched = &adev->sdma.instance[i].ring.sched;
adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
}
adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version cik_sdma_ip_block =

View File

@ -255,7 +255,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
entry->client_id = AMDGPU_IH_CLIENTID_LEGACY;
entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
entry->src_id = dw[0] & 0xff;
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
@ -297,7 +297,7 @@ static int cz_ih_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = amdgpu_ih_ring_init(adev, 64 * 1024, false);
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
@ -311,7 +311,7 @@ static int cz_ih_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini(adev);
amdgpu_ih_ring_fini(adev);
amdgpu_ih_ring_fini(adev, &adev->irq.ih);
amdgpu_irq_remove_domain(adev);
return 0;
@ -449,8 +449,7 @@ static const struct amdgpu_ih_funcs cz_ih_funcs = {
static void cz_ih_set_interrupt_funcs(struct amdgpu_device *adev)
{
if (adev->irq.ih_funcs == NULL)
adev->irq.ih_funcs = &cz_ih_funcs;
adev->irq.ih_funcs = &cz_ih_funcs;
}
const struct amdgpu_ip_block_version cz_ih_ip_block =

View File

@ -31,6 +31,7 @@
#include "atombios_encoders.h"
#include "amdgpu_pll.h"
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "dce_v10_0.h"
#include "dce/dce_10_0_d.h"
@ -1942,6 +1943,17 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
#ifdef __BIG_ENDIAN
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
ENDIAN_8IN32);
#endif
break;
default:
DRM_ERROR("Unsupported screen format %s\n",
drm_get_format_name(target_fb->format->format, &format_name));
@ -2734,19 +2746,19 @@ static int dce_v10_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
if (r)
return r;
}
for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
if (r)
return r;
@ -3558,8 +3570,7 @@ static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev)
{
if (adev->mode_info.funcs == NULL)
adev->mode_info.funcs = &dce_v10_0_display_funcs;
adev->mode_info.funcs = &dce_v10_0_display_funcs;
}
static const struct amdgpu_irq_src_funcs dce_v10_0_crtc_irq_funcs = {

View File

@ -31,6 +31,7 @@
#include "atombios_encoders.h"
#include "amdgpu_pll.h"
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
#include "dce_v11_0.h"
#include "dce/dce_11_0_d.h"
@ -1984,6 +1985,17 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
#ifdef __BIG_ENDIAN
fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
ENDIAN_8IN32);
#endif
break;
default:
DRM_ERROR("Unsupported screen format %s\n",
drm_get_format_name(target_fb->format->format, &format_name));
@ -2855,19 +2867,19 @@ static int dce_v11_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
if (r)
return r;
}
for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
if (r)
return r;
@ -3690,8 +3702,7 @@ static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev)
{
if (adev->mode_info.funcs == NULL)
adev->mode_info.funcs = &dce_v11_0_display_funcs;
adev->mode_info.funcs = &dce_v11_0_display_funcs;
}
static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = {

Some files were not shown because too many files have changed in this diff Show More