From 38b8f823864707eb1cf331d2247608c419ed388c Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 3 May 2017 11:13:46 +0800 Subject: [PATCH 001/341] clk: sunxi-ng: a31: Correct lcd1-ch1 clock register offset The register offset for the lcd1-ch1 clock was incorrectly pointing to the lcd0-ch1 clock. This resulted in the lcd0-ch1 clock being disabled when the clk core disables unused clocks. This then stops the simplefb HDMI output path. Reported-by: Bob Ham Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks") Cc: stable@vger.kernel.org # 4.9.x- Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c index 89e68d29bf45..df97e25aec76 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c @@ -556,7 +556,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(lcd0_ch1_clk, "lcd0-ch1", lcd_ch1_parents, 0x12c, 0, 4, 24, 3, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_M_WITH_MUX_GATE(lcd1_ch1_clk, "lcd1-ch1", lcd_ch1_parents, - 0x12c, 0, 4, 24, 3, BIT(31), + 0x130, 0, 4, 24, 3, BIT(31), CLK_SET_RATE_PARENT); static const char * const csi_sclk_parents[] = { "pll-video0", "pll-video1", From 7ffc781ec46ef1e9aedb482f5f04425bd8bb2753 Mon Sep 17 00:00:00 2001 From: Yong Deng Date: Fri, 5 May 2017 18:31:57 +0800 Subject: [PATCH 002/341] clk: sunxi-ng: v3s: Fix usb otg device reset bit V3S's usb otg device reset bit should be 24, not 23. Cc: stable@vger.kernel.org Signed-off-by: Yong Deng Reviewed-By: Icenowy Zheng Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun8i-v3s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c index e58706b40ae9..6297add857b5 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c @@ -537,7 +537,7 @@ static struct ccu_reset_map sun8i_v3s_ccu_resets[] = { [RST_BUS_EMAC] = { 0x2c0, BIT(17) }, [RST_BUS_HSTIMER] = { 0x2c0, BIT(19) }, [RST_BUS_SPI0] = { 0x2c0, BIT(20) }, - [RST_BUS_OTG] = { 0x2c0, BIT(23) }, + [RST_BUS_OTG] = { 0x2c0, BIT(24) }, [RST_BUS_EHCI0] = { 0x2c0, BIT(26) }, [RST_BUS_OHCI0] = { 0x2c0, BIT(29) }, From dbed87a9d3a857a86f602775b5845f5f6d9652b5 Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Mon, 24 Apr 2017 12:05:42 +0200 Subject: [PATCH 003/341] clk: meson: gxbb: fix build error without RESET_CONTROLLER With CONFIG_RESET_CONTROLLER=n we see the following link error in the meson gxbb clk driver: drivers/built-in.o: In function 'gxbb_aoclkc_probe': drivers/clk/meson/gxbb-aoclk.c:161: undefined reference to 'devm_reset_controller_register' Fix this by selecting the reset controller subsystem. Fixes: f8c11f79912d ("clk: meson: Add GXBB AO Clock and Reset controller driver") Signed-off-by: Tobias Regnery Acked-by: Neil Armstrong [narmstrong: Added fixes-by tag] Signed-off-by: Neil Armstrong --- drivers/clk/meson/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/meson/Kconfig b/drivers/clk/meson/Kconfig index 19480bcc7046..2f29ee1a4d00 100644 --- a/drivers/clk/meson/Kconfig +++ b/drivers/clk/meson/Kconfig @@ -14,6 +14,7 @@ config COMMON_CLK_MESON8B config COMMON_CLK_GXBB bool depends on COMMON_CLK_AMLOGIC + select RESET_CONTROLLER help Support for the clock controller on AmLogic S905 devices, aka gxbb. Say Y if you want peripherals and CPU frequency scaling to work. From f36afd38c5372a7cd5b363786fd62076c8b28427 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 May 2017 23:19:01 +0200 Subject: [PATCH 004/341] clk: sunxi-ng: enable SUNXI_CCU_MP for PRCM The newly added PRCM CCU driver uses SUNXI_CCU_MP_WITH_MUX_GATE, which causes a link error when no other driver enables SUNXI_CCU_MP: drivers/clk/built-in.o:(.data+0x5c8c8): undefined reference to `ccu_mp_ops' This adds an explicit 'select' statement for it. Signed-off-by: Arnd Bergmann Reviewed-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig index b0d551a8efe4..eb89c7801f00 100644 --- a/drivers/clk/sunxi-ng/Kconfig +++ b/drivers/clk/sunxi-ng/Kconfig @@ -156,6 +156,7 @@ config SUN8I_R_CCU bool "Support for Allwinner SoCs' PRCM CCUs" select SUNXI_CCU_DIV select SUNXI_CCU_GATE + select SUNXI_CCU_MP default MACH_SUN8I || (ARCH_SUNXI && ARM64) endif From 370d9192719e6c174167888cf9240df2542e3b4b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 24 May 2017 18:34:29 +0200 Subject: [PATCH 005/341] clk: sunxi-ng: sun5i: Fix ahb_bist_clk definition AHB BIST gate is actually controlled with bit 7. This bug was detected while trying to use the NAND controller which is using the DMA engine to transfer data to the NAND. Since the ahb_bist_clk gate bit conflicts with the ahb_dma_clk gate bit, the core was disabling the DMA engine clock as part of its 'disable unused clks' procedure, which was causing all DMA transfers to fail after this point. Fixes: 5e73761786d6 ("clk: sunxi-ng: Add sun5i CCU driver") Cc: stable@vger.kernel.org Reported-by: Angus Ainslie Signed-off-by: Boris Brezillon Tested-by: Angus Ainslie Reviewed-by: Chen-Yu Tsai Signed-off-by: Michael Turquette Link: lkml.kernel.org/r/1495643669-28221-1-git-send-email-boris.brezillon@free-electrons.com --- drivers/clk/sunxi-ng/ccu-sun5i.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun5i.c b/drivers/clk/sunxi-ng/ccu-sun5i.c index 5c476f966a72..5372bf8be5e6 100644 --- a/drivers/clk/sunxi-ng/ccu-sun5i.c +++ b/drivers/clk/sunxi-ng/ccu-sun5i.c @@ -243,7 +243,7 @@ static SUNXI_CCU_GATE(ahb_ss_clk, "ahb-ss", "ahb", static SUNXI_CCU_GATE(ahb_dma_clk, "ahb-dma", "ahb", 0x060, BIT(6), 0); static SUNXI_CCU_GATE(ahb_bist_clk, "ahb-bist", "ahb", - 0x060, BIT(6), 0); + 0x060, BIT(7), 0); static SUNXI_CCU_GATE(ahb_mmc0_clk, "ahb-mmc0", "ahb", 0x060, BIT(8), 0); static SUNXI_CCU_GATE(ahb_mmc1_clk, "ahb-mmc1", "ahb", From 2f2724630f7a8d582470f03ee56b96746767d270 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 22 May 2017 16:25:14 +1000 Subject: [PATCH 006/341] KVM: PPC: Book3S HV: Cope with host using large decrementer mode POWER9 introduces a new mode for the decrementer register, called large decrementer mode, in which the decrementer counter is 56 bits wide rather than 32, and reads are sign-extended rather than zero-extended. For the decrementer, this new mode is optional and controlled by a bit in the LPCR. The hypervisor decrementer (HDEC) is 56 bits wide on POWER9 and has no mode control. Since KVM code reads and writes the decrementer and hypervisor decrementer registers in a few places, it needs to be aware of the need to treat the decrementer value as a 64-bit quantity, and only do a 32-bit sign extension when large decrementer mode is not in effect. Similarly, the HDEC should always be treated as a 64-bit quantity on POWER9. We define a new EXTEND_HDEC macro to encapsulate the feature test for POWER9 and the sign extension. To enable the sign extension to be removed in large decrementer mode, we test the LPCR_LD bit in the host LPCR image stored in the struct kvm for the guest. If is set then large decrementer mode is enabled and the sign extension should be skipped. This is partly based on an earlier patch by Oliver O'Halloran. Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_interrupts.S | 12 +++++++++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 23 +++++++++++++++++------ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 0fdc4a28970b..404deb512844 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * Put whatever is in the decrementer into the * hypervisor decrementer. */ +BEGIN_FTR_SECTION + ld r5, HSTATE_KVM_VCORE(r13) + ld r6, VCORE_KVM(r5) + ld r9, KVM_HOST_LPCR(r6) + andis. r9, r9, LPCR_LD@h +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mfspr r8,SPRN_DEC mftb r7 - mtspr SPRN_HDEC,r8 +BEGIN_FTR_SECTION + /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */ + bne 32f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) extsw r8,r8 +32: mtspr SPRN_HDEC,r8 add r8,r8,r7 std r8,HSTATE_DECEXP(r13) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bdb3f76ceb6b..e390b383b4d6 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -32,6 +32,12 @@ #include #include +/* Sign-extend HDEC if not on POWER9 */ +#define EXTEND_HDEC(reg) \ +BEGIN_FTR_SECTION; \ + extsw reg, reg; \ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) /* Values in HSTATE_NAPPING(r13) */ @@ -214,6 +220,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) kvmppc_primary_no_guest: /* We handle this much like a ceded vcpu */ /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ + /* HDEC may be larger than DEC for arch >= v3.00, but since the */ + /* HDEC value came from DEC in the first place, it will fit */ mfspr r3, SPRN_HDEC mtspr SPRN_DEC, r3 /* @@ -295,8 +303,9 @@ kvm_novcpu_wakeup: /* See if our timeslice has expired (HDEC is negative) */ mfspr r0, SPRN_HDEC + EXTEND_HDEC(r0) li r12, BOOK3S_INTERRUPT_HV_DECREMENTER - cmpwi r0, 0 + cmpdi r0, 0 blt kvm_novcpu_exit /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */ @@ -390,8 +399,8 @@ kvm_secondary_got_guest: lbz r4, HSTATE_PTID(r13) cmpwi r4, 0 bne 63f - lis r6, 0x7fff - ori r6, r6, 0xffff + LOAD_REG_ADDR(r6, decrementer_max) + ld r6, 0(r6) mtspr SPRN_HDEC, r6 /* and set per-LPAR registers, if doing dynamic micro-threading */ ld r6, HSTATE_SPLIT_MODE(r13) @@ -968,7 +977,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) /* Check if HDEC expires soon */ mfspr r3, SPRN_HDEC - cmpwi r3, 512 /* 1 microsecond */ + EXTEND_HDEC(r3) + cmpdi r3, 512 /* 1 microsecond */ blt hdec_soon #ifdef CONFIG_KVM_XICS @@ -2366,12 +2376,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) mfspr r3, SPRN_DEC mfspr r4, SPRN_HDEC mftb r5 - cmpw r3, r4 + extsw r3, r3 + EXTEND_HDEC(r4) + cmpd r3, r4 ble 67f mtspr SPRN_DEC, r4 67: /* save expiry time of guest decrementer */ - extsw r3, r3 add r3, r3, r5 ld r4, HSTATE_KVM_VCPU(r13) ld r5, HSTATE_KVM_VCORE(r13) From 0051c10acabb631cfd439eae73289e6e4c39b2b7 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Tue, 23 May 2017 14:58:11 -0700 Subject: [PATCH 007/341] drm/i915: Disable decoupled MMIO The decoupled MMIO feature doesn't work as intended by HW team. Enabling it with forcewake will only make debugging efforts more difficult, so let's disable it. Fixes: 85ee17ebeedd ("drm/i915/bxt: Broxton decoupled MMIO") Cc: Zhe Wang Cc: Praveen Paneri Cc: Tvrtko Ursulin Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.10+ Signed-off-by: Kai Chen Reviewed-by: Tvrtko Ursulin Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170523215812.18328-2-kai.chen@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f80db2ccd92f..cf43dc1d539f 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -385,7 +385,6 @@ static const struct intel_device_info intel_skylake_gt3_info = { .has_gmbus_irq = 1, \ .has_logical_ring_contexts = 1, \ .has_guc = 1, \ - .has_decoupled_mmio = 1, \ .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ .has_full_48bit_ppgtt = 1, \ From d8197317f172193b12fbaa75a653e7caa0614738 Mon Sep 17 00:00:00 2001 From: Kai Chen Date: Tue, 23 May 2017 14:58:12 -0700 Subject: [PATCH 008/341] drm/i915: Remove decoupled MMIO code This is a follow-up patch to the previous patch ([PATCH[1/2] drm/i915: Disable decoupled MMIO) to remove the dead code for decoupled MMIO implementation, as it won't be used any longer on GEN9LP. Therefore, this patch reverts: commit 85ee17ebeedd1af0dccd98f82ab4e644e29d84c0 Author: Praveen Paneri Date: Tue Nov 15 22:49:20 2016 +0530 drm/i915/bxt: Broxton decoupled MMIO Signed-off-by: Kai Chen Reviewed-by: Tvrtko Ursulin Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170523215812.18328-3-kai.chen@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 - drivers/gpu/drm/i915/i915_reg.h | 7 -- drivers/gpu/drm/i915/intel_uncore.c | 126 ---------------------------- 3 files changed, 136 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 35e161b5b90e..a3b2674a4b7d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -751,7 +751,6 @@ struct intel_csr { func(has_aliasing_ppgtt); \ func(has_csr); \ func(has_ddi); \ - func(has_decoupled_mmio); \ func(has_dp_mst); \ func(has_fbc); \ func(has_fpga_dbg); \ @@ -2995,8 +2994,6 @@ intel_info(const struct drm_i915_private *dev_priv) #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 -#define HAS_DECOUPLED_MMIO(dev_priv) (INTEL_INFO(dev_priv)->has_decoupled_mmio) - #include "i915_trace.h" static inline bool intel_vtd_active(void) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 89888adb9af1..231ee86625cd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7792,13 +7792,6 @@ enum { #define SKL_FUSE_PG1_DIST_STATUS (1<<26) #define SKL_FUSE_PG2_DIST_STATUS (1<<25) -/* Decoupled MMIO register pair for kernel driver */ -#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00) -#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04) -#define GEN9_DECOUPLED_DW1_GO (1<<31) -#define GEN9_DECOUPLED_PD_SHIFT 28 -#define GEN9_DECOUPLED_OP_SHIFT 24 - /* Per-pipe DDI Function Control */ #define _TRANS_DDI_FUNC_CTL_A 0x60400 #define _TRANS_DDI_FUNC_CTL_B 0x61400 diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 47d7ee1b5d86..9882724bc2b6 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -404,8 +404,6 @@ check_for_unclaimed_mmio(struct drm_i915_private *dev_priv) static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, bool restore_forcewake) { - struct intel_device_info *info = mkwrite_device_info(dev_priv); - /* clear out unclaimed reg detection bit */ if (check_for_unclaimed_mmio(dev_priv)) DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n"); @@ -418,9 +416,6 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, GT_FIFO_CTL_RC6_POLICY_STALL); } - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST)) - info->has_decoupled_mmio = false; - intel_uncore_forcewake_reset(dev_priv, restore_forcewake); } @@ -810,78 +805,6 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv, __unclaimed_reg_debug(dev_priv, reg, read, before); } -enum decoupled_power_domain { - GEN9_DECOUPLED_PD_BLITTER = 0, - GEN9_DECOUPLED_PD_RENDER, - GEN9_DECOUPLED_PD_MEDIA, - GEN9_DECOUPLED_PD_ALL -}; - -enum decoupled_ops { - GEN9_DECOUPLED_OP_WRITE = 0, - GEN9_DECOUPLED_OP_READ -}; - -static const enum decoupled_power_domain fw2dpd_domain[] = { - GEN9_DECOUPLED_PD_RENDER, - GEN9_DECOUPLED_PD_BLITTER, - GEN9_DECOUPLED_PD_ALL, - GEN9_DECOUPLED_PD_MEDIA, - GEN9_DECOUPLED_PD_ALL, - GEN9_DECOUPLED_PD_ALL, - GEN9_DECOUPLED_PD_ALL -}; - -/* - * Decoupled MMIO access for only 1 DWORD - */ -static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv, - u32 reg, - enum forcewake_domains fw_domain, - enum decoupled_ops operation) -{ - enum decoupled_power_domain dp_domain; - u32 ctrl_reg_data = 0; - - dp_domain = fw2dpd_domain[fw_domain - 1]; - - ctrl_reg_data |= reg; - ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT); - ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT); - ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO; - __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data); - - if (wait_for_atomic((__raw_i915_read32(dev_priv, - GEN9_DECOUPLED_REG0_DW1) & - GEN9_DECOUPLED_DW1_GO) == 0, - FORCEWAKE_ACK_TIMEOUT_MS)) - DRM_ERROR("Decoupled MMIO wait timed out\n"); -} - -static inline u32 -__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv, - u32 reg, - enum forcewake_domains fw_domain) -{ - __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, - GEN9_DECOUPLED_OP_READ); - - return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0); -} - -static inline void -__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv, - u32 reg, u32 data, - enum forcewake_domains fw_domain) -{ - - __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data); - - __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, - GEN9_DECOUPLED_OP_WRITE); -} - - #define GEN2_READ_HEADER(x) \ u##x val = 0; \ assert_rpm_wakelock_held(dev_priv); @@ -978,28 +901,6 @@ func##_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { #define __gen6_read(x) __gen_read(gen6, x) #define __fwtable_read(x) __gen_read(fwtable, x) -#define __gen9_decoupled_read(x) \ -static u##x \ -gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \ - i915_reg_t reg, bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_READ_HEADER(x); \ - fw_engine = __fwtable_reg_read_fw_domains(offset); \ - if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \ - unsigned i; \ - u32 *ptr_data = (u32 *) &val; \ - for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \ - *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \ - offset, \ - fw_engine); \ - } else { \ - val = __raw_i915_read##x(dev_priv, reg); \ - } \ - GEN6_READ_FOOTER; \ -} - -__gen9_decoupled_read(32) -__gen9_decoupled_read(64) __fwtable_read(8) __fwtable_read(16) __fwtable_read(32) @@ -1086,25 +987,6 @@ func##_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, boo #define __gen8_write(x) __gen_write(gen8, x) #define __fwtable_write(x) __gen_write(fwtable, x) -#define __gen9_decoupled_write(x) \ -static void \ -gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \ - i915_reg_t reg, u##x val, \ - bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_WRITE_HEADER; \ - fw_engine = __fwtable_reg_write_fw_domains(offset); \ - if (fw_engine & ~dev_priv->uncore.fw_domains_active) \ - __gen9_decoupled_mmio_write(dev_priv, \ - offset, \ - val, \ - fw_engine); \ - else \ - __raw_i915_write##x(dev_priv, reg, val); \ - GEN6_WRITE_FOOTER; \ -} - -__gen9_decoupled_write(32) __fwtable_write(8) __fwtable_write(16) __fwtable_write(32) @@ -1341,14 +1223,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable); ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable); - if (HAS_DECOUPLED_MMIO(dev_priv)) { - dev_priv->uncore.funcs.mmio_readl = - gen9_decoupled_read32; - dev_priv->uncore.funcs.mmio_readq = - gen9_decoupled_read64; - dev_priv->uncore.funcs.mmio_writel = - gen9_decoupled_write32; - } } iosf_mbi_register_pmic_bus_access_notifier( From eead06dff9c34699f0e6c16fc6d6d4f105008336 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 1 May 2017 15:37:55 +0200 Subject: [PATCH 009/341] drm/i915: Use atomic scaling_mode instead of panel.fitting_mode The first step in converting connector properties to atomic is wiring up the atomic state. We're still not completely supoprting the scaling mode in the atomic case, but this is the first step towards it. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170501133804.8116-4-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_dp.c | 13 +++++-------- drivers/gpu/drm/i915/intel_drv.h | 1 - drivers/gpu/drm/i915/intel_dsi.c | 11 +++++------ drivers/gpu/drm/i915/intel_lvds.c | 11 +++++------ 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 4a6feb6a69bd..921b9bb3a09e 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1686,10 +1686,10 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (HAS_GMCH_DISPLAY(dev_priv)) intel_gmch_panel_fitting(intel_crtc, pipe_config, - intel_connector->panel.fitting_mode); + conn_state->scaling_mode); else intel_pch_panel_fitting(intel_crtc, pipe_config, - intel_connector->panel.fitting_mode); + conn_state->scaling_mode); } if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) @@ -4832,7 +4832,6 @@ intel_dp_set_property(struct drm_connector *connector, uint64_t val) { struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_encoder *intel_encoder = intel_attached_encoder(connector); struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base); int ret; @@ -4901,11 +4900,11 @@ intel_dp_set_property(struct drm_connector *connector, return -EINVAL; } - if (intel_connector->panel.fitting_mode == val) { + if (connector->state->scaling_mode == val) { /* the eDP scaling property is not changed */ return 0; } - intel_connector->panel.fitting_mode = val; + connector->state->scaling_mode = val; goto done; } @@ -5183,8 +5182,6 @@ bool intel_dp_is_edp(struct drm_i915_private *dev_priv, enum port port) static void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector) { - struct intel_connector *intel_connector = to_intel_connector(connector); - intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); intel_dp->color_range_auto = true; @@ -5195,7 +5192,7 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect &connector->base, connector->dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_ASPECT); - intel_connector->panel.fitting_mode = DRM_MODE_SCALE_ASPECT; + connector->state->scaling_mode = DRM_MODE_SCALE_ASPECT; } } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index bd500977b3fc..28d30f96b59e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -266,7 +266,6 @@ struct intel_encoder { struct intel_panel { struct drm_display_mode *fixed_mode; struct drm_display_mode *downclock_mode; - int fitting_mode; /* backlight */ struct { diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index fc0ef492252a..ec141eb59e6d 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -320,10 +320,10 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, if (HAS_GMCH_DISPLAY(dev_priv)) intel_gmch_panel_fitting(crtc, pipe_config, - intel_connector->panel.fitting_mode); + conn_state->scaling_mode); else intel_pch_panel_fitting(crtc, pipe_config, - intel_connector->panel.fitting_mode); + conn_state->scaling_mode); } /* DSI uses short packets for sync events, so clear mode flags for DSI */ @@ -1592,7 +1592,6 @@ static int intel_dsi_set_property(struct drm_connector *connector, uint64_t val) { struct drm_device *dev = connector->dev; - struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_crtc *crtc; int ret; @@ -1611,10 +1610,10 @@ static int intel_dsi_set_property(struct drm_connector *connector, return -EINVAL; } - if (intel_connector->panel.fitting_mode == val) + if (connector->state->scaling_mode == val) return 0; - intel_connector->panel.fitting_mode = val; + connector->state->scaling_mode = val; } crtc = connector->state->crtc; @@ -1680,7 +1679,7 @@ static void intel_dsi_add_properties(struct intel_connector *connector) drm_object_attach_property(&connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_ASPECT); - connector->panel.fitting_mode = DRM_MODE_SCALE_ASPECT; + connector->base.state->scaling_mode = DRM_MODE_SCALE_ASPECT; } } diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 8b942ef2b3ec..a14bdefdcb8f 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -433,10 +433,10 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder, pipe_config->has_pch_encoder = true; intel_pch_panel_fitting(intel_crtc, pipe_config, - intel_connector->panel.fitting_mode); + conn_state->scaling_mode); } else { intel_gmch_panel_fitting(intel_crtc, pipe_config, - intel_connector->panel.fitting_mode); + conn_state->scaling_mode); } @@ -602,7 +602,6 @@ static int intel_lvds_set_property(struct drm_connector *connector, struct drm_property *property, uint64_t value) { - struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_device *dev = connector->dev; if (property == dev->mode_config.scaling_mode_property) { @@ -613,11 +612,11 @@ static int intel_lvds_set_property(struct drm_connector *connector, return -EINVAL; } - if (intel_connector->panel.fitting_mode == value) { + if (connector->state->scaling_mode == value) { /* the LVDS scaling property is not changed */ return 0; } - intel_connector->panel.fitting_mode = value; + connector->state->scaling_mode = value; crtc = intel_attached_encoder(connector)->base.crtc; if (crtc && crtc->state->enable) { @@ -1087,7 +1086,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) drm_object_attach_property(&connector->base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_ASPECT); - intel_connector->panel.fitting_mode = DRM_MODE_SCALE_ASPECT; + connector->state->scaling_mode = DRM_MODE_SCALE_ASPECT; intel_lvds_pps_get_hw_state(dev_priv, &lvds_encoder->init_pps); lvds_encoder->init_lvds_val = lvds; From 8b45330ad30193a6e4925ae4d36565fb817533fc Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 1 May 2017 15:37:56 +0200 Subject: [PATCH 010/341] drm/i915: Use per-connector scaling mode property None of the intel connectors can use all types of scaling modes, so only try the ones that are possible. This is another preparation for connectors towards conversion to atomic. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170501133804.8116-5-maarten.lankhorst@linux.intel.com [mlankhorst: Use renamed drm_connector_attach_scaling_mode_property function] --- drivers/gpu/drm/i915/intel_dp.c | 29 ++++++++++++----------------- drivers/gpu/drm/i915/intel_dsi.c | 28 +++++++++++----------------- drivers/gpu/drm/i915/intel_lvds.c | 17 ++++++----------- 3 files changed, 29 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 921b9bb3a09e..476cac07da7c 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4888,18 +4888,7 @@ intel_dp_set_property(struct drm_connector *connector, goto done; } - if (is_edp(intel_dp) && - property == connector->dev->mode_config.scaling_mode_property) { - if (val == DRM_MODE_SCALE_NONE) { - DRM_DEBUG_KMS("no scaling not supported\n"); - return -EINVAL; - } - if (HAS_GMCH_DISPLAY(dev_priv) && - val == DRM_MODE_SCALE_CENTER) { - DRM_DEBUG_KMS("centering not supported\n"); - return -EINVAL; - } - + if (property == connector->scaling_mode_property) { if (connector->state->scaling_mode == val) { /* the eDP scaling property is not changed */ return 0; @@ -5182,17 +5171,23 @@ bool intel_dp_is_edp(struct drm_i915_private *dev_priv, enum port port) static void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector) { + struct drm_i915_private *dev_priv = to_i915(connector->dev); + intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); intel_dp->color_range_auto = true; if (is_edp(intel_dp)) { - drm_mode_create_scaling_mode_property(connector->dev); - drm_object_attach_property( - &connector->base, - connector->dev->mode_config.scaling_mode_property, - DRM_MODE_SCALE_ASPECT); + u32 allowed_scalers; + + allowed_scalers = BIT(DRM_MODE_SCALE_ASPECT) | BIT(DRM_MODE_SCALE_FULLSCREEN); + if (!HAS_GMCH_DISPLAY(dev_priv)) + allowed_scalers |= BIT(DRM_MODE_SCALE_CENTER); + + drm_connector_attach_scaling_mode_property(connector, allowed_scalers); + connector->state->scaling_mode = DRM_MODE_SCALE_ASPECT; + } } diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index ec141eb59e6d..77b8dad5fa41 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1591,7 +1591,6 @@ static int intel_dsi_set_property(struct drm_connector *connector, struct drm_property *property, uint64_t val) { - struct drm_device *dev = connector->dev; struct drm_crtc *crtc; int ret; @@ -1599,17 +1598,7 @@ static int intel_dsi_set_property(struct drm_connector *connector, if (ret) return ret; - if (property == dev->mode_config.scaling_mode_property) { - if (val == DRM_MODE_SCALE_NONE) { - DRM_DEBUG_KMS("no scaling not supported\n"); - return -EINVAL; - } - if (HAS_GMCH_DISPLAY(to_i915(dev)) && - val == DRM_MODE_SCALE_CENTER) { - DRM_DEBUG_KMS("centering not supported\n"); - return -EINVAL; - } - + if (property == connector->scaling_mode_property) { if (connector->state->scaling_mode == val) return 0; @@ -1672,13 +1661,18 @@ static const struct drm_connector_funcs intel_dsi_connector_funcs = { static void intel_dsi_add_properties(struct intel_connector *connector) { - struct drm_device *dev = connector->base.dev; + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); if (connector->panel.fixed_mode) { - drm_mode_create_scaling_mode_property(dev); - drm_object_attach_property(&connector->base.base, - dev->mode_config.scaling_mode_property, - DRM_MODE_SCALE_ASPECT); + u32 allowed_scalers; + + allowed_scalers = BIT(DRM_MODE_SCALE_ASPECT) | BIT(DRM_MODE_SCALE_FULLSCREEN); + if (!HAS_GMCH_DISPLAY(dev_priv)) + allowed_scalers |= BIT(DRM_MODE_SCALE_CENTER); + + drm_connector_attach_scaling_mode_property(&connector->base, + allowed_scalers); + connector->base.state->scaling_mode = DRM_MODE_SCALE_ASPECT; } } diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index a14bdefdcb8f..3bcd9695ef2e 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -602,16 +602,10 @@ static int intel_lvds_set_property(struct drm_connector *connector, struct drm_property *property, uint64_t value) { - struct drm_device *dev = connector->dev; - if (property == dev->mode_config.scaling_mode_property) { + if (property == connector->scaling_mode_property) { struct drm_crtc *crtc; - if (value == DRM_MODE_SCALE_NONE) { - DRM_DEBUG_KMS("no scaling not supported\n"); - return -EINVAL; - } - if (connector->state->scaling_mode == value) { /* the LVDS scaling property is not changed */ return 0; @@ -987,6 +981,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) u32 lvds; int pipe; u8 pin; + u32 allowed_scalers; if (!intel_lvds_supported(dev_priv)) return; @@ -1082,10 +1077,10 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) lvds_encoder->reg = lvds_reg; /* create the scaling mode property */ - drm_mode_create_scaling_mode_property(dev); - drm_object_attach_property(&connector->base, - dev->mode_config.scaling_mode_property, - DRM_MODE_SCALE_ASPECT); + allowed_scalers = BIT(DRM_MODE_SCALE_ASPECT); + allowed_scalers |= BIT(DRM_MODE_SCALE_FULLSCREEN); + allowed_scalers |= BIT(DRM_MODE_SCALE_CENTER); + drm_connector_attach_scaling_mode_property(connector, allowed_scalers); connector->state->scaling_mode = DRM_MODE_SCALE_ASPECT; intel_lvds_pps_get_hw_state(dev_priv, &lvds_encoder->init_pps); From 11c1a9ec25e0bfb59c240f6a43b31defd6d6e821 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 1 May 2017 15:37:57 +0200 Subject: [PATCH 011/341] drm/i915: Add plumbing for digital connector state, v3. Some atomic properties are common between the various kinds of connectors, for example a lot of them use panel fitting mode. It makes sense to put a lot of it in a common place, so each connector can use it while they're being converted. Implement the properties required for the connectors: - scaling mode property - force audio property - broadcast rgb - aspect ratio While at it, make clear that intel_digital_connector_atomic_get_property is a hack that has to be removed when all connector properties are converted to atomic. Changes since v1: - Scaling mode and aspect ratio are partly handled in core now. Changes since v2: - Split out the scaling mode / aspect ratio changes to a preparation patch. - Use mode_changed for panel fitter, changes to this property are checked by fastset. - Allowed_scaling_modes is removed, handled through core now. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170501133804.8116-6-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_atomic.c | 131 +++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_display.c | 14 ++- drivers/gpu/drm/i915/intel_drv.h | 23 +++++ 3 files changed, 159 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index 50fb1f76cc5f..182909f266f5 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -36,7 +36,7 @@ #include "intel_drv.h" /** - * intel_connector_atomic_get_property - fetch connector property value + * intel_connector_atomic_get_property - fetch legacy connector property value * @connector: connector to fetch property for * @state: state containing the property value * @property: property to look up @@ -45,12 +45,14 @@ * The DRM core does not store shadow copies of properties for * atomic-capable drivers. This entrypoint is used to fetch * the current value of a driver-specific connector property. + * + * This is a intermediary solution until all connectors are + * converted to support full atomic properties. */ -int -intel_connector_atomic_get_property(struct drm_connector *connector, - const struct drm_connector_state *state, - struct drm_property *property, - uint64_t *val) +int intel_connector_atomic_get_property(struct drm_connector *connector, + const struct drm_connector_state *state, + struct drm_property *property, + uint64_t *val) { int i; @@ -73,7 +75,122 @@ intel_connector_atomic_get_property(struct drm_connector *connector, return -EINVAL; } -/* +/** + * intel_digital_connector_atomic_get_property - hook for connector->atomic_get_property. + * @connector: Connector to get the property for. + * @state: Connector state to retrieve the property from. + * @property: Property to retrieve. + * @val: Return value for the property. + * + * Returns the atomic property value for a digital connector. + */ +int intel_digital_connector_atomic_get_property(struct drm_connector *connector, + const struct drm_connector_state *state, + struct drm_property *property, + uint64_t *val) +{ + struct drm_device *dev = connector->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_digital_connector_state *intel_conn_state = + to_intel_digital_connector_state(state); + + if (property == dev_priv->force_audio_property) + *val = intel_conn_state->force_audio; + else if (property == dev_priv->broadcast_rgb_property) + *val = intel_conn_state->broadcast_rgb; + else { + DRM_DEBUG_ATOMIC("Unknown property %s\n", property->name); + return -EINVAL; + } + + return 0; +} + +/** + * intel_digital_connector_atomic_set_property - hook for connector->atomic_set_property. + * @connector: Connector to set the property for. + * @state: Connector state to set the property on. + * @property: Property to set. + * @val: New value for the property. + * + * Sets the atomic property value for a digital connector. + */ +int intel_digital_connector_atomic_set_property(struct drm_connector *connector, + struct drm_connector_state *state, + struct drm_property *property, + uint64_t val) +{ + struct drm_device *dev = connector->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_digital_connector_state *intel_conn_state = + to_intel_digital_connector_state(state); + + if (property == dev_priv->force_audio_property) { + intel_conn_state->force_audio = val; + return 0; + } + + if (property == dev_priv->broadcast_rgb_property) { + intel_conn_state->broadcast_rgb = val; + return 0; + } + + DRM_DEBUG_ATOMIC("Unknown property %s\n", property->name); + return -EINVAL; +} + +int intel_digital_connector_atomic_check(struct drm_connector *conn, + struct drm_connector_state *new_state) +{ + struct intel_digital_connector_state *new_conn_state = + to_intel_digital_connector_state(new_state); + struct drm_connector_state *old_state = + drm_atomic_get_old_connector_state(new_state->state, conn); + struct intel_digital_connector_state *old_conn_state = + to_intel_digital_connector_state(old_state); + struct drm_crtc_state *crtc_state; + + if (!new_state->crtc) + return 0; + + crtc_state = drm_atomic_get_new_crtc_state(new_state->state, new_state->crtc); + + /* + * These properties are handled by fastset, and might not end + * up in a modeset. + */ + if (new_conn_state->force_audio != old_conn_state->force_audio || + new_conn_state->broadcast_rgb != old_conn_state->broadcast_rgb || + new_conn_state->base.picture_aspect_ratio != old_conn_state->base.picture_aspect_ratio || + new_conn_state->base.scaling_mode != old_conn_state->base.scaling_mode) + crtc_state->mode_changed = true; + + return 0; +} + +/** + * intel_digital_connector_duplicate_state - duplicate connector state + * @connector: digital connector + * + * Allocates and returns a copy of the connector state (both common and + * digital connector specific) for the specified connector. + * + * Returns: The newly allocated connector state, or NULL on failure. + */ +struct drm_connector_state * +intel_digital_connector_duplicate_state(struct drm_connector *connector) +{ + struct intel_digital_connector_state *state; + + state = kmemdup(connector->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_connector_duplicate_state(connector, &state->base); + return &state->base; +} + +/** * intel_crtc_duplicate_state - duplicate crtc state * @crtc: drm crtc * diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 7fa21df5bcd7..8a115588864d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5974,11 +5974,21 @@ static void intel_connector_verify_state(struct drm_crtc_state *crtc_state, int intel_connector_init(struct intel_connector *connector) { - drm_atomic_helper_connector_reset(&connector->base); + struct intel_digital_connector_state *conn_state; - if (!connector->base.state) + /* + * Allocate enough memory to hold intel_digital_connector_state, + * This might be a few bytes too many, but for connectors that don't + * need it we'll free the state and allocate a smaller one on the first + * succesful commit anyway. + */ + conn_state = kzalloc(sizeof(*conn_state), GFP_KERNEL); + if (!conn_state) return -ENOMEM; + __drm_atomic_helper_connector_reset(&connector->base, + &conn_state->base); + return 0; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 28d30f96b59e..5548a418e0ef 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -330,6 +330,15 @@ struct intel_connector { struct work_struct modeset_retry_work; }; +struct intel_digital_connector_state { + struct drm_connector_state base; + + enum hdmi_force_audio force_audio; + int broadcast_rgb; +}; + +#define to_intel_digital_connector_state(x) container_of(x, struct intel_digital_connector_state, base) + struct dpll { /* given values */ int n; @@ -1912,6 +1921,20 @@ int intel_connector_atomic_get_property(struct drm_connector *connector, const struct drm_connector_state *state, struct drm_property *property, uint64_t *val); + +int intel_digital_connector_atomic_get_property(struct drm_connector *connector, + const struct drm_connector_state *state, + struct drm_property *property, + uint64_t *val); +int intel_digital_connector_atomic_set_property(struct drm_connector *connector, + struct drm_connector_state *state, + struct drm_property *property, + uint64_t val); +int intel_digital_connector_atomic_check(struct drm_connector *conn, + struct drm_connector_state *new_state); +struct drm_connector_state * +intel_digital_connector_duplicate_state(struct drm_connector *connector); + struct drm_crtc_state *intel_crtc_duplicate_state(struct drm_crtc *crtc); void intel_crtc_destroy_state(struct drm_crtc *crtc, struct drm_crtc_state *state); From ba14a1adae2f786cf6f14b815330558efacdc0df Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 1 May 2017 15:37:58 +0200 Subject: [PATCH 012/341] drm/i915: Convert DSI connector properties to atomic. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170501133804.8116-7-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 38 +++++--------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 77b8dad5fa41..54030b68406a 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1587,36 +1587,6 @@ static int intel_dsi_get_modes(struct drm_connector *connector) return 1; } -static int intel_dsi_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t val) -{ - struct drm_crtc *crtc; - int ret; - - ret = drm_object_property_set_value(&connector->base, property, val); - if (ret) - return ret; - - if (property == connector->scaling_mode_property) { - if (connector->state->scaling_mode == val) - return 0; - - connector->state->scaling_mode = val; - } - - crtc = connector->state->crtc; - if (crtc && crtc->state->enable) { - /* - * If the CRTC is enabled, the display will be changed - * according to the new panel fitting mode. - */ - intel_crtc_restore_mode(crtc); - } - - return 0; -} - static void intel_dsi_connector_destroy(struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); @@ -1645,6 +1615,7 @@ static const struct drm_encoder_funcs intel_dsi_funcs = { static const struct drm_connector_helper_funcs intel_dsi_connector_helper_funcs = { .get_modes = intel_dsi_get_modes, .mode_valid = intel_dsi_mode_valid, + .atomic_check = intel_digital_connector_atomic_check, }; static const struct drm_connector_funcs intel_dsi_connector_funcs = { @@ -1653,10 +1624,11 @@ static const struct drm_connector_funcs intel_dsi_connector_funcs = { .early_unregister = intel_connector_unregister, .destroy = intel_dsi_connector_destroy, .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = intel_dsi_set_property, - .atomic_get_property = intel_connector_atomic_get_property, + .set_property = drm_atomic_helper_connector_set_property, + .atomic_get_property = intel_digital_connector_atomic_get_property, + .atomic_set_property = intel_digital_connector_atomic_set_property, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_duplicate_state = intel_digital_connector_duplicate_state, }; static void intel_dsi_add_properties(struct intel_connector *connector) From ca937582d9b2e2d041fb8a9f087a93cd62f00c2f Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 1 May 2017 15:37:59 +0200 Subject: [PATCH 013/341] drm/i915: Convert LVDS connector properties to atomic. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170501133804.8116-8-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_lvds.c | 35 +++++-------------------------- 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 3bcd9695ef2e..d2c2bca1b327 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -598,49 +598,24 @@ static void intel_lvds_destroy(struct drm_connector *connector) kfree(connector); } -static int intel_lvds_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t value) -{ - - if (property == connector->scaling_mode_property) { - struct drm_crtc *crtc; - - if (connector->state->scaling_mode == value) { - /* the LVDS scaling property is not changed */ - return 0; - } - connector->state->scaling_mode = value; - - crtc = intel_attached_encoder(connector)->base.crtc; - if (crtc && crtc->state->enable) { - /* - * If the CRTC is enabled, the display will be changed - * according to the new panel fitting mode. - */ - intel_crtc_restore_mode(crtc); - } - } - - return 0; -} - static const struct drm_connector_helper_funcs intel_lvds_connector_helper_funcs = { .get_modes = intel_lvds_get_modes, .mode_valid = intel_lvds_mode_valid, + .atomic_check = intel_digital_connector_atomic_check, }; static const struct drm_connector_funcs intel_lvds_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, .detect = intel_lvds_detect, .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = intel_lvds_set_property, - .atomic_get_property = intel_connector_atomic_get_property, + .set_property = drm_atomic_helper_connector_set_property, + .atomic_get_property = intel_digital_connector_atomic_get_property, + .atomic_set_property = intel_digital_connector_atomic_set_property, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_lvds_destroy, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_duplicate_state = intel_digital_connector_duplicate_state, }; static const struct drm_encoder_funcs intel_lvds_enc_funcs = { From e6b72c949376ca7a594ea585e6c8d85053fbe440 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 1 May 2017 15:38:00 +0200 Subject: [PATCH 014/341] drm/i915: Make intel_dp->has_audio reflect hw state only Always detect if audio is available during edid detection. With less magic switching it's easier to convert the dp connector properties to atomic. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170501133804.8116-9-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_dp.c | 38 ++++++++++++++------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 476cac07da7c..22822c26d0a7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1671,7 +1671,12 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = true; pipe_config->has_drrs = false; - pipe_config->has_audio = intel_dp->has_audio && port != PORT_A; + if (port == PORT_A) + pipe_config->has_audio = false; + else if (intel_dp->force_audio == HDMI_AUDIO_AUTO) + pipe_config->has_audio = intel_dp->has_audio; + else + pipe_config->has_audio = intel_dp->force_audio == HDMI_AUDIO_ON; if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) { intel_fixed_panel_mode(intel_connector->panel.fixed_mode, @@ -4602,10 +4607,7 @@ intel_dp_set_edid(struct intel_dp *intel_dp) edid = intel_dp_get_edid(intel_dp); intel_connector->detect_edid = edid; - if (intel_dp->force_audio != HDMI_AUDIO_AUTO) - intel_dp->has_audio = intel_dp->force_audio == HDMI_AUDIO_ON; - else - intel_dp->has_audio = drm_detect_monitor_audio(edid); + intel_dp->has_audio = drm_detect_monitor_audio(edid); } static void @@ -4813,19 +4815,6 @@ static int intel_dp_get_modes(struct drm_connector *connector) return 0; } -static bool -intel_dp_detect_audio(struct drm_connector *connector) -{ - bool has_audio = false; - struct edid *edid; - - edid = to_intel_connector(connector)->detect_edid; - if (edid) - has_audio = drm_detect_monitor_audio(edid); - - return has_audio; -} - static int intel_dp_set_property(struct drm_connector *connector, struct drm_property *property, @@ -4842,22 +4831,27 @@ intel_dp_set_property(struct drm_connector *connector, if (property == dev_priv->force_audio_property) { int i = val; - bool has_audio; + bool has_audio, old_has_audio; + int old_force_audio = intel_dp->force_audio; if (i == intel_dp->force_audio) return 0; + if (old_force_audio == HDMI_AUDIO_AUTO) + old_has_audio = intel_dp->has_audio; + else + old_has_audio = old_force_audio; + intel_dp->force_audio = i; if (i == HDMI_AUDIO_AUTO) - has_audio = intel_dp_detect_audio(connector); + has_audio = intel_dp->has_audio; else has_audio = (i == HDMI_AUDIO_ON); - if (has_audio == intel_dp->has_audio) + if (has_audio == old_has_audio) return 0; - intel_dp->has_audio = has_audio; goto done; } From 8f647a0148aab626e51a9df3a6ba0a77e7ea6b81 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 1 May 2017 15:38:01 +0200 Subject: [PATCH 015/341] drm/i915: Convert intel_dp properties to atomic, v2. intel_dp supports 3 properties, scaling mode, broadcast rgb and force_audio. intel_digital_connector handles the plumbing, so we only have to hook this up in compute_config and init. Changes since v1: - Remove limited_color_range too, unused. (danvet) Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170501133804.8116-10-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_dp.c | 105 ++++--------------------------- drivers/gpu/drm/i915/intel_drv.h | 3 - 2 files changed, 11 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 22822c26d0a7..49a1db3787c5 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1648,6 +1648,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, enum port port = dp_to_dig_port(intel_dp)->port; struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_connector *intel_connector = intel_dp->attached_connector; + struct intel_digital_connector_state *intel_conn_state = + to_intel_digital_connector_state(conn_state); int lane_count, clock; int min_lane_count = 1; int max_lane_count = intel_dp_max_lane_count(intel_dp); @@ -1673,10 +1675,10 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->has_drrs = false; if (port == PORT_A) pipe_config->has_audio = false; - else if (intel_dp->force_audio == HDMI_AUDIO_AUTO) + else if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) pipe_config->has_audio = intel_dp->has_audio; else - pipe_config->has_audio = intel_dp->force_audio == HDMI_AUDIO_ON; + pipe_config->has_audio = intel_conn_state->force_audio == HDMI_AUDIO_ON; if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) { intel_fixed_panel_mode(intel_connector->panel.fixed_mode, @@ -1763,7 +1765,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, return false; found: - if (intel_dp->color_range_auto) { + if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* * See: * CEA-861-E - 5.1 Default Encoding Parameters @@ -1775,7 +1777,7 @@ found: HDMI_QUANTIZATION_RANGE_LIMITED; } else { pipe_config->limited_color_range = - intel_dp->limited_color_range; + intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; } pipe_config->lane_count = lane_count; @@ -4815,92 +4817,6 @@ static int intel_dp_get_modes(struct drm_connector *connector) return 0; } -static int -intel_dp_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t val) -{ - struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_encoder *intel_encoder = intel_attached_encoder(connector); - struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base); - int ret; - - ret = drm_object_property_set_value(&connector->base, property, val); - if (ret) - return ret; - - if (property == dev_priv->force_audio_property) { - int i = val; - bool has_audio, old_has_audio; - int old_force_audio = intel_dp->force_audio; - - if (i == intel_dp->force_audio) - return 0; - - if (old_force_audio == HDMI_AUDIO_AUTO) - old_has_audio = intel_dp->has_audio; - else - old_has_audio = old_force_audio; - - intel_dp->force_audio = i; - - if (i == HDMI_AUDIO_AUTO) - has_audio = intel_dp->has_audio; - else - has_audio = (i == HDMI_AUDIO_ON); - - if (has_audio == old_has_audio) - return 0; - - goto done; - } - - if (property == dev_priv->broadcast_rgb_property) { - bool old_auto = intel_dp->color_range_auto; - bool old_range = intel_dp->limited_color_range; - - switch (val) { - case INTEL_BROADCAST_RGB_AUTO: - intel_dp->color_range_auto = true; - break; - case INTEL_BROADCAST_RGB_FULL: - intel_dp->color_range_auto = false; - intel_dp->limited_color_range = false; - break; - case INTEL_BROADCAST_RGB_LIMITED: - intel_dp->color_range_auto = false; - intel_dp->limited_color_range = true; - break; - default: - return -EINVAL; - } - - if (old_auto == intel_dp->color_range_auto && - old_range == intel_dp->limited_color_range) - return 0; - - goto done; - } - - if (property == connector->scaling_mode_property) { - if (connector->state->scaling_mode == val) { - /* the eDP scaling property is not changed */ - return 0; - } - connector->state->scaling_mode = val; - - goto done; - } - - return -EINVAL; - -done: - if (intel_encoder->base.crtc) - intel_crtc_restore_mode(intel_encoder->base.crtc); - - return 0; -} - static int intel_dp_connector_register(struct drm_connector *connector) { @@ -5059,19 +4975,21 @@ static const struct drm_connector_funcs intel_dp_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, .force = intel_dp_force, .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = intel_dp_set_property, - .atomic_get_property = intel_connector_atomic_get_property, + .set_property = drm_atomic_helper_connector_set_property, + .atomic_get_property = intel_digital_connector_atomic_get_property, + .atomic_set_property = intel_digital_connector_atomic_set_property, .late_register = intel_dp_connector_register, .early_unregister = intel_dp_connector_unregister, .destroy = intel_dp_connector_destroy, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_duplicate_state = intel_digital_connector_duplicate_state, }; static const struct drm_connector_helper_funcs intel_dp_connector_helper_funcs = { .detect_ctx = intel_dp_detect, .get_modes = intel_dp_get_modes, .mode_valid = intel_dp_mode_valid, + .atomic_check = intel_digital_connector_atomic_check, }; static const struct drm_encoder_funcs intel_dp_enc_funcs = { @@ -5169,7 +5087,6 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); - intel_dp->color_range_auto = true; if (is_edp(intel_dp)) { u32 allowed_scalers; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 5548a418e0ef..15aa31bf2f2c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -982,9 +982,6 @@ struct intel_dp { bool detect_done; bool channel_eq_status; bool reset_link_params; - enum hdmi_force_audio force_audio; - bool limited_color_range; - bool color_range_auto; uint8_t dpcd[DP_RECEIVER_CAP_SIZE]; uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; From 7a5ca19f8baf12d617b34d9a2004d18b84d05fb5 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 1 May 2017 15:38:02 +0200 Subject: [PATCH 016/341] drm/i915: Convert intel_hdmi connector properties to atomic intel_hdmi supports 3 properties, force_audio, broadcast rgb and scaling mode. The last one is only created for eDP, so the is_eDP in set_property is not required. panel fitting and broadcast rgb are straightforward and only requires changing compute_config. force_audio is also used to force DVI mode, which means changes to compute_config and mode_valid. mode_valid is called with connection_mutex held, so it can safely dereference connector->state. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170501133804.8116-11-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 3 - drivers/gpu/drm/i915/intel_hdmi.c | 149 +++++++----------------------- 2 files changed, 33 insertions(+), 119 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 15aa31bf2f2c..6020d8340bf3 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -904,11 +904,8 @@ struct intel_hdmi { enum drm_dp_dual_mode_type type; int max_tmds_clock; } dp_dual_mode; - bool limited_color_range; - bool color_range_auto; bool has_hdmi_sink; bool has_audio; - enum hdmi_force_audio force_audio; bool rgb_quant_range_selectable; struct intel_connector *attached_connector; void (*write_infoframe)(struct drm_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 58d690393b29..41267ffb3624 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1218,7 +1218,8 @@ static int intel_hdmi_source_max_tmds_clock(struct drm_i915_private *dev_priv) } static int hdmi_port_clock_limit(struct intel_hdmi *hdmi, - bool respect_downstream_limits) + bool respect_downstream_limits, + bool force_dvi) { struct drm_device *dev = intel_hdmi_to_dev(hdmi); int max_tmds_clock = intel_hdmi_source_max_tmds_clock(to_i915(dev)); @@ -1234,7 +1235,7 @@ static int hdmi_port_clock_limit(struct intel_hdmi *hdmi, if (info->max_tmds_clock) max_tmds_clock = min(max_tmds_clock, info->max_tmds_clock); - else if (!hdmi->has_hdmi_sink) + else if (!hdmi->has_hdmi_sink || force_dvi) max_tmds_clock = min(max_tmds_clock, 165000); } @@ -1243,13 +1244,14 @@ static int hdmi_port_clock_limit(struct intel_hdmi *hdmi, static enum drm_mode_status hdmi_port_clock_valid(struct intel_hdmi *hdmi, - int clock, bool respect_downstream_limits) + int clock, bool respect_downstream_limits, + bool force_dvi) { struct drm_i915_private *dev_priv = to_i915(intel_hdmi_to_dev(hdmi)); if (clock < 25000) return MODE_CLOCK_LOW; - if (clock > hdmi_port_clock_limit(hdmi, respect_downstream_limits)) + if (clock > hdmi_port_clock_limit(hdmi, respect_downstream_limits, force_dvi)) return MODE_CLOCK_HIGH; /* BXT DPLL can't generate 223-240 MHz */ @@ -1273,6 +1275,8 @@ intel_hdmi_mode_valid(struct drm_connector *connector, enum drm_mode_status status; int clock; int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; + bool force_dvi = + READ_ONCE(to_intel_digital_connector_state(connector->state)->force_audio) == HDMI_AUDIO_OFF_DVI; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; @@ -1289,11 +1293,11 @@ intel_hdmi_mode_valid(struct drm_connector *connector, clock *= 2; /* check if we can do 8bpc */ - status = hdmi_port_clock_valid(hdmi, clock, true); + status = hdmi_port_clock_valid(hdmi, clock, true, force_dvi); /* if we can't do 8bpc we may still be able to do 12bpc */ - if (!HAS_GMCH_DISPLAY(dev_priv) && status != MODE_OK) - status = hdmi_port_clock_valid(hdmi, clock * 3 / 2, true); + if (!HAS_GMCH_DISPLAY(dev_priv) && status != MODE_OK && hdmi->has_hdmi_sink && !force_dvi) + status = hdmi_port_clock_valid(hdmi, clock * 3 / 2, true, force_dvi); return status; } @@ -1343,16 +1347,19 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; struct drm_scdc *scdc = &conn_state->connector->display_info.hdmi.scdc; + struct intel_digital_connector_state *intel_conn_state = + to_intel_digital_connector_state(conn_state); int clock_8bpc = pipe_config->base.adjusted_mode.crtc_clock; int clock_12bpc = clock_8bpc * 3 / 2; int desired_bpp; + bool force_dvi = intel_conn_state->force_audio == HDMI_AUDIO_OFF_DVI; - pipe_config->has_hdmi_sink = intel_hdmi->has_hdmi_sink; + pipe_config->has_hdmi_sink = !force_dvi && intel_hdmi->has_hdmi_sink; if (pipe_config->has_hdmi_sink) pipe_config->has_infoframe = true; - if (intel_hdmi->color_range_auto) { + if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* See CEA-861-E - 5.1 Default Encoding Parameters */ pipe_config->limited_color_range = pipe_config->has_hdmi_sink && @@ -1360,7 +1367,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, HDMI_QUANTIZATION_RANGE_LIMITED; } else { pipe_config->limited_color_range = - intel_hdmi->limited_color_range; + intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; } if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) { @@ -1372,8 +1379,13 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv)) pipe_config->has_pch_encoder = true; - if (pipe_config->has_hdmi_sink && intel_hdmi->has_audio) - pipe_config->has_audio = true; + if (pipe_config->has_hdmi_sink) { + if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) + pipe_config->has_audio = intel_hdmi->has_audio; + else + pipe_config->has_audio = + intel_conn_state->force_audio == HDMI_AUDIO_ON; + } /* * HDMI is either 12 or 8, so if the display lets 10bpc sneak @@ -1381,8 +1393,8 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, * outputs. We also need to check that the higher clock still fits * within limits. */ - if (pipe_config->pipe_bpp > 8*3 && pipe_config->has_hdmi_sink && - hdmi_port_clock_valid(intel_hdmi, clock_12bpc, true) == MODE_OK && + if (pipe_config->pipe_bpp > 8*3 && pipe_config->has_hdmi_sink && !force_dvi && + hdmi_port_clock_valid(intel_hdmi, clock_12bpc, true, force_dvi) == MODE_OK && hdmi_12bpc_possible(pipe_config)) { DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n"); desired_bpp = 12*3; @@ -1402,7 +1414,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, } if (hdmi_port_clock_valid(intel_hdmi, pipe_config->port_clock, - false) != MODE_OK) { + false, force_dvi) != MODE_OK) { DRM_DEBUG_KMS("unsupported HDMI clock, rejecting mode\n"); return false; } @@ -1509,13 +1521,7 @@ intel_hdmi_set_edid(struct drm_connector *connector) drm_rgb_quant_range_selectable(edid); intel_hdmi->has_audio = drm_detect_monitor_audio(edid); - if (intel_hdmi->force_audio != HDMI_AUDIO_AUTO) - intel_hdmi->has_audio = - intel_hdmi->force_audio == HDMI_AUDIO_ON; - - if (intel_hdmi->force_audio != HDMI_AUDIO_OFF_DVI) - intel_hdmi->has_hdmi_sink = - drm_detect_hdmi_monitor(edid); + intel_hdmi->has_hdmi_sink = drm_detect_hdmi_monitor(edid); connected = true; } @@ -1577,96 +1583,6 @@ static int intel_hdmi_get_modes(struct drm_connector *connector) return intel_connector_update_modes(connector, edid); } -static bool -intel_hdmi_detect_audio(struct drm_connector *connector) -{ - bool has_audio = false; - struct edid *edid; - - edid = to_intel_connector(connector)->detect_edid; - if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) - has_audio = drm_detect_monitor_audio(edid); - - return has_audio; -} - -static int -intel_hdmi_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t val) -{ - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); - struct intel_digital_port *intel_dig_port = - hdmi_to_dig_port(intel_hdmi); - struct drm_i915_private *dev_priv = to_i915(connector->dev); - int ret; - - ret = drm_object_property_set_value(&connector->base, property, val); - if (ret) - return ret; - - if (property == dev_priv->force_audio_property) { - enum hdmi_force_audio i = val; - bool has_audio; - - if (i == intel_hdmi->force_audio) - return 0; - - intel_hdmi->force_audio = i; - - if (i == HDMI_AUDIO_AUTO) - has_audio = intel_hdmi_detect_audio(connector); - else - has_audio = (i == HDMI_AUDIO_ON); - - if (i == HDMI_AUDIO_OFF_DVI) - intel_hdmi->has_hdmi_sink = 0; - - intel_hdmi->has_audio = has_audio; - goto done; - } - - if (property == dev_priv->broadcast_rgb_property) { - bool old_auto = intel_hdmi->color_range_auto; - bool old_range = intel_hdmi->limited_color_range; - - switch (val) { - case INTEL_BROADCAST_RGB_AUTO: - intel_hdmi->color_range_auto = true; - break; - case INTEL_BROADCAST_RGB_FULL: - intel_hdmi->color_range_auto = false; - intel_hdmi->limited_color_range = false; - break; - case INTEL_BROADCAST_RGB_LIMITED: - intel_hdmi->color_range_auto = false; - intel_hdmi->limited_color_range = true; - break; - default: - return -EINVAL; - } - - if (old_auto == intel_hdmi->color_range_auto && - old_range == intel_hdmi->limited_color_range) - return 0; - - goto done; - } - - if (property == connector->dev->mode_config.aspect_ratio_property) { - connector->state->picture_aspect_ratio = val; - goto done; - } - - return -EINVAL; - -done: - if (intel_dig_port->base.base.crtc) - intel_crtc_restore_mode(intel_dig_port->base.base.crtc); - - return 0; -} - static void intel_hdmi_pre_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -1791,18 +1707,20 @@ static const struct drm_connector_funcs intel_hdmi_connector_funcs = { .detect = intel_hdmi_detect, .force = intel_hdmi_force, .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = intel_hdmi_set_property, - .atomic_get_property = intel_connector_atomic_get_property, + .set_property = drm_atomic_helper_connector_set_property, + .atomic_get_property = intel_digital_connector_atomic_get_property, + .atomic_set_property = intel_digital_connector_atomic_set_property, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_hdmi_destroy, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_duplicate_state = intel_digital_connector_duplicate_state, }; static const struct drm_connector_helper_funcs intel_hdmi_connector_helper_funcs = { .get_modes = intel_hdmi_get_modes, .mode_valid = intel_hdmi_mode_valid, + .atomic_check = intel_digital_connector_atomic_check, }; static const struct drm_encoder_funcs intel_hdmi_enc_funcs = { @@ -1814,7 +1732,6 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c { intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); - intel_hdmi->color_range_auto = true; intel_attach_aspect_ratio_property(connector); connector->state->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE; } From b32962f87acdc52a9734d15e9053e0f8fa1dcc9e Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 1 May 2017 15:38:03 +0200 Subject: [PATCH 017/341] drm/i915: Handle force_audio correctly in intel_sdvo Do the same as other connectors, attempt to detect hdmi audio in the detect() callback, and only use the force_audio property as override. Compute has_audio in pipe_config, and use that value instead of the probed value directly. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170501133804.8116-12-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_sdvo.c | 51 +++++++++++-------------------- 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 6cc181203135..1567fe679300 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1122,6 +1122,8 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct intel_sdvo *intel_sdvo = to_sdvo(encoder); + struct intel_sdvo_connector *intel_sdvo_connector = + to_intel_sdvo_connector(conn_state->connector); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; struct drm_display_mode *mode = &pipe_config->base.mode; @@ -1160,7 +1162,12 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, pipe_config->pixel_multiplier = intel_sdvo_get_pixel_multiplier(adjusted_mode); - pipe_config->has_hdmi_sink = intel_sdvo->has_hdmi_monitor; + if (intel_sdvo_connector->force_audio != HDMI_AUDIO_OFF_DVI) + pipe_config->has_hdmi_sink = intel_sdvo->has_hdmi_monitor; + + if (intel_sdvo_connector->force_audio == HDMI_AUDIO_ON || + (intel_sdvo_connector->force_audio == HDMI_AUDIO_AUTO && intel_sdvo->has_hdmi_audio)) + pipe_config->has_audio = true; if (intel_sdvo->color_range_auto) { /* See CEA-861-E - 5.1 Default Encoding Parameters */ @@ -1285,7 +1292,7 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, else sdvox |= SDVO_PIPE_SEL(crtc->pipe); - if (intel_sdvo->has_hdmi_audio) + if (crtc_state->has_audio) sdvox |= SDVO_AUDIO_ENABLE; if (INTEL_GEN(dev_priv) >= 4) { @@ -1694,12 +1701,6 @@ intel_sdvo_tmds_sink_detect(struct drm_connector *connector) kfree(edid); } - if (status == connector_status_connected) { - struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); - if (intel_sdvo_connector->force_audio != HDMI_AUDIO_AUTO) - intel_sdvo->has_hdmi_audio = (intel_sdvo_connector->force_audio == HDMI_AUDIO_ON); - } - return status; } @@ -1978,23 +1979,6 @@ static void intel_sdvo_destroy(struct drm_connector *connector) kfree(intel_sdvo_connector); } -static bool intel_sdvo_detect_hdmi_audio(struct drm_connector *connector) -{ - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); - struct edid *edid; - bool has_audio = false; - - if (!intel_sdvo->is_hdmi) - return false; - - edid = intel_sdvo_get_edid(connector); - if (edid != NULL && edid->input & DRM_EDID_INPUT_DIGITAL) - has_audio = drm_detect_monitor_audio(edid); - kfree(edid); - - return has_audio; -} - static int intel_sdvo_set_property(struct drm_connector *connector, struct drm_property *property, @@ -2013,22 +1997,23 @@ intel_sdvo_set_property(struct drm_connector *connector, if (property == dev_priv->force_audio_property) { int i = val; - bool has_audio; + bool has_audio, old_audio; - if (i == intel_sdvo_connector->force_audio) - return 0; - - intel_sdvo_connector->force_audio = i; + if (intel_sdvo_connector->force_audio == HDMI_AUDIO_AUTO) + old_audio = intel_sdvo->has_hdmi_audio; + else + old_audio = intel_sdvo_connector->force_audio == HDMI_AUDIO_ON; if (i == HDMI_AUDIO_AUTO) - has_audio = intel_sdvo_detect_hdmi_audio(connector); + has_audio = intel_sdvo->has_hdmi_audio; else has_audio = (i == HDMI_AUDIO_ON); - if (has_audio == intel_sdvo->has_hdmi_audio) + intel_sdvo_connector->force_audio = i; + + if (has_audio == old_audio) return 0; - intel_sdvo->has_hdmi_audio = has_audio; goto done; } From 630d30a4ee27997323bc0b38843e5b159dd5b2ed Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 1 May 2017 15:38:04 +0200 Subject: [PATCH 018/341] drm/i915: Convert intel_sdvo connector properties to atomic. SDVO was the last connector that's still using the legacy paths for properties, and this is with a reason! This connector implements a lot of properties dynamically, and some of them shared with the digital connector state, so sdvo_connector_state subclasses intel_digital_connector_state. set_property had a lot of validation, but this is handled in the drm core, so most of the validation can die off. The properties are written right before enabling the connector, since there is no good way to update the properties without crtc. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170501133804.8116-13-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_atomic.c | 40 -- drivers/gpu/drm/i915/intel_display.c | 37 -- drivers/gpu/drm/i915/intel_drv.h | 6 - drivers/gpu/drm/i915/intel_sdvo.c | 528 +++++++++++++++------------ 4 files changed, 285 insertions(+), 326 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index 182909f266f5..d791b3ef89b5 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -35,46 +35,6 @@ #include #include "intel_drv.h" -/** - * intel_connector_atomic_get_property - fetch legacy connector property value - * @connector: connector to fetch property for - * @state: state containing the property value - * @property: property to look up - * @val: pointer to write property value into - * - * The DRM core does not store shadow copies of properties for - * atomic-capable drivers. This entrypoint is used to fetch - * the current value of a driver-specific connector property. - * - * This is a intermediary solution until all connectors are - * converted to support full atomic properties. - */ -int intel_connector_atomic_get_property(struct drm_connector *connector, - const struct drm_connector_state *state, - struct drm_property *property, - uint64_t *val) -{ - int i; - - /* - * TODO: We only have atomic modeset for planes at the moment, so the - * crtc/connector code isn't quite ready yet. Until it's ready, - * continue to look up all property values in the DRM's shadow copy - * in obj->properties->values[]. - * - * When the crtc/connector state work matures, this function should - * be updated to read the values out of the state structure instead. - */ - for (i = 0; i < connector->base.properties->count; i++) { - if (connector->base.properties->properties[i] == property) { - *val = connector->base.properties->values[i]; - return 0; - } - } - - return -EINVAL; -} - /** * intel_digital_connector_atomic_get_property - hook for connector->atomic_get_property. * @connector: Connector to get the property for. diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8a115588864d..a8ececfc759e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13262,43 +13262,6 @@ static int intel_atomic_commit(struct drm_device *dev, return 0; } -void intel_crtc_restore_mode(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_atomic_state *state; - struct drm_crtc_state *crtc_state; - int ret; - - state = drm_atomic_state_alloc(dev); - if (!state) { - DRM_DEBUG_KMS("[CRTC:%d:%s] crtc restore failed, out of memory", - crtc->base.id, crtc->name); - return; - } - - state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; - -retry: - crtc_state = drm_atomic_get_crtc_state(state, crtc); - ret = PTR_ERR_OR_ZERO(crtc_state); - if (!ret) { - if (!crtc_state->active) - goto out; - - crtc_state->mode_changed = true; - ret = drm_atomic_commit(state); - } - - if (ret == -EDEADLK) { - drm_atomic_state_clear(state); - drm_modeset_backoff(state->acquire_ctx); - goto retry; - } - -out: - drm_atomic_state_put(state); -} - static const struct drm_crtc_funcs intel_crtc_funcs = { .gamma_set = drm_atomic_helper_legacy_gamma_set, .set_config = drm_atomic_helper_set_config, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6020d8340bf3..f63e8aa76e9a 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1349,7 +1349,6 @@ unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info bool intel_has_pending_fb_unpin(struct drm_i915_private *dev_priv); void intel_mark_busy(struct drm_i915_private *dev_priv); void intel_mark_idle(struct drm_i915_private *dev_priv); -void intel_crtc_restore_mode(struct drm_crtc *crtc); int intel_display_suspend(struct drm_device *dev); void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv); void intel_encoder_destroy(struct drm_encoder *encoder); @@ -1911,11 +1910,6 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work void intel_tv_init(struct drm_i915_private *dev_priv); /* intel_atomic.c */ -int intel_connector_atomic_get_property(struct drm_connector *connector, - const struct drm_connector_state *state, - struct drm_property *property, - uint64_t *val); - int intel_digital_connector_atomic_get_property(struct drm_connector *connector, const struct drm_connector_state *state, struct drm_property *property, diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 1567fe679300..f4329d20b6f6 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -99,13 +99,6 @@ struct intel_sdvo { */ uint16_t hotplug_active; - /** - * This is used to select the color range of RBG outputs in HDMI mode. - * It is only valid when using TMDS encoding and 8 bit per color mode. - */ - uint32_t color_range; - bool color_range_auto; - /** * This is set if we're going to treat the device as TV-out. * @@ -117,9 +110,6 @@ struct intel_sdvo { enum port port; - /* This is for current tv format name */ - int tv_format_index; - /** * This is set if we treat the device as HDMI, instead of DVI. */ @@ -154,8 +144,6 @@ struct intel_sdvo_connector { /* Mark the type of connector */ uint16_t output_flag; - enum hdmi_force_audio force_audio; - /* This contains all current supported TV format */ u8 tv_format_supported[TV_FORMAT_NUM]; int format_supported_num; @@ -182,24 +170,19 @@ struct intel_sdvo_connector { /* add the property for the SDVO-TV/LVDS */ struct drm_property *brightness; - /* Add variable to record current setting for the above property */ - u32 left_margin, right_margin, top_margin, bottom_margin; - /* this is to get the range of margin.*/ - u32 max_hscan, max_vscan; - u32 max_hpos, cur_hpos; - u32 max_vpos, cur_vpos; - u32 cur_brightness, max_brightness; - u32 cur_contrast, max_contrast; - u32 cur_saturation, max_saturation; - u32 cur_hue, max_hue; - u32 cur_sharpness, max_sharpness; - u32 cur_flicker_filter, max_flicker_filter; - u32 cur_flicker_filter_adaptive, max_flicker_filter_adaptive; - u32 cur_flicker_filter_2d, max_flicker_filter_2d; - u32 cur_tv_chroma_filter, max_tv_chroma_filter; - u32 cur_tv_luma_filter, max_tv_luma_filter; - u32 cur_dot_crawl, max_dot_crawl; + u32 max_hscan, max_vscan; +}; + +struct intel_sdvo_connector_state { + /* base.base: tv.saturation/contrast/hue/brightness */ + struct intel_digital_connector_state base; + + struct { + unsigned overscan_h, overscan_v, hpos, vpos, sharpness; + unsigned flicker_filter, flicker_filter_2d, flicker_filter_adaptive; + unsigned chroma_filter, luma_filter, dot_crawl; + } tv; }; static struct intel_sdvo *to_sdvo(struct intel_encoder *encoder) @@ -212,9 +195,16 @@ static struct intel_sdvo *intel_attached_sdvo(struct drm_connector *connector) return to_sdvo(intel_attached_encoder(connector)); } -static struct intel_sdvo_connector *to_intel_sdvo_connector(struct drm_connector *connector) +static struct intel_sdvo_connector * +to_intel_sdvo_connector(struct drm_connector *connector) { - return container_of(to_intel_connector(connector), struct intel_sdvo_connector, base); + return container_of(connector, struct intel_sdvo_connector, base.base); +} + +static struct intel_sdvo_connector_state * +to_intel_sdvo_connector_state(struct drm_connector_state *conn_state) +{ + return container_of(conn_state, struct intel_sdvo_connector_state, base.base); } static bool @@ -1030,12 +1020,13 @@ static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo, sdvo_data, sizeof(sdvo_data)); } -static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo) +static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo, + struct drm_connector_state *conn_state) { struct intel_sdvo_tv_format format; uint32_t format_map; - format_map = 1 << intel_sdvo->tv_format_index; + format_map = 1 << conn_state->tv.mode; memset(&format, 0, sizeof(format)); memcpy(&format, &format_map, min(sizeof(format), sizeof(format_map))); @@ -1122,8 +1113,8 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct intel_sdvo *intel_sdvo = to_sdvo(encoder); - struct intel_sdvo_connector *intel_sdvo_connector = - to_intel_sdvo_connector(conn_state->connector); + struct intel_sdvo_connector_state *intel_sdvo_state = + to_intel_sdvo_connector_state(conn_state); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; struct drm_display_mode *mode = &pipe_config->base.mode; @@ -1162,14 +1153,14 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, pipe_config->pixel_multiplier = intel_sdvo_get_pixel_multiplier(adjusted_mode); - if (intel_sdvo_connector->force_audio != HDMI_AUDIO_OFF_DVI) + if (intel_sdvo_state->base.force_audio != HDMI_AUDIO_OFF_DVI) pipe_config->has_hdmi_sink = intel_sdvo->has_hdmi_monitor; - if (intel_sdvo_connector->force_audio == HDMI_AUDIO_ON || - (intel_sdvo_connector->force_audio == HDMI_AUDIO_AUTO && intel_sdvo->has_hdmi_audio)) + if (intel_sdvo_state->base.force_audio == HDMI_AUDIO_ON || + (intel_sdvo_state->base.force_audio == HDMI_AUDIO_AUTO && intel_sdvo->has_hdmi_audio)) pipe_config->has_audio = true; - if (intel_sdvo->color_range_auto) { + if (intel_sdvo_state->base.broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* See CEA-861-E - 5.1 Default Encoding Parameters */ /* FIXME: This bit is only valid when using TMDS encoding and 8 * bit per color mode. */ @@ -1178,7 +1169,7 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, pipe_config->limited_color_range = true; } else { if (pipe_config->has_hdmi_sink && - intel_sdvo->color_range == HDMI_COLOR_RANGE_16_235) + intel_sdvo_state->base.broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED) pipe_config->limited_color_range = true; } @@ -1193,6 +1184,68 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, return true; } +#define UPDATE_PROPERTY(input, NAME) \ + do { \ + val = input; \ + intel_sdvo_set_value(intel_sdvo, SDVO_CMD_SET_##NAME, &val, sizeof(val)); \ + } while (0) + +static void intel_sdvo_update_props(struct intel_sdvo *intel_sdvo, + struct intel_sdvo_connector_state *sdvo_state) +{ + struct drm_connector_state *conn_state = &sdvo_state->base.base; + struct intel_sdvo_connector *intel_sdvo_conn = + to_intel_sdvo_connector(conn_state->connector); + uint16_t val; + + if (intel_sdvo_conn->left) + UPDATE_PROPERTY(sdvo_state->tv.overscan_h, OVERSCAN_H); + + if (intel_sdvo_conn->top) + UPDATE_PROPERTY(sdvo_state->tv.overscan_v, OVERSCAN_V); + + if (intel_sdvo_conn->hpos) + UPDATE_PROPERTY(sdvo_state->tv.hpos, HPOS); + + if (intel_sdvo_conn->vpos) + UPDATE_PROPERTY(sdvo_state->tv.vpos, VPOS); + + if (intel_sdvo_conn->saturation) + UPDATE_PROPERTY(conn_state->tv.saturation, SATURATION); + + if (intel_sdvo_conn->contrast) + UPDATE_PROPERTY(conn_state->tv.contrast, CONTRAST); + + if (intel_sdvo_conn->hue) + UPDATE_PROPERTY(conn_state->tv.hue, HUE); + + if (intel_sdvo_conn->brightness) + UPDATE_PROPERTY(conn_state->tv.brightness, BRIGHTNESS); + + if (intel_sdvo_conn->sharpness) + UPDATE_PROPERTY(sdvo_state->tv.sharpness, SHARPNESS); + + if (intel_sdvo_conn->flicker_filter) + UPDATE_PROPERTY(sdvo_state->tv.flicker_filter, FLICKER_FILTER); + + if (intel_sdvo_conn->flicker_filter_2d) + UPDATE_PROPERTY(sdvo_state->tv.flicker_filter_2d, FLICKER_FILTER_2D); + + if (intel_sdvo_conn->flicker_filter_adaptive) + UPDATE_PROPERTY(sdvo_state->tv.flicker_filter_adaptive, FLICKER_FILTER_ADAPTIVE); + + if (intel_sdvo_conn->tv_chroma_filter) + UPDATE_PROPERTY(sdvo_state->tv.chroma_filter, TV_CHROMA_FILTER); + + if (intel_sdvo_conn->tv_luma_filter) + UPDATE_PROPERTY(sdvo_state->tv.luma_filter, TV_LUMA_FILTER); + + if (intel_sdvo_conn->dot_crawl) + UPDATE_PROPERTY(sdvo_state->tv.dot_crawl, DOT_CRAWL); + +#undef UPDATE_PROPERTY +} + static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) @@ -1200,6 +1253,7 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(conn_state); struct drm_display_mode *mode = &crtc_state->base.mode; struct intel_sdvo *intel_sdvo = to_sdvo(intel_encoder); u32 sdvox; @@ -1207,6 +1261,8 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, struct intel_sdvo_dtd input_dtd, output_dtd; int rate; + intel_sdvo_update_props(intel_sdvo, sdvo_state); + /* First, set the input mapping for the first input to our controlled * output. This is only correct if we're a single-input device, in * which case the first input is the output from the appropriate SDVO @@ -1248,7 +1304,7 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, intel_sdvo_set_encode(intel_sdvo, SDVO_ENCODE_DVI); if (intel_sdvo->is_tv && - !intel_sdvo_set_tv_format(intel_sdvo)) + !intel_sdvo_set_tv_format(intel_sdvo, conn_state)) return; intel_sdvo_get_dtd_from_mode(&input_dtd, adjusted_mode); @@ -1880,6 +1936,7 @@ static const struct drm_display_mode sdvo_tv_modes[] = { static void intel_sdvo_get_tv_modes(struct drm_connector *connector) { struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); + const struct drm_connector_state *conn_state = connector->state; struct intel_sdvo_sdtv_resolution_request tv_res; uint32_t reply = 0, format_map = 0; int i; @@ -1890,7 +1947,7 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector) /* Read the list of supported input resolutions for the selected TV * format. */ - format_map = 1 << intel_sdvo->tv_format_index; + format_map = 1 << conn_state->tv.mode; memcpy(&tv_res, &format_map, min(sizeof(format_map), sizeof(struct intel_sdvo_sdtv_resolution_request))); @@ -1980,175 +2037,120 @@ static void intel_sdvo_destroy(struct drm_connector *connector) } static int -intel_sdvo_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t val) +intel_sdvo_connector_atomic_get_property(struct drm_connector *connector, + const struct drm_connector_state *state, + struct drm_property *property, + uint64_t *val) { - struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); - struct drm_i915_private *dev_priv = to_i915(connector->dev); - uint16_t temp_value; - uint8_t cmd; - int ret; - - ret = drm_object_property_set_value(&connector->base, property, val); - if (ret) - return ret; - - if (property == dev_priv->force_audio_property) { - int i = val; - bool has_audio, old_audio; - - if (intel_sdvo_connector->force_audio == HDMI_AUDIO_AUTO) - old_audio = intel_sdvo->has_hdmi_audio; - else - old_audio = intel_sdvo_connector->force_audio == HDMI_AUDIO_ON; - - if (i == HDMI_AUDIO_AUTO) - has_audio = intel_sdvo->has_hdmi_audio; - else - has_audio = (i == HDMI_AUDIO_ON); - - intel_sdvo_connector->force_audio = i; - - if (has_audio == old_audio) - return 0; - - goto done; - } - - if (property == dev_priv->broadcast_rgb_property) { - bool old_auto = intel_sdvo->color_range_auto; - uint32_t old_range = intel_sdvo->color_range; - - switch (val) { - case INTEL_BROADCAST_RGB_AUTO: - intel_sdvo->color_range_auto = true; - break; - case INTEL_BROADCAST_RGB_FULL: - intel_sdvo->color_range_auto = false; - intel_sdvo->color_range = 0; - break; - case INTEL_BROADCAST_RGB_LIMITED: - intel_sdvo->color_range_auto = false; - /* FIXME: this bit is only valid when using TMDS - * encoding and 8 bit per color mode. */ - intel_sdvo->color_range = HDMI_COLOR_RANGE_16_235; - break; - default: - return -EINVAL; - } - - if (old_auto == intel_sdvo->color_range_auto && - old_range == intel_sdvo->color_range) - return 0; - - goto done; - } - - if (property == connector->dev->mode_config.aspect_ratio_property) { - connector->state->picture_aspect_ratio = val; - goto done; - } - -#define CHECK_PROPERTY(name, NAME) \ - if (intel_sdvo_connector->name == property) { \ - if (intel_sdvo_connector->cur_##name == temp_value) return 0; \ - if (intel_sdvo_connector->max_##name < temp_value) return -EINVAL; \ - cmd = SDVO_CMD_SET_##NAME; \ - intel_sdvo_connector->cur_##name = temp_value; \ - goto set_value; \ - } + const struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state((void *)state); if (property == intel_sdvo_connector->tv_format) { - if (val >= TV_FORMAT_NUM) - return -EINVAL; + int i; - if (intel_sdvo->tv_format_index == - intel_sdvo_connector->tv_format_supported[val]) - return 0; + for (i = 0; i < intel_sdvo_connector->format_supported_num; i++) + if (state->tv.mode == intel_sdvo_connector->tv_format_supported[i]) { + *val = i; - intel_sdvo->tv_format_index = intel_sdvo_connector->tv_format_supported[val]; - goto done; - } else if (IS_TV_OR_LVDS(intel_sdvo_connector)) { - temp_value = val; - if (intel_sdvo_connector->left == property) { - drm_object_property_set_value(&connector->base, - intel_sdvo_connector->right, val); - if (intel_sdvo_connector->left_margin == temp_value) return 0; + } - intel_sdvo_connector->left_margin = temp_value; - intel_sdvo_connector->right_margin = temp_value; - temp_value = intel_sdvo_connector->max_hscan - - intel_sdvo_connector->left_margin; - cmd = SDVO_CMD_SET_OVERSCAN_H; - goto set_value; - } else if (intel_sdvo_connector->right == property) { - drm_object_property_set_value(&connector->base, - intel_sdvo_connector->left, val); - if (intel_sdvo_connector->right_margin == temp_value) - return 0; - - intel_sdvo_connector->left_margin = temp_value; - intel_sdvo_connector->right_margin = temp_value; - temp_value = intel_sdvo_connector->max_hscan - - intel_sdvo_connector->left_margin; - cmd = SDVO_CMD_SET_OVERSCAN_H; - goto set_value; - } else if (intel_sdvo_connector->top == property) { - drm_object_property_set_value(&connector->base, - intel_sdvo_connector->bottom, val); - if (intel_sdvo_connector->top_margin == temp_value) - return 0; - - intel_sdvo_connector->top_margin = temp_value; - intel_sdvo_connector->bottom_margin = temp_value; - temp_value = intel_sdvo_connector->max_vscan - - intel_sdvo_connector->top_margin; - cmd = SDVO_CMD_SET_OVERSCAN_V; - goto set_value; - } else if (intel_sdvo_connector->bottom == property) { - drm_object_property_set_value(&connector->base, - intel_sdvo_connector->top, val); - if (intel_sdvo_connector->bottom_margin == temp_value) - return 0; - - intel_sdvo_connector->top_margin = temp_value; - intel_sdvo_connector->bottom_margin = temp_value; - temp_value = intel_sdvo_connector->max_vscan - - intel_sdvo_connector->top_margin; - cmd = SDVO_CMD_SET_OVERSCAN_V; - goto set_value; - } - CHECK_PROPERTY(hpos, HPOS) - CHECK_PROPERTY(vpos, VPOS) - CHECK_PROPERTY(saturation, SATURATION) - CHECK_PROPERTY(contrast, CONTRAST) - CHECK_PROPERTY(hue, HUE) - CHECK_PROPERTY(brightness, BRIGHTNESS) - CHECK_PROPERTY(sharpness, SHARPNESS) - CHECK_PROPERTY(flicker_filter, FLICKER_FILTER) - CHECK_PROPERTY(flicker_filter_2d, FLICKER_FILTER_2D) - CHECK_PROPERTY(flicker_filter_adaptive, FLICKER_FILTER_ADAPTIVE) - CHECK_PROPERTY(tv_chroma_filter, TV_CHROMA_FILTER) - CHECK_PROPERTY(tv_luma_filter, TV_LUMA_FILTER) - CHECK_PROPERTY(dot_crawl, DOT_CRAWL) - } - - return -EINVAL; /* unknown property */ - -set_value: - if (!intel_sdvo_set_value(intel_sdvo, cmd, &temp_value, 2)) - return -EIO; - - -done: - if (intel_sdvo->base.base.crtc) - intel_crtc_restore_mode(intel_sdvo->base.base.crtc); + WARN_ON(1); + *val = 0; + } else if (property == intel_sdvo_connector->top || + property == intel_sdvo_connector->bottom) + *val = intel_sdvo_connector->max_vscan - sdvo_state->tv.overscan_v; + else if (property == intel_sdvo_connector->left || + property == intel_sdvo_connector->right) + *val = intel_sdvo_connector->max_hscan - sdvo_state->tv.overscan_h; + else if (property == intel_sdvo_connector->hpos) + *val = sdvo_state->tv.hpos; + else if (property == intel_sdvo_connector->vpos) + *val = sdvo_state->tv.vpos; + else if (property == intel_sdvo_connector->saturation) + *val = state->tv.saturation; + else if (property == intel_sdvo_connector->contrast) + *val = state->tv.contrast; + else if (property == intel_sdvo_connector->hue) + *val = state->tv.hue; + else if (property == intel_sdvo_connector->brightness) + *val = state->tv.brightness; + else if (property == intel_sdvo_connector->sharpness) + *val = sdvo_state->tv.sharpness; + else if (property == intel_sdvo_connector->flicker_filter) + *val = sdvo_state->tv.flicker_filter; + else if (property == intel_sdvo_connector->flicker_filter_2d) + *val = sdvo_state->tv.flicker_filter_2d; + else if (property == intel_sdvo_connector->flicker_filter_adaptive) + *val = sdvo_state->tv.flicker_filter_adaptive; + else if (property == intel_sdvo_connector->tv_chroma_filter) + *val = sdvo_state->tv.chroma_filter; + else if (property == intel_sdvo_connector->tv_luma_filter) + *val = sdvo_state->tv.luma_filter; + else if (property == intel_sdvo_connector->dot_crawl) + *val = sdvo_state->tv.dot_crawl; + else + return intel_digital_connector_atomic_get_property(connector, state, property, val); + + return 0; +} + +static int +intel_sdvo_connector_atomic_set_property(struct drm_connector *connector, + struct drm_connector_state *state, + struct drm_property *property, + uint64_t val) +{ + struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); + struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(state); + + if (property == intel_sdvo_connector->tv_format) { + state->tv.mode = intel_sdvo_connector->tv_format_supported[val]; + + if (state->crtc) { + struct drm_crtc_state *crtc_state = + drm_atomic_get_new_crtc_state(state->state, state->crtc); + + crtc_state->connectors_changed = true; + } + } else if (property == intel_sdvo_connector->top || + property == intel_sdvo_connector->bottom) + /* Cannot set these independent from each other */ + sdvo_state->tv.overscan_v = intel_sdvo_connector->max_vscan - val; + else if (property == intel_sdvo_connector->left || + property == intel_sdvo_connector->right) + /* Cannot set these independent from each other */ + sdvo_state->tv.overscan_h = intel_sdvo_connector->max_hscan - val; + else if (property == intel_sdvo_connector->hpos) + sdvo_state->tv.hpos = val; + else if (property == intel_sdvo_connector->vpos) + sdvo_state->tv.vpos = val; + else if (property == intel_sdvo_connector->saturation) + state->tv.saturation = val; + else if (property == intel_sdvo_connector->contrast) + state->tv.contrast = val; + else if (property == intel_sdvo_connector->hue) + state->tv.hue = val; + else if (property == intel_sdvo_connector->brightness) + state->tv.brightness = val; + else if (property == intel_sdvo_connector->sharpness) + sdvo_state->tv.sharpness = val; + else if (property == intel_sdvo_connector->flicker_filter) + sdvo_state->tv.flicker_filter = val; + else if (property == intel_sdvo_connector->flicker_filter_2d) + sdvo_state->tv.flicker_filter_2d = val; + else if (property == intel_sdvo_connector->flicker_filter_adaptive) + sdvo_state->tv.flicker_filter_adaptive = val; + else if (property == intel_sdvo_connector->tv_chroma_filter) + sdvo_state->tv.chroma_filter = val; + else if (property == intel_sdvo_connector->tv_luma_filter) + sdvo_state->tv.luma_filter = val; + else if (property == intel_sdvo_connector->dot_crawl) + sdvo_state->tv.dot_crawl = val; + else + return intel_digital_connector_atomic_set_property(connector, state, property, val); return 0; -#undef CHECK_PROPERTY } static int @@ -2176,22 +2178,61 @@ intel_sdvo_connector_unregister(struct drm_connector *connector) intel_connector_unregister(connector); } +static struct drm_connector_state * +intel_sdvo_connector_duplicate_state(struct drm_connector *connector) +{ + struct intel_sdvo_connector_state *state; + + state = kmemdup(connector->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_connector_duplicate_state(connector, &state->base.base); + return &state->base.base; +} + static const struct drm_connector_funcs intel_sdvo_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, .detect = intel_sdvo_detect, .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = intel_sdvo_set_property, - .atomic_get_property = intel_connector_atomic_get_property, + .set_property = drm_atomic_helper_connector_set_property, + .atomic_get_property = intel_sdvo_connector_atomic_get_property, + .atomic_set_property = intel_sdvo_connector_atomic_set_property, .late_register = intel_sdvo_connector_register, .early_unregister = intel_sdvo_connector_unregister, .destroy = intel_sdvo_destroy, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_duplicate_state = intel_sdvo_connector_duplicate_state, }; +static int intel_sdvo_atomic_check(struct drm_connector *conn, + struct drm_connector_state *new_conn_state) +{ + struct drm_atomic_state *state = new_conn_state->state; + struct drm_connector_state *old_conn_state = + drm_atomic_get_old_connector_state(state, conn); + struct intel_sdvo_connector_state *old_state = + to_intel_sdvo_connector_state(old_conn_state); + struct intel_sdvo_connector_state *new_state = + to_intel_sdvo_connector_state(new_conn_state); + + if (new_conn_state->crtc && + (memcmp(&old_state->tv, &new_state->tv, sizeof(old_state->tv)) || + memcmp(&old_conn_state->tv, &new_conn_state->tv, sizeof(old_conn_state->tv)))) { + struct drm_crtc_state *crtc_state = + drm_atomic_get_new_crtc_state(new_conn_state->state, + new_conn_state->crtc); + + crtc_state->connectors_changed = true; + } + + return intel_digital_connector_atomic_check(conn, new_conn_state); +} + static const struct drm_connector_helper_funcs intel_sdvo_connector_helper_funcs = { .get_modes = intel_sdvo_get_modes, .mode_valid = intel_sdvo_mode_valid, + .atomic_check = intel_sdvo_atomic_check, }; static void intel_sdvo_enc_destroy(struct drm_encoder *encoder) @@ -2383,7 +2424,6 @@ intel_sdvo_add_hdmi_properties(struct intel_sdvo *intel_sdvo, intel_attach_force_audio_property(&connector->base.base); if (INTEL_GEN(dev_priv) >= 4 && IS_MOBILE(dev_priv)) { intel_attach_broadcast_rgb_property(&connector->base.base); - intel_sdvo->color_range_auto = true; } intel_attach_aspect_ratio_property(&connector->base.base); connector->base.base.state->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE; @@ -2392,16 +2432,21 @@ intel_sdvo_add_hdmi_properties(struct intel_sdvo *intel_sdvo, static struct intel_sdvo_connector *intel_sdvo_connector_alloc(void) { struct intel_sdvo_connector *sdvo_connector; + struct intel_sdvo_connector_state *conn_state; sdvo_connector = kzalloc(sizeof(*sdvo_connector), GFP_KERNEL); if (!sdvo_connector) return NULL; - if (intel_connector_init(&sdvo_connector->base) < 0) { + conn_state = kzalloc(sizeof(*conn_state), GFP_KERNEL); + if (!conn_state) { kfree(sdvo_connector); return NULL; } + __drm_atomic_helper_connector_reset(&sdvo_connector->base.base, + &conn_state->base.base); + return sdvo_connector; } @@ -2693,31 +2738,30 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, intel_sdvo_connector->tv_format, i, i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); - intel_sdvo->tv_format_index = intel_sdvo_connector->tv_format_supported[0]; - drm_object_attach_property(&intel_sdvo_connector->base.base.base, - intel_sdvo_connector->tv_format, 0); + intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0]; + drm_object_attach_property(&intel_sdvo_connector->base.base.base, 0, 0); return true; } -#define ENHANCEMENT(name, NAME) do { \ +#define _ENHANCEMENT(state_assignment, name, NAME) do { \ if (enhancements.name) { \ if (!intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_MAX_##NAME, &data_value, 4) || \ !intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_##NAME, &response, 2)) \ return false; \ - intel_sdvo_connector->max_##name = data_value[0]; \ - intel_sdvo_connector->cur_##name = response; \ intel_sdvo_connector->name = \ drm_property_create_range(dev, 0, #name, 0, data_value[0]); \ if (!intel_sdvo_connector->name) return false; \ + state_assignment = response; \ drm_object_attach_property(&connector->base, \ - intel_sdvo_connector->name, \ - intel_sdvo_connector->cur_##name); \ + intel_sdvo_connector->name, 0); \ DRM_DEBUG_KMS(#name ": max %d, default %d, current %d\n", \ data_value[0], data_value[1], response); \ } \ } while (0) +#define ENHANCEMENT(state, name, NAME) _ENHANCEMENT((state)->name, name, NAME) + static bool intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo, struct intel_sdvo_connector *intel_sdvo_connector, @@ -2725,6 +2769,9 @@ intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo, { struct drm_device *dev = intel_sdvo->base.base.dev; struct drm_connector *connector = &intel_sdvo_connector->base.base; + struct drm_connector_state *conn_state = connector->state; + struct intel_sdvo_connector_state *sdvo_state = + to_intel_sdvo_connector_state(conn_state); uint16_t response, data_value[2]; /* when horizontal overscan is supported, Add the left/right property */ @@ -2739,17 +2786,16 @@ intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo, &response, 2)) return false; + sdvo_state->tv.overscan_h = response; + intel_sdvo_connector->max_hscan = data_value[0]; - intel_sdvo_connector->left_margin = data_value[0] - response; - intel_sdvo_connector->right_margin = intel_sdvo_connector->left_margin; intel_sdvo_connector->left = drm_property_create_range(dev, 0, "left_margin", 0, data_value[0]); if (!intel_sdvo_connector->left) return false; drm_object_attach_property(&connector->base, - intel_sdvo_connector->left, - intel_sdvo_connector->left_margin); + intel_sdvo_connector->left, 0); intel_sdvo_connector->right = drm_property_create_range(dev, 0, "right_margin", 0, data_value[0]); @@ -2757,8 +2803,7 @@ intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo, return false; drm_object_attach_property(&connector->base, - intel_sdvo_connector->right, - intel_sdvo_connector->right_margin); + intel_sdvo_connector->right, 0); DRM_DEBUG_KMS("h_overscan: max %d, " "default %d, current %d\n", data_value[0], data_value[1], response); @@ -2775,9 +2820,9 @@ intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo, &response, 2)) return false; + sdvo_state->tv.overscan_v = response; + intel_sdvo_connector->max_vscan = data_value[0]; - intel_sdvo_connector->top_margin = data_value[0] - response; - intel_sdvo_connector->bottom_margin = intel_sdvo_connector->top_margin; intel_sdvo_connector->top = drm_property_create_range(dev, 0, "top_margin", 0, data_value[0]); @@ -2785,8 +2830,7 @@ intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo, return false; drm_object_attach_property(&connector->base, - intel_sdvo_connector->top, - intel_sdvo_connector->top_margin); + intel_sdvo_connector->top, 0); intel_sdvo_connector->bottom = drm_property_create_range(dev, 0, @@ -2795,40 +2839,37 @@ intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo, return false; drm_object_attach_property(&connector->base, - intel_sdvo_connector->bottom, - intel_sdvo_connector->bottom_margin); + intel_sdvo_connector->bottom, 0); DRM_DEBUG_KMS("v_overscan: max %d, " "default %d, current %d\n", data_value[0], data_value[1], response); } - ENHANCEMENT(hpos, HPOS); - ENHANCEMENT(vpos, VPOS); - ENHANCEMENT(saturation, SATURATION); - ENHANCEMENT(contrast, CONTRAST); - ENHANCEMENT(hue, HUE); - ENHANCEMENT(sharpness, SHARPNESS); - ENHANCEMENT(brightness, BRIGHTNESS); - ENHANCEMENT(flicker_filter, FLICKER_FILTER); - ENHANCEMENT(flicker_filter_adaptive, FLICKER_FILTER_ADAPTIVE); - ENHANCEMENT(flicker_filter_2d, FLICKER_FILTER_2D); - ENHANCEMENT(tv_chroma_filter, TV_CHROMA_FILTER); - ENHANCEMENT(tv_luma_filter, TV_LUMA_FILTER); + ENHANCEMENT(&sdvo_state->tv, hpos, HPOS); + ENHANCEMENT(&sdvo_state->tv, vpos, VPOS); + ENHANCEMENT(&conn_state->tv, saturation, SATURATION); + ENHANCEMENT(&conn_state->tv, contrast, CONTRAST); + ENHANCEMENT(&conn_state->tv, hue, HUE); + ENHANCEMENT(&conn_state->tv, brightness, BRIGHTNESS); + ENHANCEMENT(&sdvo_state->tv, sharpness, SHARPNESS); + ENHANCEMENT(&sdvo_state->tv, flicker_filter, FLICKER_FILTER); + ENHANCEMENT(&sdvo_state->tv, flicker_filter_adaptive, FLICKER_FILTER_ADAPTIVE); + ENHANCEMENT(&sdvo_state->tv, flicker_filter_2d, FLICKER_FILTER_2D); + _ENHANCEMENT(sdvo_state->tv.chroma_filter, tv_chroma_filter, TV_CHROMA_FILTER); + _ENHANCEMENT(sdvo_state->tv.luma_filter, tv_luma_filter, TV_LUMA_FILTER); if (enhancements.dot_crawl) { if (!intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_DOT_CRAWL, &response, 2)) return false; - intel_sdvo_connector->max_dot_crawl = 1; - intel_sdvo_connector->cur_dot_crawl = response & 0x1; + sdvo_state->tv.dot_crawl = response & 0x1; intel_sdvo_connector->dot_crawl = drm_property_create_range(dev, 0, "dot_crawl", 0, 1); if (!intel_sdvo_connector->dot_crawl) return false; drm_object_attach_property(&connector->base, - intel_sdvo_connector->dot_crawl, - intel_sdvo_connector->cur_dot_crawl); + intel_sdvo_connector->dot_crawl, 0); DRM_DEBUG_KMS("dot crawl: current %d\n", response); } @@ -2844,11 +2885,12 @@ intel_sdvo_create_enhance_property_lvds(struct intel_sdvo *intel_sdvo, struct drm_connector *connector = &intel_sdvo_connector->base.base; uint16_t response, data_value[2]; - ENHANCEMENT(brightness, BRIGHTNESS); + ENHANCEMENT(&connector->state->tv, brightness, BRIGHTNESS); return true; } #undef ENHANCEMENT +#undef _ENHANCEMENT static bool intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo, struct intel_sdvo_connector *intel_sdvo_connector) From b5a824250e4aac4fb76c5076b6a5e460e52c831a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 May 2017 21:48:18 +0100 Subject: [PATCH 019/341] drm/i915: Add kerneldoc to describe i915_gem_object.vma_list Add kerneldoc for the vma_list stored on the i915_gem_object, in particular, documenting the expected ordering of elements -- i.e. that we do expect GGTT VMA first followed by the ppGTT VMA. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170525204818.12044-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_object.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 35e1a27729dc..915057824284 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -68,8 +68,23 @@ struct drm_i915_gem_object { const struct drm_i915_gem_object_ops *ops; - /** List of VMAs backed by this object */ + /** + * @vma_list: List of VMAs backed by this object + * + * The VMA on this list are ordered by type, all GGTT vma are placed + * at the head and all ppGTT vma are placed at the tail. The different + * types of GGTT vma are unordered between themselves, use the + * @vma_tree (which has a defined order between all VMA) to find an + * exact match. + */ struct list_head vma_list; + /** + * @vma_tree: Ordered tree of VMAs backed by this object + * + * All VMA created for this object are placed in the @vma_tree for + * fast retrieval via a binary search in i915_vma_instance(). + * They are also added to @vma_list for easy iteration. + */ struct rb_root vma_tree; /** Stolen memory for this object, instead of being backed by shmem. */ From 00c26cf9ce4eadf3680e431a374eb4cdc6e824cd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 May 2017 17:26:53 +0100 Subject: [PATCH 020/341] drm/i915: Remove toplevel struct_mutex locking from debugfs/i915_drop_caches I have a plan to write a quick test to exercise concurrent usage of i915_gem_shrink(), the simplest way looks to be to have multiple threads using debugfs/i915_drop_caches. However, we currently take one lock over the entire function, serialising the calls into i915_gem_shrink() so reduce the lock coverage. Testcase: igt/gem_shrink/reclaim Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170524162653.5446-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_debugfs.c | 30 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7e0816ccdc21..3b088685a553 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4289,26 +4289,27 @@ i915_drop_caches_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; struct drm_device *dev = &dev_priv->drm; - int ret; + int ret = 0; DRM_DEBUG("Dropping caches: 0x%08llx\n", val); /* No need to check and wait for gpu resets, only libdrm auto-restarts * on ioctls on -EAGAIN. */ - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - if (val & DROP_ACTIVE) { - ret = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); + if (val & (DROP_ACTIVE | DROP_RETIRE)) { + ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) - goto unlock; - } + return ret; - if (val & DROP_RETIRE) - i915_gem_retire_requests(dev_priv); + if (val & DROP_ACTIVE) + ret = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + + if (val & DROP_RETIRE) + i915_gem_retire_requests(dev_priv); + + mutex_unlock(&dev->struct_mutex); + } lockdep_set_current_reclaim_state(GFP_KERNEL); if (val & DROP_BOUND) @@ -4321,9 +4322,6 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_shrink_all(dev_priv); lockdep_clear_current_reclaim_state(); -unlock: - mutex_unlock(&dev->struct_mutex); - if (val & DROP_FREED) { synchronize_rcu(); i915_gem_drain_freed_objects(dev_priv); From 67b7f33eeeaa6368289c6d25b283432e02dcc1c1 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Sat, 27 May 2017 17:44:17 +0800 Subject: [PATCH 021/341] drm/i915/gvt: Add gvt options sanitize function The intel_gvt_sanitize_options will sanitize the GVT related options before doing initialize the GVT. Suggested-by: Joonas Lahtinen Signed-off-by: Chuanxiao Dong Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Cc: Chris Wilson Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/intel_gvt.c | 36 +++++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_gvt.h | 5 +++++ 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 7b8c72776f46..3536f1a7f5da 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -997,6 +997,8 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("use GPU semaphores? %s\n", yesno(i915.semaphores)); intel_uc_sanitize_options(dev_priv); + + intel_gvt_sanitize_options(dev_priv); } /** diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index e1ab6432a914..dde9c78325e4 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -50,6 +50,32 @@ static bool is_supported_device(struct drm_i915_private *dev_priv) return false; } +/** + * intel_gvt_sanitize_options - sanitize GVT related options + * @dev_priv: drm i915 private data + * + * This function is called at the i915 options sanitize stage. + */ +void intel_gvt_sanitize_options(struct drm_i915_private *dev_priv) +{ + if (!i915.enable_gvt) + return; + + if (intel_vgpu_active(dev_priv)) { + DRM_INFO("GVT-g is disabled for guest\n"); + goto bail; + } + + if (!is_supported_device(dev_priv)) { + DRM_INFO("Unsupported device. GVT-g is disabled\n"); + goto bail; + } + + return; +bail: + i915.enable_gvt = 0; +} + /** * intel_gvt_init - initialize GVT components * @dev_priv: drm i915 private data @@ -69,16 +95,6 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) return 0; } - if (intel_vgpu_active(dev_priv)) { - DRM_DEBUG_DRIVER("GVT-g is disabled for guest\n"); - goto bail; - } - - if (!is_supported_device(dev_priv)) { - DRM_DEBUG_DRIVER("Unsupported device. GVT-g is disabled\n"); - goto bail; - } - if (!i915.enable_execlists) { DRM_INFO("GPU guest virtualisation [GVT-g] disabled due to disabled execlist submission [i915.enable_execlists module parameter]\n"); goto bail; diff --git a/drivers/gpu/drm/i915/intel_gvt.h b/drivers/gpu/drm/i915/intel_gvt.h index 25df2d65b985..61b246470282 100644 --- a/drivers/gpu/drm/i915/intel_gvt.h +++ b/drivers/gpu/drm/i915/intel_gvt.h @@ -32,6 +32,7 @@ void intel_gvt_cleanup(struct drm_i915_private *dev_priv); int intel_gvt_init_device(struct drm_i915_private *dev_priv); void intel_gvt_clean_device(struct drm_i915_private *dev_priv); int intel_gvt_init_host(void); +void intel_gvt_sanitize_options(struct drm_i915_private *dev_priv); #else static inline int intel_gvt_init(struct drm_i915_private *dev_priv) { @@ -40,6 +41,10 @@ static inline int intel_gvt_init(struct drm_i915_private *dev_priv) static inline void intel_gvt_cleanup(struct drm_i915_private *dev_priv) { } + +static inline void intel_gvt_sanitize_options(struct drm_i915_private *dev_priv) +{ +} #endif #endif /* _INTEL_GVT_H_ */ From 36ccc4f89fe595567d91e3246c5d81952ba3d8b2 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Sat, 27 May 2017 17:44:18 +0800 Subject: [PATCH 022/341] drm/i915/gvt: Return -EIO if host enable_execlists not enabled when loading GVT-g GVT-g relies on the enable_execlists parameter in i915. If this option is not enabled for GVT-g, should return -EIO to make i915 driver loading failed. v2: - Use DMR_ERROR as it is a fatal message. (Chris) Suggested-by: Joonas Lahtinen Signed-off-by: Chuanxiao Dong Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_gvt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index dde9c78325e4..e2a3f0af1418 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -96,8 +96,8 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) } if (!i915.enable_execlists) { - DRM_INFO("GPU guest virtualisation [GVT-g] disabled due to disabled execlist submission [i915.enable_execlists module parameter]\n"); - goto bail; + DRM_ERROR("i915 GVT-g loading failed due to disabled execlists mode\n"); + return -EIO; } /* From ecef814c994d7d00dfa7bef5aa87dd66e8574946 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Sat, 27 May 2017 17:44:19 +0800 Subject: [PATCH 023/341] drm/i915/gvt: Return -EIO if host GuC submission is enabled when loading GVT-g Currently GVT-g cannot work properly when host GuC submission is enabled, so make the driver loading failed in this case. v2: - use DRM_ERROR as it is a fatal message. (Chris) Suggested-by: Chris Wilson Signed-off-by: Chuanxiao Dong Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Zhenyu Wang Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1495878259-7733-2-git-send-email-chuanxiao.dong@intel.com --- drivers/gpu/drm/i915/intel_gvt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index e2a3f0af1418..52d5b82790d9 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -100,6 +100,11 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) return -EIO; } + if (i915.enable_guc_submission) { + DRM_ERROR("i915 GVT-g loading failed due to Graphics virtualization is not yet supported with GuC submission\n"); + return -EIO; + } + /* * We're not in host or fail to find a MPT module, disable GVT-g */ From 863e9fde1a7061dad09bb299c65bed5f1ccb44ff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 30 May 2017 13:13:32 +0100 Subject: [PATCH 024/341] drm/i915: Short-circuit i915_gem_wait_for_idle() if already idle If the device is asleep (no GT wakeref), we know the GPU is already idle. If we add an early return, we can avoid touching registers and checking hw state outside of the assumed GT wakelock. This prevents causing such errors whilst debugging: [ 2613.401647] RPM wakelock ref not held during HW access [ 2613.401684] ------------[ cut here ]------------ [ 2613.401720] WARNING: CPU: 5 PID: 7739 at drivers/gpu/drm/i915/intel_drv.h:1787 gen6_read32+0x21f/0x2b0 [i915] [ 2613.401731] Modules linked in: snd_hda_intel i915 vgem snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp snd_hda_codec_realtek coretemp snd_hda_codec_generic crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm r8169 mii mei_me lpc_ich mei prime_numbers [last unloaded: i915] [ 2613.401823] CPU: 5 PID: 7739 Comm: drv_missed_irq Tainted: G U 4.12.0-rc2-CI-CI_DRM_421+ #1 [ 2613.401825] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS V1.12 02/15/2016 [ 2613.401840] task: ffff880409e3a740 task.stack: ffffc900084dc000 [ 2613.401861] RIP: 0010:gen6_read32+0x21f/0x2b0 [i915] [ 2613.401863] RSP: 0018:ffffc900084dfce8 EFLAGS: 00010292 [ 2613.401869] RAX: 000000000000002a RBX: ffff8804016a8000 RCX: 0000000000000006 [ 2613.401871] RDX: 0000000000000006 RSI: ffffffff81cbf2d9 RDI: ffffffff81c9e3a7 [ 2613.401874] RBP: ffffc900084dfd18 R08: ffff880409e3afc8 R09: 0000000000000000 [ 2613.401877] R10: 000000008a1c483f R11: 0000000000000000 R12: 000000000000209c [ 2613.401879] R13: 0000000000000001 R14: ffff8804016a8000 R15: ffff8804016ac150 [ 2613.401882] FS: 00007f39ef3dd8c0(0000) GS:ffff88041fb40000(0000) knlGS:0000000000000000 [ 2613.401885] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2613.401887] CR2: 00000000023717c8 CR3: 00000002e7b34000 CR4: 00000000001406e0 [ 2613.401889] Call Trace: [ 2613.401912] intel_engine_is_idle+0x76/0x90 [i915] [ 2613.401931] i915_gem_wait_for_idle+0xe6/0x1e0 [i915] [ 2613.401951] fault_irq_set+0x40/0x90 [i915] [ 2613.401970] i915_ring_test_irq_set+0x42/0x50 [i915] [ 2613.401976] simple_attr_write+0xc7/0xe0 [ 2613.401981] full_proxy_write+0x4f/0x70 [ 2613.401987] __vfs_write+0x23/0x120 [ 2613.401992] ? rcu_read_lock_sched_held+0x75/0x80 [ 2613.401996] ? rcu_sync_lockdep_assert+0x2a/0x50 [ 2613.401999] ? __sb_start_write+0xfa/0x1f0 [ 2613.402004] vfs_write+0xc5/0x1d0 [ 2613.402008] ? trace_hardirqs_on_caller+0xe7/0x1c0 [ 2613.402013] SyS_write+0x44/0xb0 [ 2613.402020] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 2613.402022] RIP: 0033:0x7f39eded6670 [ 2613.402025] RSP: 002b:00007fffdcdcb1a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 2613.402030] RAX: ffffffffffffffda RBX: ffffffff81470203 RCX: 00007f39eded6670 [ 2613.402033] RDX: 0000000000000001 RSI: 000000000041bc33 RDI: 0000000000000006 [ 2613.402036] RBP: ffffc900084dff88 R08: 00007f39ef3dd8c0 R09: 0000000000000001 [ 2613.402038] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000041bc33 [ 2613.402041] R13: 0000000000000006 R14: 0000000000000000 R15: 0000000000000000 [ 2613.402046] ? __this_cpu_preempt_check+0x13/0x20 [ 2613.402052] Code: 01 9b fa e0 0f ff e9 28 fe ff ff 80 3d 6a dd 0e 00 00 0f 85 29 fe ff ff 48 c7 c7 48 19 29 a0 c6 05 56 dd 0e 00 01 e8 da 9a fa e0 <0f> ff e9 0f fe ff ff b9 01 00 00 00 ba 01 00 00 00 44 89 e6 48 [ 2613.402199] ---[ end trace 31f0cfa93ab632bf ]--- Fixes: 25112b64b3d2 ("drm/i915: Wait for all engines to be idle as part of i915_gem_wait_for_idle()") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170530121334.17364-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7ab47a84671f..7b676fd1f075 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3348,6 +3348,10 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { int ret; + /* If the device is asleep, we have no requests outstanding */ + if (!READ_ONCE(i915->gt.awake)) + return 0; + if (flags & I915_WAIT_LOCKED) { struct i915_gem_timeline *tl; From a091d4ee931b16ce4fef945d39a20b851a7e17b7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 30 May 2017 13:13:33 +0100 Subject: [PATCH 025/341] drm/i915: Hold a wakeref for probing the ring registers Allow intel_engine_is_idle() to be called outside of the GT wakeref by acquiring the device runtime pm for ourselves. This allows the function to act as check after we assume the engine is idle and we release the GT wakeref held whilst we have requests. At the moment, we do not call it outside of an awake context but taking the wakeref as required makes it more convenient to use for quick debugging in future. [ 2613.401647] RPM wakelock ref not held during HW access [ 2613.401684] ------------[ cut here ]------------ [ 2613.401720] WARNING: CPU: 5 PID: 7739 at drivers/gpu/drm/i915/intel_drv.h:1787 gen6_read32+0x21f/0x2b0 [i915] [ 2613.401731] Modules linked in: snd_hda_intel i915 vgem snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp snd_hda_codec_realtek coretemp snd_hda_codec_generic crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm r8169 mii mei_me lpc_ich mei prime_numbers [last unloaded: i915] [ 2613.401823] CPU: 5 PID: 7739 Comm: drv_missed_irq Tainted: G U 4.12.0-rc2-CI-CI_DRM_421+ #1 [ 2613.401825] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS V1.12 02/15/2016 [ 2613.401840] task: ffff880409e3a740 task.stack: ffffc900084dc000 [ 2613.401861] RIP: 0010:gen6_read32+0x21f/0x2b0 [i915] [ 2613.401863] RSP: 0018:ffffc900084dfce8 EFLAGS: 00010292 [ 2613.401869] RAX: 000000000000002a RBX: ffff8804016a8000 RCX: 0000000000000006 [ 2613.401871] RDX: 0000000000000006 RSI: ffffffff81cbf2d9 RDI: ffffffff81c9e3a7 [ 2613.401874] RBP: ffffc900084dfd18 R08: ffff880409e3afc8 R09: 0000000000000000 [ 2613.401877] R10: 000000008a1c483f R11: 0000000000000000 R12: 000000000000209c [ 2613.401879] R13: 0000000000000001 R14: ffff8804016a8000 R15: ffff8804016ac150 [ 2613.401882] FS: 00007f39ef3dd8c0(0000) GS:ffff88041fb40000(0000) knlGS:0000000000000000 [ 2613.401885] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2613.401887] CR2: 00000000023717c8 CR3: 00000002e7b34000 CR4: 00000000001406e0 [ 2613.401889] Call Trace: [ 2613.401912] intel_engine_is_idle+0x76/0x90 [i915] [ 2613.401931] i915_gem_wait_for_idle+0xe6/0x1e0 [i915] [ 2613.401951] fault_irq_set+0x40/0x90 [i915] [ 2613.401970] i915_ring_test_irq_set+0x42/0x50 [i915] [ 2613.401976] simple_attr_write+0xc7/0xe0 [ 2613.401981] full_proxy_write+0x4f/0x70 [ 2613.401987] __vfs_write+0x23/0x120 [ 2613.401992] ? rcu_read_lock_sched_held+0x75/0x80 [ 2613.401996] ? rcu_sync_lockdep_assert+0x2a/0x50 [ 2613.401999] ? __sb_start_write+0xfa/0x1f0 [ 2613.402004] vfs_write+0xc5/0x1d0 [ 2613.402008] ? trace_hardirqs_on_caller+0xe7/0x1c0 [ 2613.402013] SyS_write+0x44/0xb0 [ 2613.402020] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 2613.402022] RIP: 0033:0x7f39eded6670 [ 2613.402025] RSP: 002b:00007fffdcdcb1a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 2613.402030] RAX: ffffffffffffffda RBX: ffffffff81470203 RCX: 00007f39eded6670 [ 2613.402033] RDX: 0000000000000001 RSI: 000000000041bc33 RDI: 0000000000000006 [ 2613.402036] RBP: ffffc900084dff88 R08: 00007f39ef3dd8c0 R09: 0000000000000001 [ 2613.402038] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000041bc33 [ 2613.402041] R13: 0000000000000006 R14: 0000000000000000 R15: 0000000000000000 [ 2613.402046] ? __this_cpu_preempt_check+0x13/0x20 [ 2613.402052] Code: 01 9b fa e0 0f ff e9 28 fe ff ff 80 3d 6a dd 0e 00 00 0f 85 29 fe ff ff 48 c7 c7 48 19 29 a0 c6 05 56 dd 0e 00 01 e8 da 9a fa e0 <0f> ff e9 0f fe ff ff b9 01 00 00 00 ba 01 00 00 00 44 89 e6 48 [ 2613.402199] ---[ end trace 31f0cfa93ab632bf ]--- Fixes: 5400367a864d ("drm/i915: Ensure the engine is idle before manually changing HWS") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170530121334.17364-2-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_engine_cs.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 413bfd8d4bf4..699f2d3861c7 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1205,6 +1205,22 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) return 0; } +static bool ring_is_idle(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + bool idle = true; + + intel_runtime_pm_get(dev_priv); + + /* No bit for gen2, so assume the CS parser is idle */ + if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) + idle = false; + + intel_runtime_pm_put(dev_priv); + + return idle; +} + /** * intel_engine_is_idle() - Report if the engine has finished process all work * @engine: the intel_engine_cs @@ -1237,7 +1253,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return false; /* Ring stopped? */ - if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) + if (!ring_is_idle(engine)) return false; return true; From bef8c056fba09aa4629fe5a2d3efe64068d049db Mon Sep 17 00:00:00 2001 From: "Nagaraju, Vathsala" Date: Thu, 25 May 2017 22:13:29 +0530 Subject: [PATCH 026/341] drm/i915/psr: disable psr2 for resolution greater than 32X20 psr1 is also disabled for panel resolution greater than 32X20. Added psr2 check to disable only for psr2 panels having resolution greater than 32X20. issue was introduced by commit-id : "acf45d11050abd751dcec986ab121cb2367dcbba" commit message: "PSR2 is restricted to work with panel resolutions upto 3200x2000, move the check to intel_psr_match_conditions and fully block psr." v2: (Rodrigo) Add previous commit details which introduced the issue Fixes: acf45d11050a ("drm/i915/psr: disable psr2 for resolution greater than 32X20") Cc: Rodrigo Vivi Cc: Jim Bride Cc: Yaroslav Shabalin Reported-by: Yaroslav Shabalin Reviewed-by: Rodrigo Vivi Signed-off-by: vathsala nagaraju Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/49935bdff896ee3140bed471012b9f9110a863a4.1495729964.git.vathsala.nagaraju@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index c3780d0d2baf..559f1ab42bfc 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -435,8 +435,9 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp) } /* PSR2 is restricted to work with panel resolutions upto 3200x2000 */ - if (intel_crtc->config->pipe_src_w > 3200 || - intel_crtc->config->pipe_src_h > 2000) { + if (dev_priv->psr.psr2_support && + (intel_crtc->config->pipe_src_w > 3200 || + intel_crtc->config->pipe_src_h > 2000)) { dev_priv->psr.psr2_support = false; return false; } From 7f1ea2ac30171d9a6aad2916a73e41fa68c8f137 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Tue, 30 May 2017 17:05:46 -0700 Subject: [PATCH 027/341] drm/i915/guc: Fix doorbell id selection We are passing parameters in the wrong order to find next zero bit, and when it doesn't find anything it returns size (offset in the code), which is always zero. For reference the function is defined as: find_next_bit( *addr, size, offset ) The incorrect parameter order was added by commit abddffdf3620e ("drm/i915/guc: Sanitize GuC client initialization"). Luckily, currently we only use a single guc client and a single doorbell, which happens to be zero; therefore it isn't necessary to backport this fix (which would be for v4.12). Cc: Daniele Ceraolo Spurio Cc: Joonas Lahtinen Cc: Oscar Mateo Signed-off-by: Michel Thierry Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170531000546.30762-1-michel.thierry@intel.com --- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index e6e0c6ef1084..48a1e9349a2c 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -105,7 +105,7 @@ static int __reserve_doorbell(struct i915_guc_client *client) end += offset; } - id = find_next_zero_bit(client->guc->doorbell_bitmap, offset, end); + id = find_next_zero_bit(client->guc->doorbell_bitmap, end, offset); if (id == end) return -ENOSPC; From adfdf85d795f4d4f487b61ee0b169d64c6e19081 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 2 May 2017 15:04:09 +0300 Subject: [PATCH 028/341] drm/i915: Prevent the system suspend complete optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit bac2a909a096c9110525c18cbb8ce73c660d5f71 Author: Rafael J. Wysocki Date: Wed Jan 21 02:17:42 2015 +0100 PCI / PM: Avoid resuming PCI devices during system suspend PCI devices will default to allowing the system suspend complete optimization where devices are not woken up during system suspend if they were already runtime suspended. This however breaks the i915/HDA drivers for two reasons: - The i915 driver has system suspend specific steps that it needs to run, that bring the device to a different state than its runtime suspended state. - The HDA driver's suspend handler requires power that it will request from the i915 driver's power domain handler. This in turn requires the i915 driver to runtime resume itself, but this won't be possible if the suspend complete optimization is in effect: in this case the i915 runtime PM is disabled and trying to get an RPM reference returns -EACCESS. Solve this by requiring the PCI/PM core to resume the device during system suspend which in effect disables the suspend complete optimization. Regardless of the above commit the optimization stayed disabled for DRM devices until commit d14d2a8453d650bea32a1c5271af1458cd283a0f Author: Lukas Wunner Date: Wed Jun 8 12:49:29 2016 +0200 drm: Remove dev_pm_ops from drm_class so this patch is in practice a fix for this commit. Another reason for the bug staying hidden for so long is that the optimization for a device is disabled if it's disabled for any of its children devices. i915 may have a backlight device as its child which doesn't support runtime PM and so doesn't allow the optimization either. So if this backlight device got registered the bug stayed hidden. Credits to Marta, Tomi and David who enabled pstore logging, that caught one instance of this issue across a suspend/ resume-to-ram and Ville who rememberd that the optimization was enabled for some devices at one point. The first WARN triggered by the problem: [ 6250.746445] WARNING: CPU: 2 PID: 17384 at drivers/gpu/drm/i915/intel_runtime_pm.c:2846 intel_runtime_pm_get+0x6b/0xd0 [i915] [ 6250.746448] pm_runtime_get_sync() failed: -13 [ 6250.746451] Modules linked in: snd_hda_intel i915 vgem snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul snd_hda_codec_realtek snd_hda_codec_generic ghash_clmulni_intel e1000e snd_hda_codec snd_hwdep snd_hda_core ptp mei_me pps_core snd_pcm lpc_ich mei prime_ numbers i2c_hid i2c_designware_platform i2c_designware_core [last unloaded: i915] [ 6250.746512] CPU: 2 PID: 17384 Comm: kworker/u8:0 Tainted: G U W 4.11.0-rc5-CI-CI_DRM_334+ #1 [ 6250.746515] Hardware name: /NUC5i5RYB, BIOS RYBDWi35.86A.0362.2017.0118.0940 01/18/2017 [ 6250.746521] Workqueue: events_unbound async_run_entry_fn [ 6250.746525] Call Trace: [ 6250.746530] dump_stack+0x67/0x92 [ 6250.746536] __warn+0xc6/0xe0 [ 6250.746542] ? pci_restore_standard_config+0x40/0x40 [ 6250.746546] warn_slowpath_fmt+0x46/0x50 [ 6250.746553] ? __pm_runtime_resume+0x56/0x80 [ 6250.746584] intel_runtime_pm_get+0x6b/0xd0 [i915] [ 6250.746610] intel_display_power_get+0x1b/0x40 [i915] [ 6250.746646] i915_audio_component_get_power+0x15/0x20 [i915] [ 6250.746654] snd_hdac_display_power+0xc8/0x110 [snd_hda_core] [ 6250.746661] azx_runtime_resume+0x218/0x280 [snd_hda_intel] [ 6250.746667] pci_pm_runtime_resume+0x76/0xa0 [ 6250.746672] __rpm_callback+0xb4/0x1f0 [ 6250.746677] ? pci_restore_standard_config+0x40/0x40 [ 6250.746682] rpm_callback+0x1f/0x80 [ 6250.746686] ? pci_restore_standard_config+0x40/0x40 [ 6250.746690] rpm_resume+0x4ba/0x740 [ 6250.746698] __pm_runtime_resume+0x49/0x80 [ 6250.746703] pci_pm_suspend+0x57/0x140 [ 6250.746709] dpm_run_callback+0x6f/0x330 [ 6250.746713] ? pci_pm_freeze+0xe0/0xe0 [ 6250.746718] __device_suspend+0xf9/0x370 [ 6250.746724] ? dpm_watchdog_set+0x60/0x60 [ 6250.746730] async_suspend+0x1a/0x90 [ 6250.746735] async_run_entry_fn+0x34/0x160 [ 6250.746741] process_one_work+0x1f2/0x6d0 [ 6250.746749] worker_thread+0x49/0x4a0 [ 6250.746755] kthread+0x107/0x140 [ 6250.746759] ? process_one_work+0x6d0/0x6d0 [ 6250.746763] ? kthread_create_on_node+0x40/0x40 [ 6250.746768] ret_from_fork+0x2e/0x40 [ 6250.746778] ---[ end trace 102a62fd2160f5e6 ]--- v2: - Use the new pci_dev->needs_resume flag, to avoid any overhead during the ->pm_prepare hook. (Rafael) v3: - Update commit message to reference the actual regressing commit. (Lukas) v4: - Rebase on v4 of patch 1/2. Fixes: d14d2a8453d6 ("drm: Remove dev_pm_ops from drm_class") References: https://bugs.freedesktop.org/show_bug.cgi?id=100378 References: https://bugs.freedesktop.org/show_bug.cgi?id=100770 Cc: Rafael J. Wysocki Cc: Marta Lofstedt Cc: David Weinehall Cc: Tomi Sarvela Cc: Ville Syrjälä Cc: Mika Kuoppala Cc: Chris Wilson Cc: Takashi Iwai Cc: Bjorn Helgaas Cc: Lukas Wunner Cc: linux-pci@vger.kernel.org Cc: # v4.10.x: 4d071c3 - PCI/PM: Add needs_resume flag Cc: # v4.10.x Signed-off-by: Imre Deak Reviewed-by: Chris Wilson Reported-and-tested-by: Marta Lofstedt Link: http://patchwork.freedesktop.org/patch/msgid/1493726649-32094-2-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3536f1a7f5da..2fdfaf135ea9 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1242,6 +1242,15 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_fini; pci_set_drvdata(pdev, &dev_priv->drm); + /* + * Disable the system suspend direct complete optimization, which can + * leave the device suspended skipping the driver's suspend handlers + * if the device was already runtime suspended. This is needed due to + * the difference in our runtime and system suspend sequence and + * becaue the HDA driver may require us to enable the audio power + * domain during system suspend. + */ + pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME; ret = i915_driver_init_early(dev_priv, ent); if (ret < 0) From a91afc974ee8441940241e3c39c75d7b8f38e911 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 31 May 2017 15:58:19 +0800 Subject: [PATCH 029/341] dt-bindings: clock: sunxi-ccu: Add pll-periph to PRCM's needed clocks The AR100 clock in the PRCM has parents, one of which is pll-periph from the main CCU. Add it to the list of required clocks for the PRCM CCU. Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- Documentation/devicetree/bindings/clock/sunxi-ccu.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt index e9c5a1d9834a..f465647a4dd2 100644 --- a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt +++ b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt @@ -22,7 +22,8 @@ Required properties : - #clock-cells : must contain 1 - #reset-cells : must contain 1 -For the PRCM CCUs on H3/A64, one more clock is needed: +For the PRCM CCUs on H3/A64, two more clocks are needed: +- "pll-periph": the SoC's peripheral PLL from the main CCU - "iosc": the SoC's internal frequency oscillator Example for generic CCU: @@ -39,8 +40,8 @@ Example for PRCM CCU: r_ccu: clock@01f01400 { compatible = "allwinner,sun50i-a64-r-ccu"; reg = <0x01f01400 0x100>; - clocks = <&osc24M>, <&osc32k>, <&iosc>; - clock-names = "hosc", "losc", "iosc"; + clocks = <&osc24M>, <&osc32k>, <&iosc>, <&ccu CLK_PLL_PERIPH0>; + clock-names = "hosc", "losc", "iosc", "pll-periph"; #clock-cells = <1>; #reset-cells = <1>; }; From c4be8c68e6900b1811bc64f74cb13d5032a389ce Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 31 May 2017 15:58:21 +0800 Subject: [PATCH 030/341] clk: sunxi-ng: h3: Export PLL_PERIPH0 clock for the PRCM The PRCM takes PLL_PERIPH0 as one of its parents for the AR100 clock. As such we need to be able to describe this relationship in the device tree. Export the PLL_PERIPH0 clock so we can reference it in the PRCM node. Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun8i-h3.h | 4 +++- include/dt-bindings/clock/sun8i-h3-ccu.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h index 85973d1e8165..1b4baea37d81 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h +++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h @@ -29,7 +29,9 @@ #define CLK_PLL_VIDEO 6 #define CLK_PLL_VE 7 #define CLK_PLL_DDR 8 -#define CLK_PLL_PERIPH0 9 + +/* PLL_PERIPH0 exported for PRCM */ + #define CLK_PLL_PERIPH0_2X 10 #define CLK_PLL_GPU 11 #define CLK_PLL_PERIPH1 12 diff --git a/include/dt-bindings/clock/sun8i-h3-ccu.h b/include/dt-bindings/clock/sun8i-h3-ccu.h index c2afc41d6964..e139fe5c62ec 100644 --- a/include/dt-bindings/clock/sun8i-h3-ccu.h +++ b/include/dt-bindings/clock/sun8i-h3-ccu.h @@ -43,6 +43,8 @@ #ifndef _DT_BINDINGS_CLK_SUN8I_H3_H_ #define _DT_BINDINGS_CLK_SUN8I_H3_H_ +#define CLK_PLL_PERIPH0 9 + #define CLK_CPUX 14 #define CLK_BUS_CE 20 From d85da227c3ae43d9ca513d60f244213cb4e55485 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 31 May 2017 15:58:23 +0800 Subject: [PATCH 031/341] clk: sunxi-ng: a64: Export PLL_PERIPH0 clock for the PRCM The PRCM takes PLL_PERIPH0 as one of its parents for the AR100 clock. As such we need to be able to describe this relationship in the device tree. Export the PLL_PERIPH0 clock so we can reference it in the PRCM node. Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun50i-a64.h | 4 +++- include/dt-bindings/clock/sun50i-a64-ccu.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.h b/drivers/clk/sunxi-ng/ccu-sun50i-a64.h index 9b3cd24b78d2..061b6fbb4f95 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.h +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.h @@ -31,7 +31,9 @@ #define CLK_PLL_VIDEO0_2X 8 #define CLK_PLL_VE 9 #define CLK_PLL_DDR0 10 -#define CLK_PLL_PERIPH0 11 + +/* PLL_PERIPH0 exported for PRCM */ + #define CLK_PLL_PERIPH0_2X 12 #define CLK_PLL_PERIPH1 13 #define CLK_PLL_PERIPH1_2X 14 diff --git a/include/dt-bindings/clock/sun50i-a64-ccu.h b/include/dt-bindings/clock/sun50i-a64-ccu.h index 370c0a0473fc..d66432c6e675 100644 --- a/include/dt-bindings/clock/sun50i-a64-ccu.h +++ b/include/dt-bindings/clock/sun50i-a64-ccu.h @@ -43,6 +43,8 @@ #ifndef _DT_BINDINGS_CLK_SUN50I_A64_H_ #define _DT_BINDINGS_CLK_SUN50I_A64_H_ +#define CLK_PLL_PERIPH0 11 + #define CLK_BUS_MIPI_DSI 28 #define CLK_BUS_CE 29 #define CLK_BUS_DMA 30 From 367d73d2806085bb507ab44c1f532640917fd5ca Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 31 May 2017 17:42:36 +0200 Subject: [PATCH 032/341] drm/i915: Always recompute watermarks when distrust_bios_wm is set, v2. On some systems there can be a race condition in which no crtc state is added to the first atomic commit. This results in all crtc's having a null DDB allocation, causing a FIFO underrun on any update until the first modeset. Changes since v1: - Do not take the connection_mutex, this is already done below. Reported-by: Maarten Lankhorst Inspired-by: Mahesh Kumar Signed-off-by: Maarten Lankhorst Fixes: 98d39494d375 ("drm/i915/gen9: Compute DDB allocation at atomic check time (v4)") Cc: # v4.8+ Cc: Mahesh Kumar Cc: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170531154236.27180-1-maarten.lankhorst@linux.intel.com Reviewed-by: Mahesh Kumar Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_pm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 936eef1634c7..fce4bc5ccc99 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4819,10 +4819,18 @@ skl_compute_wm(struct drm_atomic_state *state) struct drm_crtc_state *cstate; struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct skl_wm_values *results = &intel_state->wm_results; + struct drm_device *dev = state->dev; struct skl_pipe_wm *pipe_wm; bool changed = false; int ret, i; + /* + * When we distrust bios wm we always need to recompute to set the + * expected DDB allocations for each CRTC. + */ + if (to_i915(dev)->wm.distrust_bios_wm) + changed = true; + /* * If this transaction isn't actually touching any CRTC's, don't * bother with watermark calculation. Note that if we pass this @@ -4833,6 +4841,7 @@ skl_compute_wm(struct drm_atomic_state *state) */ for_each_new_crtc_in_state(state, crtc, cstate, i) changed = true; + if (!changed) return 0; From bb9d85f6e9de8fef5236c076530eab67a2f2431b Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Thu, 1 Jun 2017 11:29:18 +0530 Subject: [PATCH 033/341] drm/i915/skl: New ddb allocation algorithm This patch implements new DDB allocation algorithm as per HW team recommendation. This algo takecare of scenario where we allocate less DDB for the planes with lower relative pixel rate, but they require more DDB to work. It also takes care of enabling same watermark level for each plane in crtc, for efficient power saving. Changes since v1: - Rebase on top of Paulo's patch series Changes since v2: - Fix the for loop condition to enable WM Changes since v3: - Fix crash in cursor i-g-t reported by Maarten - Rebase after addressing Paulo's comments - Few other ULT fixes Changes since v4: - Rebase on drm-tip - Added separate function to enable WM levels Changes since v5: - Fix a crash identified in skl-6770HQ system Changes since v6: - Address review comments from Matt Changes since v7: - Fix failure return in skl_compute_plane_wm (Matt) - fix typo Changes since v8: - Always check cursor wm enable irrespective of total_data_rate Changes since v9: - fix typo Signed-off-by: Mahesh Kumar Reviewed-by: Maarten Lankhorst Reviewed-by: Matt Roper Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170601055918.4601-1-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 257 ++++++++++++++++++++------------ 1 file changed, 159 insertions(+), 98 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fce4bc5ccc99..1f9fee30cb4c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4039,13 +4039,41 @@ skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active, minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active); } +static void +skl_enable_plane_wm_levels(const struct drm_i915_private *dev_priv, + uint16_t plane_ddb, + uint16_t max_level, + struct skl_plane_wm *wm) +{ + int level; + /* + * Now enable all levels in WM structure which can be enabled + * using current DDB allocation + */ + for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) { + struct skl_wm_level *level_wm = &wm->wm[level]; + + if (level > max_level || level_wm->plane_res_b == 0 + || level_wm->plane_res_l >= 31 + || level_wm->plane_res_b >= plane_ddb) { + level_wm->plane_en = false; + level_wm->plane_res_b = 0; + level_wm->plane_res_l = 0; + } else { + level_wm->plane_en = true; + } + } +} + static int skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, + struct skl_pipe_wm *pipe_wm, struct skl_ddb_allocation *ddb /* out */) { struct drm_atomic_state *state = cstate->base.state; struct drm_crtc *crtc = cstate->base.crtc; struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; @@ -4058,6 +4086,9 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, unsigned plane_data_rate[I915_MAX_PLANES] = {}; unsigned plane_y_data_rate[I915_MAX_PLANES] = {}; uint16_t total_min_blocks = 0; + uint16_t total_level_ddb; + uint16_t plane_blocks = 0; + int max_level, level; /* Clear the partitioning for disabled planes. */ memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); @@ -4096,10 +4127,48 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, return -EINVAL; } - alloc_size -= total_min_blocks; - ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR]; + alloc_size -= minimum[PLANE_CURSOR]; + ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - + minimum[PLANE_CURSOR]; ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; + for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) { + total_level_ddb = 0; + for_each_plane_id_on_crtc(intel_crtc, plane_id) { + /* + * TODO: We should calculate watermark values for Y/UV + * plane both in case of NV12 format and use both values + * for ddb calculation. NV12 is disabled as of now, So + * using only single/UV plane value here. + */ + struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; + uint16_t plane_res_b = wm->wm[level].plane_res_b; + uint16_t min = minimum[plane_id] + y_minimum[plane_id]; + + if (plane_id == PLANE_CURSOR) + continue; + + total_level_ddb += max(plane_res_b, min); + } + + /* + * If This level can successfully be enabled with the + * pipe's current DDB allocation, then all lower levels are + * guaranteed to succeed as well. + */ + if (total_level_ddb <= alloc_size) + break; + } + + if ((level < 0) || (total_min_blocks > alloc_size)) { + DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations"); + DRM_DEBUG_KMS("minimum required %d/%d\n", (level < 0) ? + total_level_ddb : total_min_blocks, alloc_size); + return -EINVAL; + } + max_level = level; + alloc_size -= total_level_ddb; + /* * 2. Distribute the remaining space in proportion to the amount of * data each plane needs to fetch from memory. @@ -4109,13 +4178,24 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, total_data_rate = skl_get_total_relative_data_rate(cstate, plane_data_rate, plane_y_data_rate); + /* + * PLANE_CURSOR data rate is not included in total_data_rate. + * If only cursor plane is enabled we have to enable its WM levels + * explicitly before returning. Cursor has fixed ddb allocation, + * So it's ok to always check cursor WM enabling before return. + */ + plane_blocks = skl_ddb_entry_size(&ddb->plane[pipe][PLANE_CURSOR]); + skl_enable_plane_wm_levels(dev_priv, plane_blocks, max_level, + &pipe_wm->planes[PLANE_CURSOR]); if (total_data_rate == 0) return 0; start = alloc->start; for_each_plane_id_on_crtc(intel_crtc, plane_id) { unsigned int data_rate, y_data_rate; - uint16_t plane_blocks, y_plane_blocks = 0; + uint16_t plane_blocks = 0, y_plane_blocks = 0; + struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; + uint16_t plane_res_b = wm->wm[max_level].plane_res_b; if (plane_id == PLANE_CURSOR) continue; @@ -4127,33 +4207,36 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * promote the expression to 64 bits to avoid overflowing, the * result is < available as data_rate / total_data_rate < 1 */ - plane_blocks = minimum[plane_id]; - plane_blocks += div_u64((uint64_t)alloc_size * data_rate, - total_data_rate); /* Leave disabled planes at (0,0) */ if (data_rate) { + plane_blocks = max(minimum[plane_id], plane_res_b); + plane_blocks += div_u64((uint64_t)alloc_size * + data_rate, total_data_rate); ddb->plane[pipe][plane_id].start = start; ddb->plane[pipe][plane_id].end = start + plane_blocks; + start += plane_blocks; } - start += plane_blocks; - /* * allocation for y_plane part of planar format: + * TODO: Once we start calculating watermark values for Y/UV + * plane both consider it for initial allowed wm blocks. */ y_data_rate = plane_y_data_rate[plane_id]; - y_plane_blocks = y_minimum[plane_id]; - y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, - total_data_rate); - if (y_data_rate) { + y_plane_blocks = y_minimum[plane_id]; + y_plane_blocks += div_u64((uint64_t)alloc_size * + y_data_rate, total_data_rate); ddb->y_plane[pipe][plane_id].start = start; ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks; + start += y_plane_blocks; } - - start += y_plane_blocks; + skl_enable_plane_wm_levels(dev_priv, + plane_blocks, + max_level, + wm); } return 0; @@ -4243,11 +4326,9 @@ skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, - uint16_t ddb_allocation, int level, uint16_t *out_blocks, /* out */ - uint8_t *out_lines, /* out */ - bool *enabled /* out */) + uint8_t *out_lines /* out */) { struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); const struct drm_plane_state *pstate = &intel_pstate->base; @@ -4270,10 +4351,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, bool y_tiled, x_tiled; if (latency == 0 || - !intel_wm_plane_visible(cstate, intel_pstate)) { - *enabled = false; + !intel_wm_plane_visible(cstate, intel_pstate)) return 0; - } y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || fb->modifier == I915_FORMAT_MOD_Yf_TILED; @@ -4359,9 +4438,6 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && (plane_bytes_per_line / 512 < 1)) selected_result = method2; - else if ((ddb_allocation && ddb_allocation / - fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1) - selected_result = min_fixed_16_16(method1, method2); else if (latency >= linetime_us) selected_result = min_fixed_16_16(method1, method2); else @@ -4381,64 +4457,42 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } } - if (res_blocks >= ddb_allocation || res_lines > 31) { - *enabled = false; + if (res_lines >= 31 && level == 0) { + struct drm_plane *plane = pstate->plane; - /* - * If there are no valid level 0 watermarks, then we can't - * support this display configuration. - */ - if (level) { - return 0; - } else { - struct drm_plane *plane = pstate->plane; - - DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n"); - DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n", - plane->base.id, plane->name, - res_blocks, ddb_allocation, res_lines); - return -EINVAL; - } + DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n"); + DRM_DEBUG_KMS("[PLANE:%d:%s] lines required = %u/31\n", + plane->base.id, plane->name, res_lines); + return -EINVAL; } *out_blocks = res_blocks; *out_lines = res_lines; - *enabled = true; return 0; } static int skl_compute_wm_levels(const struct drm_i915_private *dev_priv, - struct skl_ddb_allocation *ddb, struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, struct skl_plane_wm *wm) { - struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); - struct drm_plane *plane = intel_pstate->base.plane; - struct intel_plane *intel_plane = to_intel_plane(plane); - uint16_t ddb_blocks; - enum pipe pipe = intel_crtc->pipe; int level, max_level = ilk_wm_max_level(dev_priv); int ret; if (WARN_ON(!intel_pstate->base.fb)) return -EINVAL; - ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]); - for (level = 0; level <= max_level; level++) { struct skl_wm_level *result = &wm->wm[level]; ret = skl_compute_plane_wm(dev_priv, cstate, intel_pstate, - ddb_blocks, level, &result->plane_res_b, - &result->plane_res_l, - &result->plane_en); + &result->plane_res_l); if (ret) return ret; } @@ -4504,8 +4558,7 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate, wm = &pipe_wm->planes[plane_id]; - ret = skl_compute_wm_levels(dev_priv, ddb, cstate, - intel_pstate, wm); + ret = skl_compute_wm_levels(dev_priv, cstate, intel_pstate, wm); if (ret) return ret; skl_compute_transition_wm(cstate, &wm->trans_wm); @@ -4618,6 +4671,45 @@ bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, return false; } +static int +skl_ddb_add_affected_planes(struct intel_crtc_state *cstate, + const struct skl_pipe_wm *old_pipe_wm, + const struct skl_pipe_wm *pipe_wm) +{ + struct drm_atomic_state *state = cstate->base.state; + struct drm_device *dev = state->dev; + struct drm_crtc *crtc = cstate->base.crtc; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; + struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; + struct drm_plane_state *plane_state; + struct drm_plane *plane; + enum pipe pipe = intel_crtc->pipe; + + WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); + + drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) { + enum plane_id plane_id = to_intel_plane(plane)->id; + const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; + const struct skl_plane_wm *old_wm = &old_pipe_wm->planes[plane_id]; + + if ((skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id], + &new_ddb->plane[pipe][plane_id]) && + skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id], + &new_ddb->y_plane[pipe][plane_id])) && + !memcmp(wm, old_wm, sizeof(struct skl_plane_wm))) + continue; + + plane_state = drm_atomic_get_plane_state(state, plane); + if (IS_ERR(plane_state)) + return PTR_ERR(plane_state); + } + + return 0; +} + static int skl_update_pipe_wm(struct drm_crtc_state *cstate, const struct skl_pipe_wm *old_pipe_wm, struct skl_pipe_wm *pipe_wm, /* out */ @@ -4631,6 +4723,17 @@ static int skl_update_pipe_wm(struct drm_crtc_state *cstate, if (ret) return ret; + ret = skl_allocate_pipe_ddb(intel_cstate, pipe_wm, ddb); + if (ret) + return ret; + /* + * TODO: Planes are included in state to arm WM registers. + * Scope to optimize further, by just rewriting plane surf register. + */ + ret = skl_ddb_add_affected_planes(intel_cstate, old_pipe_wm, pipe_wm); + if (ret) + return ret; + if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm))) *changed = false; else @@ -4653,41 +4756,7 @@ pipes_modified(struct drm_atomic_state *state) } static int -skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) -{ - struct drm_atomic_state *state = cstate->base.state; - struct drm_device *dev = state->dev; - struct drm_crtc *crtc = cstate->base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; - struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; - struct drm_plane_state *plane_state; - struct drm_plane *plane; - enum pipe pipe = intel_crtc->pipe; - - WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); - - drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) { - enum plane_id plane_id = to_intel_plane(plane)->id; - - if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id], - &new_ddb->plane[pipe][plane_id]) && - skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id], - &new_ddb->y_plane[pipe][plane_id])) - continue; - - plane_state = drm_atomic_get_plane_state(state, plane); - if (IS_ERR(plane_state)) - return PTR_ERR(plane_state); - } - - return 0; -} - -static int -skl_compute_ddb(struct drm_atomic_state *state) +skl_include_affected_crtcs(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -4751,14 +4820,6 @@ skl_compute_ddb(struct drm_atomic_state *state) cstate = intel_atomic_get_crtc_state(state, intel_crtc); if (IS_ERR(cstate)) return PTR_ERR(cstate); - - ret = skl_allocate_pipe_ddb(cstate, ddb); - if (ret) - return ret; - - ret = skl_ddb_add_affected_planes(cstate); - if (ret) - return ret; } return 0; @@ -4848,7 +4909,7 @@ skl_compute_wm(struct drm_atomic_state *state) /* Clear all dirty flags */ results->dirty_pipes = 0; - ret = skl_compute_ddb(state); + ret = skl_include_affected_crtcs(state); if (ret) return ret; From 73b0ca8ec76d593d8ebb9d3a5e4021da852310e7 Mon Sep 17 00:00:00 2001 From: Mahesh Kumar Date: Fri, 26 May 2017 20:45:46 +0530 Subject: [PATCH 034/341] drm/i915/skl+: consider max supported plane pixel rate while scaling A display resolution is only supported if it meets all the restrictions below for Maximum Pipe Pixel Rate. The display resolution must fit within the maximum pixel rate output from the pipe. Make sure that the display pipe is able to feed pixels at a rate required to support the desired resolution. For each enabled plane on the pipe { If plane scaling enabled { Horizontal down scale amount = Maximum[1, plane horizontal size / scaler horizontal window size] Vertical down scale amount = Maximum[1, plane vertical size / scaler vertical window size] Plane down scale amount = Horizontal down scale amount * Vertical down scale amount Plane Ratio = 1 / Plane down scale amount } Else { Plane Ratio = 1 } If plane source pixel format is 64 bits per pixel { Plane Ratio = Plane Ratio * 8/9 } } Pipe Ratio = Minimum Plane Ratio of all enabled planes on the pipe If pipe scaling is enabled { Horizontal down scale amount = Maximum[1, pipe horizontal source size / scaler horizontal window size] Vertical down scale amount = Maximum[1, pipe vertical source size / scaler vertical window size] Note: The progressive fetch - interlace display mode is equivalent to a 2.0 vertical down scale Pipe down scale amount = Horizontal down scale amount * Vertical down scale amount Pipe Ratio = Pipe Ratio / Pipe down scale amount } Pipe maximum pixel rate = CDCLK frequency * Pipe Ratio In this patch our calculation is based on pipe downscale amount (plane max downscale amount * pipe downscale amount) instead of Pipe Ratio. So, max supported crtc clock with given scaling = CDCLK / pipe downscale. Flip will fail if, current crtc clock > max supported crct clock with given scaling. Changes since V1: - separate out fixed_16_16 wrapper API definition Changes since V2: - Fix buggy crtc !active condition (Maarten) - use intel_wm_plane_visible wrapper as per Maarten's suggestion Changes since V3: - Change failure return from ERANGE to EINVAL Changes since V4: - Rebase based on previous patch changes Changes since V5: - return EINVAL instead of continue (Maarten) Changes since V6: - Improve commit message - Address review comment Changes since V7: - use !enable instead of !active - rename config variable for consistency (Maarten) Signed-off-by: Mahesh Kumar Reviewed-by: Matt Roper Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170526151546.25025-4-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_display.c | 3 + drivers/gpu/drm/i915/intel_drv.h | 2 + drivers/gpu/drm/i915/intel_pm.c | 87 ++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a8ececfc759e..91b010134724 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11186,6 +11186,9 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, if (mode_changed) ret = skl_update_scaler_crtc(pipe_config); + if (!ret) + ret = skl_check_pipe_max_pixel_rate(intel_crtc, + pipe_config); if (!ret) ret = intel_atomic_setup_scalers(dev_priv, intel_crtc, pipe_config); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f63e8aa76e9a..ac5cd41ab420 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1886,6 +1886,8 @@ bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, int ignore); bool ilk_disable_lp_wm(struct drm_device *dev); int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); +int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, + struct intel_crtc_state *cstate); static inline int intel_enable_rc6(void) { return i915.enable_rc6; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1f9fee30cb4c..f9c5f613d085 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3863,6 +3863,93 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, return mul_fixed16(downscale_w, downscale_h); } +static uint_fixed_16_16_t +skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) +{ + uint_fixed_16_16_t pipe_downscale = u32_to_fixed_16_16(1); + + if (!crtc_state->base.enable) + return pipe_downscale; + + if (crtc_state->pch_pfit.enabled) { + uint32_t src_w, src_h, dst_w, dst_h; + uint32_t pfit_size = crtc_state->pch_pfit.size; + uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; + uint_fixed_16_16_t downscale_h, downscale_w; + + src_w = crtc_state->pipe_src_w; + src_h = crtc_state->pipe_src_h; + dst_w = pfit_size >> 16; + dst_h = pfit_size & 0xffff; + + if (!dst_w || !dst_h) + return pipe_downscale; + + fp_w_ratio = fixed_16_16_div(src_w, dst_w); + fp_h_ratio = fixed_16_16_div(src_h, dst_h); + downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1)); + downscale_h = max_fixed_16_16(fp_h_ratio, u32_to_fixed_16_16(1)); + + pipe_downscale = mul_fixed16(downscale_w, downscale_h); + } + + return pipe_downscale; +} + +int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, + struct intel_crtc_state *cstate) +{ + struct drm_crtc_state *crtc_state = &cstate->base; + struct drm_atomic_state *state = crtc_state->state; + struct drm_plane *plane; + const struct drm_plane_state *pstate; + struct intel_plane_state *intel_pstate; + int crtc_clock, cdclk; + uint32_t pipe_max_pixel_rate; + uint_fixed_16_16_t pipe_downscale; + uint_fixed_16_16_t max_downscale = u32_to_fixed_16_16(1); + + if (!cstate->base.enable) + return 0; + + drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { + uint_fixed_16_16_t plane_downscale; + uint_fixed_16_16_t fp_9_div_8 = fixed_16_16_div(9, 8); + int bpp; + + if (!intel_wm_plane_visible(cstate, + to_intel_plane_state(pstate))) + continue; + + if (WARN_ON(!pstate->fb)) + return -EINVAL; + + intel_pstate = to_intel_plane_state(pstate); + plane_downscale = skl_plane_downscale_amount(cstate, + intel_pstate); + bpp = pstate->fb->format->cpp[0] * 8; + if (bpp == 64) + plane_downscale = mul_fixed16(plane_downscale, + fp_9_div_8); + + max_downscale = max_fixed_16_16(plane_downscale, max_downscale); + } + pipe_downscale = skl_pipe_downscale_amount(cstate); + + pipe_downscale = mul_fixed16(pipe_downscale, max_downscale); + + crtc_clock = crtc_state->adjusted_mode.crtc_clock; + cdclk = to_intel_atomic_state(state)->cdclk.logical.cdclk; + pipe_max_pixel_rate = div_round_up_u32_fixed16(cdclk, pipe_downscale); + + if (pipe_max_pixel_rate < crtc_clock) { + DRM_ERROR("Max supported pixel clock with scaling exceeded\n"); + return -EINVAL; + } + + return 0; +} + static unsigned int skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, const struct drm_plane_state *pstate, From cb60606d835ca8b2f744835116bcabe64ce88849 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 31 May 2017 20:05:14 +0100 Subject: [PATCH 035/341] drm/i915: Guard against i915_ggtt_disable_guc() being invoked unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7c3f86b6dc51 ("drm/i915: Invalidate the guc ggtt TLB upon insertion") added the restoration of the invalidation routine after the GuC was disabled, but missed that the GuC was unconditionally disabled when not used. This then overwrites the invalidate routine for the older chipsets, causing havoc and breaking resume as the most obvious victim. We place the guard inside i915_ggtt_disable_guc() to be backport friendly (the bug was introduced into v4.11) but it would be preferred to be in more control over when this was guard (i.e. do not try and teardown the data structures before we have enabled them). That should be true with the reorganisation of the guc loaders. Reported-by: Ville Syrjälä Signed-off-by: Chris Wilson Fixes: 7c3f86b6dc51 ("drm/i915: Invalidate the guc ggtt TLB upon insertion") Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Oscar Mateo Cc: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: # v4.11+ Link: http://patchwork.freedesktop.org/patch/msgid/20170531190514.3691-1-chris@chris-wilson.co.uk Reviewed-by: Michel Thierry --- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0c1008a2bbda..1489c3af7145 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3100,7 +3100,8 @@ void i915_ggtt_enable_guc(struct drm_i915_private *i915) void i915_ggtt_disable_guc(struct drm_i915_private *i915) { - i915->ggtt.invalidate = gen6_ggtt_invalidate; + if (i915->ggtt.invalidate == guc_ggtt_invalidate) + i915->ggtt.invalidate = gen6_ggtt_invalidate; } void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) From 789f35d70c33b5c3809907d586f080c62ada7365 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 1 Jun 2017 12:34:13 +0200 Subject: [PATCH 036/341] drm/i915/glk: Fix dotclock calculation in skl_check_pipe_max_pixel_rate Seems that GLK has a dotclock that's twice the display clock. skl_max_scale checks for IS_GEMINILAKE, so perform the same check here. While at it, change the DRM_ERROR to DEBUG_KMS. Fixes: 73b0ca8ec76d ("drm/i915/skl+: consider max supported plane pixel rate while scaling") Cc: Mahesh Kumar Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170601103413.7037-1-maarten.lankhorst@linux.intel.com Reviewed-by: Mahesh Kumar --- drivers/gpu/drm/i915/intel_pm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f9c5f613d085..ae36df02948a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3904,7 +3904,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, struct drm_plane *plane; const struct drm_plane_state *pstate; struct intel_plane_state *intel_pstate; - int crtc_clock, cdclk; + int crtc_clock, dotclk; uint32_t pipe_max_pixel_rate; uint_fixed_16_16_t pipe_downscale; uint_fixed_16_16_t max_downscale = u32_to_fixed_16_16(1); @@ -3939,11 +3939,15 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, pipe_downscale = mul_fixed16(pipe_downscale, max_downscale); crtc_clock = crtc_state->adjusted_mode.crtc_clock; - cdclk = to_intel_atomic_state(state)->cdclk.logical.cdclk; - pipe_max_pixel_rate = div_round_up_u32_fixed16(cdclk, pipe_downscale); + dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk; + + if (IS_GEMINILAKE(to_i915(intel_crtc->base.dev))) + dotclk *= 2; + + pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale); if (pipe_max_pixel_rate < crtc_clock) { - DRM_ERROR("Max supported pixel clock with scaling exceeded\n"); + DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n"); return -EINVAL; } From 7618138d8b40adfea7660ccabbb6b131d72e444b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 31 May 2017 20:05:35 +0300 Subject: [PATCH 037/341] drm/i915/ddi: Avoid long delays during system suspend / eDP disabling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm disabling either DP or eDP outputs can generate a spurious short pulse interrupt. The reason is that after disabling the port the source will stop sending a valid stream data, while the sink expects either a valid stream or the idle pattern. Since neither of this is sent the sink assumes (after an arbitrary delay) that the link is lost and requests for link retraining with a short pulse. The spurious pulse is a real problem at least for eDP panels with long power-off / power-cycle delays: as part of disabling the output we disable the panel power. The subsequent spurious short pulse handling will have to turn the power back on, which means the driver has to do a redundant wait for the power-off and power-cycle delays. During system suspend this leads to an unnecessary delay up to ~1s on systems with such panels as reported by Rui. To fix this put the sink to DPMS D3 state before turning off the port. According to the DP spec in this state the sink should not request retraining. This is also what we do already on pre-ddi platforms. As an alternative I also tried configuring the port to send idle pattern - which is against BSPec - and leave the port in normal mode before turning off the port. Neither of these resolved the problem. Cc: Zhang Rui Cc: David Weinehall Cc: Ville Syrjälä Reported-and-tested-by: Zhang Rui Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1496250335-7627-1-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 0914ad96a71b..8bac62805cd1 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1732,12 +1732,18 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, struct drm_i915_private *dev_priv = to_i915(encoder->dev); enum port port = intel_ddi_get_encoder_port(intel_encoder); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + struct intel_dp *intel_dp = NULL; int type = intel_encoder->type; uint32_t val; bool wait = false; /* old_crtc_state and old_conn_state are NULL when called from DP_MST */ + if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { + intel_dp = enc_to_intel_dp(encoder); + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + } + val = I915_READ(DDI_BUF_CTL(port)); if (val & DDI_BUF_CTL_ENABLE) { val &= ~DDI_BUF_CTL_ENABLE; @@ -1753,9 +1759,7 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, if (wait) intel_wait_ddi_buf_idle(dev_priv, port); - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + if (intel_dp) { intel_edp_panel_vdd_on(intel_dp); intel_edp_panel_off(intel_dp); } From 3d06bf421a9b376bb658b1531de8826f1bf811ff Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 31 May 2017 13:16:31 +0300 Subject: [PATCH 038/341] drm/i915/dvo: fix debug logging on unknown DID Print DID not VID on the DID error path. Looks like a copy-paste error from the VID error path. Clarify and clean up error logging, making them distinguishable from each other, while at it. Reported-by: Petru Mihancea Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101243 Reviewed-by: Clinton Taylor Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170531101631.26953-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/dvo_ch7xxx.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/dvo_ch7xxx.c b/drivers/gpu/drm/i915/dvo_ch7xxx.c index 44b3159f2fe8..7aeeffd2428b 100644 --- a/drivers/gpu/drm/i915/dvo_ch7xxx.c +++ b/drivers/gpu/drm/i915/dvo_ch7xxx.c @@ -217,9 +217,8 @@ static bool ch7xxx_init(struct intel_dvo_device *dvo, name = ch7xxx_get_id(vendor); if (!name) { - DRM_DEBUG_KMS("ch7xxx not detected; got 0x%02x from %s " - "slave %d.\n", - vendor, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("ch7xxx not detected; got VID 0x%02x from %s slave %d.\n", + vendor, adapter->name, dvo->slave_addr); goto out; } @@ -229,9 +228,8 @@ static bool ch7xxx_init(struct intel_dvo_device *dvo, devid = ch7xxx_get_did(device); if (!devid) { - DRM_DEBUG_KMS("ch7xxx not detected; got 0x%02x from %s " - "slave %d.\n", - vendor, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("ch7xxx not detected; got DID 0x%02x from %s slave %d.\n", + device, adapter->name, dvo->slave_addr); goto out; } From 04f7b24eccdfae680a36e9825fe0d61dcd5ed528 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Jun 2017 10:04:46 +0100 Subject: [PATCH 039/341] drm/i915/guc: Assert that we switch between known ggtt->invalidate functions When we enable the GuC, we enable an alternative mechanism for doing post-GGTT update invalidation. Likewise, when we disable the GuC, we restore the previous method. Assert that we change between known endpoints, so that we can catch if we accidentally clobber some other gen and if we change the invalidate routine without updating guc. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Oscar Mateo Cc: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: Michel Thierry Link: http://patchwork.freedesktop.org/patch/msgid/20170601090446.1334-1-chris@chris-wilson.co.uk Reviewed-by: Michal Wajdeczko Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_gtt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1489c3af7145..4ff854e6413c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3095,13 +3095,17 @@ int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) void i915_ggtt_enable_guc(struct drm_i915_private *i915) { + GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate); + i915->ggtt.invalidate = guc_ggtt_invalidate; } void i915_ggtt_disable_guc(struct drm_i915_private *i915) { - if (i915->ggtt.invalidate == guc_ggtt_invalidate) - i915->ggtt.invalidate = gen6_ggtt_invalidate; + /* We should only be called after i915_ggtt_enable_guc() */ + GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate); + + i915->ggtt.invalidate = gen6_ggtt_invalidate; } void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) From aed2fc102ffaa9bd41e4cff140d3ec8482d13ff5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 30 May 2017 13:13:34 +0100 Subject: [PATCH 040/341] drm/i915: Check the ring is empty when declaring the engines are idle As another precaution when testing whether the CS engine is actually idle, also inspect the ring's HEAD/TAIL registers, which should be equal when there are no commands left to execute by the GPU. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170530121334.17364-3-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 699f2d3861c7..bc38bd128b76 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1212,6 +1212,11 @@ static bool ring_is_idle(struct intel_engine_cs *engine) intel_runtime_pm_get(dev_priv); + /* First check that no commands are left in the ring */ + if ((I915_READ_HEAD(engine) & HEAD_ADDR) != + (I915_READ_TAIL(engine) & TAIL_ADDR)) + idle = false; + /* No bit for gen2, so assume the CS parser is idle */ if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) idle = false; From 20bb377106af69d16269b1837e9a945b9f508a2e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 21 May 2017 13:40:14 +0100 Subject: [PATCH 041/341] drm/i915: Fix logical inversion for gen4 quirking The assertion that we want to make before disabling the pin of the pages for the unknown swizzling quirk is that the quirk is indeed active, and that the quirk is disabled before we do apply it to the pages. Fixes: 2c3a3f44dc13 ("drm/i915: Fix pages pin counting around swizzle quirk") Fixes: 957870f93412 ("drm/i915: Split out i915_gem_object_set_tiling()") Signed-off-by: Chris Wilson Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170521124014.27678-1-chris@chris-wilson.co.uk Reviewed-bhy: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index a0d6d4317a49..fb5231f98c0d 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -278,7 +278,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, obj->mm.quirked = false; } if (!i915_gem_object_is_tiled(obj)) { - GEM_BUG_ON(!obj->mm.quirked); + GEM_BUG_ON(obj->mm.quirked); __i915_gem_object_pin_pages(obj); obj->mm.quirked = true; } From ff8f797557c73f1f191866a013b61a7286330021 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Wed, 31 May 2017 10:35:52 +0800 Subject: [PATCH 042/341] drm/i915: return the correct usable aperture size under gvt environment I915_GEM_GET_APERTURE ioctl is used to probe aperture size from userspace. In gvt environment, each vm only use the ballooned part of aperture, so we should return the correct available aperture size exclude the reserved part by balloon. v2: add 'reserved' in struct i915_address_space to record the reserved size in ggtt (Chris) v3: remain aper_size as total, adjust aper_available_size exclude reserved and pinned. UMD driver need to adjust the max allocation size according to the available aperture size but not total size. KMD return the correct usable aperture size any time (Chris, Joonas) v4: decrease reserved in deballoon (Joonas) v5: add onion teardown in balloon, add vgt_deballoon_space (Joonas) v6: change title name (Zhenyu) v7: code style refine (Joonas) Suggested-by: Chris Wilson Suggested-by: Joonas Lahtinen Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Zhenyu Wang Signed-off-by: Weinan Li Link: http://patchwork.freedesktop.org/patch/msgid/1496198152-14175-1-git-send-email-weinan.z.li@intel.com Reviewed-by: Joonas Lahtinen Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 4 +-- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_vgpu.c | 44 ++++++++++++++++++++--------- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7b676fd1f075..aff449807399 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -143,9 +143,9 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_i915_gem_get_aperture *args = data; struct i915_vma *vma; - size_t pinned; + u64 pinned; - pinned = 0; + pinned = ggtt->base.reserved; mutex_lock(&dev->struct_mutex); list_for_each_entry(vma, &ggtt->base.active_list, vm_link) if (i915_vma_is_pinned(vma)) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index fb15684c1d83..da9aa9f706e7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -255,6 +255,7 @@ struct i915_address_space { struct drm_i915_file_private *file; struct list_head global_link; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ + u64 reserved; /* size addr space reserved */ bool closed; diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 4ab8a973b61f..3791e9c9392f 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -92,6 +92,18 @@ struct _balloon_info_ { static struct _balloon_info_ bl_info; +static void vgt_deballoon_space(struct i915_ggtt *ggtt, + struct drm_mm_node *node) +{ + DRM_DEBUG_DRIVER("deballoon space: range [0x%llx - 0x%llx] %llu KiB.\n", + node->start, + node->start + node->size, + node->size / 1024); + + ggtt->base.reserved -= node->size; + drm_mm_remove_node(node); +} + /** * intel_vgt_deballoon - deballoon reserved graphics address trunks * @dev_priv: i915 device private data @@ -108,12 +120,8 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv) DRM_DEBUG("VGT deballoon.\n"); - for (i = 0; i < 4; i++) { - if (bl_info.space[i].allocated) - drm_mm_remove_node(&bl_info.space[i]); - } - - memset(&bl_info, 0, sizeof(bl_info)); + for (i = 0; i < 4; i++) + vgt_deballoon_space(&dev_priv->ggtt, &bl_info.space[i]); } static int vgt_balloon_space(struct i915_ggtt *ggtt, @@ -121,15 +129,20 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, unsigned long start, unsigned long end) { unsigned long size = end - start; + int ret; if (start >= end) return -EINVAL; DRM_INFO("balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n", start, end, size / 1024); - return i915_gem_gtt_reserve(&ggtt->base, node, - size, start, I915_COLOR_UNEVICTABLE, - 0); + ret = i915_gem_gtt_reserve(&ggtt->base, node, + size, start, I915_COLOR_UNEVICTABLE, + 0); + if (!ret) + ggtt->base.reserved += size; + + return ret; } /** @@ -222,7 +235,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) ret = vgt_balloon_space(ggtt, &bl_info.space[3], unmappable_end, ggtt_end); if (ret) - goto err; + goto err_upon_mappable; } /* Mappable graphic memory ballooning */ @@ -231,7 +244,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) 0, mappable_base); if (ret) - goto err; + goto err_upon_unmappable; } if (mappable_end < ggtt->mappable_end) { @@ -239,14 +252,19 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) mappable_end, ggtt->mappable_end); if (ret) - goto err; + goto err_below_mappable; } DRM_INFO("VGT balloon successfully\n"); return 0; +err_below_mappable: + vgt_deballoon_space(ggtt, &bl_info.space[0]); +err_upon_unmappable: + vgt_deballoon_space(ggtt, &bl_info.space[3]); +err_upon_mappable: + vgt_deballoon_space(ggtt, &bl_info.space[2]); err: DRM_ERROR("VGT balloon fail\n"); - intel_vgt_deballoon(dev_priv); return ret; } From 1d24ad457c54f0c8a835c33b7f687c454b643fcb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Jun 2017 14:33:29 +0100 Subject: [PATCH 043/341] drm/i915: Allow kswapd to pause the device whilst reaping In commit 5763ff04dc4e ("drm/i915: Avoid GPU stalls from kswapd") we stopped direct reclaim and kswapd from triggering GPU/client stalls whilst running (by restricting the objects they could reap to be idle). However with abusive GPU usage, it becomes quite easy to starve kswapd of memory and prevent it from making forward progress towards obtaining enough free memory (thus driving the system closer to swap exhaustion). Relax the previous restriction to allow kswapd (but not direct reclaim) to stall the device whilst reaping purgeable pages. v2: Also acquire the rpm wakelock to allow kswapd to unbind buffers. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170601133331.5973-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 0fd2b58ce475..58f27369183c 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -332,6 +332,15 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) sc->nr_to_scan - freed, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); + if (freed < sc->nr_to_scan && current_is_kswapd()) { + intel_runtime_pm_get(dev_priv); + freed += i915_gem_shrink(dev_priv, + sc->nr_to_scan - freed, + I915_SHRINK_ACTIVE | + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND); + intel_runtime_pm_put(dev_priv); + } shrinker_unlock(dev_priv, unlock); From 7b22b8c402c8ee26dd4dc1474887a2a91961e766 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 2 Jun 2017 13:06:39 -0700 Subject: [PATCH 044/341] drm/i915/cnp: Introduce Cannonpoint PCH. Most of south engine display that is in PCH is still the same as SPT and KBP, except for this key differences: - Backlight: Backlight programming changed in CNP PCH. - Panel Power: Sligh programming changed in CNP PCH. - GMBUS and GPIO: The pin mapping has changed in CNP PCH. All of these changes follow more the BXT style. v2: Update definition to use dev_priv isntead of dev (Tvrtko). Cc: Tvrtko Ursulin Signed-off-by: Rodrigo Vivi Reviewed-by: Anusha Srivatsa Link: http://patchwork.freedesktop.org/patch/msgid/1496434004-29812-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_irq.c | 6 ++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2fdfaf135ea9..9e4c13ed795d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -216,6 +216,9 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Found KabyPoint PCH\n"); WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); + } else if (id == INTEL_PCH_CNP_DEVICE_ID_TYPE) { + dev_priv->pch_type = PCH_CNP; + DRM_DEBUG_KMS("Found CannonPoint PCH\n"); } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) || (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) || ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) && diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a3b2674a4b7d..47b8b3d5e230 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1150,6 +1150,7 @@ enum intel_pch { PCH_LPT, /* Lynxpoint PCH */ PCH_SPT, /* Sunrisepoint PCH */ PCH_KBP, /* Kabypoint PCH */ + PCH_CNP, /* Cannonpoint PCH */ PCH_NOP, }; @@ -2965,11 +2966,13 @@ intel_info(const struct drm_i915_private *dev_priv) #define INTEL_PCH_SPT_DEVICE_ID_TYPE 0xA100 #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00 #define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA200 +#define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 #define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100 #define INTEL_PCH_P3X_DEVICE_ID_TYPE 0x7000 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */ #define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type) +#define HAS_PCH_CNP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CNP) #define HAS_PCH_KBP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_KBP) #define HAS_PCH_SPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_SPT) #define HAS_PCH_LPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_LPT) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7b7f55a28eec..4cd9ee1ba332 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2548,7 +2548,8 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) I915_WRITE(SDEIIR, iir); ret = IRQ_HANDLED; - if (HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv)) + if (HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv) || + HAS_PCH_CNP(dev_priv)) spt_irq_handler(dev_priv, iir); else cpt_irq_handler(dev_priv, iir); @@ -4289,7 +4290,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv) dev->driver->disable_vblank = gen8_disable_vblank; if (IS_GEN9_LP(dev_priv)) dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup; - else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv)) + else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv) || + HAS_PCH_CNP(dev_priv)) dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup; else dev_priv->display.hpd_irq_setup = ilk_hpd_irq_setup; From ec7e0bb35f8d339b51b440b5fc525618784f11f4 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Fri, 2 Jun 2017 13:06:40 -0700 Subject: [PATCH 045/341] drm/i915/cnp: Add PCI ID for Cannonpoint LP PCH The first two bytes of PCI ID for CNP_LP PCH are the same as that of SPT_LP. We should really be looking at the first 9 bits instead of the first 8 to identify platforms, although this seems to have not caused any problems on earlier platforms. Introduce a 9 bit extended mask for SPT and CNP while not touching the code for any of the other platforms. v2: (Rodrigo) Make platform agnostic and fix commit message. Signed-off-by: Dhinakaran Pandiyan Signed-off-by: Rodrigo Vivi Reviewed-by: Anusha Srivatsa Link: http://patchwork.freedesktop.org/patch/msgid/1496434004-29812-2-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 8 +++++++- drivers/gpu/drm/i915/i915_drv.h | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9e4c13ed795d..90b646c51759 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -170,6 +170,9 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) while ((pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, pch))) { if (pch->vendor == PCI_VENDOR_ID_INTEL) { unsigned short id = pch->device & INTEL_PCH_DEVICE_ID_MASK; + unsigned short id_ext = pch->device & + INTEL_PCH_DEVICE_ID_MASK_EXT; + dev_priv->pch_id = id; if (id == INTEL_PCH_IBX_DEVICE_ID_TYPE) { @@ -206,7 +209,7 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Found SunrisePoint PCH\n"); WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); - } else if (id == INTEL_PCH_SPT_LP_DEVICE_ID_TYPE) { + } else if (id_ext == INTEL_PCH_SPT_LP_DEVICE_ID_TYPE) { dev_priv->pch_type = PCH_SPT; DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n"); WARN_ON(!IS_SKYLAKE(dev_priv) && @@ -219,6 +222,9 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) } else if (id == INTEL_PCH_CNP_DEVICE_ID_TYPE) { dev_priv->pch_type = PCH_CNP; DRM_DEBUG_KMS("Found CannonPoint PCH\n"); + } else if (id_ext == INTEL_PCH_CNP_LP_DEVICE_ID_TYPE) { + dev_priv->pch_type = PCH_CNP; + DRM_DEBUG_KMS("Found CannonPoint LP PCH\n"); } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) || (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) || ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) && diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 47b8b3d5e230..22588d499e19 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2958,6 +2958,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_POOLED_EU(dev_priv) ((dev_priv)->info.has_pooled_eu) #define INTEL_PCH_DEVICE_ID_MASK 0xff00 +#define INTEL_PCH_DEVICE_ID_MASK_EXT 0xff80 #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 #define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00 #define INTEL_PCH_PPT_DEVICE_ID_TYPE 0x1e00 @@ -2967,12 +2968,15 @@ intel_info(const struct drm_i915_private *dev_priv) #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00 #define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA200 #define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 +#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 #define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100 #define INTEL_PCH_P3X_DEVICE_ID_TYPE 0x7000 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */ #define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type) #define HAS_PCH_CNP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CNP) +#define HAS_PCH_CNP_LP(dev_priv) \ + ((dev_priv)->pch_id == INTEL_PCH_CNP_LP_DEVICE_ID_TYPE) #define HAS_PCH_KBP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_KBP) #define HAS_PCH_SPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_SPT) #define HAS_PCH_LPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_LPT) From 9d81a99713bc29b2f96403b8f7c1720e1b277b35 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 2 Jun 2017 13:06:41 -0700 Subject: [PATCH 046/341] drm/i915/cnp: Get/set proper Raw clock frequency on CNP. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RAWCLK_FREQ register has changed for platforms with CNP+. [29:26] This field provides the denominator for the fractional part of the microsecond counter divider. The numerator is fixed at 1. Program this field to the denominator of the fractional portion of reference frequency minus one. If the fraction is 0, program to 0. 0100b = Fraction .2 MHz = Fraction 1/5. 0000b = Fraction .0 MHz. [25:16] This field provides the integer part of the microsecond counter divider. Program this field to the integer portion of the reference frequenct minus one. Also this register tells us that proper raw clock should be read from SFUSE_STRAP and programmed to this register. Up to this point on other platforms we are reading instead of programming it so probably relying on whatever BIOS had configured here. Now on let's follow the spec and also program this register fetching the right value from SFUSE_STRAP as Spec tells us to do. v2: Read from SFUSE_STRAP and Program RAWCLK_FREQ instead of reading the value relying someone else will program that for us. v3: Add missing else. (Jani) v4: Addressing all Ville's catches: Use macro for shift bits instead of defining shift. Remove shift from the cleaning bits with mask that already has it. Add missing I915_WRITE to actually write the reg. Stop using useless DIV_ROUND_* on divider that is exact dividion and use DIV_ROUND_CLOSEST for the fraction part. v5: Remove useless Read-Modify-Write on raclk_freq reg. (Ville). v6: Change is per PCH instead of per platform. Cc: Ville Syrjälä Cc: Jani Nikula Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1496434004-29812-3-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 5 +++++ drivers/gpu/drm/i915/intel_cdclk.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 231ee86625cd..cb83fb7a8e00 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6838,6 +6838,10 @@ enum { #define FDL_TP2_TIMER_SHIFT 10 #define FDL_TP2_TIMER_MASK (3<<10) #define RAWCLK_FREQ_MASK 0x3ff +#define CNP_RAWCLK_DIV_MASK (0x3ff << 16) +#define CNP_RAWCLK_DIV(div) ((div) << 16) +#define CNP_RAWCLK_FRAC_MASK (0xf << 26) +#define CNP_RAWCLK_FRAC(frac) ((frac) << 26) #define PCH_DPLL_TMR_CFG _MMIO(0xc6208) @@ -8141,6 +8145,7 @@ enum { /* SFUSE_STRAP */ #define SFUSE_STRAP _MMIO(0xc2014) #define SFUSE_STRAP_FUSE_LOCK (1<<13) +#define SFUSE_STRAP_RAW_FREQUENCY (1<<8) #define SFUSE_STRAP_DISPLAY_DISABLED (1<<7) #define SFUSE_STRAP_CRT_DISABLED (1<<6) #define SFUSE_STRAP_DDIB_DETECTED (1<<2) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 29792972d55d..634c89fe6377 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1780,6 +1780,30 @@ void intel_update_cdclk(struct drm_i915_private *dev_priv) DIV_ROUND_UP(dev_priv->cdclk.hw.cdclk, 1000)); } +static int cnp_rawclk(struct drm_i915_private *dev_priv) +{ + u32 rawclk; + int divider, fraction; + + if (I915_READ(SFUSE_STRAP) & SFUSE_STRAP_RAW_FREQUENCY) { + /* 24 MHz */ + divider = 24000; + fraction = 0; + } else { + /* 19.2 MHz */ + divider = 19000; + fraction = 200; + } + + rawclk = CNP_RAWCLK_DIV((divider / 1000) - 1); + if (fraction) + rawclk |= CNP_RAWCLK_FRAC(DIV_ROUND_CLOSEST(1000, + fraction) - 1); + + I915_WRITE(PCH_RAWCLK_FREQ, rawclk); + return divider + fraction; +} + static int pch_rawclk(struct drm_i915_private *dev_priv) { return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; @@ -1827,7 +1851,10 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv) */ void intel_update_rawclk(struct drm_i915_private *dev_priv) { - if (HAS_PCH_SPLIT(dev_priv)) + + if (HAS_PCH_CNP(dev_priv)) + dev_priv->rawclk_freq = cnp_rawclk(dev_priv); + else if (HAS_PCH_SPLIT(dev_priv)) dev_priv->rawclk_freq = pch_rawclk(dev_priv); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) dev_priv->rawclk_freq = vlv_hrawclk(dev_priv); From 4c9f7086ac6d069d5b79ba37ef4f1ed4fa3dc3f7 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 2 Jun 2017 13:06:42 -0700 Subject: [PATCH 047/341] drm/i915/cnp: Backlight support for CNP. Split out BXT and CNP's setup_backlight(),enable_backlight(), disable_backlight() and hz_to_pwm() into two separate functions instead of reusing BXT function. Reuse set_backlight() and get_backlight() since they have no reference to the utility pin. v2: Reuse BXT functions with controller 0 instead of redefining it. (Jani). Use dev_priv->rawclk_freq instead of getting the value from SFUSE_STRAP. v3: Avoid setup backligh controller along with hooks and fully reuse hooks setup as suggested by Jani. v4: Clean up commit message. v5: Implement per PCH instead per platform. v6: Introduce a new function for CNP.(Jani and Ville) v7: Squash the all CNP Backlight support patches into a single patch. (Jani) v8: Correct indentation, remove unneeded blank lines and correct mail address (Jani). v9: Remove unused enum pipe. (by CI) v10: Remove comment mentioning SFUSE_STRAP in a part of the code that we don't use it. (Jani) Make controller = 0 since current CNP has only one controller and put a comment mentioning why we reuse the BXT definitions and are keeping the controller = 0. (DK) v11: Remove spurious line. (DK) Reviewed-by: Dhinakaran Pandiyan Reviewed-by: Jani Nikula Suggested-by: Jani Nikula Suggested-by: Ville Syrjala Cc: Ville Syrjala Cc: Jani Nikula Signed-off-by: Anusha Srivatsa Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1496434004-29812-4-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_panel.c | 96 ++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index c8103f8d4dfa..4114cb3f14e7 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -796,6 +796,19 @@ static void bxt_disable_backlight(struct intel_connector *connector) } } +static void cnp_disable_backlight(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + u32 tmp; + + intel_panel_actually_set_backlight(connector, 0); + + tmp = I915_READ(BXT_BLC_PWM_CTL(panel->backlight.controller)); + I915_WRITE(BXT_BLC_PWM_CTL(panel->backlight.controller), + tmp & ~BXT_BLC_PWM_ENABLE); +} + static void pwm_disable_backlight(struct intel_connector *connector) { struct intel_panel *panel = &connector->panel; @@ -1086,6 +1099,35 @@ static void bxt_enable_backlight(struct intel_connector *connector) pwm_ctl | BXT_BLC_PWM_ENABLE); } +static void cnp_enable_backlight(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + u32 pwm_ctl; + + pwm_ctl = I915_READ(BXT_BLC_PWM_CTL(panel->backlight.controller)); + if (pwm_ctl & BXT_BLC_PWM_ENABLE) { + DRM_DEBUG_KMS("backlight already enabled\n"); + pwm_ctl &= ~BXT_BLC_PWM_ENABLE; + I915_WRITE(BXT_BLC_PWM_CTL(panel->backlight.controller), + pwm_ctl); + } + + I915_WRITE(BXT_BLC_PWM_FREQ(panel->backlight.controller), + panel->backlight.max); + + intel_panel_actually_set_backlight(connector, panel->backlight.level); + + pwm_ctl = 0; + if (panel->backlight.active_low_pwm) + pwm_ctl |= BXT_BLC_PWM_POLARITY; + + I915_WRITE(BXT_BLC_PWM_CTL(panel->backlight.controller), pwm_ctl); + POSTING_READ(BXT_BLC_PWM_CTL(panel->backlight.controller)); + I915_WRITE(BXT_BLC_PWM_CTL(panel->backlight.controller), + pwm_ctl | BXT_BLC_PWM_ENABLE); +} + static void pwm_enable_backlight(struct intel_connector *connector) { struct intel_panel *panel = &connector->panel; @@ -1249,6 +1291,17 @@ void intel_backlight_device_unregister(struct intel_connector *connector) } #endif /* CONFIG_BACKLIGHT_CLASS_DEVICE */ +/* + * CNP: PWM clock frequency is 19.2 MHz or 24 MHz. + * PWM increment = 1 + */ +static u32 cnp_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + + return DIV_ROUND_CLOSEST(KHz(dev_priv->rawclk_freq), pwm_freq_hz); +} + /* * BXT: PWM clock frequency = 19.2 MHz. */ @@ -1644,6 +1697,42 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused) return 0; } +static int +cnp_setup_backlight(struct intel_connector *connector, enum pipe unused) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + u32 pwm_ctl, val; + + /* + * CNP has the BXT implementation of backlight, but with only + * one controller. Future platforms could have multiple controllers + * so let's make this extensible and prepared for the future. + */ + panel->backlight.controller = 0; + + pwm_ctl = I915_READ(BXT_BLC_PWM_CTL(panel->backlight.controller)); + + panel->backlight.active_low_pwm = pwm_ctl & BXT_BLC_PWM_POLARITY; + panel->backlight.max = + I915_READ(BXT_BLC_PWM_FREQ(panel->backlight.controller)); + + if (!panel->backlight.max) + panel->backlight.max = get_backlight_max_vbt(connector); + + if (!panel->backlight.max) + return -ENODEV; + + val = bxt_get_backlight(connector); + val = intel_panel_compute_brightness(connector, val); + panel->backlight.level = clamp(val, panel->backlight.min, + panel->backlight.max); + + panel->backlight.enabled = pwm_ctl & BXT_BLC_PWM_ENABLE; + + return 0; +} + static int pwm_setup_backlight(struct intel_connector *connector, enum pipe pipe) { @@ -1760,6 +1849,13 @@ intel_panel_init_backlight_funcs(struct intel_panel *panel) panel->backlight.set = bxt_set_backlight; panel->backlight.get = bxt_get_backlight; panel->backlight.hz_to_pwm = bxt_hz_to_pwm; + } else if (HAS_PCH_CNP(dev_priv)) { + panel->backlight.setup = cnp_setup_backlight; + panel->backlight.enable = cnp_enable_backlight; + panel->backlight.disable = cnp_disable_backlight; + panel->backlight.set = bxt_set_backlight; + panel->backlight.get = bxt_get_backlight; + panel->backlight.hz_to_pwm = cnp_hz_to_pwm; } else if (HAS_PCH_LPT(dev_priv) || HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv)) { panel->backlight.setup = lpt_setup_backlight; From 3d02352cd9e8b43805bf68e50e395fda2e218791 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 2 Jun 2017 13:06:43 -0700 Subject: [PATCH 048/341] drm/i915/cnp: add CNP gmbus support On CNP PCH based platforms the gmbus is on the south display that is on PCH. The existing implementation for previous platforms already covers the need for CNP expect for the pin pair configuration that follows similar definitions that we had on BXT. v2: Don't drop "_BXT" as the indicator of the first platform supporting this pin numbers. Suggested by Daniel. v3: Add missing else and fix register table since CNP GPIO_CTL starts on 0xC5014. v4: Fix pin number and map according to the current available VBT. Re-add pin 4 for port D. Lost during some rebase. v5: Use table as spec. If VBT is wrong it should be ignored. Cc: Daniel Vetter Cc: Jani Nikula Signed-off-by: Rodrigo Vivi Reviewed-by: Anusha Srivatsa Link: http://patchwork.freedesktop.org/patch/msgid/1496434004-29812-5-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 3 ++- drivers/gpu/drm/i915/intel_hdmi.c | 8 +++++--- drivers/gpu/drm/i915/intel_i2c.c | 15 +++++++++++++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index cb83fb7a8e00..1329420f4a1e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2626,9 +2626,10 @@ enum skl_disp_power_wells { #define GMBUS_PIN_DPB 5 /* SDVO, HDMIB */ #define GMBUS_PIN_DPD 6 /* HDMID */ #define GMBUS_PIN_RESERVED 7 /* 7 reserved */ -#define GMBUS_PIN_1_BXT 1 +#define GMBUS_PIN_1_BXT 1 /* BXT+ (atom) and CNP+ (big core) */ #define GMBUS_PIN_2_BXT 2 #define GMBUS_PIN_3_BXT 3 +#define GMBUS_PIN_4_CNP 4 #define GMBUS_NUM_PINS 7 /* including 0 */ #define GMBUS1 _MMIO(dev_priv->gpio_mmio_base + 0x5104) /* command/status */ #define GMBUS_SW_CLR_INT (1<<31) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 41267ffb3624..ec0779a52d53 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1802,19 +1802,21 @@ static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv, switch (port) { case PORT_B: - if (IS_GEN9_LP(dev_priv)) + if (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv)) ddc_pin = GMBUS_PIN_1_BXT; else ddc_pin = GMBUS_PIN_DPB; break; case PORT_C: - if (IS_GEN9_LP(dev_priv)) + if (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv)) ddc_pin = GMBUS_PIN_2_BXT; else ddc_pin = GMBUS_PIN_DPC; break; case PORT_D: - if (IS_CHERRYVIEW(dev_priv)) + if (HAS_PCH_CNP(dev_priv)) + ddc_pin = GMBUS_PIN_4_CNP; + else if (IS_CHERRYVIEW(dev_priv)) ddc_pin = GMBUS_PIN_DPD_CHV; else ddc_pin = GMBUS_PIN_DPD; diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index b6401e8f1bd6..3c9e00d4ba5a 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -68,11 +68,20 @@ static const struct gmbus_pin gmbus_pins_bxt[] = { [GMBUS_PIN_3_BXT] = { "misc", GPIOD }, }; +static const struct gmbus_pin gmbus_pins_cnp[] = { + [GMBUS_PIN_1_BXT] = { "dpb", GPIOB }, + [GMBUS_PIN_2_BXT] = { "dpc", GPIOC }, + [GMBUS_PIN_3_BXT] = { "misc", GPIOD }, + [GMBUS_PIN_4_CNP] = { "dpd", GPIOE }, +}; + /* pin is expected to be valid */ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, unsigned int pin) { - if (IS_GEN9_LP(dev_priv)) + if (HAS_PCH_CNP(dev_priv)) + return &gmbus_pins_cnp[pin]; + else if (IS_GEN9_LP(dev_priv)) return &gmbus_pins_bxt[pin]; else if (IS_GEN9_BC(dev_priv)) return &gmbus_pins_skl[pin]; @@ -87,7 +96,9 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, { unsigned int size; - if (IS_GEN9_LP(dev_priv)) + if (HAS_PCH_CNP(dev_priv)) + size = ARRAY_SIZE(gmbus_pins_cnp); + else if (IS_GEN9_LP(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bxt); else if (IS_GEN9_BC(dev_priv)) size = ARRAY_SIZE(gmbus_pins_skl); From 938361e7a50619b76a1415c86438eaee41397220 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 2 Jun 2017 13:06:44 -0700 Subject: [PATCH 049/341] drm/i915/cnp: Panel Power sequence changes for CNP PCH. Panel Power sequences for CNP is similar to Broxton, but with only one sequencer. Main difference from SPT is that PP_DIVISOR was removed and power cycle delay has been moved to PP_CONTROL. v2: Add missed pp_div write, that is now part of PP_CONTROL[8:4] as on Broxton. (Found by DK) v3: Improve commit message. (By DK) Cc: Dhinakaran Pandiyan Cc: Jani Nikula Signed-off-by: Rodrigo Vivi Reviewed-by: Clinton Taylor Reviewed-by: Dhinakaran Pandiyan Link: http://patchwork.freedesktop.org/patch/msgid/1496434004-29812-6-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 49a1db3787c5..d2fd8b67bb8a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -798,7 +798,7 @@ static void intel_pps_get_registers(struct drm_i915_private *dev_priv, regs->pp_stat = PP_STATUS(pps_idx); regs->pp_on = PP_ON_DELAYS(pps_idx); regs->pp_off = PP_OFF_DELAYS(pps_idx); - if (!IS_GEN9_LP(dev_priv)) + if (!IS_GEN9_LP(dev_priv) && !HAS_PCH_CNP(dev_priv)) regs->pp_div = PP_DIVISOR(pps_idx); } @@ -5124,7 +5124,7 @@ intel_pps_readout_hw_state(struct drm_i915_private *dev_priv, pp_on = I915_READ(regs.pp_on); pp_off = I915_READ(regs.pp_off); - if (!IS_GEN9_LP(dev_priv)) { + if (!IS_GEN9_LP(dev_priv) && !HAS_PCH_CNP(dev_priv)) { I915_WRITE(regs.pp_ctrl, pp_ctl); pp_div = I915_READ(regs.pp_div); } @@ -5142,7 +5142,7 @@ intel_pps_readout_hw_state(struct drm_i915_private *dev_priv, seq->t10 = (pp_off & PANEL_POWER_DOWN_DELAY_MASK) >> PANEL_POWER_DOWN_DELAY_SHIFT; - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv)) { u16 tmp = (pp_ctl & BXT_POWER_CYCLE_DELAY_MASK) >> BXT_POWER_CYCLE_DELAY_SHIFT; if (tmp > 0) @@ -5299,7 +5299,7 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, (seq->t10 << PANEL_POWER_DOWN_DELAY_SHIFT); /* Compute the divisor for the pp clock, simply match the Bspec * formula. */ - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv)) { pp_div = I915_READ(regs.pp_ctrl); pp_div &= ~BXT_POWER_CYCLE_DELAY_MASK; pp_div |= (DIV_ROUND_UP((seq->t11_t12 + 1), 1000) @@ -5325,7 +5325,7 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, I915_WRITE(regs.pp_on, pp_on); I915_WRITE(regs.pp_off, pp_off); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv)) I915_WRITE(regs.pp_ctrl, pp_div); else I915_WRITE(regs.pp_div, pp_div); @@ -5333,7 +5333,7 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, DRM_DEBUG_KMS("panel power sequencer register settings: PP_ON %#x, PP_OFF %#x, PP_DIV %#x\n", I915_READ(regs.pp_on), I915_READ(regs.pp_off), - IS_GEN9_LP(dev_priv) ? + (IS_GEN9_LP(dev_priv) || HAS_PCH_CNP(dev_priv)) ? (I915_READ(regs.pp_ctrl) & BXT_POWER_CYCLE_DELAY_MASK) : I915_READ(regs.pp_div)); } From 3def03441e53e29eed3afd9009974a5a42bf124a Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sat, 20 May 2017 13:27:00 +0200 Subject: [PATCH 050/341] genksyms: add printf format attribute to error_with_pos() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling with -Wsuggest-attribute=format in HOSTCFLAGS, gcc complains that error_with_pos() may be declared with a printf format attribute: scripts/genksyms/genksyms.c:726:3: warning: function might be possible candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format] vfprintf(stderr, fmt, args); ^~~~~~~~ This would allow catching printf-format errors at compile time in callers to error_with_pos(). Add this attribute. Signed-off-by: Nicolas Iooss Signed-off-by: Masahiro Yamada --- scripts/genksyms/genksyms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h index 3bffdcaaa274..b724a0290c75 100644 --- a/scripts/genksyms/genksyms.h +++ b/scripts/genksyms/genksyms.h @@ -75,7 +75,7 @@ struct string_list *copy_list_range(struct string_list *start, int yylex(void); int yyparse(void); -void error_with_pos(const char *, ...); +void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2))); /*----------------------------------------------------------------------*/ #define xmalloc(size) ({ void *__ptr = malloc(size); \ From cbf52a3e6a8a92beec6e0c70abf4111cd8f8faf7 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Mon, 5 Jun 2017 13:59:15 +0200 Subject: [PATCH 051/341] tags: honor COMPILED_SOURCE with apart output directory When the kernel is compiled with an "O=" argument, the object files are not in the source tree, but in the build tree. This patch fixes O= build by looking for object files in the build tree. Fixes: 923e02ecf3f8 ("scripts/tags.sh: Support compiled source") Signed-off-by: Robert Jarzmik Signed-off-by: Masahiro Yamada --- scripts/tags.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/tags.sh b/scripts/tags.sh index d661f2f3ef61..d23dcbf17457 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -106,6 +106,7 @@ all_compiled_sources() case "$i" in *.[cS]) j=${i/\.[cS]/\.o} + j="${j#$tree}" if [ -e $j ]; then echo $i fi From 27fef9f8ecb0495d302deba210606a32e54db37a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 6 Jun 2017 09:46:33 +0100 Subject: [PATCH 052/341] mfd: arizona: Fix typo using hard-coded register A hardcoded register is accidentally used instead of the register address passed into the function. Correct this and use the appropriate variable. This would cause minor issues on wm5102, but all other devices using this driver would have been unaffected. Fixes: commit ef84f885e037 ("mfd: arizona: Refactor arizona_poll_reg") Reported-by: Andrzej Hajda Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/arizona-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index 75488e65cd96..8d46e3ad9529 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -245,8 +245,7 @@ static int arizona_poll_reg(struct arizona *arizona, int ret; ret = regmap_read_poll_timeout(arizona->regmap, - ARIZONA_INTERRUPT_RAW_STATUS_5, val, - ((val & mask) == target), + reg, val, ((val & mask) == target), ARIZONA_REG_POLL_DELAY_US, timeout_ms * 1000); if (ret) From 4ba459a224fd8e85b241d15481d63aaa3b306d4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 1 Jun 2017 21:30:43 +0300 Subject: [PATCH 053/341] drm/i915: Remove dead code from runtime resume handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the SNB PCH refclock init call from the runtime resume handler. I don't think it was actually needed even when we had SNB runtime PM, and if definitely isn't needed ever since SNB runtime PM was nuked in commit d4c5636e7447 ("drm/i915: Remove runtime PM for SNB"). Cc: Rodrigo Vivi Cc: Paulo Zanoni Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170601183043.28543-1-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 90b646c51759..6ca99dea7cf0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2470,9 +2470,6 @@ static int intel_runtime_resume(struct device *kdev) intel_guc_resume(dev_priv); - if (IS_GEN6(dev_priv)) - intel_init_pch_refclk(dev_priv); - if (IS_GEN9_LP(dev_priv)) { bxt_disable_dc9(dev_priv); bxt_display_core_init(dev_priv, true); From 3db28271f0feae129262d30e41384a7c4c767987 Mon Sep 17 00:00:00 2001 From: Sebastian Parschauer Date: Tue, 6 Jun 2017 13:53:13 +0200 Subject: [PATCH 054/341] HID: Add quirk for Dell PIXART OEM mouse This mouse is also known under other IDs. It needs the quirk ALWAYS_POLL or will disconnect in runlevel 1 or 3. Signed-off-by: Sebastian Parschauer CC: stable@vger.kernel.org Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 8ca1e8ce0af2..4f9a3938189a 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -319,6 +319,9 @@ #define USB_VENDOR_ID_DELCOM 0x0fc5 #define USB_DEVICE_ID_DELCOM_VISUAL_IND 0xb080 +#define USB_VENDOR_ID_DELL 0x413c +#define USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE 0x301a + #define USB_VENDOR_ID_DELORME 0x1163 #define USB_DEVICE_ID_DELORME_EARTHMATE 0x0100 #define USB_DEVICE_ID_DELORME_EM_LT20 0x0200 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 6316498b7812..a88e7c7bea0a 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -85,6 +85,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3, HID_QUIRK_MULTI_INPUT }, From 9ba26a7283f56100eb08a2df48f17da600f60d52 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Tue, 6 Jun 2017 17:07:53 +0800 Subject: [PATCH 055/341] Kbuild: tiny correction on `make help` The help info of `make C=1` is little confusing, make it clear. Signed-off-by: Cao jin Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 63e10bd4f14a..1e2ae78f8ba5 100644 --- a/Makefile +++ b/Makefile @@ -1437,7 +1437,7 @@ help: @echo ' make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build' @echo ' make V=2 [targets] 2 => give reason for rebuild of target' @echo ' make O=dir [targets] Locate all output files in "dir", including .config' - @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)' + @echo ' make C=1 [targets] Check re-compiled c source with $$CHECK (sparse by default)' @echo ' make C=2 [targets] Force check of all c source with $$CHECK' @echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections' @echo ' make W=n [targets] Enable extra gcc checks, n=1,2,3 where' From ec1b4ee2834e66884e5b0d3d465f347ff212e372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 15 Dec 2016 19:47:34 +0200 Subject: [PATCH 056/341] drm/i915: Workaround VLV/CHV DSI scanline counter hardware fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The scanline counter is bonkers on VLV/CHV DSI. The scanline counter increment is not lined up with the start of vblank like it is on every other platform and output type. This causes problems for both the vblank timestamping and atomic update vblank evasion. On my FFRD8 machine at least, the scanline counter increment happens about 1/3 of a scanline ahead of the start of vblank (which is where all register latching happens still). That means we can't trust the scanline counter to tell us whether we're in vblank or not while we're on that particular line. In order to keep vblank timestamping in working condition when called from the vblank irq, we'll leave scanline_offset at one, which means that the entire line containing the start of vblank is considered to be inside the vblank. For the vblank evasion we'll need to consider that entire line to be bad, since we can't tell whether the registers already got latched or not. And we can't actually use the start of vblank interrupt to get us past that line as the interrupt would fire too soon, and then we'd up waiting for the next start of vblank instead. One way around that would using the frame start interrupt instead since that wouldn't fire until the next scanline, but that would require some bigger changes in the interrupt code. So for simplicity we'll just poll until we get past the bad line. v2: Adjust the comments a bit Cc: stable@vger.kernel.org Cc: Jonas Aaberg Tested-by: Jonas Aaberg Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99086 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161215174734.28779-1-ville.syrjala@linux.intel.com Tested-by: Mika Kahola Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/intel_display.c | 9 +++++++++ drivers/gpu/drm/i915/intel_sprite.c | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 91b010134724..b0a9e156bfd1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12394,6 +12394,15 @@ static void update_scanline_offset(struct intel_crtc *crtc) * type. For DP ports it behaves like most other platforms, but on HDMI * there's an extra 1 line difference. So we need to add two instead of * one to the value. + * + * On VLV/CHV DSI the scanline counter would appear to increment + * approx. 1/3 of a scanline before start of vblank. Unfortunately + * that means we can't tell whether we're in vblank or not while + * we're on that particular line. We must still set scanline_offset + * to 1 so that the vblank timestamps come out correct when we query + * the scanline counter from within the vblank interrupt handler. + * However if queried just before the start of vblank we'll get an + * answer that's slightly in the future. */ if (IS_GEN2(dev_priv)) { const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index c4bf19364e49..0c650c2cbca8 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -83,10 +83,13 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, */ void intel_pipe_update_start(struct intel_crtc *crtc) { + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; long timeout = msecs_to_jiffies_timeout(1); int scanline, min, max, vblank_start; wait_queue_head_t *wq = drm_crtc_vblank_waitqueue(&crtc->base); + bool need_vlv_dsi_wa = (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI); DEFINE_WAIT(wait); vblank_start = adjusted_mode->crtc_vblank_start; @@ -139,6 +142,24 @@ void intel_pipe_update_start(struct intel_crtc *crtc) drm_crtc_vblank_put(&crtc->base); + /* + * On VLV/CHV DSI the scanline counter would appear to + * increment approx. 1/3 of a scanline before start of vblank. + * The registers still get latched at start of vblank however. + * This means we must not write any registers on the first + * line of vblank (since not the whole line is actually in + * vblank). And unfortunately we can't use the interrupt to + * wait here since it will fire too soon. We could use the + * frame start interrupt instead since it will fire after the + * critical scanline, but that would require more changes + * in the interrupt code. So for now we'll just do the nasty + * thing and poll for the bad scanline to pass us by. + * + * FIXME figure out if BXT+ DSI suffers from this as well + */ + while (need_vlv_dsi_wa && scanline == vblank_start) + scanline = intel_get_crtc_scanline(crtc); + crtc->debug.scanline_start = scanline; crtc->debug.start_vbl_time = ktime_get(); crtc->debug.start_vbl_count = intel_crtc_get_vblank_counter(crtc); From 3fd5d1ecae2d91bf3d3ce73ce0ef97d84d93a770 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 6 Jun 2017 15:43:18 +0300 Subject: [PATCH 057/341] drm/i915: Implement fbc_status "Compressing" info for all platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The number of compressed segments has been available ever since FBC2 was introduced in g4x, it just moved from the STATUS register into STATUS2 on IVB. For FBC1 if we really wanted the number of compressed segments we'd have to trawl through the tags, but in this case since the code just uses the number of compressed segments as an indicator whether compression has occurred we can just check the state of the COMPRESSING and COMPRESSED bits. IIRC the hardware will try to periodically recompress all uncompressed lines even if they haven't changed and the COMPRESSED bit will be cleared while the compressor is running, so just checking the COMPRESSED bit might not give us the right answer. Hence it seems better to check for both COMPRESSED and COMPRESSING as that should tell us that the compressor is at least trying to do something. While at it move the IVB+ register define to the right place, unify the naming convention of the compressed segment count masks, and fix up the mask for g4x. v2: s/ILK_DPFC_STATUS2/IVB_FBC_STATUS2/ (Paulo) Cc: Paulo Zanoni Cc: Gabriel Krisman Bertazi Signed-off-by: Ville Syrjälä Tested-by: Gabriel Krisman Bertazi # SNB Reviewed-by: Paulo Zanoni # ilk+ Acked-by: Paulo Zanoni # pre-ilk Link: http://patchwork.freedesktop.org/patch/msgid/20170606124318.31755-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 22 ++++++++++++++++------ drivers/gpu/drm/i915/i915_reg.h | 10 +++++----- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3b088685a553..8fdb911344b3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1670,12 +1670,22 @@ static int i915_fbc_status(struct seq_file *m, void *unused) seq_printf(m, "FBC disabled: %s\n", dev_priv->fbc.no_fbc_reason); - if (intel_fbc_is_active(dev_priv) && INTEL_GEN(dev_priv) >= 7) { - uint32_t mask = INTEL_GEN(dev_priv) >= 8 ? - BDW_FBC_COMPRESSION_MASK : - IVB_FBC_COMPRESSION_MASK; - seq_printf(m, "Compressing: %s\n", - yesno(I915_READ(FBC_STATUS2) & mask)); + if (intel_fbc_is_active(dev_priv)) { + u32 mask; + + if (INTEL_GEN(dev_priv) >= 8) + mask = I915_READ(IVB_FBC_STATUS2) & BDW_FBC_COMP_SEG_MASK; + else if (INTEL_GEN(dev_priv) >= 7) + mask = I915_READ(IVB_FBC_STATUS2) & IVB_FBC_COMP_SEG_MASK; + else if (INTEL_GEN(dev_priv) >= 5) + mask = I915_READ(ILK_DPFC_STATUS) & ILK_DPFC_COMP_SEG_MASK; + else if (IS_G4X(dev_priv)) + mask = I915_READ(DPFC_STATUS) & DPFC_COMP_SEG_MASK; + else + mask = I915_READ(FBC_STATUS) & (FBC_STAT_COMPRESSING | + FBC_STAT_COMPRESSED); + + seq_printf(m, "Compressing: %s\n", yesno(mask)); } mutex_unlock(&dev_priv->fbc.lock); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1329420f4a1e..ac0bf2364efa 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2507,10 +2507,6 @@ enum skl_disp_power_wells { #define FBC_FENCE_OFF _MMIO(0x3218) /* BSpec typo has 321Bh */ #define FBC_TAG(i) _MMIO(0x3300 + (i) * 4) -#define FBC_STATUS2 _MMIO(0x43214) -#define IVB_FBC_COMPRESSION_MASK 0x7ff -#define BDW_FBC_COMPRESSION_MASK 0xfff - #define FBC_LL_SIZE (1536) #define FBC_LLC_READ_CTRL _MMIO(0x9044) @@ -2539,7 +2535,7 @@ enum skl_disp_power_wells { #define DPFC_INVAL_SEG_SHIFT (16) #define DPFC_INVAL_SEG_MASK (0x07ff0000) #define DPFC_COMP_SEG_SHIFT (0) -#define DPFC_COMP_SEG_MASK (0x000003ff) +#define DPFC_COMP_SEG_MASK (0x000007ff) #define DPFC_STATUS2 _MMIO(0x3214) #define DPFC_FENCE_YOFF _MMIO(0x3218) #define DPFC_CHICKEN _MMIO(0x3224) @@ -2553,6 +2549,10 @@ enum skl_disp_power_wells { #define DPFC_RESERVED (0x1FFFFF00) #define ILK_DPFC_RECOMP_CTL _MMIO(0x4320c) #define ILK_DPFC_STATUS _MMIO(0x43210) +#define ILK_DPFC_COMP_SEG_MASK 0x7ff +#define IVB_FBC_STATUS2 _MMIO(0x43214) +#define IVB_FBC_COMP_SEG_MASK 0x7ff +#define BDW_FBC_COMP_SEG_MASK 0xfff #define ILK_DPFC_FENCE_YOFF _MMIO(0x43218) #define ILK_DPFC_CHICKEN _MMIO(0x43224) #define ILK_DPFC_DISABLE_DUMMY0 (1<<8) From 4127dc43e2fb274e349e22a537fa7dca1e70cfc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 6 Jun 2017 15:44:12 +0300 Subject: [PATCH 058/341] drm/i915: s/fbc_fc/fbc_false_color/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're not that short on characters that we can't spell out "false_color". Saves me from figuring out what "fc" means the next time look at the code. Cc: Rodrigo Vivi Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170606124412.5335-1-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_debugfs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8fdb911344b3..a6ba2100bb88 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1694,7 +1694,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused) return 0; } -static int i915_fbc_fc_get(void *data, u64 *val) +static int i915_fbc_false_color_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; @@ -1706,7 +1706,7 @@ static int i915_fbc_fc_get(void *data, u64 *val) return 0; } -static int i915_fbc_fc_set(void *data, u64 val) +static int i915_fbc_false_color_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; u32 reg; @@ -1727,8 +1727,8 @@ static int i915_fbc_fc_set(void *data, u64 val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_fc_fops, - i915_fbc_fc_get, i915_fbc_fc_set, +DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_false_color_fops, + i915_fbc_false_color_get, i915_fbc_false_color_set, "%llu\n"); static int i915_ips_status(struct seq_file *m, void *unused) @@ -4869,7 +4869,7 @@ static const struct i915_debugfs_files { {"i915_pri_wm_latency", &i915_pri_wm_latency_fops}, {"i915_spr_wm_latency", &i915_spr_wm_latency_fops}, {"i915_cur_wm_latency", &i915_cur_wm_latency_fops}, - {"i915_fbc_false_color", &i915_fbc_fc_fops}, + {"i915_fbc_false_color", &i915_fbc_false_color_fops}, {"i915_dp_test_data", &i915_displayport_test_data_fops}, {"i915_dp_test_type", &i915_displayport_test_type_fops}, {"i915_dp_test_active", &i915_displayport_test_active_fops}, From c2d1a0ced2603c4a17fa9c53c37e415905cf5a6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 6 Jun 2017 16:32:29 +0300 Subject: [PATCH 059/341] drm/i915: Restore has_fbc=1 for ILK-M MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restore the lost has_fbc flag for mobile ILK. Cc: Carlos Santa Cc: Rodrigo Vivi Fixes: a13233804686 ("drm/i915: Introduce GEN5_FEATURES for device info") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170606133229.12439-1-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index cf43dc1d539f..f940e486a62a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -208,7 +208,7 @@ static const struct intel_device_info intel_ironlake_d_info = { static const struct intel_device_info intel_ironlake_m_info = { GEN5_FEATURES, .platform = INTEL_IRONLAKE, - .is_mobile = 1, + .is_mobile = 1, .has_fbc = 1, }; #define GEN6_FEATURES \ From d96a7d2adb040a67e163a82dad6316f9f572498a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 31 Mar 2017 21:00:54 +0300 Subject: [PATCH 060/341] drm/i915: Fix scaling check for 90/270 degree plane rotation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from commit b63a16f6cd89 ("drm/i915: Compute display surface offset in the plane check hook for SKL+") we've already rotated the src coordinates by 270 degrees by the time we check if a scaler is needed or not, so we must not account for the rotation a second time. Previously we did these steps in the opposite order and hence the scaler check had to deal with rotation itself. The double rotation handling causes us to enable a scaler pretty much every time 90/270 degree plane rotation is requested, leading to fuzzier fonts and whatnot. v2: s/unsigned/unsigned int/ to appease checkpatch v3: s/DRM_ROTATE_0/DRM_MODE_ROTATE_0/ Cc: stable@vger.kernel.org Cc: Tvrtko Ursulin Reported-by: Tvrtko Ursulin Tested-by: Tvrtko Ursulin Fixes: b63a16f6cd89 ("drm/i915: Compute display surface offset in the plane check hook for SKL+") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170331180056.14086-2-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_display.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b0a9e156bfd1..2b75faf61a83 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4610,7 +4610,7 @@ static void cpt_verify_modeset(struct drm_device *dev, int pipe) static int skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, - unsigned scaler_user, int *scaler_id, unsigned int rotation, + unsigned int scaler_user, int *scaler_id, int src_w, int src_h, int dst_w, int dst_h) { struct intel_crtc_scaler_state *scaler_state = @@ -4619,9 +4619,12 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, to_intel_crtc(crtc_state->base.crtc); int need_scaling; - need_scaling = drm_rotation_90_or_270(rotation) ? - (src_h != dst_w || src_w != dst_h): - (src_w != dst_w || src_h != dst_h); + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + */ + need_scaling = src_w != dst_w || src_h != dst_h; /* * if plane is being disabled or scaler is no more required or force detach @@ -4683,7 +4686,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state) const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode; return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX, - &state->scaler_state.scaler_id, DRM_MODE_ROTATE_0, + &state->scaler_state.scaler_id, state->pipe_src_w, state->pipe_src_h, adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay); } @@ -4712,7 +4715,6 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, ret = skl_update_scaler(crtc_state, force_detach, drm_plane_index(&intel_plane->base), &plane_state->scaler_id, - plane_state->base.rotation, drm_rect_width(&plane_state->base.src) >> 16, drm_rect_height(&plane_state->base.src) >> 16, drm_rect_width(&plane_state->base.dst), From fce5adf568abb1e8264d677156e2e0deb529194d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 31 Mar 2017 21:00:55 +0300 Subject: [PATCH 061/341] drm/i915: Fix SKL+ watermarks for 90/270 rotation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skl_check_plane_surface() already rotates the clipped plane source coordinates to match the scanout direction because that's the way the GTT mapping is set up. Thus we no longer need to rotate the coordinates in the watermark code. For cursors we use the non-clipped coordinates which are not rotated appropriately, but that doesn't actually matter since cursors don't even support 90/270 degree rotation. v2: Resolve conflicts from SKL+ wm rework Cc: stable@vger.kernel.org Cc: Tvrtko Ursulin Fixes: b63a16f6cd89 ("drm/i915: Compute display surface offset in the plane check hook for SKL+") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170331180056.14086-3-ville.syrjala@linux.intel.com Tested-by: Tvrtko Ursulin Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 36 ++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ae36df02948a..aa9d8cef7ce0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3841,20 +3841,26 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, /* n.b., src is 16.16 fixed point, dst is whole integer */ if (plane->id == PLANE_CURSOR) { + /* + * Cursors only support 0/180 degree rotation, + * hence no need to account for rotation here. + */ src_w = pstate->base.src_w >> 16; src_h = pstate->base.src_h >> 16; dst_w = pstate->base.crtc_w; dst_h = pstate->base.crtc_h; } else { + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + */ src_w = drm_rect_width(&pstate->base.src) >> 16; src_h = drm_rect_height(&pstate->base.src) >> 16; dst_w = drm_rect_width(&pstate->base.dst); dst_h = drm_rect_height(&pstate->base.dst); } - if (drm_rotation_90_or_270(pstate->base.rotation)) - swap(dst_w, dst_h); - fp_w_ratio = fixed_16_16_div(src_w, dst_w); fp_h_ratio = fixed_16_16_div(src_h, dst_h); downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1)); @@ -3978,12 +3984,14 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, if (y && format != DRM_FORMAT_NV12) return 0; + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + */ width = drm_rect_width(&intel_pstate->base.src) >> 16; height = drm_rect_height(&intel_pstate->base.src) >> 16; - if (drm_rotation_90_or_270(pstate->rotation)) - swap(width, height); - /* for planar format */ if (format == DRM_FORMAT_NV12) { if (y) /* y-plane data rate */ @@ -4066,12 +4074,14 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, fb->modifier != I915_FORMAT_MOD_Yf_TILED) return 8; + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + */ src_w = drm_rect_width(&intel_pstate->base.src) >> 16; src_h = drm_rect_height(&intel_pstate->base.src) >> 16; - if (drm_rotation_90_or_270(pstate->rotation)) - swap(src_w, src_h); - /* Halve UV plane width and height for NV12 */ if (fb->format->format == DRM_FORMAT_NV12 && !y) { src_w /= 2; @@ -4460,13 +4470,15 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, width = intel_pstate->base.crtc_w; height = intel_pstate->base.crtc_h; } else { + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + */ width = drm_rect_width(&intel_pstate->base.src) >> 16; height = drm_rect_height(&intel_pstate->base.src) >> 16; } - if (drm_rotation_90_or_270(pstate->rotation)) - swap(width, height); - cpp = fb->format->cpp[0]; plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); From 73714c05df97d7527e7eaaa771472ef2ede46fa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 31 Mar 2017 21:00:56 +0300 Subject: [PATCH 062/341] drm/i915: Fix 90/270 rotated coordinates for FBC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clipped src coordinates have already been rotated by 270 degrees for when the plane rotation is 90/270 degrees, hence the FBC code should no longer swap the width and height. Cc: stable@vger.kernel.org Cc: Tvrtko Ursulin Cc: Paulo Zanoni Fixes: b63a16f6cd89 ("drm/i915: Compute display surface offset in the plane check hook for SKL+") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170331180056.14086-4-ville.syrjala@linux.intel.com Reviewed-by: Paulo Zanoni Tested-by: Tvrtko Ursulin Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_fbc.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index ff2fc5bc4af4..860b8c26d29b 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -82,20 +82,10 @@ static unsigned int get_crtc_fence_y_offset(struct intel_crtc *crtc) static void intel_fbc_get_plane_source_size(struct intel_fbc_state_cache *cache, int *width, int *height) { - int w, h; - - if (drm_rotation_90_or_270(cache->plane.rotation)) { - w = cache->plane.src_h; - h = cache->plane.src_w; - } else { - w = cache->plane.src_w; - h = cache->plane.src_h; - } - if (width) - *width = w; + *width = cache->plane.src_w; if (height) - *height = h; + *height = cache->plane.src_h; } static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv, @@ -746,6 +736,11 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->crtc.hsw_bdw_pixel_rate = crtc_state->pixel_rate; cache->plane.rotation = plane_state->base.rotation; + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + */ cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; cache->plane.src_h = drm_rect_height(&plane_state->base.src) >> 16; cache->plane.visible = plane_state->base.visible; From 413f3c19f8ecefd29067897db9c414a29d86685f Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 6 Jun 2017 13:30:30 -0700 Subject: [PATCH 063/341] drm/i915/cnl: Introduce Cannonlake platform defition. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cannonlake is a Intel® Processor containing Intel® HD Graphics following Kabylake. It is Gen10. Let's start by adding the platform definition based on previous platforms but yet as alpha_support. On following patches we will start adding PCI IDs and the platform specific changes. CNL has an increased DDB size as Damien had previously noticed and provided a separated patch that got squashed here. v2: Squash DDB size here per Ander request. Credits-to: Damien Lespiau Signed-off-by: Rodrigo Vivi Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_pci.c | 8 ++++++++ drivers/gpu/drm/i915/intel_device_info.c | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 22588d499e19..0ab25ebd4462 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -825,6 +825,7 @@ enum intel_platform { INTEL_BROXTON, INTEL_KABYLAKE, INTEL_GEMINILAKE, + INTEL_CANNONLAKE, INTEL_MAX_PLATFORMS }; @@ -2767,6 +2768,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_BROXTON(dev_priv) ((dev_priv)->info.platform == INTEL_BROXTON) #define IS_KABYLAKE(dev_priv) ((dev_priv)->info.platform == INTEL_KABYLAKE) #define IS_GEMINILAKE(dev_priv) ((dev_priv)->info.platform == INTEL_GEMINILAKE) +#define IS_CANNONLAKE(dev_priv) ((dev_priv)->info.platform == INTEL_CANNONLAKE) #define IS_MOBILE(dev_priv) ((dev_priv)->info.is_mobile) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) @@ -2858,6 +2860,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_GEN7(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(6))) #define IS_GEN8(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(7))) #define IS_GEN9(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(8))) +#define IS_GEN10(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(9))) #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv)) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f940e486a62a..feb3425b8174 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -425,6 +425,14 @@ static const struct intel_device_info intel_kabylake_gt3_info = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; +static const struct intel_device_info intel_cannonlake_info = { + BDW_FEATURES, + .is_alpha_support = 1, + .platform = INTEL_CANNONLAKE, + .gen = 10, + .ddb_size = 1024, +}; + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 3718341662c2..e5eb1a7f9b6d 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -51,6 +51,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(BROXTON), PLATFORM_NAME(KABYLAKE), PLATFORM_NAME(GEMINILAKE), + PLATFORM_NAME(CANNONLAKE), }; #undef PLATFORM_NAME From acf1dba661e908e923320b4226bad4d8fc23c6f5 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 6 Jun 2017 13:30:31 -0700 Subject: [PATCH 064/341] drm/i915/cnl: Cannonlake uses CNP PCH. Avoid warning when CNP is detected with CNL. Also let's force it on the virtual detection. Signed-off-by: Rodrigo Vivi Reviewed-by: Anusha Srivatsa Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-2-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6ca99dea7cf0..7a925c51477f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -139,6 +139,8 @@ static enum intel_pch intel_virt_detect_pch(struct drm_i915_private *dev_priv) } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { ret = PCH_SPT; DRM_DEBUG_KMS("Assuming SunrisePoint PCH\n"); + } else if (IS_CANNONLAKE(dev_priv)) { + ret = PCH_CNP; } return ret; @@ -222,9 +224,11 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) } else if (id == INTEL_PCH_CNP_DEVICE_ID_TYPE) { dev_priv->pch_type = PCH_CNP; DRM_DEBUG_KMS("Found CannonPoint PCH\n"); + WARN_ON(!IS_CANNONLAKE(dev_priv)); } else if (id_ext == INTEL_PCH_CNP_LP_DEVICE_ID_TYPE) { dev_priv->pch_type = PCH_CNP; DRM_DEBUG_KMS("Found CannonPoint LP PCH\n"); + WARN_ON(!IS_CANNONLAKE(dev_priv)); } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) || (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) || ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) && From e918d79a5d0a1b431e2cac0e6e6ac9452fd9ab32 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 6 Jun 2017 13:30:32 -0700 Subject: [PATCH 065/341] drm/i915/cnl: Add Cannonlake PCI IDs for U-skus. Platform enabling and its power-on are organized in different skus (U x Y x S x H, etc). So instead of organizing it in GT1 x GT2 x GT3 let's also use the platform sku. This is also the new Spec style what makes the review much more easy and straightforward. v2: Really include the PCI IDs to the picidlist[]; v3: Remove PCI IDs not present in spec. v4: Rebase. Signed-off-by: Anusha Srivatsa Signed-off-by: Rodrigo Vivi Reviewed-by: Clinton Taylor Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-3-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + include/drm/i915_pciids.h | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index feb3425b8174..e9d80df85cf3 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -477,6 +477,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_KBL_GT2_IDS(&intel_kabylake_info), INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info), INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info), + INTEL_CNL_IDS(&intel_cannonlake_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 27e0dbaa6c0e..7f1bb3b0ce5b 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -334,4 +334,14 @@ INTEL_KBL_GT3_IDS(info), \ INTEL_KBL_GT4_IDS(info) +/* CNL U 2+2 */ +#define INTEL_CNL_U_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x5A52, info), \ + INTEL_VGA_DEVICE(0x5A5A, info), \ + INTEL_VGA_DEVICE(0x5A42, info), \ + INTEL_VGA_DEVICE(0x5A4A, info) + +#define INTEL_CNL_IDS(info) \ + INTEL_CNL_U_GT2_IDS(info) + #endif /* _I915_PCIIDS_H */ From 95578277cbdb60e3c68cb92c843cafc1f77c4f55 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 6 Jun 2017 13:30:33 -0700 Subject: [PATCH 066/341] drm/i915/cnl: Add Cannonlake PCI IDs for Y-skus. By the Spec all CNL Y skus are 2+2, i.e. GT2. v2: Really include the PCI IDs to the picidlist[]; Reviewed-by: Anusha Srivatsa Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-4-git-send-email-rodrigo.vivi@intel.com --- include/drm/i915_pciids.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 7f1bb3b0ce5b..7d2696a6588e 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -341,7 +341,17 @@ INTEL_VGA_DEVICE(0x5A42, info), \ INTEL_VGA_DEVICE(0x5A4A, info) +/* CNL Y 2+2 */ +#define INTEL_CNL_Y_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x5A51, info), \ + INTEL_VGA_DEVICE(0x5A59, info), \ + INTEL_VGA_DEVICE(0x5A41, info), \ + INTEL_VGA_DEVICE(0x5A49, info), \ + INTEL_VGA_DEVICE(0x5A71, info), \ + INTEL_VGA_DEVICE(0x5A79, info) + #define INTEL_CNL_IDS(info) \ - INTEL_CNL_U_GT2_IDS(info) + INTEL_CNL_U_GT2_IDS(info), \ + INTEL_CNL_Y_GT2_IDS(info) #endif /* _I915_PCIIDS_H */ From 3c2e0fd92c194f495aaa8a8a1c86ea1b8c4bd304 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Tue, 6 Jun 2017 13:30:34 -0700 Subject: [PATCH 067/341] drm/i915/cnl: add IS_CNL_REVID macro We're going to use it in the next commits. Signed-off-by: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Jim Bride Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-5-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0ab25ebd4462..f9632ea186c0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2846,6 +2846,12 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_GLK_REVID(dev_priv, since, until) \ (IS_GEMINILAKE(dev_priv) && IS_REVID(dev_priv, since, until)) +#define CNL_REVID_A0 0x0 +#define CNL_REVID_B0 0x1 + +#define IS_CNL_REVID(p, since, until) \ + (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) + /* * The genX designation typically refers to the render engine, so render * capability related checks should use IS_GEN, while display and other checks From 8366be98f6792419ac2e19648391988edec7a7fe Mon Sep 17 00:00:00 2001 From: James Irwin Date: Tue, 6 Jun 2017 13:30:35 -0700 Subject: [PATCH 068/341] drm/i915/cnl: Cannonlake has 4 planes (3 sprites) per pipe Issue: VIZ-4525 Reviewed-by: Damien Lespiau Signed-off-by: James Irwin Signed-off-by: Damien Lespiau Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-6-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_device_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index e5eb1a7f9b6d..bb89faf45468 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -328,7 +328,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) * we don't expose the topmost plane at all to prevent ABI breakage * down the line. */ - if (IS_GEMINILAKE(dev_priv)) + if (IS_GEN10(dev_priv) || IS_GEMINILAKE(dev_priv)) for_each_pipe(dev_priv, pipe) info->num_sprites[pipe] = 3; else if (IS_BROXTON(dev_priv)) { From c7ae7e9ab2078ed987903bc6c308abe57d575a59 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 6 Jun 2017 13:30:36 -0700 Subject: [PATCH 069/341] drm/i915/cnl: Configure EU slice power gating. Cannonlake also supports slice power gating on devices with more than one slice as SKL. Let's assume that this is the same for SKL+ and exclude BXT only. v2: Also remove KBL. Signed-off-by: Rodrigo Vivi Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-7-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_device_info.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index bb89faf45468..91e68fd31c07 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -184,16 +184,15 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) DIV_ROUND_UP(sseu->eu_total, sseu_subslice_total(sseu)) : 0; /* - * SKL supports slice power gating on devices with more than + * SKL+ supports slice power gating on devices with more than * one slice, and supports EU power gating on devices with - * more than one EU pair per subslice. BXT supports subslice + * more than one EU pair per subslice. BXT+ supports subslice * power gating on devices with more than one subslice, and * supports EU power gating on devices with more than one EU * pair per subslice. */ sseu->has_slice_pg = - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && - hweight8(sseu->slice_mask) > 1; + !IS_GEN9_LP(dev_priv) && hweight8(sseu->slice_mask) > 1; sseu->has_subslice_pg = IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1; sseu->has_eu_pg = sseu->eu_per_subslice > 2; From 1dc0766c33473d61fd85caa5031daf34f719cd3f Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 6 Jun 2017 13:30:37 -0700 Subject: [PATCH 070/341] drm/i915/cnl: Cannonlake has same MOCS table than Skylake. All registers and default configuration are the same for Skylake and Cannonlake. v2: Don't apply Wa for platforms without MOCS. (Paulo) v3: Removed WaDisableSkipCaching that Joonas noticed that according to spec it is not applicable to CNL. Cc: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-8-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_mocs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 92e461c68385..f4c46b0b8f0a 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -178,7 +178,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_GEN9_BC(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->table = skylake_mocs_table; result = true; From 7bd0a2c6e1ece06624c1547307f21b0550382ce2 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Tue, 6 Jun 2017 13:30:38 -0700 Subject: [PATCH 071/341] drm/i915/gen10: Set value of Indirect Context Offset for gen10 Indirect Context Offset Pointer has changed for Cannonlake. INDIRECT_CTX_OFFSET[15:6] valid value for CNL is 19h per Spec. v2: rebased to intel_lr_indirect_ctx_offset v3: Commit message added per Tvrtko request. Signed-off-by: Michel Thierry Signed-off-by: Rodrigo Vivi Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-9-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 014b30ace8a0..d49dbaa931b5 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -204,6 +204,7 @@ #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 +#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19 /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ @@ -1861,6 +1862,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) default: MISSING_CASE(INTEL_GEN(engine->i915)); /* fall through */ + case 10: + indirect_ctx_offset = + GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + break; case 9: indirect_ctx_offset = GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; From 8bcd3dd417660dce8cf38a731a888f09e8028190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 6 Jun 2017 13:30:39 -0700 Subject: [PATCH 072/341] drm/i915/cnl: Add power wells for CNL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CNL power wells are very similar to SKL, with the exception that the misc IO well has been split into separate AUX IO wells. Not sure if DMC is supposed to manage the AUX wells for us or not. Let's assume so for now. v2: DDI A power well wants DDI A domains, not DDI B domains v3: s/BIT/BIT_ULL and add proper Aux IO domains. (Rodrigo) v4: Remove PW_DDI_E. Not supported on Current CNL SKUs. (Rodrigo). v5: Removed DDI_E_IO_DOMAINS and moved PORT_DDI_E_IO to DDI_A_IO for the same reasons as v4 when we found out that current CNL SKUs don't have the full port E split. Signed-off-by: Ville Syrjälä Signed-off-by: Rodrigo Vivi Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-10-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 5 + drivers/gpu/drm/i915/intel_runtime_pm.c | 136 +++++++++++++++++++++++- 2 files changed, 137 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ac0bf2364efa..45e887742339 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1065,6 +1065,7 @@ enum skl_disp_power_wells { SKL_DISP_PW_MISC_IO, SKL_DISP_PW_DDI_A_E, GLK_DISP_PW_DDI_A = SKL_DISP_PW_DDI_A_E, + CNL_DISP_PW_DDI_A = SKL_DISP_PW_DDI_A_E, SKL_DISP_PW_DDI_B, SKL_DISP_PW_DDI_C, SKL_DISP_PW_DDI_D, @@ -1072,6 +1073,10 @@ enum skl_disp_power_wells { GLK_DISP_PW_AUX_A = 8, GLK_DISP_PW_AUX_B, GLK_DISP_PW_AUX_C, + CNL_DISP_PW_AUX_A = GLK_DISP_PW_AUX_A, + CNL_DISP_PW_AUX_B = GLK_DISP_PW_AUX_B, + CNL_DISP_PW_AUX_C = GLK_DISP_PW_AUX_C, + CNL_DISP_PW_AUX_D, SKL_DISP_PW_1 = 14, SKL_DISP_PW_2, diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index f8a375f8dde6..0b3cacd29bac 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -494,6 +494,55 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_DDI_A_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_DDI_B_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_DDI_C_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_DDI_D_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_AUX_A_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_AUX_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_AUX_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_AUX_D_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_INIT)) +#define CNL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ + CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + static void assert_can_enable_dc9(struct drm_i915_private *dev_priv) { WARN_ONCE((I915_READ(DC_STATE_EN) & DC_STATE_EN_DC9), @@ -762,13 +811,14 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, } break; case SKL_DISP_PW_MISC_IO: - case SKL_DISP_PW_DDI_A_E: /* GLK_DISP_PW_DDI_A */ + case SKL_DISP_PW_DDI_A_E: /* GLK_DISP_PW_DDI_A, CNL_DISP_PW_DDI_A */ case SKL_DISP_PW_DDI_B: case SKL_DISP_PW_DDI_C: case SKL_DISP_PW_DDI_D: - case GLK_DISP_PW_AUX_A: - case GLK_DISP_PW_AUX_B: - case GLK_DISP_PW_AUX_C: + case GLK_DISP_PW_AUX_A: /* CNL_DISP_PW_AUX_A */ + case GLK_DISP_PW_AUX_B: /* CNL_DISP_PW_AUX_B */ + case GLK_DISP_PW_AUX_C: /* CNL_DISP_PW_AUX_C */ + case CNL_DISP_PW_AUX_D: break; default: WARN(1, "Unknown power well %lu\n", power_well->id); @@ -2275,6 +2325,82 @@ static struct i915_power_well glk_power_wells[] = { }, }; +static struct i915_power_well cnl_power_wells[] = { + { + .name = "always-on", + .always_on = 1, + .domains = POWER_DOMAIN_MASK, + .ops = &i9xx_always_on_power_well_ops, + }, + { + .name = "power well 1", + /* Handled by the DMC firmware */ + .domains = 0, + .ops = &skl_power_well_ops, + .id = SKL_DISP_PW_1, + }, + { + .name = "AUX A", + .domains = CNL_DISPLAY_AUX_A_POWER_DOMAINS, + .ops = &skl_power_well_ops, + .id = CNL_DISP_PW_AUX_A, + }, + { + .name = "AUX B", + .domains = CNL_DISPLAY_AUX_B_POWER_DOMAINS, + .ops = &skl_power_well_ops, + .id = CNL_DISP_PW_AUX_B, + }, + { + .name = "AUX C", + .domains = CNL_DISPLAY_AUX_C_POWER_DOMAINS, + .ops = &skl_power_well_ops, + .id = CNL_DISP_PW_AUX_C, + }, + { + .name = "AUX D", + .domains = CNL_DISPLAY_AUX_D_POWER_DOMAINS, + .ops = &skl_power_well_ops, + .id = CNL_DISP_PW_AUX_D, + }, + { + .name = "DC off", + .domains = CNL_DISPLAY_DC_OFF_POWER_DOMAINS, + .ops = &gen9_dc_off_power_well_ops, + .id = SKL_DISP_PW_DC_OFF, + }, + { + .name = "power well 2", + .domains = CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS, + .ops = &skl_power_well_ops, + .id = SKL_DISP_PW_2, + }, + { + .name = "DDI A IO power well", + .domains = CNL_DISPLAY_DDI_A_IO_POWER_DOMAINS, + .ops = &skl_power_well_ops, + .id = CNL_DISP_PW_DDI_A, + }, + { + .name = "DDI B IO power well", + .domains = CNL_DISPLAY_DDI_B_IO_POWER_DOMAINS, + .ops = &skl_power_well_ops, + .id = SKL_DISP_PW_DDI_B, + }, + { + .name = "DDI C IO power well", + .domains = CNL_DISPLAY_DDI_C_IO_POWER_DOMAINS, + .ops = &skl_power_well_ops, + .id = SKL_DISP_PW_DDI_C, + }, + { + .name = "DDI D IO power well", + .domains = CNL_DISPLAY_DDI_D_IO_POWER_DOMAINS, + .ops = &skl_power_well_ops, + .id = SKL_DISP_PW_DDI_D, + }, +}; + static int sanitize_disable_power_well_option(const struct drm_i915_private *dev_priv, int disable_power_well) @@ -2369,6 +2495,8 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) set_power_wells(power_domains, bdw_power_wells); } else if (IS_GEN9_BC(dev_priv)) { set_power_wells(power_domains, skl_power_wells); + } else if (IS_CANNONLAKE(dev_priv)) { + set_power_wells(power_domains, cnl_power_wells); } else if (IS_BROXTON(dev_priv)) { set_power_wells(power_domains, bxt_power_wells); } else if (IS_GEMINILAKE(dev_priv)) { From bf9a496a1fa434670285bd592c75d009cbb99720 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 6 Jun 2017 13:30:40 -0700 Subject: [PATCH 073/341] drm/i915/cnl: Also need power well sanitize. The workaround added in commit c6782b76d31a ("drm/i915/gen9: Reset secondary power well equests left on by DMC/KVMR") needs to be applied on Cannonlake as well. So let's assume any platform using this power well setup will also need and let's just go ahead and remove if condition. Cc: Imre Deak Signed-off-by: Rodrigo Vivi Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-11-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 0b3cacd29bac..8a6f287d225b 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -853,8 +853,7 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("Disabling %s\n", power_well->name); } - if (IS_GEN9(dev_priv)) - gen9_sanitize_power_well_requests(dev_priv, power_well); + gen9_sanitize_power_well_requests(dev_priv, power_well); } if (wait_for(!!(I915_READ(HSW_PWR_WELL_DRIVER) & state_mask) == enable, From a1986f4174a4c19b5634f9873943126bbe85a285 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 5 Jun 2017 15:12:02 -0700 Subject: [PATCH 074/341] drm/i915: Remove unnecessary PORT3 definition. Let's be picky and just use PICK directly. So we can extend this later without creating a new PORT_X por every new number of ports we have to handle. Cc: Manasi Navare Signed-off-by: Rodrigo Vivi Reviewed-by: Manasi Navare Link: http://patchwork.freedesktop.org/patch/msgid/1496700722-13755-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 45e887742339..b6d69e289974 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -58,10 +58,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_TRANS(tran, a, b) _MMIO(_TRANS(tran, a, b)) #define _PORT(port, a, b) ((a) + (port)*((b)-(a))) #define _MMIO_PORT(port, a, b) _MMIO(_PORT(port, a, b)) -#define _PIPE3(pipe, ...) _PICK(pipe, __VA_ARGS__) -#define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PIPE3(pipe, a, b, c)) -#define _PORT3(port, ...) _PICK(port, __VA_ARGS__) -#define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PORT3(pipe, a, b, c)) +#define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c)) +#define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c)) #define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) From 94829de433db0253a5c238ef74a70b7043b93e57 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 6 Jun 2017 09:06:06 -0700 Subject: [PATCH 075/341] drm/i915: Unify GT* and GT3 definitions This patch clean up a bit the platform definition block in a way to avoid duplications and to let clear that GT3 for the current platform only have the extra Media engine (BSD2). v2: Kabylake IS_KABYLAKE as Anusha noticed. v3: Avoid EXTRA_ENGINE_MASK and list rings out on GT3 to make it more clear. Cc: Chris Wilson Cc: Anusha Srivatsa Signed-off-by: Rodrigo Vivi Reviewed-by: Anusha Srivatsa Link: http://patchwork.freedesktop.org/patch/msgid/1496765166-7068-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 57 ++++++++++++++++----------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index e9d80df85cf3..224f5f96ff65 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -312,16 +312,17 @@ static const struct intel_device_info intel_haswell_info = { .has_full_48bit_ppgtt = 1, \ .has_64bit_reloc = 1 +#define BDW_PLATFORM \ + BDW_FEATURES, \ + .gen = 8, \ + .platform = INTEL_BROADWELL + static const struct intel_device_info intel_broadwell_info = { - BDW_FEATURES, - .gen = 8, - .platform = INTEL_BROADWELL, + BDW_PLATFORM, }; static const struct intel_device_info intel_broadwell_gt3_info = { - BDW_FEATURES, - .gen = 8, - .platform = INTEL_BROADWELL, + BDW_PLATFORM, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; @@ -347,22 +348,20 @@ static const struct intel_device_info intel_cherryview_info = { CHV_COLORS, }; +#define SKL_PLATFORM \ + BDW_FEATURES, \ + .gen = 9, \ + .platform = INTEL_SKYLAKE, \ + .has_csr = 1, \ + .has_guc = 1, \ + .ddb_size = 896 + static const struct intel_device_info intel_skylake_info = { - BDW_FEATURES, - .platform = INTEL_SKYLAKE, - .gen = 9, - .has_csr = 1, - .has_guc = 1, - .ddb_size = 896, + SKL_PLATFORM, }; static const struct intel_device_info intel_skylake_gt3_info = { - BDW_FEATURES, - .platform = INTEL_SKYLAKE, - .gen = 9, - .has_csr = 1, - .has_guc = 1, - .ddb_size = 896, + SKL_PLATFORM, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; @@ -406,22 +405,20 @@ static const struct intel_device_info intel_geminilake_info = { .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } }; +#define KBL_PLATFORM \ + BDW_FEATURES, \ + .gen = 9, \ + .platform = INTEL_KABYLAKE, \ + .has_csr = 1, \ + .has_guc = 1, \ + .ddb_size = 896 + static const struct intel_device_info intel_kabylake_info = { - BDW_FEATURES, - .platform = INTEL_KABYLAKE, - .gen = 9, - .has_csr = 1, - .has_guc = 1, - .ddb_size = 896, + KBL_PLATFORM, }; static const struct intel_device_info intel_kabylake_gt3_info = { - BDW_FEATURES, - .platform = INTEL_KABYLAKE, - .gen = 9, - .has_csr = 1, - .has_guc = 1, - .ddb_size = 896, + KBL_PLATFORM, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; From ffc197763e636b928963c5dd9a3eaea8146345e3 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Wed, 3 May 2017 09:20:10 +0800 Subject: [PATCH 076/341] drm/i915/gvt: rewrite the trace gvt:gvt_command using trace style approach The gvt:gvt_command trace involve unnecessary overhead even this trace is not enabled. We need improve it. The kernel trace infrastructure provide a full api to define a trace event. We should leverage them if possible. And one important thing is that a trace point should store raw data but not format string. This patch include two part work: 1) Refactor the gvt_command trace definition, including: o only store raw trace data. o use __dynamic_array() to declare a variable size buffer. o use __print_array() to format raw cmd data. o rename vm_id as vgpu_id. 2) Improve the trace invoking, including: o remove the cycles calculation for handler. We can get this data by any perf tool. o do not make a backup for raw cmd data which just doesn't make sense. With this patch, this trace has no overhead if it is not enabled. And we are trace style now. The final output example: gvt workload 0-211 [000] ...1 120.555964: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e880, raw cmd {0x4000000} gvt workload 0-211 [000] ...1 120.556014: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e884, raw cmd {0x7a000004,0x1004000,0xe1511018,0x0,0x7d,0x0} gvt workload 0-211 [000] ...1 120.556062: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e89c, raw cmd {0x7a000004,0x140000,0x0,0x0,0x0,0x0} gvt workload 0-211 [000] ...1 120.556110: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e8b4, raw cmd {0x10400002,0xe1511018,0x0,0x7d} Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 50 ++----------------- drivers/gpu/drm/i915/gvt/trace.h | 72 ++++++++++----------------- 2 files changed, 29 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 41b2c3aaa04a..5634eb1fa24b 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2414,53 +2414,13 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e) hash_add(gvt->cmd_table, &e->hlist, e->info->opcode); } -#define GVT_MAX_CMD_LENGTH 20 /* In Dword */ - -static void trace_cs_command(struct parser_exec_state *s, - cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler) -{ - /* This buffer is used by ftrace to store all commands copied from - * guest gma space. Sometimes commands can cross pages, this should - * not be handled in ftrace logic. So this is just used as a - * 'bounce buffer' - */ - u32 cmd_trace_buf[GVT_MAX_CMD_LENGTH]; - int i; - u32 cmd_len = cmd_length(s); - /* The chosen value of GVT_MAX_CMD_LENGTH are just based on - * following two considerations: - * 1) From observation, most common ring commands is not that long. - * But there are execeptions. So it indeed makes sence to observe - * longer commands. - * 2) From the performance and debugging point of view, dumping all - * contents of very commands is not necessary. - * We mgith shrink GVT_MAX_CMD_LENGTH or remove this trace event in - * future for performance considerations. - */ - if (unlikely(cmd_len > GVT_MAX_CMD_LENGTH)) { - gvt_dbg_cmd("cmd length exceed tracing limitation!\n"); - cmd_len = GVT_MAX_CMD_LENGTH; - } - - for (i = 0; i < cmd_len; i++) - cmd_trace_buf[i] = cmd_val(s, i); - - trace_gvt_command(s->vgpu->id, s->ring_id, s->ip_gma, cmd_trace_buf, - cmd_len, s->buf_type == RING_BUFFER_INSTRUCTION, - cost_pre_cmd_handler, cost_cmd_handler); -} - /* call the cmd handler, and advance ip */ static int cmd_parser_exec(struct parser_exec_state *s) { + struct intel_vgpu *vgpu = s->vgpu; struct cmd_info *info; u32 cmd; int ret = 0; - cycles_t t0, t1, t2; - struct parser_exec_state s_before_advance_custom; - struct intel_vgpu *vgpu = s->vgpu; - - t0 = get_cycles(); cmd = cmd_val(s, 0); @@ -2475,9 +2435,8 @@ static int cmd_parser_exec(struct parser_exec_state *s) s->info = info; - t1 = get_cycles(); - - s_before_advance_custom = *s; + trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va, + cmd_length(s), s->buf_type); if (info->handler) { ret = info->handler(s); @@ -2486,9 +2445,6 @@ static int cmd_parser_exec(struct parser_exec_state *s) return ret; } } - t2 = get_cycles(); - - trace_cs_command(&s_before_advance_custom, t1 - t0, t2 - t1); if (!(info->flag & F_IP_ADVANCE_CUSTOM)) { ret = cmd_advance_default(s); diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 53a2d10cf3f1..9171291e36c6 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -224,57 +224,37 @@ TRACE_EVENT(oos_sync, TP_printk("%s", __entry->buf) ); -#define MAX_CMD_STR_LEN 256 TRACE_EVENT(gvt_command, - TP_PROTO(u8 vm_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, bool ring_buffer_cmd, cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler), + TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, + u32 buf_type), - TP_ARGS(vm_id, ring_id, ip_gma, cmd_va, cmd_len, ring_buffer_cmd, cost_pre_cmd_handler, cost_cmd_handler), + TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type), - TP_STRUCT__entry( - __field(u8, vm_id) - __field(u8, ring_id) - __field(int, i) - __array(char, tmp_buf, MAX_CMD_STR_LEN) - __array(char, cmd_str, MAX_CMD_STR_LEN) - ), + TP_STRUCT__entry( + __field(u8, vgpu_id) + __field(u8, ring_id) + __field(u32, ip_gma) + __field(u32, buf_type) + __field(u32, cmd_len) + __dynamic_array(u32, raw_cmd, cmd_len) + ), - TP_fast_assign( - __entry->vm_id = vm_id; - __entry->ring_id = ring_id; - __entry->cmd_str[0] = '\0'; - snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "VM(%d) Ring(%d): %s ip(%08x) pre handler cost (%llu), handler cost (%llu) ", vm_id, ring_id, ring_buffer_cmd ? "RB":"BB", ip_gma, cost_pre_cmd_handler, cost_cmd_handler); - strcat(__entry->cmd_str, __entry->tmp_buf); - entry->i = 0; - while (cmd_len > 0) { - if (cmd_len >= 8) { - snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x %08x %08x %08x %08x ", - cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3], - cmd_va[__entry->i+4], cmd_va[__entry->i+5], cmd_va[__entry->i+6], cmd_va[__entry->i+7]); - __entry->i += 8; - cmd_len -= 8; - strcat(__entry->cmd_str, __entry->tmp_buf); - } else if (cmd_len >= 4) { - snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x ", - cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3]); - __entry->i += 4; - cmd_len -= 4; - strcat(__entry->cmd_str, __entry->tmp_buf); - } else if (cmd_len >= 2) { - snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x ", cmd_va[__entry->i], cmd_va[__entry->i+1]); - __entry->i += 2; - cmd_len -= 2; - strcat(__entry->cmd_str, __entry->tmp_buf); - } else if (cmd_len == 1) { - snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x ", cmd_va[__entry->i]); - __entry->i += 1; - cmd_len -= 1; - strcat(__entry->cmd_str, __entry->tmp_buf); - } - } - strcat(__entry->cmd_str, "\n"); - ), + TP_fast_assign( + __entry->vgpu_id = vgpu_id; + __entry->ring_id = ring_id; + __entry->ip_gma = ip_gma; + __entry->buf_type = buf_type; + __entry->cmd_len = cmd_len; + memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va)); + ), - TP_printk("%s", __entry->cmd_str) + + TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s", + __entry->vgpu_id, + __entry->ring_id, + __entry->buf_type, + __entry->ip_gma, + __print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4)) ); #endif /* _GVT_TRACE_H_ */ From 5d0f5de16ef3d127469aa09dcdf07bec5174937f Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 4 May 2017 18:36:54 +0800 Subject: [PATCH 077/341] drm/i915/gvt: refactor function intel_vgpu_submit_execlist The function intel_vgpu_submit_execlist could be more simpler. It actually does: 1) validate the submission. The first context must be valid, and all two must be privilege_access. 2) submit valid contexts. The first one need emulate schedule_in. We do not need a bitmap, valid desc copy valid_desc. Local variable emulate_schedule_in also can be optimized out. v2: dump desc content in err msg (Zhi Wang) Signed-off-by: Changbin Du Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 56 ++++++++++++----------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index dca989eb2d42..8bba38fa19b8 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -708,53 +708,43 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) { struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id]; - struct execlist_ctx_descriptor_format *desc[2], valid_desc[2]; - unsigned long valid_desc_bitmap = 0; - bool emulate_schedule_in = true; - int ret; - int i; + struct execlist_ctx_descriptor_format desc[2]; + int i, ret; - memset(valid_desc, 0, sizeof(valid_desc)); + desc[0] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1); + desc[1] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0); - desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1); - desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0); + if (!desc[0].valid) { + gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n"); + goto inv_desc; + } - for (i = 0; i < 2; i++) { - if (!desc[i]->valid) + for (i = 0; i < ARRAY_SIZE(desc); i++) { + if (!desc[i].valid) continue; - - if (!desc[i]->privilege_access) { + if (!desc[i].privilege_access) { gvt_vgpu_err("unexpected GGTT elsp submission\n"); - return -EINVAL; + goto inv_desc; } - - /* TODO: add another guest context checks here. */ - set_bit(i, &valid_desc_bitmap); - valid_desc[i] = *desc[i]; - } - - if (!valid_desc_bitmap) { - gvt_vgpu_err("no valid desc in a elsp submission\n"); - return -EINVAL; - } - - if (!test_bit(0, (void *)&valid_desc_bitmap) && - test_bit(1, (void *)&valid_desc_bitmap)) { - gvt_vgpu_err("weird elsp submission, desc 0 is not valid\n"); - return -EINVAL; } /* submit workload */ - for_each_set_bit(i, (void *)&valid_desc_bitmap, 2) { - ret = submit_context(vgpu, ring_id, &valid_desc[i], - emulate_schedule_in); + for (i = 0; i < ARRAY_SIZE(desc); i++) { + if (!desc[i].valid) + continue; + ret = submit_context(vgpu, ring_id, &desc[i], i == 0); if (ret) { - gvt_vgpu_err("fail to schedule workload\n"); + gvt_vgpu_err("failed to submit desc %d\n", i); return ret; } - emulate_schedule_in = false; } + return 0; + +inv_desc: + gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n", + desc[0].udw, desc[0].ldw, desc[1].udw, desc[1].ldw); + return -EINVAL; } static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) From 0e86cc9ccc3bf557348befaddf5cb613cf3c4458 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 4 May 2017 10:52:38 +0800 Subject: [PATCH 078/341] drm/i915/gvt: implement per-vm mmio switching optimization Commit ab9da627906a ("drm/i915: make context status notifier head be per engine") gives us a chance to inspect every single request. Then we can eliminate unnecessary mmio switching for same vGPU. We only need mmio switching for different VMs (including host). This patch introduced a new general API intel_gvt_switch_mmio() to replace the old intel_gvt_load/restore_render_mmio(). This function can be further optimized for vGPU to vGPU switching. To support individual ring switch, we track the owner who occupy each ring. When another VM or host request a ring we do the mmio context switching. Otherwise no need to switch the ring. This optimization is very useful if only one guest has plenty of workloads and the host is mostly idle. The best case is no mmio switching will happen. v2: o fix missing ring switch issue. (chuanxiao) o support individual ring switch. Signed-off-by: Changbin Du Reviewed-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 2 +- drivers/gpu/drm/i915/gvt/render.c | 35 +++++++++++++++++++++++-- drivers/gpu/drm/i915/gvt/render.h | 4 +-- drivers/gpu/drm/i915/gvt/sched_policy.c | 12 +++++++++ drivers/gpu/drm/i915/gvt/scheduler.c | 35 ++++++++++++++++++++----- drivers/gpu/drm/i915/gvt/scheduler.h | 4 +++ 6 files changed, 80 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 7dea5e5d5567..20329171e4ab 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -244,7 +244,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) gvt_dbg_core("init gvt device\n"); idr_init(&gvt->vgpu_idr); - + spin_lock_init(&gvt->scheduler.mmio_context_lock); mutex_init(&gvt->lock); gvt->dev_priv = dev_priv; diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index c6e7972ac21d..19d98c903672 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -260,7 +260,8 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) #define CTX_CONTEXT_CONTROL_VAL 0x03 -void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id) +/* Switch ring mmio values (context) from host to a vgpu. */ +static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct render_mmio *mmio; @@ -312,7 +313,8 @@ void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id) handle_tlb_pending_event(vgpu, ring_id); } -void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id) +/* Switch ring mmio values (context) from vgpu to host. */ +static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct render_mmio *mmio; @@ -348,3 +350,32 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id) mmio->value, v); } } + +/** + * intel_gvt_switch_render_mmio - switch mmio context of specific engine + * @pre: the last vGPU that own the engine + * @next: the vGPU to switch to + * @ring_id: specify the engine + * + * If pre is null indicates that host own the engine. If next is null + * indicates that we are switching to host workload. + */ +void intel_gvt_switch_mmio(struct intel_vgpu *pre, + struct intel_vgpu *next, int ring_id) +{ + if (WARN_ON(!pre && !next)) + return; + + gvt_dbg_render("switch ring %d from %s to %s\n", ring_id, + pre ? "vGPU" : "host", next ? "vGPU" : "HOST"); + + /** + * TODO: Optimize for vGPU to vGPU switch by merging + * switch_mmio_to_host() and switch_mmio_to_vgpu(). + */ + if (pre) + switch_mmio_to_host(pre, ring_id); + + if (next) + switch_mmio_to_vgpu(next, ring_id); +} diff --git a/drivers/gpu/drm/i915/gvt/render.h b/drivers/gpu/drm/i915/gvt/render.h index dac1a3cc458b..91db1d39d28f 100644 --- a/drivers/gpu/drm/i915/gvt/render.h +++ b/drivers/gpu/drm/i915/gvt/render.h @@ -36,8 +36,8 @@ #ifndef __GVT_RENDER_H__ #define __GVT_RENDER_H__ -void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id); +void intel_gvt_switch_mmio(struct intel_vgpu *pre, + struct intel_vgpu *next, int ring_id); -void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id); #endif diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 79ba4b3440aa..f642a3f0cfa0 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -299,8 +299,20 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu) { + struct intel_gvt_workload_scheduler *scheduler = &vgpu->gvt->scheduler; + int ring_id; + kfree(vgpu->sched_data); vgpu->sched_data = NULL; + + spin_lock_bh(&scheduler->mmio_context_lock); + for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) { + if (scheduler->engine_owner[ring_id] == vgpu) { + intel_gvt_switch_mmio(vgpu, NULL, ring_id); + scheduler->engine_owner[ring_id] = NULL; + } + } + spin_unlock_bh(&scheduler->mmio_context_lock); } static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 6ae286cb5804..aa7e06df88b6 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -138,21 +138,42 @@ static int shadow_context_status_change(struct notifier_block *nb, struct intel_gvt *gvt = container_of(nb, struct intel_gvt, shadow_ctx_notifier_block[req->engine->id]); struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - struct intel_vgpu_workload *workload = - scheduler->current_workload[req->engine->id]; + enum intel_engine_id ring_id = req->engine->id; + struct intel_vgpu_workload *workload; - if (!is_gvt_request(req) || unlikely(!workload)) + if (!is_gvt_request(req)) { + spin_lock_bh(&scheduler->mmio_context_lock); + if (action == INTEL_CONTEXT_SCHEDULE_IN && + scheduler->engine_owner[ring_id]) { + /* Switch ring from vGPU to host. */ + intel_gvt_switch_mmio(scheduler->engine_owner[ring_id], + NULL, ring_id); + scheduler->engine_owner[ring_id] = NULL; + } + spin_unlock_bh(&scheduler->mmio_context_lock); + + return NOTIFY_OK; + } + + workload = scheduler->current_workload[ring_id]; + if (unlikely(!workload)) return NOTIFY_OK; switch (action) { case INTEL_CONTEXT_SCHEDULE_IN: - intel_gvt_load_render_mmio(workload->vgpu, - workload->ring_id); + spin_lock_bh(&scheduler->mmio_context_lock); + if (workload->vgpu != scheduler->engine_owner[ring_id]) { + /* Switch ring from host to vGPU or vGPU to vGPU. */ + intel_gvt_switch_mmio(scheduler->engine_owner[ring_id], + workload->vgpu, ring_id); + scheduler->engine_owner[ring_id] = workload->vgpu; + } else + gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n", + ring_id, workload->vgpu->id); + spin_unlock_bh(&scheduler->mmio_context_lock); atomic_set(&workload->shadow_ctx_active, 1); break; case INTEL_CONTEXT_SCHEDULE_OUT: - intel_gvt_restore_render_mmio(workload->vgpu, - workload->ring_id); /* If the status is -EINPROGRESS means this workload * doesn't meet any issue during dispatching so when * get the SCHEDULE_OUT set the status to be zero for diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 2cd725c0573e..9b6bf51e9b9b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -42,6 +42,10 @@ struct intel_gvt_workload_scheduler { struct intel_vgpu_workload *current_workload[I915_NUM_ENGINES]; bool need_reschedule; + spinlock_t mmio_context_lock; + /* can be null when owner is host */ + struct intel_vgpu *engine_owner[I915_NUM_ENGINES]; + wait_queue_head_t workload_complete_wq; struct task_struct *thread[I915_NUM_ENGINES]; wait_queue_head_t waitq[I915_NUM_ENGINES]; From 23ce0592ac991447e1d1c1096bef29b5653936c4 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Fri, 19 May 2017 23:48:34 +0800 Subject: [PATCH 079/341] drm/i915/gvt: add RING_INSTDONE and SC_INSTDONE mmio handler in GVT-g kernel hangcheck needs to check RING_INSTDONE and SC_INSTDONE registers' state to know if hardware is still running. In GVT-g environment, we need to emulate these registers changing for all the guests although they are not render owner. Here we return the physical state for all the guests, then if INSTDONE is changing guest can know hardware is still running although its workload is pending. Read INSTDONE isn't one correct way to know if guest trigger gfx reset, especially with Linux guest, it will read ACTH first, then check INSTDONE and SUBSLICE registers to check if hardware is still running, at last trigger gfx reset when it finds all the registers is frozen. In Windows guest, read INSTDONE usually happens when OS detect TDR. With the difference between Windows and Linux guest, "disable_warn_untrack" may let debug log run into wrong state(Linux guest trigger hangcheck with no ACTHD changed, then check INSTDONE), but actually there is no TDR happened. The new policy is always WARN with untrack MMIO r/w. Bad effect is many noisy untrack mmio warning logs exist when real TDR happen. Even so you can control the log output or not by setting the debug mask bit. v2: remove log in instdone_mmio_read Suggested-by: Zhenyu Wang Cc: Zhenyu Wang Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 15 +++++++++++++++ drivers/gpu/drm/i915/gvt/mmio.c | 7 ------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 0ad1a508e2af..3edff42d8f0c 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1409,6 +1409,15 @@ static int ring_timestamp_mmio_read(struct intel_vgpu *vgpu, return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); } +static int instdone_mmio_read(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + + vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); + return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); +} + static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1593,6 +1602,12 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL); #undef RING_REG +#define RING_REG(base) (base + 0x6c) + MMIO_RING_DFH(RING_REG, D_ALL, 0, instdone_mmio_read, NULL); + MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_ALL, instdone_mmio_read, NULL); +#undef RING_REG + MMIO_DH(GEN7_SC_INSTDONE, D_HSW_PLUS, instdone_mmio_read, NULL); + MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL); MMIO_GM_RDR(CCID, D_ALL, NULL, NULL); MMIO_GM_RDR(0x12198, D_ALL, NULL, NULL); diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 1ba3bdb09341..35f6c4713cb6 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -202,13 +202,6 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, if (!vgpu->mmio.disable_warn_untrack) { gvt_vgpu_err("read untracked MMIO %x(%dB) val %x\n", offset, bytes, *(u32 *)p_data); - - if (offset == 0x206c) { - gvt_vgpu_err("------------------------------------------\n"); - gvt_vgpu_err("likely triggers a gfx reset\n"); - gvt_vgpu_err("------------------------------------------\n"); - vgpu->mmio.disable_warn_untrack = true; - } } } From 7b8d57587025dc294094b73f08b389a498fb107f Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 22 May 2017 17:46:47 +0800 Subject: [PATCH 080/341] drm/i915/gvt: clean up the unused last_ctx_submit_time of struct intel_vgpu Clean up it as it is not used now. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 1 - drivers/gpu/drm/i915/gvt/handlers.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 930732e5c780..0b2a4a11ae9b 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -165,7 +165,6 @@ struct intel_vgpu { struct list_head workload_q_head[I915_NUM_ENGINES]; struct kmem_cache *workloads; atomic_t running_workload_num; - ktime_t last_ctx_submit_time; DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); struct i915_gem_context *shadow_ctx; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 3edff42d8f0c..45e5907158b7 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1433,7 +1433,6 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, execlist->elsp_dwords.data[execlist->elsp_dwords.index] = data; if (execlist->elsp_dwords.index == 3) { - vgpu->last_ctx_submit_time = ktime_get(); ret = intel_vgpu_submit_execlist(vgpu, ring_id); if(ret) gvt_vgpu_err("fail submit workload on ring %d\n", From 7fb6a7d65292a524256ed6e2d0e94071b0c53936 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Tue, 23 May 2017 05:38:08 +0800 Subject: [PATCH 081/341] drm/i915/gvt: Change flood gvt dmesg into trace Currently gvt dmesg is so heavy at drm.debug=0x2 that guest and host almost couldn't run on xengt. This patch transfer these repeated messages into trace, so dmesg is light at drm.debug=0x2, and user could get the target message through trace event and trace filter. Suggested-by: Zhi Wang Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/interrupt.c | 20 +++--- drivers/gpu/drm/i915/gvt/mpt.h | 3 +- drivers/gpu/drm/i915/gvt/render.c | 13 ++-- drivers/gpu/drm/i915/gvt/trace.h | 100 +++++++++++++++++++++++++++ 5 files changed, 119 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 0b2a4a11ae9b..d3b4d42063da 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -472,6 +472,7 @@ enum { GVT_FAILSAFE_INSUFFICIENT_RESOURCE, }; +#include "trace.h" #include "mpt.h" #endif diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index 9d6812f0957f..7a041b368f68 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -31,6 +31,7 @@ #include "i915_drv.h" #include "gvt.h" +#include "trace.h" /* common offset among interrupt control registers */ #define regbase_to_isr(base) (base) @@ -178,8 +179,8 @@ int intel_vgpu_reg_imr_handler(struct intel_vgpu *vgpu, struct intel_gvt_irq_ops *ops = gvt->irq.ops; u32 imr = *(u32 *)p_data; - gvt_dbg_irq("write IMR %x, new %08x, old %08x, changed %08x\n", - reg, imr, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ imr); + trace_write_ir(vgpu->id, "IMR", reg, imr, vgpu_vreg(vgpu, reg), + (vgpu_vreg(vgpu, reg) ^ imr)); vgpu_vreg(vgpu, reg) = imr; @@ -209,8 +210,8 @@ int intel_vgpu_reg_master_irq_handler(struct intel_vgpu *vgpu, u32 ier = *(u32 *)p_data; u32 virtual_ier = vgpu_vreg(vgpu, reg); - gvt_dbg_irq("write MASTER_IRQ %x, new %08x, old %08x, changed %08x\n", - reg, ier, virtual_ier, virtual_ier ^ ier); + trace_write_ir(vgpu->id, "MASTER_IRQ", reg, ier, virtual_ier, + (virtual_ier ^ ier)); /* * GEN8_MASTER_IRQ is a special irq register, @@ -248,8 +249,8 @@ int intel_vgpu_reg_ier_handler(struct intel_vgpu *vgpu, struct intel_gvt_irq_info *info; u32 ier = *(u32 *)p_data; - gvt_dbg_irq("write IER %x, new %08x, old %08x, changed %08x\n", - reg, ier, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ ier); + trace_write_ir(vgpu->id, "IER", reg, ier, vgpu_vreg(vgpu, reg), + (vgpu_vreg(vgpu, reg) ^ ier)); vgpu_vreg(vgpu, reg) = ier; @@ -285,8 +286,8 @@ int intel_vgpu_reg_iir_handler(struct intel_vgpu *vgpu, unsigned int reg, iir_to_regbase(reg)); u32 iir = *(u32 *)p_data; - gvt_dbg_irq("write IIR %x, new %08x, old %08x, changed %08x\n", - reg, iir, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ iir); + trace_write_ir(vgpu->id, "IIR", reg, iir, vgpu_vreg(vgpu, reg), + (vgpu_vreg(vgpu, reg) ^ iir)); if (WARN_ON(!info)) return -EINVAL; @@ -411,8 +412,7 @@ static void propagate_event(struct intel_gvt_irq *irq, if (!test_bit(bit, (void *)&vgpu_vreg(vgpu, regbase_to_imr(reg_base)))) { - gvt_dbg_irq("set bit (%d) for (%s) for vgpu (%d)\n", - bit, irq_name[event], vgpu->id); + trace_propagate_event(vgpu->id, irq_name[event], bit); set_bit(bit, (void *)&vgpu_vreg(vgpu, regbase_to_iir(reg_base))); } diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 419353624c5a..f0e5487e6688 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -133,8 +133,7 @@ static inline int intel_gvt_hypervisor_inject_msi(struct intel_vgpu *vgpu) if (WARN(control & GENMASK(15, 1), "only support one MSI format\n")) return -EINVAL; - gvt_dbg_irq("vgpu%d: inject msi address %x data%x\n", vgpu->id, addr, - data); + trace_inject_msi(vgpu->id, addr, data); ret = intel_gvt_host.mpt->inject_msi(vgpu->handle, addr, data); if (ret) diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 19d98c903672..28c91187c027 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -35,6 +35,7 @@ #include "i915_drv.h" #include "gvt.h" +#include "trace.h" struct render_mmio { int ring_id; @@ -306,9 +307,9 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) I915_WRITE(mmio->reg, v); POSTING_READ(mmio->reg); - gvt_dbg_render("load reg %x old %x new %x\n", - i915_mmio_reg_offset(mmio->reg), - mmio->value, v); + trace_render_mmio(vgpu->id, "load", + i915_mmio_reg_offset(mmio->reg), + mmio->value, v); } handle_tlb_pending_event(vgpu, ring_id); } @@ -345,9 +346,9 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) I915_WRITE(mmio->reg, v); POSTING_READ(mmio->reg); - gvt_dbg_render("restore reg %x old %x new %x\n", - i915_mmio_reg_offset(mmio->reg), - mmio->value, v); + trace_render_mmio(vgpu->id, "restore", + i915_mmio_reg_offset(mmio->reg), + mmio->value, v); } } diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 9171291e36c6..8c150381d9a4 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -256,6 +256,106 @@ TRACE_EVENT(gvt_command, __entry->ip_gma, __print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4)) ); + +#define GVT_TEMP_STR_LEN 10 +TRACE_EVENT(write_ir, + TP_PROTO(int id, char *reg_name, unsigned int reg, unsigned int new_val, + unsigned int old_val, bool changed), + + TP_ARGS(id, reg_name, reg, new_val, old_val, changed), + + TP_STRUCT__entry( + __field(int, id) + __array(char, buf, GVT_TEMP_STR_LEN) + __field(unsigned int, reg) + __field(unsigned int, new_val) + __field(unsigned int, old_val) + __field(bool, changed) + ), + + TP_fast_assign( + __entry->id = id; + snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", reg_name); + __entry->reg = reg; + __entry->new_val = new_val; + __entry->old_val = old_val; + __entry->changed = changed; + ), + + TP_printk("VM%u write [%s] %x, new %08x, old %08x, changed %08x\n", + __entry->id, __entry->buf, __entry->reg, __entry->new_val, + __entry->old_val, __entry->changed) +); + +TRACE_EVENT(propagate_event, + TP_PROTO(int id, const char *irq_name, int bit), + + TP_ARGS(id, irq_name, bit), + + TP_STRUCT__entry( + __field(int, id) + __array(char, buf, GVT_TEMP_STR_LEN) + __field(int, bit) + ), + + TP_fast_assign( + __entry->id = id; + snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", irq_name); + __entry->bit = bit; + ), + + TP_printk("Set bit (%d) for (%s) for vgpu (%d)\n", + __entry->bit, __entry->buf, __entry->id) +); + +TRACE_EVENT(inject_msi, + TP_PROTO(int id, unsigned int address, unsigned int data), + + TP_ARGS(id, address, data), + + TP_STRUCT__entry( + __field(int, id) + __field(unsigned int, address) + __field(unsigned int, data) + ), + + TP_fast_assign( + __entry->id = id; + __entry->address = address; + __entry->data = data; + ), + + TP_printk("vgpu%d:inject msi address %x data %x\n", + __entry->id, __entry->address, __entry->data) +); + +TRACE_EVENT(render_mmio, + TP_PROTO(int id, char *action, unsigned int reg, + unsigned int old_val, unsigned int new_val), + + TP_ARGS(id, action, reg, new_val, old_val), + + TP_STRUCT__entry( + __field(int, id) + __array(char, buf, GVT_TEMP_STR_LEN) + __field(unsigned int, reg) + __field(unsigned int, old_val) + __field(unsigned int, new_val) + ), + + TP_fast_assign( + __entry->id = id; + snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", action); + __entry->reg = reg; + __entry->old_val = old_val; + __entry->new_val = new_val; + ), + + TP_printk("VM%u %s reg %x, old %08x new %08x\n", + __entry->id, __entry->buf, __entry->reg, + __entry->old_val, __entry->new_val) +); + #endif /* _GVT_TRACE_H_ */ /* This part must be out of protection */ From 089f93c3f94c368157980578b1efc4f6014ebd97 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Tue, 23 May 2017 05:38:09 +0800 Subject: [PATCH 082/341] drm/i915/gvt: Delete gvt_dbg_cmd() in cmd_parser_exec() Since cmd message have been recorded in trace, gvt_dbg_cmd isn't necessary. This will reduce much of dmesg as gvt_dbg_cmd is repeated on each workload. Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 5634eb1fa24b..51241de5e7a7 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2431,8 +2431,6 @@ static int cmd_parser_exec(struct parser_exec_state *s) return -EINVAL; } - gvt_dbg_cmd("%s\n", info->name); - s->info = info; trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va, @@ -2478,8 +2476,6 @@ static int command_scan(struct parser_exec_state *s, gma_tail = rb_start + rb_tail; gma_bottom = rb_start + rb_len; - gvt_dbg_cmd("scan_start: start=%lx end=%lx\n", gma_head, gma_tail); - while (s->ip_gma != gma_tail) { if (s->buf_type == RING_BUFFER_INSTRUCTION) { if (!(s->ip_gma >= rb_start) || @@ -2508,8 +2504,6 @@ static int command_scan(struct parser_exec_state *s, } } - gvt_dbg_cmd("scan_end\n"); - return ret; } From c713cb2f9b7e1e9ffa8a379cecb13bc6eacd49b6 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Wed, 24 May 2017 20:30:17 +0800 Subject: [PATCH 083/341] drm/i915/gvt: Support event based scheduling This patch decouple the time slice calculation and scheduler, let other event be able to trigger scheduling without impact the calculation for QoS. v2: add only one new enum definition. v3: fix typo. Signed-off-by: Ping Gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 4 +++- drivers/gpu/drm/i915/gvt/gvt.h | 5 +++++ drivers/gpu/drm/i915/gvt/sched_policy.c | 15 ++++++++++----- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 20329171e4ab..c27c6838eaca 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -147,7 +147,9 @@ static int gvt_service_thread(void *data) mutex_unlock(&gvt->lock); } - if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, + if (test_bit(INTEL_GVT_REQUEST_SCHED, + (void *)&gvt->service_request) || + test_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request)) { intel_gvt_schedule(gvt); } diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index d3b4d42063da..8fd40f55caf1 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -256,7 +256,12 @@ static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915) enum { INTEL_GVT_REQUEST_EMULATE_VBLANK = 0, + + /* Scheduling trigger by timer */ INTEL_GVT_REQUEST_SCHED = 1, + + /* Scheduling trigger by event */ + INTEL_GVT_REQUEST_EVENT_SCHED = 2, }; static inline void intel_gvt_request_service(struct intel_gvt *gvt, diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index f642a3f0cfa0..6f2073d74de2 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -198,11 +198,6 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data) struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct vgpu_sched_data *vgpu_data; struct intel_vgpu *vgpu = NULL; - static uint64_t timer_check; - - if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS)) - gvt_balance_timeslice(sched_data); - /* no active vgpu or has already had a target */ if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu) goto out; @@ -227,9 +222,19 @@ out: void intel_gvt_schedule(struct intel_gvt *gvt) { struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; + static uint64_t timer_check; mutex_lock(&gvt->lock); + + if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, + (void *)&gvt->service_request)) { + if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS)) + gvt_balance_timeslice(sched_data); + } + clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request); + tbs_sched_func(sched_data); + mutex_unlock(&gvt->lock); } From f100daec9c9a5bbf1a715323cb6102e99933fdb3 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Wed, 24 May 2017 09:14:11 +0800 Subject: [PATCH 084/341] drm/i915/gvt: Trigger scheduling after context complete The time based scheduler poll context busy status at every micro-second during vGPU switch, it will make GPU idle for a while when the context is very small and completed before the next micro-second arrival. Trigger scheduling immediately after context complete will eliminate GPU idle and improve performance. Create two vGPU with same type, run Heaven simultaneously: Before this patch: +---------+----------+----------+ | | vGPU1 | vGPU2 | +---------+----------+----------+ | Heaven | 357 | 354 | +-------------------------------+ After this patch: +---------+----------+----------+ | | vGPU1 | vGPU2 | +---------+----------+----------+ | Heaven | 397 | 398 | +-------------------------------+ v2: Let need_reschedule protect by gvt-lock. Signed-off-by: Ping Gao Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index aa7e06df88b6..488fdea348a9 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -452,6 +452,10 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) atomic_dec(&vgpu->running_workload_num); wake_up(&scheduler->workload_complete_wq); + + if (gvt->scheduler.need_reschedule) + intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED); + mutex_unlock(&gvt->lock); } From a1dcba905817f97a4086392276334dce0f6faea7 Mon Sep 17 00:00:00 2001 From: fred gao Date: Thu, 25 May 2017 15:32:27 +0800 Subject: [PATCH 085/341] drm/i915/gvt: Legacy HSW related MMIO handler clean up remove all the legacy pre-BDW mmio handlers and the corresponding usage/definition since pre-BDW platforms are not supported in GVT environment. v2: - clean up all the left dirty code before BDW, e.g all D_HSW usage and itself, D_IVB, D_PRE_BDW. (Zhenyu) v3: - change is based on gvt-staging. (Zhenyu) Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 22 ++++++++++------------ drivers/gpu/drm/i915/gvt/mmio.h | 18 +++++------------- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 45e5907158b7..de394e3e9fab 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1605,7 +1605,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_REG, D_ALL, 0, instdone_mmio_read, NULL); MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_ALL, instdone_mmio_read, NULL); #undef RING_REG - MMIO_DH(GEN7_SC_INSTDONE, D_HSW_PLUS, instdone_mmio_read, NULL); + MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, instdone_mmio_read, NULL); MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL); MMIO_GM_RDR(CCID, D_ALL, NULL, NULL); @@ -2190,7 +2190,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GTFIFODBG, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GTFIFOCTL, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DH(FORCEWAKE_MT, D_PRE_SKL, NULL, mul_force_wake_write); - MMIO_DH(FORCEWAKE_ACK_HSW, D_HSW | D_BDW, NULL, NULL); + MMIO_DH(FORCEWAKE_ACK_HSW, D_BDW, NULL, NULL); MMIO_D(ECOBUS, D_ALL); MMIO_DH(GEN6_RC_CONTROL, D_ALL, NULL, NULL); MMIO_DH(GEN6_RC_STATE, D_ALL, NULL, NULL); @@ -2222,12 +2222,12 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN6_RC6p_THRESHOLD, D_ALL); MMIO_D(GEN6_RC6pp_THRESHOLD, D_ALL); MMIO_D(GEN6_PMINTRMSK, D_ALL); - MMIO_DH(HSW_PWR_WELL_BIOS, D_HSW | D_BDW, NULL, power_well_ctl_mmio_write); - MMIO_DH(HSW_PWR_WELL_DRIVER, D_HSW | D_BDW, NULL, power_well_ctl_mmio_write); - MMIO_DH(HSW_PWR_WELL_KVMR, D_HSW | D_BDW, NULL, power_well_ctl_mmio_write); - MMIO_DH(HSW_PWR_WELL_DEBUG, D_HSW | D_BDW, NULL, power_well_ctl_mmio_write); - MMIO_DH(HSW_PWR_WELL_CTL5, D_HSW | D_BDW, NULL, power_well_ctl_mmio_write); - MMIO_DH(HSW_PWR_WELL_CTL6, D_HSW | D_BDW, NULL, power_well_ctl_mmio_write); + MMIO_DH(HSW_PWR_WELL_BIOS, D_BDW, NULL, power_well_ctl_mmio_write); + MMIO_DH(HSW_PWR_WELL_DRIVER, D_BDW, NULL, power_well_ctl_mmio_write); + MMIO_DH(HSW_PWR_WELL_KVMR, D_BDW, NULL, power_well_ctl_mmio_write); + MMIO_DH(HSW_PWR_WELL_DEBUG, D_BDW, NULL, power_well_ctl_mmio_write); + MMIO_DH(HSW_PWR_WELL_CTL5, D_BDW, NULL, power_well_ctl_mmio_write); + MMIO_DH(HSW_PWR_WELL_CTL6, D_BDW, NULL, power_well_ctl_mmio_write); MMIO_D(RSTDBYCTL, D_ALL); @@ -2245,7 +2245,6 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_BDW); MMIO_D(GEN6_PCODE_DATA, D_ALL); MMIO_D(0x13812c, D_ALL); MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL); @@ -2324,14 +2323,13 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(0x1a054, D_ALL); MMIO_D(0x44070, D_ALL); - MMIO_DFH(0x215c, D_HSW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x215c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_HSW_PLUS, NULL, NULL); - MMIO_DFH(GEN7_OACONTROL, D_HSW, F_CMD_ACCESS, NULL, NULL); + MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL); MMIO_D(0x2b00, D_BDW_PLUS); MMIO_D(0x2360, D_BDW_PLUS); MMIO_F(0x5200, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 7edd66f38ef9..bd193f9bbcee 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -39,26 +39,18 @@ struct intel_gvt; struct intel_vgpu; -#define D_SNB (1 << 0) -#define D_IVB (1 << 1) -#define D_HSW (1 << 2) -#define D_BDW (1 << 3) -#define D_SKL (1 << 4) -#define D_KBL (1 << 5) +#define D_BDW (1 << 0) +#define D_SKL (1 << 1) +#define D_KBL (1 << 2) #define D_GEN9PLUS (D_SKL | D_KBL) #define D_GEN8PLUS (D_BDW | D_SKL | D_KBL) -#define D_GEN75PLUS (D_HSW | D_BDW | D_SKL | D_KBL) -#define D_GEN7PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) #define D_SKL_PLUS (D_SKL | D_KBL) #define D_BDW_PLUS (D_BDW | D_SKL | D_KBL) -#define D_HSW_PLUS (D_HSW | D_BDW | D_SKL | D_KBL) -#define D_IVB_PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) -#define D_PRE_BDW (D_SNB | D_IVB | D_HSW) -#define D_PRE_SKL (D_SNB | D_IVB | D_HSW | D_BDW) -#define D_ALL (D_SNB | D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) +#define D_PRE_SKL (D_BDW) +#define D_ALL (D_BDW | D_SKL | D_KBL) struct intel_gvt_mmio_info { u32 offset; From 89009b7746fa66634061a7e76f881b7ea344d26d Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sun, 21 May 2017 00:15:27 -0700 Subject: [PATCH 086/341] drm/i915/gvt: remove redundant -Wall This flag is already set in the top level Makefile of the kernel. Also, by having set CONFIG_DRM_I915_GVT, thereby appending -Wall to ccflags, you undo all the -Wno-* cflags previously set in the Make variable KBUILD_CFLAGS. For example: cc foo.c -Wall -Wno-format -Wall resets -Wformat. Signed-off-by: Nick Desaulniers Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index b123c20e2097..f5486cb94818 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -3,6 +3,6 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ execlist.o scheduler.o sched_policy.o render.o cmd_parser.o -ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) -Wall +ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) obj-$(CONFIG_DRM_I915_GVT_KVMGT) += $(GVT_DIR)/kvmgt.o From 9b7bd65ecdf347b33c37d73b610fd85774b12e87 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Fri, 2 Jun 2017 15:34:23 +0800 Subject: [PATCH 087/341] drm/i915/gvt: Add runtime_pm get/put to proctect MMIO accessing In some cases, GVT-g is accessing MMIO without holding runtime_pm and this patch can add the inline API for doing the runtime_pm get/put to make sure when accessing HW MMIO the i915 HW is really powered on. Suggested-by: Zhenyu Wang Signed-off-by: Chuanxiao Dong Cc: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 10 ++++++++++ drivers/gpu/drm/i915/gvt/handlers.c | 12 ++++++++++++ 2 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 8fd40f55caf1..1fca76bf7f73 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -477,6 +477,16 @@ enum { GVT_FAILSAFE_INSUFFICIENT_RESOURCE, }; +static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv) +{ + intel_runtime_pm_get(dev_priv); +} + +static inline void mmio_hw_access_post(struct drm_i915_private *dev_priv) +{ + intel_runtime_pm_put(dev_priv); +} + #include "trace.h" #include "mpt.h" diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index de394e3e9fab..bb7037c6c347 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -209,6 +209,7 @@ static int fence_mmio_read(struct intel_vgpu *vgpu, unsigned int off, static int fence_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; unsigned int fence_num = offset_to_fence_num(off); int ret; @@ -217,8 +218,10 @@ static int fence_mmio_write(struct intel_vgpu *vgpu, unsigned int off, return ret; write_vreg(vgpu, off, p_data, bytes); + mmio_hw_access_pre(dev_priv); intel_vgpu_write_fence(vgpu, fence_num, vgpu_vreg64(vgpu, fence_num_to_offset(fence_num))); + mmio_hw_access_post(dev_priv); return 0; } @@ -1265,7 +1268,10 @@ static int gen9_trtte_write(struct intel_vgpu *vgpu, unsigned int offset, } write_vreg(vgpu, offset, p_data, bytes); /* TRTTE is not per-context */ + + mmio_hw_access_pre(dev_priv); I915_WRITE(_MMIO(offset), vgpu_vreg(vgpu, offset)); + mmio_hw_access_post(dev_priv); return 0; } @@ -1278,7 +1284,9 @@ static int gen9_trtt_chicken_write(struct intel_vgpu *vgpu, unsigned int offset, if (val & 1) { /* unblock hw logic */ + mmio_hw_access_pre(dev_priv); I915_WRITE(_MMIO(offset), val); + mmio_hw_access_post(dev_priv); } write_vreg(vgpu, offset, p_data, bytes); return 0; @@ -1405,7 +1413,9 @@ static int ring_timestamp_mmio_read(struct intel_vgpu *vgpu, { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + mmio_hw_access_pre(dev_priv); vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); + mmio_hw_access_post(dev_priv); return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); } @@ -1414,7 +1424,9 @@ static int instdone_mmio_read(struct intel_vgpu *vgpu, { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + mmio_hw_access_pre(dev_priv); vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); + mmio_hw_access_post(dev_priv); return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); } From af2c6399aabeb7a7107657a469cb2f16b55dfbae Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Fri, 2 Jun 2017 15:34:24 +0800 Subject: [PATCH 088/341] drm/i915/gvt: add gtt_invalidate API to flush the GTT TLB add gtt_invalidate API to handle the GTT TLB flush instead of hiding in write_pte64 function. This can avoid overkill when using write_pte64 Suggested-by: Zhenyu Wang Signed-off-by: Chuanxiao Dong Cc: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index c6f0077f590d..66374dba3b1a 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -244,15 +244,19 @@ static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index) return readq(addr); } +static void gtt_invalidate(struct drm_i915_private *dev_priv) +{ + mmio_hw_access_pre(dev_priv); + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + mmio_hw_access_post(dev_priv); +} + static void write_pte64(struct drm_i915_private *dev_priv, unsigned long index, u64 pte) { void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; writeq(pte, addr); - - I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); - POSTING_READ(GFX_FLSH_CNTL_GEN6); } static inline struct intel_gvt_gtt_entry *gtt_get_entry64(void *pt, @@ -1849,6 +1853,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, } ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index); + gtt_invalidate(gvt->dev_priv); ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); return 0; } @@ -2301,8 +2306,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) u32 num_entries; struct intel_gvt_gtt_entry e; - intel_runtime_pm_get(dev_priv); - memset(&e, 0, sizeof(struct intel_gvt_gtt_entry)); e.type = GTT_TYPE_GGTT_PTE; ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn); @@ -2318,7 +2321,7 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) for (offset = 0; offset < num_entries; offset++) ops->set_entry(NULL, &e, index + offset, false, 0, vgpu); - intel_runtime_pm_put(dev_priv); + gtt_invalidate(dev_priv); } /** From 65f9f6febf12ed5bbcebd3599698eb78b03e5b69 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 6 Jun 2017 15:56:09 +0800 Subject: [PATCH 089/341] drm/i915/gvt: Optimize MMIO register handling for some large MMIO blocks Some of traced MMIO registers are a large continuous section. These stuffed the MMIO lookup hash table and so waste lots of memory and get much lower lookup performance. Here we picked out these sections by special handling. These sections include: o Display pipe registers, total 768. o The PVINFO page, total 1024. o MCHBAR_MIRROR, total 65536. o CSR_MMIO, total 3072. So we removed 70,400 items from the hash table, and speed up guest boot time by ~500ms. v2: o add a local function find_mmio_block(). o fix comments. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 162 +++++++++++++++++++++++----- drivers/gpu/drm/i915/gvt/mmio.c | 86 +-------------- drivers/gpu/drm/i915/gvt/mmio.h | 13 ++- 3 files changed, 147 insertions(+), 114 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index bb7037c6c347..60c0db10ae15 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -92,11 +92,22 @@ static void write_vreg(struct intel_vgpu *vgpu, unsigned int offset, memcpy(&vgpu_vreg(vgpu, offset), p_data, bytes); } +static struct intel_gvt_mmio_info *find_mmio_info(struct intel_gvt *gvt, + unsigned int offset) +{ + struct intel_gvt_mmio_info *e; + + hash_for_each_possible(gvt->mmio.mmio_info_table, e, node, offset) { + if (e->offset == offset) + return e; + } + return NULL; +} + static int new_mmio_info(struct intel_gvt *gvt, u32 offset, u32 flags, u32 size, u32 addr_mask, u32 ro_mask, u32 device, - int (*read)(struct intel_vgpu *, unsigned int, void *, unsigned int), - int (*write)(struct intel_vgpu *, unsigned int, void *, unsigned int)) + gvt_mmio_func read, gvt_mmio_func write) { struct intel_gvt_mmio_info *info, *p; u32 start, end, i; @@ -116,7 +127,7 @@ static int new_mmio_info(struct intel_gvt *gvt, return -ENOMEM; info->offset = i; - p = intel_gvt_find_mmio_info(gvt, info->offset); + p = find_mmio_info(gvt, info->offset); if (p) gvt_err("dup mmio definition offset %x\n", info->offset); @@ -1794,10 +1805,6 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(SPRSCALE(PIPE_C), D_ALL); MMIO_D(SPRSURFLIVE(PIPE_C), D_ALL); - MMIO_F(LGC_PALETTE(PIPE_A, 0), 4 * 256, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(LGC_PALETTE(PIPE_B, 0), 4 * 256, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(LGC_PALETTE(PIPE_C, 0), 4 * 256, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_D(HTOTAL(TRANSCODER_A), D_ALL); MMIO_D(HBLANK(TRANSCODER_A), D_ALL); MMIO_D(HSYNC(TRANSCODER_A), D_ALL); @@ -2245,11 +2252,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN6_GDRST, D_ALL, NULL, gdrst_mmio_write); MMIO_F(FENCE_REG_GEN6_LO(0), 0x80, 0, 0, 0, D_ALL, fence_mmio_read, fence_mmio_write); - MMIO_F(VGT_PVINFO_PAGE, VGT_PVINFO_SIZE, F_UNALIGN, 0, 0, D_ALL, pvinfo_mmio_read, pvinfo_mmio_write); MMIO_DH(CPU_VGACNTRL, D_ALL, NULL, vga_control_mmio_write); - MMIO_F(MCHBAR_MIRROR_BASE_SNB, 0x40000, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_D(TILECTL, D_ALL); MMIO_D(GEN6_UCGCTL1, D_ALL); @@ -2778,7 +2782,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0x72380, D_SKL_PLUS); MMIO_D(0x7039c, D_SKL_PLUS); - MMIO_F(0x80000, 0x3000, 0, 0, 0, D_SKL_PLUS, NULL, NULL); MMIO_D(0x8f074, D_SKL | D_KBL); MMIO_D(0x8f004, D_SKL | D_KBL); MMIO_D(0x8f034, D_SKL | D_KBL); @@ -2852,26 +2855,36 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) return 0; } -/** - * intel_gvt_find_mmio_info - find MMIO information entry by aligned offset - * @gvt: GVT device - * @offset: register offset - * - * This function is used to find the MMIO information entry from hash table - * - * Returns: - * pointer to MMIO information entry, NULL if not exists - */ -struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt, - unsigned int offset) +/* Special MMIO blocks. */ +static struct gvt_mmio_block { + unsigned int device; + i915_reg_t offset; + unsigned int size; + gvt_mmio_func read; + gvt_mmio_func write; +} gvt_mmio_blocks[] = { + {D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL}, + {D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL}, + {D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE, + pvinfo_mmio_read, pvinfo_mmio_write}, + {D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL}, + {D_ALL, LGC_PALETTE(PIPE_B, 0), 1024, NULL, NULL}, + {D_ALL, LGC_PALETTE(PIPE_C, 0), 1024, NULL, NULL}, +}; + +static struct gvt_mmio_block *find_mmio_block(struct intel_gvt *gvt, + unsigned int offset) { - struct intel_gvt_mmio_info *e; + unsigned long device = intel_gvt_get_device_type(gvt); + struct gvt_mmio_block *block = gvt_mmio_blocks; + int i; - WARN_ON(!IS_ALIGNED(offset, 4)); - - hash_for_each_possible(gvt->mmio.mmio_info_table, e, node, offset) { - if (e->offset == offset) - return e; + for (i = 0; i < ARRAY_SIZE(gvt_mmio_blocks); i++, block++) { + if (!(device & block->device)) + continue; + if (offset >= INTEL_GVT_MMIO_OFFSET(block->offset) && + offset < INTEL_GVT_MMIO_OFFSET(block->offset) + block->size) + return block; } return NULL; } @@ -3056,3 +3069,94 @@ bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt, { return in_whitelist(offset); } + +/** + * intel_vgpu_mmio_reg_rw - emulate tracked mmio registers + * @vgpu: a vGPU + * @offset: register offset + * @pdata: data buffer + * @bytes: data length + * + * Returns: + * Zero on success, negative error code if failed. + */ +int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset, + void *pdata, unsigned int bytes, bool is_read) +{ + struct intel_gvt *gvt = vgpu->gvt; + struct intel_gvt_mmio_info *mmio_info; + struct gvt_mmio_block *mmio_block; + gvt_mmio_func func; + int ret; + + if (WARN_ON(bytes > 4)) + return -EINVAL; + + /* + * Handle special MMIO blocks. + */ + mmio_block = find_mmio_block(gvt, offset); + if (mmio_block) { + func = is_read ? mmio_block->read : mmio_block->write; + if (func) + return func(vgpu, offset, pdata, bytes); + goto default_rw; + } + + /* + * Normal tracked MMIOs. + */ + mmio_info = find_mmio_info(gvt, offset); + if (!mmio_info) { + if (!vgpu->mmio.disable_warn_untrack) + gvt_vgpu_err("untracked MMIO %08x len %d\n", + offset, bytes); + goto default_rw; + } + + if (WARN_ON(bytes > mmio_info->size)) + return -EINVAL; + + if (is_read) + return mmio_info->read(vgpu, offset, pdata, bytes); + else { + u64 ro_mask = mmio_info->ro_mask; + u32 old_vreg = 0, old_sreg = 0; + u64 data = 0; + + if (intel_gvt_mmio_has_mode_mask(gvt, mmio_info->offset)) { + old_vreg = vgpu_vreg(vgpu, offset); + old_sreg = vgpu_sreg(vgpu, offset); + } + + if (likely(!ro_mask)) + ret = mmio_info->write(vgpu, offset, pdata, bytes); + else if (!~ro_mask) { + gvt_vgpu_err("try to write RO reg %x\n", offset); + return 0; + } else { + /* keep the RO bits in the virtual register */ + memcpy(&data, pdata, bytes); + data &= ~ro_mask; + data |= vgpu_vreg(vgpu, offset) & ro_mask; + ret = mmio_info->write(vgpu, offset, &data, bytes); + } + + /* higher 16bits of mode ctl regs are mask bits for change */ + if (intel_gvt_mmio_has_mode_mask(gvt, mmio_info->offset)) { + u32 mask = vgpu_vreg(vgpu, offset) >> 16; + + vgpu_vreg(vgpu, offset) = (old_vreg & ~mask) + | (vgpu_vreg(vgpu, offset) & mask); + vgpu_sreg(vgpu, offset) = (old_sreg & ~mask) + | (vgpu_sreg(vgpu, offset) & mask); + } + } + + return ret; + +default_rw: + return is_read ? + intel_vgpu_default_mmio_read(vgpu, offset, pdata, bytes) : + intel_vgpu_default_mmio_write(vgpu, offset, pdata, bytes); +} diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 35f6c4713cb6..322077fce2bb 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -123,7 +123,6 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, void *p_data, unsigned int bytes) { struct intel_gvt *gvt = vgpu->gvt; - struct intel_gvt_mmio_info *mmio; unsigned int offset = 0; int ret = -EINVAL; @@ -187,25 +186,8 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, goto err; } - mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4)); - if (mmio) { - if (!intel_gvt_mmio_is_unalign(gvt, mmio->offset)) { - if (WARN_ON(offset + bytes > mmio->offset + mmio->size)) - goto err; - if (WARN_ON(mmio->offset != offset)) - goto err; - } - ret = mmio->read(vgpu, offset, p_data, bytes); - } else { - ret = intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); - - if (!vgpu->mmio.disable_warn_untrack) { - gvt_vgpu_err("read untracked MMIO %x(%dB) val %x\n", - offset, bytes, *(u32 *)p_data); - } - } - - if (ret) + ret = intel_vgpu_mmio_reg_rw(vgpu, offset, p_data, bytes, true); + if (ret < 0) goto err; intel_gvt_mmio_set_accessed(gvt, offset); @@ -232,9 +214,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, void *p_data, unsigned int bytes) { struct intel_gvt *gvt = vgpu->gvt; - struct intel_gvt_mmio_info *mmio; unsigned int offset = 0; - u32 old_vreg = 0, old_sreg = 0; int ret = -EINVAL; if (vgpu->failsafe) { @@ -289,66 +269,10 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, return ret; } - mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4)); - if (!mmio && !vgpu->mmio.disable_warn_untrack) - gvt_dbg_mmio("vgpu%d: write untracked MMIO %x len %d val %x\n", - vgpu->id, offset, bytes, *(u32 *)p_data); - - if (!intel_gvt_mmio_is_unalign(gvt, offset)) { - if (WARN_ON(!IS_ALIGNED(offset, bytes))) - goto err; - } - - if (mmio) { - u64 ro_mask = mmio->ro_mask; - - if (!intel_gvt_mmio_is_unalign(gvt, mmio->offset)) { - if (WARN_ON(offset + bytes > mmio->offset + mmio->size)) - goto err; - if (WARN_ON(mmio->offset != offset)) - goto err; - } - - if (intel_gvt_mmio_has_mode_mask(gvt, mmio->offset)) { - old_vreg = vgpu_vreg(vgpu, offset); - old_sreg = vgpu_sreg(vgpu, offset); - } - - if (!ro_mask) { - ret = mmio->write(vgpu, offset, p_data, bytes); - } else { - /* Protect RO bits like HW */ - u64 data = 0; - - /* all register bits are RO. */ - if (ro_mask == ~(u64)0) { - gvt_vgpu_err("try to write RO reg %x\n", - offset); - ret = 0; - goto out; - } - /* keep the RO bits in the virtual register */ - memcpy(&data, p_data, bytes); - data &= ~mmio->ro_mask; - data |= vgpu_vreg(vgpu, offset) & mmio->ro_mask; - ret = mmio->write(vgpu, offset, &data, bytes); - } - - /* higher 16bits of mode ctl regs are mask bits for change */ - if (intel_gvt_mmio_has_mode_mask(gvt, mmio->offset)) { - u32 mask = vgpu_vreg(vgpu, offset) >> 16; - - vgpu_vreg(vgpu, offset) = (old_vreg & ~mask) - | (vgpu_vreg(vgpu, offset) & mask); - vgpu_sreg(vgpu, offset) = (old_sreg & ~mask) - | (vgpu_sreg(vgpu, offset) & mask); - } - } else - ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data, - bytes); - if (ret) + ret = intel_vgpu_mmio_reg_rw(vgpu, offset, p_data, bytes, false); + if (ret < 0) goto err; -out: + intel_gvt_mmio_set_accessed(gvt, offset); mutex_unlock(&gvt->lock); return 0; diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index bd193f9bbcee..4410a323eea3 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -52,6 +52,9 @@ struct intel_vgpu; #define D_PRE_SKL (D_BDW) #define D_ALL (D_BDW | D_SKL | D_KBL) +typedef int (*gvt_mmio_func)(struct intel_vgpu *, unsigned int, void *, + unsigned int); + struct intel_gvt_mmio_info { u32 offset; u32 size; @@ -59,8 +62,8 @@ struct intel_gvt_mmio_info { u32 addr_mask; u64 ro_mask; u32 device; - int (*read)(struct intel_vgpu *, unsigned int, void *, unsigned int); - int (*write)(struct intel_vgpu *, unsigned int, void *, unsigned int); + gvt_mmio_func read; + gvt_mmio_func write; u32 addr_range; struct hlist_node node; }; @@ -71,8 +74,6 @@ bool intel_gvt_match_device(struct intel_gvt *gvt, unsigned long device); int intel_gvt_setup_mmio_info(struct intel_gvt *gvt); void intel_gvt_clean_mmio_info(struct intel_gvt *gvt); -struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt, - unsigned int offset); #define INTEL_GVT_MMIO_OFFSET(reg) ({ \ typeof(reg) __reg = reg; \ u32 *offset = (u32 *)&__reg; \ @@ -103,4 +104,8 @@ int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt, unsigned int offset); + +int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset, + void *pdata, unsigned int bytes, bool is_read); + #endif From d8d94ba3fc4d28753d0d6ba08340d8467380e666 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 6 Jun 2017 15:56:10 +0800 Subject: [PATCH 090/341] drm/i915/gvt: Cleanup struct intel_gvt_mmio_info The size, length, addr_mask fields actually are not necessary. Every tracked mmio has DWORD size, and addr_mask is a legacy field. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/firmware.c | 9 ++------- drivers/gpu/drm/i915/gvt/handlers.c | 7 +------ drivers/gpu/drm/i915/gvt/mmio.h | 3 --- 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index dce8d15f706f..5dad9298b2d5 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -102,13 +102,8 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) p = firmware + h->mmio_offset; - hash_for_each(gvt->mmio.mmio_info_table, i, e, node) { - int j; - - for (j = 0; j < e->length; j += 4) - *(u32 *)(p + e->offset + j) = - I915_READ_NOTRACE(_MMIO(e->offset + j)); - } + hash_for_each(gvt->mmio.mmio_info_table, i, e, node) + *(u32 *)(p + e->offset) = I915_READ_NOTRACE(_MMIO(e->offset)); memcpy(gvt->firmware.mmio, p, info->mmio_size); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 60c0db10ae15..29de07f4d219 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -131,9 +131,7 @@ static int new_mmio_info(struct intel_gvt *gvt, if (p) gvt_err("dup mmio definition offset %x\n", info->offset); - info->size = size; - info->length = (i + 4) < end ? 4 : (end - i); - info->addr_mask = addr_mask; + info->ro_mask = ro_mask; info->device = device; info->read = read ? read : intel_vgpu_default_mmio_read; @@ -3114,9 +3112,6 @@ int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset, goto default_rw; } - if (WARN_ON(bytes > mmio_info->size)) - return -EINVAL; - if (is_read) return mmio_info->read(vgpu, offset, pdata, bytes); else { diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 4410a323eea3..0c89e10dcce4 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -57,9 +57,6 @@ typedef int (*gvt_mmio_func)(struct intel_vgpu *, unsigned int, void *, struct intel_gvt_mmio_info { u32 offset; - u32 size; - u32 length; - u32 addr_mask; u64 ro_mask; u32 device; gvt_mmio_func read; From 56a78de54964894de2f65c9fa8066d5e9843e1ce Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 6 Jun 2017 15:56:11 +0800 Subject: [PATCH 091/341] drm/i915/gvt: Make mmio_attribute as type u8 to save 1.5MB memory Type u8 is big enough to contain all MMIO attribute flags. As the total MMIO size is 2MB so we saved 1.5MB memory. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 2 +- drivers/gpu/drm/i915/gvt/handlers.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 1fca76bf7f73..9ff371b81835 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -198,7 +198,7 @@ struct intel_gvt_fence { #define INTEL_GVT_MMIO_HASH_BITS 9 struct intel_gvt_mmio { - u32 *mmio_attribute; + u8 *mmio_attribute; DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS); }; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 29de07f4d219..6ec47598d758 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -105,7 +105,7 @@ static struct intel_gvt_mmio_info *find_mmio_info(struct intel_gvt *gvt, } static int new_mmio_info(struct intel_gvt *gvt, - u32 offset, u32 flags, u32 size, + u32 offset, u8 flags, u32 size, u32 addr_mask, u32 ro_mask, u32 device, gvt_mmio_func read, gvt_mmio_func write) { @@ -2922,9 +2922,10 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) { struct intel_gvt_device_info *info = &gvt->device_info; struct drm_i915_private *dev_priv = gvt->dev_priv; + int size = info->mmio_size / 4 * sizeof(*gvt->mmio.mmio_attribute); int ret; - gvt->mmio.mmio_attribute = vzalloc(info->mmio_size); + gvt->mmio.mmio_attribute = vzalloc(size); if (!gvt->mmio.mmio_attribute) return -ENOMEM; From 5c6d4c676d0ccba2dcd97e47e1f10321da423e7d Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 6 Jun 2017 15:56:12 +0800 Subject: [PATCH 092/341] drm/i915/gvt: Make the MMIO attribute wrappers be inline Function calls are expensive. I have see obvious overhead call to these wrappers in perf data, especially from the cmd parser side. So make these simple wrappers be inline to kill them all. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 78 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/handlers.c | 80 ----------------------------- drivers/gpu/drm/i915/gvt/mmio.h | 8 +-- 3 files changed, 79 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 9ff371b81835..b9a277c726cb 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -199,6 +199,21 @@ struct intel_gvt_fence { struct intel_gvt_mmio { u8 *mmio_attribute; +/* Register contains RO bits */ +#define F_RO (1 << 0) +/* Register contains graphics address */ +#define F_GMADR (1 << 1) +/* Mode mask registers with high 16 bits as the mask bits */ +#define F_MODE_MASK (1 << 2) +/* This reg can be accessed by GPU commands */ +#define F_CMD_ACCESS (1 << 3) +/* This reg has been accessed by a VM */ +#define F_ACCESSED (1 << 4) +/* This reg has been accessed through GPU commands */ +#define F_CMD_ACCESSED (1 << 5) +/* This reg could be accessed by unaligned address */ +#define F_UNALIGN (1 << 6) + DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS); }; @@ -487,6 +502,69 @@ static inline void mmio_hw_access_post(struct drm_i915_private *dev_priv) intel_runtime_pm_put(dev_priv); } +/** + * intel_gvt_mmio_set_accessed - mark a MMIO has been accessed + * @gvt: a GVT device + * @offset: register offset + * + */ +static inline void intel_gvt_mmio_set_accessed( + struct intel_gvt *gvt, unsigned int offset) +{ + gvt->mmio.mmio_attribute[offset >> 2] |= F_ACCESSED; +} + +/** + * intel_gvt_mmio_is_cmd_accessed - mark a MMIO could be accessed by command + * @gvt: a GVT device + * @offset: register offset + * + */ +static inline bool intel_gvt_mmio_is_cmd_access( + struct intel_gvt *gvt, unsigned int offset) +{ + return gvt->mmio.mmio_attribute[offset >> 2] & F_CMD_ACCESS; +} + +/** + * intel_gvt_mmio_is_unalign - mark a MMIO could be accessed unaligned + * @gvt: a GVT device + * @offset: register offset + * + */ +static inline bool intel_gvt_mmio_is_unalign( + struct intel_gvt *gvt, unsigned int offset) +{ + return gvt->mmio.mmio_attribute[offset >> 2] & F_UNALIGN; +} + +/** + * intel_gvt_mmio_set_cmd_accessed - mark a MMIO has been accessed by command + * @gvt: a GVT device + * @offset: register offset + * + */ +static inline void intel_gvt_mmio_set_cmd_accessed( + struct intel_gvt *gvt, unsigned int offset) +{ + gvt->mmio.mmio_attribute[offset >> 2] |= F_CMD_ACCESSED; +} + +/** + * intel_gvt_mmio_has_mode_mask - if a MMIO has a mode mask + * @gvt: a GVT device + * @offset: register offset + * + * Returns: + * True if a MMIO has a mode mask in its higher 16 bits, false if it isn't. + * + */ +static inline bool intel_gvt_mmio_has_mode_mask( + struct intel_gvt *gvt, unsigned int offset) +{ + return gvt->mmio.mmio_attribute[offset >> 2] & F_MODE_MASK; +} + #include "trace.h" #include "mpt.h" diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 6ec47598d758..8ba7cf5fe185 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -47,21 +47,6 @@ #define PCH_PP_OFF_DELAYS _MMIO(0xc720c) #define PCH_PP_DIVISOR _MMIO(0xc7210) -/* Register contains RO bits */ -#define F_RO (1 << 0) -/* Register contains graphics address */ -#define F_GMADR (1 << 1) -/* Mode mask registers with high 16 bits as the mask bits */ -#define F_MODE_MASK (1 << 2) -/* This reg can be accessed by GPU commands */ -#define F_CMD_ACCESS (1 << 3) -/* This reg has been accessed by a VM */ -#define F_ACCESSED (1 << 4) -/* This reg has been accessed through GPU commands */ -#define F_CMD_ACCESSED (1 << 5) -/* This reg could be accessed by unaligned address */ -#define F_UNALIGN (1 << 6) - unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt) { if (IS_BROADWELL(gvt->dev_priv)) @@ -2952,71 +2937,6 @@ err: return ret; } -/** - * intel_gvt_mmio_set_accessed - mark a MMIO has been accessed - * @gvt: a GVT device - * @offset: register offset - * - */ -void intel_gvt_mmio_set_accessed(struct intel_gvt *gvt, unsigned int offset) -{ - gvt->mmio.mmio_attribute[offset >> 2] |= - F_ACCESSED; -} - -/** - * intel_gvt_mmio_is_cmd_accessed - mark a MMIO could be accessed by command - * @gvt: a GVT device - * @offset: register offset - * - */ -bool intel_gvt_mmio_is_cmd_access(struct intel_gvt *gvt, - unsigned int offset) -{ - return gvt->mmio.mmio_attribute[offset >> 2] & - F_CMD_ACCESS; -} - -/** - * intel_gvt_mmio_is_unalign - mark a MMIO could be accessed unaligned - * @gvt: a GVT device - * @offset: register offset - * - */ -bool intel_gvt_mmio_is_unalign(struct intel_gvt *gvt, - unsigned int offset) -{ - return gvt->mmio.mmio_attribute[offset >> 2] & - F_UNALIGN; -} - -/** - * intel_gvt_mmio_set_cmd_accessed - mark a MMIO has been accessed by command - * @gvt: a GVT device - * @offset: register offset - * - */ -void intel_gvt_mmio_set_cmd_accessed(struct intel_gvt *gvt, - unsigned int offset) -{ - gvt->mmio.mmio_attribute[offset >> 2] |= - F_CMD_ACCESSED; -} - -/** - * intel_gvt_mmio_has_mode_mask - if a MMIO has a mode mask - * @gvt: a GVT device - * @offset: register offset - * - * Returns: - * True if a MMIO has a mode mask in its higher 16 bits, false if it isn't. - * - */ -bool intel_gvt_mmio_has_mode_mask(struct intel_gvt *gvt, unsigned int offset) -{ - return gvt->mmio.mmio_attribute[offset >> 2] & - F_MODE_MASK; -} /** * intel_vgpu_default_mmio_read - default MMIO read handler diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 0c89e10dcce4..b55ccfa9a24d 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -87,13 +87,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa, void *p_data, unsigned int bytes); int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, u64 pa, void *p_data, unsigned int bytes); -bool intel_gvt_mmio_is_cmd_access(struct intel_gvt *gvt, - unsigned int offset); -bool intel_gvt_mmio_is_unalign(struct intel_gvt *gvt, unsigned int offset); -void intel_gvt_mmio_set_accessed(struct intel_gvt *gvt, unsigned int offset); -void intel_gvt_mmio_set_cmd_accessed(struct intel_gvt *gvt, - unsigned int offset); -bool intel_gvt_mmio_has_mode_mask(struct intel_gvt *gvt, unsigned int offset); + int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, From fbfd76c3746a322a9f33f77b66f85d4f68cabe4a Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 6 Jun 2017 15:56:13 +0800 Subject: [PATCH 093/341] drm/i915/gvt: Add helper for tuning MMIO hash table We count all the tracked virtual MMIO registers, which can help us to tune the MMIO hash table. v2: Move num_tracked_mmio into gvt structure. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/handlers.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index b9a277c726cb..ffb9ebbbcf5a 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -215,6 +215,7 @@ struct intel_gvt_mmio { #define F_UNALIGN (1 << 6) DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS); + unsigned int num_tracked_mmio; }; struct intel_gvt_firmware { diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 8ba7cf5fe185..eb3dc1525404 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -124,6 +124,7 @@ static int new_mmio_info(struct intel_gvt *gvt, gvt->mmio.mmio_attribute[info->offset / 4] = flags; INIT_HLIST_NODE(&info->node); hash_add(gvt->mmio.mmio_info_table, &info->node, info->offset); + gvt->mmio.num_tracked_mmio++; } return 0; } @@ -2931,6 +2932,9 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) if (ret) goto err; } + + gvt_dbg_mmio("traced %u virtual mmio registers\n", + gvt->mmio.num_tracked_mmio); return 0; err: intel_gvt_clean_mmio_info(gvt); From 178cd160c6652f57571ba3dc0a9091a1f41d9bc8 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 6 Jun 2017 15:56:14 +0800 Subject: [PATCH 094/341] drm/i915/gvt: Tuning the size of MMIO hash lookup table to 2048 On Skylake platform, The traced virtual mmio registers are up to 2039. So tuning the hash table size to improve lookup performance. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index ffb9ebbbcf5a..3a74e79eac2f 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -195,7 +195,7 @@ struct intel_gvt_fence { unsigned long vgpu_allocated_fence_num; }; -#define INTEL_GVT_MMIO_HASH_BITS 9 +#define INTEL_GVT_MMIO_HASH_BITS 11 struct intel_gvt_mmio { u8 *mmio_attribute; From 0811fa663015c469510f30e2a0f2fe8fd383b224 Mon Sep 17 00:00:00 2001 From: fred gao Date: Wed, 24 May 2017 12:02:24 +0800 Subject: [PATCH 095/341] drm/i915/gvt: Fix GDRST vreg state after reset Emulating the GDRST read behavior correctly to ack the guest reset request. v2: - split the original patch into two: GDRST read handler and virtual gpu reset. (Zhenyu) v3: - emulate the GDRST read right after write. (Zhenyu) Cc: Zhenyu Wang Cc: Zhang Yulei Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index eb3dc1525404..372421ba0259 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -298,6 +298,9 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, intel_gvt_reset_vgpu_locked(vgpu, false, engine_mask); + /* sw will wait for the device to ack the reset request */ + vgpu_vreg(vgpu, offset) = 0; + return 0; } From 615c16a9d8649b9894592d11bc393e684b11e2ea Mon Sep 17 00:00:00 2001 From: fred gao Date: Thu, 25 May 2017 15:33:52 +0800 Subject: [PATCH 096/341] drm/i915/gvt: Refine virtual reset function during the emulation of virtual reset: 1. only reset the engine related mmio ending with MMIO offset Master_IRQ, not include display stuff. 2. fences are not required to set default value as well to prevent screen flicking. this will fix the issue of Guest screen hang while running Force tdr in Linux guest. v2: - only reset the engine related mmio. (Zhenyu & Zhiyuan) v3: - IMR/Ring mode registers are not save/restored. (Changbin) v4: - redefine the MMIO reset offset for easy understanding. (Zhenyu) - pvinfo can be reset. (Zhenyu) v5: - add more comments for mmio reset. (Zhenyu) Cc: Changbin Du Cc: Zhenyu Wang Cc: Lv zhiyuan Cc: Zhang Yulei Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio.c | 28 ++++++++++++++++++++-------- drivers/gpu/drm/i915/gvt/mmio.h | 2 +- drivers/gpu/drm/i915/gvt/vgpu.c | 9 +++++++-- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 322077fce2bb..980ec8906b1e 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -289,20 +289,32 @@ err: * @vgpu: a vGPU * */ -void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu) +void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr) { struct intel_gvt *gvt = vgpu->gvt; const struct intel_gvt_device_info *info = &gvt->device_info; + void *mmio = gvt->firmware.mmio; - memcpy(vgpu->mmio.vreg, gvt->firmware.mmio, info->mmio_size); - memcpy(vgpu->mmio.sreg, gvt->firmware.mmio, info->mmio_size); + if (dmlr) { + memcpy(vgpu->mmio.vreg, mmio, info->mmio_size); + memcpy(vgpu->mmio.sreg, mmio, info->mmio_size); - vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0; + vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0; - /* set the bit 0:2(Core C-State ) to C0 */ - vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; + /* set the bit 0:2(Core C-State ) to C0 */ + vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; + + vgpu->mmio.disable_warn_untrack = false; + } else { +#define GVT_GEN8_MMIO_RESET_OFFSET (0x44200) + /* only reset the engine related, so starting with 0x44200 + * interrupt include DE,display mmio related will not be + * touched + */ + memcpy(vgpu->mmio.vreg, mmio, GVT_GEN8_MMIO_RESET_OFFSET); + memcpy(vgpu->mmio.sreg, mmio, GVT_GEN8_MMIO_RESET_OFFSET); + } - vgpu->mmio.disable_warn_untrack = false; } /** @@ -322,7 +334,7 @@ int intel_vgpu_init_mmio(struct intel_vgpu *vgpu) vgpu->mmio.sreg = vgpu->mmio.vreg + info->mmio_size; - intel_vgpu_reset_mmio(vgpu); + intel_vgpu_reset_mmio(vgpu, true); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index b55ccfa9a24d..32cd64ddad26 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -78,7 +78,7 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt); }) int intel_vgpu_init_mmio(struct intel_vgpu *vgpu); -void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu); +void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr); void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu); int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 6e3cbd8caec2..90c14e6e3ea0 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -501,9 +501,14 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, /* full GPU reset or device model level reset */ if (engine_mask == ALL_ENGINES || dmlr) { + intel_vgpu_reset_gtt(vgpu, dmlr); - intel_vgpu_reset_resource(vgpu); - intel_vgpu_reset_mmio(vgpu); + + /*fence will not be reset during virtual reset */ + if (dmlr) + intel_vgpu_reset_resource(vgpu); + + intel_vgpu_reset_mmio(vgpu, dmlr); populate_pvinfo_page(vgpu); intel_vgpu_reset_display(vgpu); From d5553c0990c8b9cde6a9abb1f15acf8a11c5eef3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 May 2017 12:55:08 +0100 Subject: [PATCH 097/341] drm/i915: Move the unclaimed mmio detection into the powerwell for KMS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the large comment about requiring the powerwell for intel_uncore_arm_unclaimed_mmio_detection() by moving the arming of the mmio error detection into the powerwell held for modesetting. Thereby also accomplishing the goal of only arming the mmio detection after a full modeset. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Daniel Vetter Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170504115508.13571-1-chris@chris-wilson.co.uk Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_display.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2b75faf61a83..97dfce53df5b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13121,8 +13121,16 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_commit_hw_done(state); - if (intel_state->modeset) + if (intel_state->modeset) { + /* As one of the primary mmio accessors, KMS has a high + * likelihood of triggering bugs in unclaimed access. After we + * finish modesetting, see if an error has been flagged, and if + * so enable debugging for the next modeset - and hope we catch + * the culprit. + */ + intel_uncore_arm_unclaimed_mmio_detection(dev_priv); intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET); + } mutex_lock(&dev->struct_mutex); drm_atomic_helper_cleanup_planes(dev, state); @@ -13132,19 +13140,6 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_state_put(state); - /* As one of the primary mmio accessors, KMS has a high likelihood - * of triggering bugs in unclaimed access. After we finish - * modesetting, see if an error has been flagged, and if so - * enable debugging for the next modeset - and hope we catch - * the culprit. - * - * XXX note that we assume display power is on at this point. - * This might hold true now but we need to add pm helper to check - * unclaimed only when the hardware is on, as atomic commits - * can happen also when the device is completely off. - */ - intel_uncore_arm_unclaimed_mmio_detection(dev_priv); - intel_atomic_helper_free_state(dev_priv); } From c4a8952612f6c1c28bab200c45d773e08dfd24d5 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Mon, 5 Jun 2017 10:12:51 -0700 Subject: [PATCH 098/341] drm/i915/guc: Clear enable_guc_loading in case of init failure And prevent calling i915_ggtt_disable_guc twice (the first when GuC init failed, and the second time during driver unload / intel_uc_fini_hw), and hitting the GEM_BUG_ON. v2: Clear enable_guc_loading unconditionally (Michal) Make sure guc_free_load_err_log is still called (Daniele) Don't shoot the messenger (Chris) Fixes: 3950bf3dbff10 ("drm/i915/guc: Add onion teardown to the GuC setup") Cc: Chris Wilson Cc: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: Joonas Lahtinen Signed-off-by: Michel Thierry Reviewed-by: Michal Wajdeczko Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170605171251.9905-1-michel.thierry@intel.com --- drivers/gpu/drm/i915/intel_uc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 7a7b07de28a3..27e072cc96eb 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -433,16 +433,19 @@ err_guc: DRM_NOTE("Falling back from GuC submission to execlist mode\n"); } + i915.enable_guc_loading = 0; + DRM_NOTE("GuC firmware loading disabled\n"); + return ret; } void intel_uc_fini_hw(struct drm_i915_private *dev_priv) { + guc_free_load_err_log(&dev_priv->guc); + if (!i915.enable_guc_loading) return; - guc_free_load_err_log(&dev_priv->guc); - if (i915.enable_guc_submission) i915_guc_submission_disable(dev_priv); From 9f90ff392877581e2c9b54a2d5ecfd1f50d1f170 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Jun 2017 12:14:02 +0100 Subject: [PATCH 099/341] drm/i915: Check signaled state after enabling signaling Setting up the irq to signal the request completion takes a finite amount of time, during which it is possible that the request already completed. Check afterwards, just in case, so that we can respond immediately. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170608111405.16466-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 0d1e0d8873ef..46d869e26b4d 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -62,7 +62,7 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence) return false; intel_engine_enable_signaling(to_request(fence), true); - return true; + return !i915_fence_signaled(fence); } static signed long i915_fence_wait(struct dma_fence *fence, From bac2ef4b470b08e3748784eb8e84e00d3b121c20 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Jun 2017 12:14:03 +0100 Subject: [PATCH 100/341] drm/i915: Report back whether the irq was armed when adding the waiter The important condition that we need to check after enabling the interrupt for signaling is whether the request completed in the process (and so we missed that interrupt). A large cost in enabling the signaling (rather than waiters) is in waking up the auxiliary signaling thread, but we only need to do so to catch that missed interrupt. If we know we didn't miss any interrupts (because we didn't arm the interrupt) then we can skip waking the auxiliary thread. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170608111405.16466-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 183afcb036aa..c90a72f87d82 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -234,7 +234,7 @@ static void enable_fake_irq(struct intel_breadcrumbs *b) mod_timer(&b->hangcheck, wait_timeout()); } -static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) +static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); @@ -242,7 +242,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) lockdep_assert_held(&b->irq_lock); if (b->irq_armed) - return; + return false; /* The breadcrumb irq will be disarmed on the interrupt after the * waiters are signaled. This gives us a single interrupt window in @@ -260,7 +260,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) * implementation to call intel_engine_wakeup() * itself when it wants to simulate a user interrupt, */ - return; + return true; } /* Since we are waiting on a request, the GPU should be busy @@ -278,6 +278,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) } enable_fake_irq(b); + return true; } static inline struct intel_wait *to_wait(struct rb_node *node) @@ -329,7 +330,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node **p, *parent, *completed; - bool first; + bool first, armed; u32 seqno; /* Insert the request into the retirement ordered list @@ -344,6 +345,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, * removing stale elements in the tree, we may be able to reduce the * ping-pong between the old bottom-half and ourselves as first-waiter. */ + armed = false; first = true; parent = NULL; completed = NULL; @@ -399,7 +401,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, * in the unlocked read of b->irq_seqno_bh in the irq handler) * and so we miss the wake up. */ - __intel_breadcrumbs_enable_irq(b); + armed = __intel_breadcrumbs_enable_irq(b); spin_unlock(&b->irq_lock); } @@ -426,20 +428,24 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, GEM_BUG_ON(!b->irq_armed); GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node); - return first; + return armed; } bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - bool first; + bool armed; spin_lock_irq(&b->rb_lock); - first = __intel_engine_add_wait(engine, wait); + armed = __intel_engine_add_wait(engine, wait); spin_unlock_irq(&b->rb_lock); + if (armed) + return armed; - return first; + /* Make the caller recheck if its request has already started. */ + return i915_seqno_passed(intel_engine_get_seqno(engine), + wait->seqno - 1); } static inline bool chain_wakeup(struct rb_node *rb, int priority) From 735e0eb669a6bb4f6f7b9574d55b451c838747bd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Jun 2017 12:14:04 +0100 Subject: [PATCH 101/341] drm/i915: Skip adding the request to the signal tree is complete Enabling the interrupt for the signaler takes a finite amount of time (a few microseconds) during which it is possible for the request to complete. Check afterwards and skip adding the request to the signal rbtree if it complete. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170608111405.16466-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 49 ++++++++++++++---------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index c90a72f87d82..4e00e5cb9fa1 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -678,8 +678,6 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request, { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node *parent, **p; - bool first; u32 seqno; /* Note that we may be called from an interrupt handler on another @@ -714,27 +712,36 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request, */ wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); - /* Now insert ourselves into the retirement ordered list of signals - * on this engine. We track the oldest seqno as that will be the - * first signal to complete. - */ - parent = NULL; - first = true; - p = &b->signals.rb_node; - while (*p) { - parent = *p; - if (i915_seqno_passed(seqno, - to_signaler(parent)->signaling.wait.seqno)) { - p = &parent->rb_right; - first = false; - } else { - p = &parent->rb_left; + if (!__i915_gem_request_completed(request, seqno)) { + struct rb_node *parent, **p; + bool first; + + /* Now insert ourselves into the retirement ordered list of + * signals on this engine. We track the oldest seqno as that + * will be the first signal to complete. + */ + parent = NULL; + first = true; + p = &b->signals.rb_node; + while (*p) { + parent = *p; + if (i915_seqno_passed(seqno, + to_signaler(parent)->signaling.wait.seqno)) { + p = &parent->rb_right; + first = false; + } else { + p = &parent->rb_left; + } } + rb_link_node(&request->signaling.node, parent, p); + rb_insert_color(&request->signaling.node, &b->signals); + if (first) + rcu_assign_pointer(b->first_signal, request); + } else { + __intel_engine_remove_wait(engine, &request->signaling.wait); + i915_gem_request_put(request); + wakeup = false; } - rb_link_node(&request->signaling.node, parent, p); - rb_insert_color(&request->signaling.node, &b->signals); - if (first) - rcu_assign_pointer(b->first_signal, request); spin_unlock(&b->rb_lock); From 6b567085c1bdbacea217dd628f0dbf26dd67db3f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Jun 2017 12:14:05 +0100 Subject: [PATCH 102/341] drm/i915: Remove the spin-request during execbuf await_request Originally we would enable and disable the breadcrumb interrupt immediately on demand. This was slow enough to have a large impact (>30%) on tasks that hopped between engines. However, by using a shadow to keep the irq alive for an extra interrupt (see commit 67b807a89230 ("drm/i915: Delay disabling the user interrupt for breadcrumbs")) and by recently reducing the cost in adding ourselves to the signal tree, we no longer need to spin-request during await_request to avoid delays in throughput tests. Without the earlier patches to stop the wakeup when signaling if the irq was already active, we saw no improvement in execbuf overhead (and corresponding contention in other clients) despite the removal of the spinner in a simple test like glxgears. This means there will be scenarios where now we spend longer enabling the interrupt than we would have spent spinning, but these are not likely to have as noticeable an impact as the high frequency test cases (where there should not be any regression). Ulterior motive: generalising the engine->sync_to to handle different types of semaphores and non-semaphores. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Oscar Mateo Reviewed-by: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170608111405.16466-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 46d869e26b4d..8c59c79cbd8b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -683,7 +683,6 @@ static int i915_gem_request_await_request(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from) { - u32 seqno; int ret; GEM_BUG_ON(to == from); @@ -707,19 +706,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret < 0 ? ret : 0; } - seqno = i915_gem_request_global_seqno(from); - if (!seqno) - goto await_dma_fence; + if (to->engine->semaphore.sync_to) { + u32 seqno; - if (!to->engine->semaphore.sync_to) { - if (!__i915_gem_request_started(from, seqno)) - goto await_dma_fence; - - if (!__i915_spin_request(from, seqno, TASK_INTERRUPTIBLE, 2)) - goto await_dma_fence; - } else { GEM_BUG_ON(!from->engine->semaphore.signal); + seqno = i915_gem_request_global_seqno(from); + if (!seqno) + goto await_dma_fence; + if (seqno <= to->timeline->global_sync[from->engine->id]) return 0; @@ -729,10 +724,9 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret; to->timeline->global_sync[from->engine->id] = seqno; + return 0; } - return 0; - await_dma_fence: ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, From 71851fa82f4d644f947dd60cfcf81b47640c1b51 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 8 Jun 2017 08:49:58 -0700 Subject: [PATCH 103/341] drm/i915/cfl: Introduce Coffee Lake platform definition. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coffee Lake is a Intel® Processor containing Intel® HD Graphics following Kabylake. It is Gen9 graphics based platform on top of CNP PCH. Let's start by adding the platform definition based on previous platforms but yet as preliminary_hw_support. On following patches we will start adding PCI IDs and the platform specific changes. v2: Also add BS2 ring that is present on GT3. As on KBL, according spec: "GT3 also has additional media blocks with second instance of VEBox and VDBox each", i.e. BSD2 ring in our case. Noticed when reviewing PCI ID patches. v3: CFL_PLATFORM instead for CFL_FEATURES because it contains Platform information and no new features when compared to BDW_FEATURES definition. v4: Rebased on top of Cannonlake patches. Cc: Anusha Srivatsa Signed-off-by: Rodrigo Vivi Reviewed-by: Anusha Srivatsa Link: http://patchwork.freedesktop.org/patch/msgid/1496937000-8450-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 16 ++++++++++++++++ drivers/gpu/drm/i915/intel_device_info.c | 1 + 3 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f9632ea186c0..dd22f3d0d9d6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -825,6 +825,7 @@ enum intel_platform { INTEL_BROXTON, INTEL_KABYLAKE, INTEL_GEMINILAKE, + INTEL_COFFEELAKE, INTEL_CANNONLAKE, INTEL_MAX_PLATFORMS }; @@ -2768,6 +2769,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_BROXTON(dev_priv) ((dev_priv)->info.platform == INTEL_BROXTON) #define IS_KABYLAKE(dev_priv) ((dev_priv)->info.platform == INTEL_KABYLAKE) #define IS_GEMINILAKE(dev_priv) ((dev_priv)->info.platform == INTEL_GEMINILAKE) +#define IS_COFFEELAKE(dev_priv) ((dev_priv)->info.platform == INTEL_COFFEELAKE) #define IS_CANNONLAKE(dev_priv) ((dev_priv)->info.platform == INTEL_CANNONLAKE) #define IS_MOBILE(dev_priv) ((dev_priv)->info.is_mobile) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 224f5f96ff65..03a495e13b5e 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -422,6 +422,22 @@ static const struct intel_device_info intel_kabylake_gt3_info = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; +#define CFL_PLATFORM \ + .is_alpha_support = 1, \ + BDW_FEATURES, \ + .gen = 9, \ + .platform = INTEL_COFFEELAKE, \ + .ddb_size = 896 + +static const struct intel_device_info intel_coffeelake_info = { + CFL_PLATFORM, +}; + +static const struct intel_device_info intel_coffeelake_gt3_info = { + CFL_PLATFORM, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, +}; + static const struct intel_device_info intel_cannonlake_info = { BDW_FEATURES, .is_alpha_support = 1, diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 91e68fd31c07..77d3214e1a77 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -51,6 +51,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(BROXTON), PLATFORM_NAME(KABYLAKE), PLATFORM_NAME(GEMINILAKE), + PLATFORM_NAME(COFFEELAKE), PLATFORM_NAME(CANNONLAKE), }; #undef PLATFORM_NAME From 809378196bb449fe30d0ca15a990965fc553f9f5 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 8 Jun 2017 08:49:59 -0700 Subject: [PATCH 104/341] drm/i915/cfl: Coffee Lake uses CNP PCH. So let's force it on the virtual detection. Also it is still the only silicon for now on this PCH, so WARN otherwise. v2: Rebased on top of Cannonlake and added the missed debug message as pointed by DK. Cc: Dhinakaran Pandiyan Signed-off-by: Rodrigo Vivi Reviewed-by: Anusha Srivatsa Link: http://patchwork.freedesktop.org/patch/msgid/1496937000-8450-2-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 7a925c51477f..1f802de7b94b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -139,8 +139,9 @@ static enum intel_pch intel_virt_detect_pch(struct drm_i915_private *dev_priv) } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { ret = PCH_SPT; DRM_DEBUG_KMS("Assuming SunrisePoint PCH\n"); - } else if (IS_CANNONLAKE(dev_priv)) { + } else if (IS_COFFEELAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { ret = PCH_CNP; + DRM_DEBUG_KMS("Assuming CannonPoint PCH\n"); } return ret; @@ -224,11 +225,13 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) } else if (id == INTEL_PCH_CNP_DEVICE_ID_TYPE) { dev_priv->pch_type = PCH_CNP; DRM_DEBUG_KMS("Found CannonPoint PCH\n"); - WARN_ON(!IS_CANNONLAKE(dev_priv)); + WARN_ON(!IS_CANNONLAKE(dev_priv) && + !IS_COFFEELAKE(dev_priv)); } else if (id_ext == INTEL_PCH_CNP_LP_DEVICE_ID_TYPE) { dev_priv->pch_type = PCH_CNP; DRM_DEBUG_KMS("Found CannonPoint LP PCH\n"); - WARN_ON(!IS_CANNONLAKE(dev_priv)); + WARN_ON(!IS_CANNONLAKE(dev_priv) && + !IS_COFFEELAKE(dev_priv)); } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) || (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) || ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) && From 82525c17dedca6316b07c20c62627c83800caa31 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 8 Jun 2017 08:50:00 -0700 Subject: [PATCH 105/341] drm/i915/cfl: Introduce Display workarounds for Coffee Lake. The whole Display engine for Coffee Lake is pretty much identical to the Kabylake. For this reason let's reuse all display related production workardounds here even though CFL is not explicit listed at Display workarounds page at Spec. v2: moved intel_pm.c chunck to this patch in order to address all display related w/a in a single place. Cc: Arthur Runyan Cc: Dhinakaran Pandiyan Signed-off-by: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Link: http://patchwork.freedesktop.org/patch/msgid/1496937000-8450-3-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index aa9d8cef7ce0..0aed13dcedf0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -58,24 +58,24 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) { - /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */ + /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */ I915_WRITE(CHICKEN_PAR1_1, I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); I915_WRITE(GEN8_CONFIG0, I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); - /* WaEnableChickenDCPR:skl,bxt,kbl,glk */ + /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */ I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); - /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */ - /* WaFbcWakeMemOn:skl,bxt,kbl,glk */ + /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */ + /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */ I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS | DISP_FBC_MEMORY_WAKE); - /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */ + /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */ I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | ILK_DPFC_DISABLE_DUMMY0); } @@ -3549,7 +3549,7 @@ static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) static bool intel_has_sagv(struct drm_i915_private *dev_priv) { - if (IS_KABYLAKE(dev_priv)) + if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) return true; if (IS_SKYLAKE(dev_priv) && @@ -4459,8 +4459,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, fb->modifier == I915_FORMAT_MOD_Yf_TILED; x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; - /* Display WA #1141: kbl. */ - if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) + /* Display WA #1141: kbl,cfl */ + if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && + dev_priv->ipc_enabled) latency += 4; if (apply_memory_bw_wa && x_tiled) @@ -8312,7 +8313,7 @@ static void kabylake_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | GEN6_GAMUNIT_CLOCK_GATE_DISABLE); - /* WaFbcNukeOnHostModify:kbl */ + /* WaFbcNukeOnHostModify:kbl,cfl */ I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | ILK_DPFC_NUKE_ON_ANY_MODIFICATION); } @@ -8780,7 +8781,7 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { if (IS_SKYLAKE(dev_priv)) dev_priv->display.init_clock_gating = skylake_init_clock_gating; - else if (IS_KABYLAKE(dev_priv)) + else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) dev_priv->display.init_clock_gating = kabylake_init_clock_gating; else if (IS_BROXTON(dev_priv)) dev_priv->display.init_clock_gating = bxt_init_clock_gating; From 73d4e580ccc5c3e05cea002f18111f66c9c07034 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 2 Jun 2017 20:00:17 -0700 Subject: [PATCH 106/341] target: Fix kref->refcount underflow in transport_cmd_finish_abort This patch fixes a se_cmd->cmd_kref underflow during CMD_T_ABORTED when a fabric driver drops it's second reference from below the target_core_tmr.c based callers of transport_cmd_finish_abort(). Recently with the conversion of kref to refcount_t, this bug was manifesting itself as: [705519.601034] refcount_t: underflow; use-after-free. [705519.604034] INFO: NMI handler (kgdb_nmi_handler) took too long to run: 20116.512 msecs [705539.719111] ------------[ cut here ]------------ [705539.719117] WARNING: CPU: 3 PID: 26510 at lib/refcount.c:184 refcount_sub_and_test+0x33/0x51 Since the original kref atomic_t based kref_put() didn't check for underflow and only invoked the final callback when zero was reached, this bug did not manifest in practice since all se_cmd memory is using preallocated tags. To address this, go ahead and propigate the existing return from transport_put_cmd() up via transport_cmd_finish_abort(), and change transport_cmd_finish_abort() + core_tmr_handle_tas_abort() callers to only do their local target_put_sess_cmd() if necessary. Reported-by: Bart Van Assche Tested-by: Bart Van Assche Cc: Mike Christie Cc: Hannes Reinecke Cc: Christoph Hellwig Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: stable@vger.kernel.org # 3.14+ Tested-by: Gary Guo Tested-by: Chu Yuan Lin Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_internal.h | 2 +- drivers/target/target_core_tmr.c | 16 ++++++++-------- drivers/target/target_core_transport.c | 9 ++++++--- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 9ab7090f7c83..0912de7c0cf8 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -136,7 +136,7 @@ int init_se_kmem_caches(void); void release_se_kmem_caches(void); u32 scsi_get_new_index(scsi_index_t); void transport_subsystem_check_init(void); -void transport_cmd_finish_abort(struct se_cmd *, int); +int transport_cmd_finish_abort(struct se_cmd *, int); unsigned char *transport_dump_cmd_direction(struct se_cmd *); void transport_dump_dev_state(struct se_device *, char *, int *); void transport_dump_dev_info(struct se_device *, struct se_lun *, diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index dce1e1b47316..13f47bf4d16b 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -75,7 +75,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr) kfree(tmr); } -static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) +static int core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) { unsigned long flags; bool remove = true, send_tas; @@ -91,7 +91,7 @@ static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) transport_send_task_abort(cmd); } - transport_cmd_finish_abort(cmd, remove); + return transport_cmd_finish_abort(cmd, remove); } static int target_check_cdb_and_preempt(struct list_head *list, @@ -184,8 +184,8 @@ void core_tmr_abort_task( cancel_work_sync(&se_cmd->work); transport_wait_for_tasks(se_cmd); - transport_cmd_finish_abort(se_cmd, true); - target_put_sess_cmd(se_cmd); + if (!transport_cmd_finish_abort(se_cmd, true)) + target_put_sess_cmd(se_cmd); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" " ref_tag: %llu\n", ref_tag); @@ -281,8 +281,8 @@ static void core_tmr_drain_tmr_list( cancel_work_sync(&cmd->work); transport_wait_for_tasks(cmd); - transport_cmd_finish_abort(cmd, 1); - target_put_sess_cmd(cmd); + if (!transport_cmd_finish_abort(cmd, 1)) + target_put_sess_cmd(cmd); } } @@ -380,8 +380,8 @@ static void core_tmr_drain_state_list( cancel_work_sync(&cmd->work); transport_wait_for_tasks(cmd); - core_tmr_handle_tas_abort(cmd, tas); - target_put_sess_cmd(cmd); + if (!core_tmr_handle_tas_abort(cmd, tas)) + target_put_sess_cmd(cmd); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 6025935036c9..f1b3a46bdcaf 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -651,9 +651,10 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd) percpu_ref_put(&lun->lun_ref); } -void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) +int transport_cmd_finish_abort(struct se_cmd *cmd, int remove) { bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF); + int ret = 0; if (cmd->se_cmd_flags & SCF_SE_LUN_CMD) transport_lun_remove_cmd(cmd); @@ -665,9 +666,11 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) cmd->se_tfo->aborted_task(cmd); if (transport_cmd_check_stop_to_fabric(cmd)) - return; + return 1; if (remove && ack_kref) - transport_put_cmd(cmd); + ret = transport_put_cmd(cmd); + + return ret; } static void target_complete_failure_work(struct work_struct *work) From 105fa2f44e504c830697b0c794822112d79808dc Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 3 Jun 2017 05:35:47 -0700 Subject: [PATCH 107/341] iscsi-target: Fix delayed logout processing greater than SECONDS_FOR_LOGOUT_COMP This patch fixes a BUG() in iscsit_close_session() that could be triggered when iscsit_logout_post_handler() execution from within tx thread context was not run for more than SECONDS_FOR_LOGOUT_COMP (15 seconds), and the TCP connection didn't already close before then forcing tx thread context to automatically exit. This would manifest itself during explicit logout as: [33206.974254] 1 connection(s) still exist for iSCSI session to iqn.1993-08.org.debian:01:3f5523242179 [33206.980184] INFO: NMI handler (kgdb_nmi_handler) took too long to run: 2100.772 msecs [33209.078643] ------------[ cut here ]------------ [33209.078646] kernel BUG at drivers/target/iscsi/iscsi_target.c:4346! Normally when explicit logout attempt fails, the tx thread context exits and iscsit_close_connection() from rx thread context does the extra cleanup once it detects conn->conn_logout_remove has not been cleared by the logout type specific post handlers. To address this special case, if the logout post handler in tx thread context detects conn->tx_thread_active has already been cleared, simply return and exit in order for existing iscsit_close_connection() logic from rx thread context do failed logout cleanup. Reported-by: Bart Van Assche Tested-by: Bart Van Assche Cc: Mike Christie Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: stable@vger.kernel.org # 3.14+ Tested-by: Gary Guo Tested-by: Chu Yuan Lin Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 0d8f81591bed..c0254516b380 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4423,8 +4423,11 @@ static void iscsit_logout_post_handler_closesession( * always sleep waiting for RX/TX thread shutdown to complete * within iscsit_close_connection(). */ - if (!conn->conn_transport->rdma_shutdown) + if (!conn->conn_transport->rdma_shutdown) { sleep = cmpxchg(&conn->tx_thread_active, true, false); + if (!sleep) + return; + } atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); @@ -4440,8 +4443,11 @@ static void iscsit_logout_post_handler_samecid( { int sleep = 1; - if (!conn->conn_transport->rdma_shutdown) + if (!conn->conn_transport->rdma_shutdown) { sleep = cmpxchg(&conn->tx_thread_active, true, false); + if (!sleep) + return; + } atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); From abb85a9b512e8ca7ad04a5a8a6db9664fe644974 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 7 Jun 2017 20:29:50 -0700 Subject: [PATCH 108/341] iscsi-target: Reject immediate data underflow larger than SCSI transfer length When iscsi WRITE underflow occurs there are two different scenarios that can happen. Normally in practice, when an EDTL vs. SCSI CDB TRANSFER LENGTH underflow is detected, the iscsi immediate data payload is the smaller SCSI CDB TRANSFER LENGTH. That is, when a host fabric LLD is using a fixed size EDTL for a specific control CDB, the SCSI CDB TRANSFER LENGTH and actual SCSI payload ends up being smaller than EDTL. In iscsi, this means the received iscsi immediate data payload matches the smaller SCSI CDB TRANSFER LENGTH, because there is no more SCSI payload to accept beyond SCSI CDB TRANSFER LENGTH. However, it's possible for a malicous host to send a WRITE underflow where EDTL is larger than SCSI CDB TRANSFER LENGTH, but incoming iscsi immediate data actually matches EDTL. In the wild, we've never had a iscsi host environment actually try to do this. For this special case, it's wrong to truncate part of the control CDB payload and continue to process the command during underflow when immediate data payload received was larger than SCSI CDB TRANSFER LENGTH, so go ahead and reject and drop the bogus payload as a defensive action. Note this potential bug was originally relaxed by the following for allowing WRITE underflow in MSFT FCP host environments: commit c72c5250224d475614a00c1d7e54a67f77cd3410 Author: Roland Dreier Date: Wed Jul 22 15:08:18 2015 -0700 target: allow underflow/overflow for PR OUT etc. commands Cc: Roland Dreier Cc: Mike Christie Cc: Hannes Reinecke Cc: Martin K. Petersen Cc: # v4.3+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index c0254516b380..3fdca2cdd8da 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1279,6 +1279,18 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, */ if (dump_payload) goto after_immediate_data; + /* + * Check for underflow case where both EDTL and immediate data payload + * exceeds what is presented by CDB's TRANSFER LENGTH, and what has + * already been set in target_cmd_size_check() as se_cmd->data_length. + * + * For this special case, fail the command and dump the immediate data + * payload. + */ + if (cmd->first_burst_len > cmd->se_cmd.data_length) { + cmd->sense_reason = TCM_INVALID_CDB_FIELD; + goto after_immediate_data; + } immed_ret = iscsit_handle_immediate_data(cmd, hdr, cmd->first_burst_len); From ba714a9c1dea85e0bf2899d02dfeb9c70040427c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 23 May 2017 23:23:32 +0200 Subject: [PATCH 109/341] pinctrl/amd: Use regular interrupt instead of chained The AMD pinctrl driver uses a chained interrupt to demultiplex the GPIO interrupts. Kevin Vandeventer reported, that his new AMD Ryzen locks up hard on boot when the AMD pinctrl driver is initialized. The reason is an interrupt storm. It's not clear whether that's caused by hardware or firmware or both. Using chained interrupts on X86 is a dangerous endavour. If a system is misconfigured or the hardware buggy there is no safety net to catch an interrupt storm. Convert the driver to use a regular interrupt for the demultiplex handler. This allows the interrupt storm detector to catch the malfunction and lets the system boot up. This should be backported to stable because it's likely that more users run into this problem as the AMD Ryzen machines are spreading. Reported-by: Kevin Vandeventer Link: https://bugzilla.suse.com/show_bug.cgi?id=1034261 Signed-off-by: Thomas Gleixner Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 91 ++++++++++++++++------------------- 1 file changed, 41 insertions(+), 50 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 1482d132fbb8..e432ec887479 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -495,64 +495,54 @@ static struct irq_chip amd_gpio_irqchip = { .flags = IRQCHIP_SKIP_SET_WAKE, }; -static void amd_gpio_irq_handler(struct irq_desc *desc) +#define PIN_IRQ_PENDING (BIT(INTERRUPT_STS_OFF) | BIT(WAKE_STS_OFF)) + +static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) { - u32 i; - u32 off; - u32 reg; - u32 pin_reg; - u64 reg64; - int handled = 0; - unsigned int irq; + struct amd_gpio *gpio_dev = dev_id; + struct gpio_chip *gc = &gpio_dev->gc; + irqreturn_t ret = IRQ_NONE; + unsigned int i, irqnr; unsigned long flags; - struct irq_chip *chip = irq_desc_get_chip(desc); - struct gpio_chip *gc = irq_desc_get_handler_data(desc); - struct amd_gpio *gpio_dev = gpiochip_get_data(gc); + u32 *regs, regval; + u64 status, mask; - chained_irq_enter(chip, desc); - /*enable GPIO interrupt again*/ + /* Read the wake status */ raw_spin_lock_irqsave(&gpio_dev->lock, flags); - reg = readl(gpio_dev->base + WAKE_INT_STATUS_REG1); - reg64 = reg; - reg64 = reg64 << 32; - - reg = readl(gpio_dev->base + WAKE_INT_STATUS_REG0); - reg64 |= reg; + status = readl(gpio_dev->base + WAKE_INT_STATUS_REG1); + status <<= 32; + status |= readl(gpio_dev->base + WAKE_INT_STATUS_REG0); raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); - /* - * first 46 bits indicates interrupt status. - * one bit represents four interrupt sources. - */ - for (off = 0; off < 46 ; off++) { - if (reg64 & BIT(off)) { - for (i = 0; i < 4; i++) { - pin_reg = readl(gpio_dev->base + - (off * 4 + i) * 4); - if ((pin_reg & BIT(INTERRUPT_STS_OFF)) || - (pin_reg & BIT(WAKE_STS_OFF))) { - irq = irq_find_mapping(gc->irqdomain, - off * 4 + i); - generic_handle_irq(irq); - writel(pin_reg, - gpio_dev->base - + (off * 4 + i) * 4); - handled++; - } - } + /* Bit 0-45 contain the relevant status bits */ + status &= (1ULL << 46) - 1; + regs = gpio_dev->base; + for (mask = 1, irqnr = 0; status; mask <<= 1, regs += 4, irqnr += 4) { + if (!(status & mask)) + continue; + status &= ~mask; + + /* Each status bit covers four pins */ + for (i = 0; i < 4; i++) { + regval = readl(regs + i); + if (!(regval & PIN_IRQ_PENDING)) + continue; + irq = irq_find_mapping(gc->irqdomain, irqnr + i); + generic_handle_irq(irq); + /* Clear interrupt */ + writel(regval, regs + i); + ret = IRQ_HANDLED; } } - if (handled == 0) - handle_bad_irq(desc); - + /* Signal EOI to the GPIO unit */ raw_spin_lock_irqsave(&gpio_dev->lock, flags); - reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG); - reg |= EOI_MASK; - writel(reg, gpio_dev->base + WAKE_INT_MASTER_REG); + regval = readl(gpio_dev->base + WAKE_INT_MASTER_REG); + regval |= EOI_MASK; + writel(regval, gpio_dev->base + WAKE_INT_MASTER_REG); raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); - chained_irq_exit(chip, desc); + return ret; } static int amd_get_groups_count(struct pinctrl_dev *pctldev) @@ -821,10 +811,11 @@ static int amd_gpio_probe(struct platform_device *pdev) goto out2; } - gpiochip_set_chained_irqchip(&gpio_dev->gc, - &amd_gpio_irqchip, - irq_base, - amd_gpio_irq_handler); + ret = devm_request_irq(&pdev->dev, irq_base, amd_gpio_irq_handler, 0, + KBUILD_MODNAME, gpio_dev); + if (ret) + goto out2; + platform_set_drvdata(pdev, gpio_dev); dev_dbg(&pdev->dev, "amd gpio driver loaded\n"); From b7c747d4627462f25b3daabf49c18895a6722faa Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Tue, 30 May 2017 16:43:04 +0200 Subject: [PATCH 110/341] pinctrl: stm32: Fix bad function call In stm32_pconf_parse_conf function, stm32_pmx_gpio_set_direction is called with wrong parameter value. Indeed, using NULL value for range will raise an oops. Fixes: aceb16dc2da5 ("pinctrl: Add STM32 MCUs support") Reported-by: Dan Carpenter Signed-off-by: Alexandre TORGUE Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index d3c5f5dfbbd7..222b6685b09f 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -798,7 +798,7 @@ static int stm32_pconf_parse_conf(struct pinctrl_dev *pctldev, break; case PIN_CONFIG_OUTPUT: __stm32_gpio_set(bank, offset, arg); - ret = stm32_pmx_gpio_set_direction(pctldev, NULL, pin, false); + ret = stm32_pmx_gpio_set_direction(pctldev, range, pin, false); break; default: ret = -EINVAL; From 07981f2b27a4ff4bdbc4bb6b5ec965f33f4117c3 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 8 Jun 2017 14:48:00 +0300 Subject: [PATCH 111/341] drm/i915/glk: Remove the alpha_support flag Geminilake is now included in CI, making it part of the pre-merge criteria. The support should be in good enough shape, so let's remove the alpha_support flag. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/20170608114800.17201-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 03a495e13b5e..ba0443cc874a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -400,7 +400,6 @@ static const struct intel_device_info intel_broxton_info = { static const struct intel_device_info intel_geminilake_info = { GEN9_LP_FEATURES, .platform = INTEL_GEMINILAKE, - .is_alpha_support = 1, .ddb_size = 1024, .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } }; From ff85a1a80e00349dc7783c8dc4d6233d9a709283 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 21 May 2017 11:44:47 +0200 Subject: [PATCH 112/341] kconfig: Check for libncurses before menuconfig There is a check and a nice user-friendly message when the curses library is not present on the system and the user wants to do "make menuconfig". It doesn't get issued, though. Instead, we fail the build when mconf.c doesn't find the curses.h header: HOSTCC scripts/kconfig/mconf.o In file included from scripts/kconfig/mconf.c:23:0: scripts/kconfig/lxdialog/dialog.h:38:20: fatal error: curses.h: No such file or directory #include CURSES_LOC ^ compilation terminated. Make that check a prerequisite to mconf so that the user sees the error message instead: $ make menuconfig *** Unable to find the ncurses libraries or the *** required header files. *** 'make menuconfig' requires the ncurses libraries. *** *** Install ncurses (ncurses-devel) and try again. *** scripts/kconfig/Makefile:203: recipe for target 'scripts/kconfig/dochecklxdialog' failed make[1]: *** [scripts/kconfig/dochecklxdialog] Error 1 Makefile:548: recipe for target 'menuconfig' failed make: *** [menuconfig] Error 2 Signed-off-by: Borislav Petkov Signed-off-by: Masahiro Yamada --- scripts/kconfig/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 90a091b6ae4d..eb8144643b78 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -196,7 +196,7 @@ clean-files += config.pot linux.pot # Check that we have the required ncurses stuff installed for lxdialog (menuconfig) PHONY += $(obj)/dochecklxdialog -$(addprefix $(obj)/,$(lxdialog)): $(obj)/dochecklxdialog +$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/dochecklxdialog $(obj)/dochecklxdialog: $(Q)$(CONFIG_SHELL) $(check-lxdialog) -check $(HOSTCC) $(HOST_EXTRACFLAGS) $(HOSTLOADLIBES_mconf) From b056f8f3d6b900e8afd19f312719160346d263b4 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 8 Jun 2017 16:41:05 -0700 Subject: [PATCH 113/341] drm/i915/cfl: Add Coffee Lake PCI IDs for S Skus. Add PCI Ids for S Sku following the BSpec. v2: Remove the unused INTEL_CFL_IDS.(Rodrigo) v3: Add missing IDs(Rodrigo) Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1496965267-21725-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + include/drm/i915_pciids.h | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ba0443cc874a..4dfc4d37165a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -489,6 +489,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_KBL_GT2_IDS(&intel_kabylake_info), INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info), INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info), + INTEL_CFL_S_IDS(&intel_coffeelake_info), INTEL_CNL_IDS(&intel_cannonlake_info), {0, 0, 0} }; diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 7d2696a6588e..4858debd4b58 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -334,6 +334,14 @@ INTEL_KBL_GT3_IDS(info), \ INTEL_KBL_GT4_IDS(info) +/* CFL S */ +#define INTEL_CFL_S_IDS(info) \ + INTEL_VGA_DEVICE(0x3E90, info), /* SRV GT1 */ \ + INTEL_VGA_DEVICE(0x3E93, info), /* SRV GT1 */ \ + INTEL_VGA_DEVICE(0x3E91, info), /* SRV GT2 */ \ + INTEL_VGA_DEVICE(0x3E92, info), /* SRV GT2 */ \ + INTEL_VGA_DEVICE(0x3E96, info) /* SRV GT2 */ + /* CNL U 2+2 */ #define INTEL_CNL_U_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x5A52, info), \ From ccfd13215fd25a0e8c28221f3acc0dcaec11cd15 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 8 Jun 2017 16:41:06 -0700 Subject: [PATCH 114/341] drm/i915/cfl: Add Coffee Lake PCI IDs for H Sku. Add PCI Ids for H Sku by following the BSpec. v2: Remove unused INTEL_CFL_IDS.(Rodrigo). v3: Add missing IDs(Rodrigo) Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1496965267-21725-2-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + include/drm/i915_pciids.h | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 4dfc4d37165a..9202a68fc7c1 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -490,6 +490,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info), INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info), INTEL_CFL_S_IDS(&intel_coffeelake_info), + INTEL_CFL_H_IDS(&intel_coffeelake_info), INTEL_CNL_IDS(&intel_cannonlake_info), {0, 0, 0} }; diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 4858debd4b58..8f25316013ba 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -342,6 +342,11 @@ INTEL_VGA_DEVICE(0x3E92, info), /* SRV GT2 */ \ INTEL_VGA_DEVICE(0x3E96, info) /* SRV GT2 */ +/* CFL H */ +#define INTEL_CFL_H_IDS(info) \ + INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \ + INTEL_VGA_DEVICE(0x3E94, info) /* Halo GT2 */ + /* CNL U 2+2 */ #define INTEL_CNL_U_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x5A52, info), \ From d29fe702c9cb682df99146d24d06e5455f043101 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 8 Jun 2017 16:41:07 -0700 Subject: [PATCH 115/341] drm/i915/cfl: Add Coffee Lake PCI IDs for U Sku. Add PCI Ids for U Skus of Coffeelake. v2: Use intel_coffeelake_gt3_info, in accordance to- Rodrigo's patch: v3: rebased v3: Remove unused INTEL_CFL_IDS(Rodrigo). Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1496965267-21725-3-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + include/drm/i915_pciids.h | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 9202a68fc7c1..4f5fd1ab9f2c 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -491,6 +491,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info), INTEL_CFL_S_IDS(&intel_coffeelake_info), INTEL_CFL_H_IDS(&intel_coffeelake_info), + INTEL_CFL_U_IDS(&intel_coffeelake_gt3_info), INTEL_CNL_IDS(&intel_cannonlake_info), {0, 0, 0} }; diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 8f25316013ba..34c8f5600ce0 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -347,6 +347,13 @@ INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \ INTEL_VGA_DEVICE(0x3E94, info) /* Halo GT2 */ +/* CFL U */ +#define INTEL_CFL_U_IDS(info) \ + INTEL_VGA_DEVICE(0x3EA6, info), /* ULT GT3 */ \ + INTEL_VGA_DEVICE(0x3EA7, info), /* ULT GT3 */ \ + INTEL_VGA_DEVICE(0x3EA8, info), /* ULT GT3 */ \ + INTEL_VGA_DEVICE(0x3EA5, info) /* ULT GT3 */ + /* CNL U 2+2 */ #define INTEL_CNL_U_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x5A52, info), \ From c0f82960fa6d9134c9c56106caf9daa1ecf84c2e Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 8 Jun 2017 16:48:23 -0700 Subject: [PATCH 116/341] drm/i915/guc: Load GuC on Coffee Lake Coffee Lake reuses Kabylake's GuC. v2: Change Coffeelake to Coffee Lake Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Lukasz Fiedorowicz Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1496965704-23610-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/intel_guc_loader.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 4f5fd1ab9f2c..e38ff1436d83 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -426,6 +426,7 @@ static const struct intel_device_info intel_kabylake_gt3_info = { BDW_FEATURES, \ .gen = 9, \ .platform = INTEL_COFFEELAKE, \ + .has_guc = 1, \ .ddb_size = 896 static const struct intel_device_info intel_coffeelake_info = { diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index d9045b6e897b..8b0ae7fce7f2 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -402,7 +402,7 @@ int intel_guc_select_fw(struct intel_guc *guc) guc->fw.path = I915_BXT_GUC_UCODE; guc->fw.major_ver_wanted = BXT_FW_MAJOR; guc->fw.minor_ver_wanted = BXT_FW_MINOR; - } else if (IS_KABYLAKE(dev_priv)) { + } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { guc->fw.path = I915_KBL_GUC_UCODE; guc->fw.major_ver_wanted = KBL_FW_MAJOR; guc->fw.minor_ver_wanted = KBL_FW_MINOR; From 5e5d8b664effe57dc459e082fc37b1aec23f184a Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 8 Jun 2017 16:48:24 -0700 Subject: [PATCH 117/341] drm/i915/huc: Load HuC on Coffee Lake Coffee Lake reuses Kabylake's HUC firmware. v2: Change Coffeelake to Coffee Lake Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Lukasz Fiedorowicz Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1496965704-23610-2-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_huc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index f5eb18d0e2d1..6145fa0d6773 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -167,7 +167,7 @@ void intel_huc_select_fw(struct intel_huc *huc) huc->fw.path = I915_BXT_HUC_UCODE; huc->fw.major_ver_wanted = BXT_HUC_FW_MAJOR; huc->fw.minor_ver_wanted = BXT_HUC_FW_MINOR; - } else if (IS_KABYLAKE(dev_priv)) { + } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { huc->fw.path = I915_KBL_HUC_UCODE; huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR; huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR; From 84cd843e715298bbfb17ed40c7d61d7db6854a70 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 13:02:30 -0700 Subject: [PATCH 118/341] drm/i915/cfl: Coffee Lake reuses Kabylake DMC. both platforms. We haven't recieved any separated release specifically for Coffee Lake so let's just re-use what is already there for Kabylake. Signed-off-by: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Link: http://patchwork.freedesktop.org/patch/msgid/1497038550-30910-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/intel_csr.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index e38ff1436d83..03b5fe3e3036 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -426,6 +426,7 @@ static const struct intel_device_info intel_kabylake_gt3_info = { BDW_FEATURES, \ .gen = 9, \ .platform = INTEL_COFFEELAKE, \ + .has_csr = 1, \ .has_guc = 1, \ .ddb_size = 896 diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 1575bde0cf90..fb6af0bcdf8f 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -291,7 +291,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, if (IS_GEMINILAKE(dev_priv)) { required_version = GLK_CSR_VERSION_REQUIRED; - } else if (IS_KABYLAKE(dev_priv)) { + } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { required_version = KBL_CSR_VERSION_REQUIRED; } else if (IS_SKYLAKE(dev_priv)) { required_version = SKL_CSR_VERSION_REQUIRED; @@ -440,7 +440,7 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) if (IS_GEMINILAKE(dev_priv)) csr->fw_path = I915_CSR_GLK; - else if (IS_KABYLAKE(dev_priv)) + else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) csr->fw_path = I915_CSR_KBL; else if (IS_SKYLAKE(dev_priv)) csr->fw_path = I915_CSR_SKL; From 0c8792d00d38de85b6ceb1dd67d3ee009d7c8e42 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 9 Jun 2017 15:48:05 +0800 Subject: [PATCH 119/341] drm/i915: Fix GVT-g PVINFO version compatibility check Current it's strictly checked if PVINFO version matches 1.0 for GVT-g i915 guest which doesn't help for compatibility at all and forces GVT-g host can't extend PVINFO easily with version bump for real compatibility check. This fixes that to check minimal required PVINFO version instead. v2: - drop unneeded version macro - use only major version for sanity check v3: - fix up PVInfo value with kernel type - one indent fix Reviewed-by: Joonas Lahtinen Cc: Chuanxiao Dong Cc: Joonas Lahtinen Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Zhenyu Wang Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170609074805.5101-1-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/i915_pvinfo.h | 8 ++------ drivers/gpu/drm/i915/i915_vgpu.c | 10 ++++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h index c0cb2974caac..2cfe96d3e5d1 100644 --- a/drivers/gpu/drm/i915/i915_pvinfo.h +++ b/drivers/gpu/drm/i915/i915_pvinfo.h @@ -36,10 +36,6 @@ #define VGT_VERSION_MAJOR 1 #define VGT_VERSION_MINOR 0 -#define INTEL_VGT_IF_VERSION_ENCODE(major, minor) ((major) << 16 | (minor)) -#define INTEL_VGT_IF_VERSION \ - INTEL_VGT_IF_VERSION_ENCODE(VGT_VERSION_MAJOR, VGT_VERSION_MINOR) - /* * notifications from guest to vgpu device model */ @@ -55,8 +51,8 @@ enum vgt_g2v_type { struct vgt_if { u64 magic; /* VGT_MAGIC */ - uint16_t version_major; - uint16_t version_minor; + u16 version_major; + u16 version_minor; u32 vgt_id; /* ID of vGT instance */ u32 rsv1[12]; /* pad to offset 0x40 */ /* diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 3791e9c9392f..cf7a958e4d3c 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -60,8 +60,8 @@ */ void i915_check_vgpu(struct drm_i915_private *dev_priv) { - uint64_t magic; - uint32_t version; + u64 magic; + u16 version_major; BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); @@ -69,10 +69,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv) if (magic != VGT_MAGIC) return; - version = INTEL_VGT_IF_VERSION_ENCODE( - __raw_i915_read16(dev_priv, vgtif_reg(version_major)), - __raw_i915_read16(dev_priv, vgtif_reg(version_minor))); - if (version != INTEL_VGT_IF_VERSION) { + version_major = __raw_i915_read16(dev_priv, vgtif_reg(version_major)); + if (version_major < VGT_VERSION_MAJOR) { DRM_INFO("VGT interface version mismatch!\n"); return; } From 3db1200ca21f3c63c9044185dc5762ef996848cb Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Fri, 9 Jun 2017 17:26:32 -0700 Subject: [PATCH 120/341] clocksource/drivers/arm_arch_timer: Fix read and iounmap of incorrect variable Fix boot warning 'Trying to vfree() nonexistent vm area' from arch_timer_mem_of_init(). Refactored code attempts to read and iounmap using address frame instead of address ioremap(frame->cntbase). Fixes: c389d701dfb70 ("clocksource: arm_arch_timer: split MMIO timer probing.") Signed-off-by: Frank Rowand Reviewed-by: Fu Wei Acked-by: Marc Zyngier Signed-off-by: Daniel Lezcano --- drivers/clocksource/arm_arch_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 4bed671e490e..8b5c30062d99 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -1209,9 +1209,9 @@ arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame) return 0; } - rate = readl_relaxed(frame + CNTFRQ); + rate = readl_relaxed(base + CNTFRQ); - iounmap(frame); + iounmap(base); return rate; } From b037d58f976204544f4266840f3538d2ed7e01d9 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 12 Jun 2017 12:21:13 +0200 Subject: [PATCH 121/341] drm/i915: Pass crtc_state and connector state to backlight enable/disable functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The backlight functions need to determine the pipe and the transcoder the backlight will be enabled on, so pass crtc_state instead of trying to dereference the state without holding locks. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100022 Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170612102115.23665-2-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_ddi.c | 4 ++-- drivers/gpu/drm/i915/intel_dp.c | 21 ++++++++++++--------- drivers/gpu/drm/i915/intel_drv.h | 10 ++++++---- drivers/gpu/drm/i915/intel_dsi.c | 4 ++-- drivers/gpu/drm/i915/intel_lvds.c | 16 +++------------- drivers/gpu/drm/i915/intel_panel.c | 12 +++++++----- 6 files changed, 32 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 8bac62805cd1..2d35d97d170e 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1845,7 +1845,7 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, if (port == PORT_A && INTEL_GEN(dev_priv) < 9) intel_dp_stop_link_train(intel_dp); - intel_edp_backlight_on(intel_dp); + intel_edp_backlight_on(pipe_config, conn_state); intel_psr_enable(intel_dp); intel_edp_drrs_enable(intel_dp, pipe_config); } @@ -1875,7 +1875,7 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder, intel_edp_drrs_disable(intel_dp, old_crtc_state); intel_psr_disable(intel_dp); - intel_edp_backlight_off(intel_dp); + intel_edp_backlight_off(old_conn_state); } } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d2fd8b67bb8a..d1ee278064b7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2338,14 +2338,17 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp) } /* Enable backlight PWM and backlight PP control. */ -void intel_edp_backlight_on(struct intel_dp *intel_dp) +void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct intel_dp *intel_dp = enc_to_intel_dp(conn_state->best_encoder); + if (!is_edp(intel_dp)) return; DRM_DEBUG_KMS("\n"); - intel_panel_enable_backlight(intel_dp->attached_connector); + intel_panel_enable_backlight(crtc_state, conn_state); _intel_edp_backlight_on(intel_dp); } @@ -2377,15 +2380,17 @@ static void _intel_edp_backlight_off(struct intel_dp *intel_dp) } /* Disable backlight PP control and backlight PWM. */ -void intel_edp_backlight_off(struct intel_dp *intel_dp) +void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state) { + struct intel_dp *intel_dp = enc_to_intel_dp(old_conn_state->best_encoder); + if (!is_edp(intel_dp)) return; DRM_DEBUG_KMS("\n"); _intel_edp_backlight_off(intel_dp); - intel_panel_disable_backlight(intel_dp->attached_connector); + intel_panel_disable_backlight(old_conn_state); } /* @@ -2681,7 +2686,7 @@ static void intel_disable_dp(struct intel_encoder *encoder, /* Make sure the panel is off before trying to change the mode. But also * ensure that we have vdd while we switch off the panel. */ intel_edp_panel_vdd_on(intel_dp); - intel_edp_backlight_off(intel_dp); + intel_edp_backlight_off(old_conn_state); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); intel_edp_panel_off(intel_dp); @@ -2895,10 +2900,8 @@ static void g4x_enable_dp(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - intel_enable_dp(encoder, pipe_config, conn_state); - intel_edp_backlight_on(intel_dp); + intel_edp_backlight_on(pipe_config, conn_state); } static void vlv_enable_dp(struct intel_encoder *encoder, @@ -2907,7 +2910,7 @@ static void vlv_enable_dp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - intel_edp_backlight_on(intel_dp); + intel_edp_backlight_on(pipe_config, conn_state); intel_psr_enable(intel_dp); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index ac5cd41ab420..f08899becfe2 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1527,8 +1527,9 @@ bool intel_dp_compute_config(struct intel_encoder *encoder, bool intel_dp_is_edp(struct drm_i915_private *dev_priv, enum port port); enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd); -void intel_edp_backlight_on(struct intel_dp *intel_dp); -void intel_edp_backlight_off(struct intel_dp *intel_dp); +void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); +void intel_edp_backlight_off(const struct drm_connector_state *conn_state); void intel_edp_panel_vdd_on(struct intel_dp *intel_dp); void intel_edp_panel_on(struct intel_dp *intel_dp); void intel_edp_panel_off(struct intel_dp *intel_dp); @@ -1715,8 +1716,9 @@ void intel_panel_set_backlight_acpi(struct intel_connector *connector, u32 level, u32 max); int intel_panel_setup_backlight(struct drm_connector *connector, enum pipe pipe); -void intel_panel_enable_backlight(struct intel_connector *connector); -void intel_panel_disable_backlight(struct intel_connector *connector); +void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); +void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_state); void intel_panel_destroy_backlight(struct drm_connector *connector); enum drm_connector_status intel_panel_detect(struct drm_i915_private *dev_priv); extern struct drm_display_mode *intel_find_panel_downclock( diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 54030b68406a..721f3f3adc1e 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -835,7 +835,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_dsi_port_enable(encoder); } - intel_panel_enable_backlight(intel_dsi->attached_connector); + intel_panel_enable_backlight(pipe_config, conn_state); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); } @@ -866,7 +866,7 @@ static void intel_dsi_disable(struct intel_encoder *encoder, DRM_DEBUG_KMS("\n"); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); - intel_panel_disable_backlight(intel_dsi->attached_connector); + intel_panel_disable_backlight(old_conn_state); /* * Disable Device ready before the port shutdown in order diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index d2c2bca1b327..6fe5d7c3bc23 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -311,8 +311,6 @@ static void intel_enable_lvds(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); - struct intel_connector *intel_connector = - &lvds_encoder->attached_connector->base; struct drm_i915_private *dev_priv = to_i915(dev); I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) | LVDS_PORT_EN); @@ -322,7 +320,7 @@ static void intel_enable_lvds(struct intel_encoder *encoder, if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000)) DRM_ERROR("timed out waiting for panel to power on\n"); - intel_panel_enable_backlight(intel_connector); + intel_panel_enable_backlight(pipe_config, conn_state); } static void intel_disable_lvds(struct intel_encoder *encoder, @@ -345,11 +343,7 @@ static void gmch_disable_lvds(struct intel_encoder *encoder, struct drm_connector_state *old_conn_state) { - struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); - struct intel_connector *intel_connector = - &lvds_encoder->attached_connector->base; - - intel_panel_disable_backlight(intel_connector); + intel_panel_disable_backlight(old_conn_state); intel_disable_lvds(encoder, old_crtc_state, old_conn_state); } @@ -358,11 +352,7 @@ static void pch_disable_lvds(struct intel_encoder *encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state) { - struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); - struct intel_connector *intel_connector = - &lvds_encoder->attached_connector->base; - - intel_panel_disable_backlight(intel_connector); + intel_panel_disable_backlight(old_conn_state); } static void pch_post_disable_lvds(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 4114cb3f14e7..2567533544aa 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -819,8 +819,9 @@ static void pwm_disable_backlight(struct intel_connector *connector) pwm_disable(panel->backlight.pwm); } -void intel_panel_disable_backlight(struct intel_connector *connector) +void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_state) { + struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; @@ -1136,17 +1137,18 @@ static void pwm_enable_backlight(struct intel_connector *connector) intel_panel_actually_set_backlight(connector, panel->backlight.level); } -void intel_panel_enable_backlight(struct intel_connector *connector) +void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - enum pipe pipe = intel_get_pipe_from_connector(connector); + enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; if (!panel->backlight.present) return; - if (!WARN_ON_ONCE(pipe == INVALID_PIPE)) - DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe)); + DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe)); mutex_lock(&dev_priv->backlight_lock); From 90d7cd240308e3d77c414224c47f42f2362c3e21 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 12 Jun 2017 12:21:14 +0200 Subject: [PATCH 122/341] drm/i915: Pass connector state to intel_panel_set_backlight_acpi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passing the state is also needed to convert the backlight functions to use the correct state instead of looking it up. This is done as a separate commit to allow easier bisecting. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100022 Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170612102115.23665-3-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_opregion.c | 2 +- drivers/gpu/drm/i915/intel_panel.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f08899becfe2..e53506bdaf3b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1712,7 +1712,7 @@ void intel_pch_panel_fitting(struct intel_crtc *crtc, void intel_gmch_panel_fitting(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, int fitting_mode); -void intel_panel_set_backlight_acpi(struct intel_connector *connector, +void intel_panel_set_backlight_acpi(const struct drm_connector_state *conn_state, u32 level, u32 max); int intel_panel_setup_backlight(struct drm_connector *connector, enum pipe pipe); diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index d44465190dc1..2bd03001cc70 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -461,7 +461,7 @@ static u32 asle_set_backlight(struct drm_i915_private *dev_priv, u32 bclp) DRM_DEBUG_KMS("updating opregion backlight %d/255\n", bclp); drm_connector_list_iter_begin(dev, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) - intel_panel_set_backlight_acpi(connector, bclp, 255); + intel_panel_set_backlight_acpi(connector->base.state, bclp, 255); drm_connector_list_iter_end(&conn_iter); asle->cblv = DIV_ROUND_UP(bclp * 100, 255) | ASLE_CBLV_VALID; diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 2567533544aa..8cb573166421 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -671,21 +671,21 @@ static void intel_panel_set_backlight(struct intel_connector *connector, /* set backlight brightness to level in range [0..max], assuming hw min is * respected. */ -void intel_panel_set_backlight_acpi(struct intel_connector *connector, +void intel_panel_set_backlight_acpi(const struct drm_connector_state *conn_state, u32 user_level, u32 user_max) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - enum pipe pipe = intel_get_pipe_from_connector(connector); u32 hw_level; /* - * INVALID_PIPE may occur during driver init because + * Lack of crtc may occur during driver init because * connection_mutex isn't held across the entire backlight * setup + modeset readout, and the BIOS can issue the * requests at any time. */ - if (!panel->backlight.present || pipe == INVALID_PIPE) + if (!panel->backlight.present || !conn_state->crtc) return; mutex_lock(&dev_priv->backlight_lock); From 7d025e0804f2cbeafeba2f5b8effa6361d7db8e4 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 12 Jun 2017 12:21:15 +0200 Subject: [PATCH 123/341] drm/i915: Pass atomic state to backlight enable/disable/set callbacks. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass crtc_state to the enable callback, and connector_state to all callbacks. This will eliminate the need to guess for the correct pipe in these callbacks. The crtc state is required for pch_enable_backlight to obtain the correct cpu_transcoder. intel_dp_aux_backlight's setup function is called before hw readout, so crtc_state and connector_state->best_encoder are NULL in the enable() and set() callbacks. This fixes the following series of warns from intel_get_pipe_from_connector: [ 219.968428] ------------[ cut here ]------------ [ 219.968481] WARNING: CPU: 3 PID: 2457 at drivers/gpu/drm/i915/intel_display.c:13881 intel_get_pipe_from_connector+0x62/0x90 [i915] [ 219.968483] WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)) [ 219.968485] Modules linked in: nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm intel_rapl x86_pkg_temp_thermal coretemp kvm_intel snd_seq_midi snd_seq_midi_event kvm snd_rawmidi irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc snd_seq snd_seq_device serio_raw snd_timer aesni_intel aes_x86_64 crypto_simd glue_helper cryptd lpc_ich snd mei_me shpchp soundcore mei rfkill_gpio mac_hid intel_pmc_ipc parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid igb ahci i915 xhci_pci dca xhci_hcd ptp sdhci_pci sdhci libahci pps_core i2c_hid hid video [ 219.968573] CPU: 3 PID: 2457 Comm: kworker/u8:3 Tainted: G W 4.10.0-tip-201703010159+ #2 [ 219.968575] Hardware name: Intel Corp. Broxton P/NOTEBOOK, BIOS APLKRVPA.X64.0144.B10.1606270006 06/27/2016 [ 219.968627] Workqueue: events_unbound intel_atomic_commit_work [i915] [ 219.968629] Call Trace: [ 219.968640] dump_stack+0x63/0x87 [ 219.968646] __warn+0xd1/0xf0 [ 219.968651] warn_slowpath_fmt+0x4f/0x60 [ 219.968657] ? drm_printk+0x97/0xa0 [ 219.968708] intel_get_pipe_from_connector+0x62/0x90 [i915] [ 219.968756] intel_panel_enable_backlight+0x19/0xf0 [i915] [ 219.968804] intel_edp_backlight_on.part.22+0x33/0x40 [i915] [ 219.968852] intel_edp_backlight_on+0x18/0x20 [i915] [ 219.968900] intel_enable_ddi+0x94/0xc0 [i915] [ 219.968950] intel_encoders_enable.isra.93+0x77/0x90 [i915] [ 219.969000] haswell_crtc_enable+0x310/0x7f0 [i915] [ 219.969051] intel_update_crtc+0x58/0x100 [i915] [ 219.969101] skl_update_crtcs+0x218/0x240 [i915] [ 219.969153] intel_atomic_commit_tail+0x350/0x1000 [i915] [ 219.969159] ? vtime_account_idle+0xe/0x50 [ 219.969164] ? finish_task_switch+0x107/0x250 [ 219.969214] intel_atomic_commit_work+0x12/0x20 [i915] [ 219.969219] process_one_work+0x153/0x3f0 [ 219.969223] worker_thread+0x12b/0x4b0 [ 219.969227] kthread+0x101/0x140 [ 219.969230] ? rescuer_thread+0x340/0x340 [ 219.969233] ? kthread_park+0x90/0x90 [ 219.969237] ? do_syscall_64+0x6e/0x180 [ 219.969243] ret_from_fork+0x2c/0x40 [ 219.969246] ---[ end trace 0a8fa19387b9ad6d ]--- Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100022 Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170612102115.23665-4-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_dp_aux_backlight.c | 25 ++- drivers/gpu/drm/i915/intel_drv.h | 7 +- .../gpu/drm/i915/intel_dsi_dcs_backlight.c | 22 ++- drivers/gpu/drm/i915/intel_panel.c | 157 +++++++++--------- 4 files changed, 115 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c index a0995c00fc84..6cc62980d0da 100644 --- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c @@ -78,8 +78,13 @@ static uint32_t intel_dp_aux_get_backlight(struct intel_connector *connector) * 8-bit or 16 bit value (MSB and LSB) */ static void -intel_dp_aux_set_backlight(struct intel_connector *connector, u32 level) +intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 level) { + /* + * conn_state->best_encoder is likely NULL when called from + * intel_dp_aux_setup_backlight() + */ + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); uint8_t vals[2] = { 0x0 }; @@ -97,8 +102,14 @@ intel_dp_aux_set_backlight(struct intel_connector *connector, u32 level) } } -static void intel_dp_aux_enable_backlight(struct intel_connector *connector) +static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); + /* + * conn_state->best_encoder (and crtc_state) are NULL when called from + * intel_dp_aux_setup_backlight() + */ struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); uint8_t dpcd_buf = 0; uint8_t edp_backlight_mode = 0; @@ -131,12 +142,12 @@ static void intel_dp_aux_enable_backlight(struct intel_connector *connector) } set_aux_backlight_enable(intel_dp, true); - intel_dp_aux_set_backlight(connector, connector->panel.backlight.level); + intel_dp_aux_set_backlight(conn_state, connector->panel.backlight.level); } -static void intel_dp_aux_disable_backlight(struct intel_connector *connector) +static void intel_dp_aux_disable_backlight(const struct drm_connector_state *old_conn_state) { - set_aux_backlight_enable(enc_to_intel_dp(&connector->encoder->base), false); + set_aux_backlight_enable(enc_to_intel_dp(old_conn_state->best_encoder), false); } static int intel_dp_aux_setup_backlight(struct intel_connector *connector, @@ -145,7 +156,7 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector, struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); struct intel_panel *panel = &connector->panel; - intel_dp_aux_enable_backlight(connector); + intel_dp_aux_enable_backlight(NULL, connector->base.state); if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) panel->backlight.max = 0xFFFF; @@ -165,7 +176,7 @@ intel_dp_aux_display_control_capable(struct intel_connector *connector) { struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); - /* Check the eDP Display control capabilities registers to determine if + /* Check the eDP Display control capabilities registers to determine if * the panel can support backlight control over the aux channel */ if (intel_dp->edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP && diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index e53506bdaf3b..2ad5fa337ed1 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -288,9 +288,10 @@ struct intel_panel { /* Connector and platform specific backlight functions */ int (*setup)(struct intel_connector *connector, enum pipe pipe); uint32_t (*get)(struct intel_connector *connector); - void (*set)(struct intel_connector *connector, uint32_t level); - void (*disable)(struct intel_connector *connector); - void (*enable)(struct intel_connector *connector); + void (*set)(const struct drm_connector_state *conn_state, uint32_t level); + void (*disable)(const struct drm_connector_state *conn_state); + void (*enable)(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); uint32_t (*hz_to_pwm)(struct intel_connector *connector, uint32_t hz); void (*power)(struct intel_connector *, bool enable); diff --git a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c b/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c index ac7c6020c443..6e09ceb71500 100644 --- a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c +++ b/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c @@ -60,10 +60,9 @@ static u32 dcs_get_backlight(struct intel_connector *connector) return data; } -static void dcs_set_backlight(struct intel_connector *connector, u32 level) +static void dcs_set_backlight(const struct drm_connector_state *conn_state, u32 level) { - struct intel_encoder *encoder = connector->encoder; - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder); struct mipi_dsi_device *dsi_device; u8 data = level; enum port port; @@ -76,14 +75,13 @@ static void dcs_set_backlight(struct intel_connector *connector, u32 level) } } -static void dcs_disable_backlight(struct intel_connector *connector) +static void dcs_disable_backlight(const struct drm_connector_state *conn_state) { - struct intel_encoder *encoder = connector->encoder; - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder); struct mipi_dsi_device *dsi_device; enum port port; - dcs_set_backlight(connector, 0); + dcs_set_backlight(conn_state, 0); for_each_dsi_port(port, intel_dsi->dcs_cabc_ports) { u8 cabc = POWER_SAVE_OFF; @@ -110,11 +108,11 @@ static void dcs_disable_backlight(struct intel_connector *connector) } } -static void dcs_enable_backlight(struct intel_connector *connector) +static void dcs_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct intel_encoder *encoder = connector->encoder; - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - struct intel_panel *panel = &connector->panel; + struct intel_dsi *intel_dsi = enc_to_intel_dsi(conn_state->best_encoder); + struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel; struct mipi_dsi_device *dsi_device; enum port port; @@ -142,7 +140,7 @@ static void dcs_enable_backlight(struct intel_connector *connector) &cabc, sizeof(cabc)); } - dcs_set_backlight(connector, panel->backlight.level); + dcs_set_backlight(conn_state, panel->backlight.level); } static int dcs_setup_backlight(struct intel_connector *connector, diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 8cb573166421..96c2cbd81869 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -561,15 +561,18 @@ static u32 intel_panel_get_backlight(struct intel_connector *connector) return val; } -static void lpt_set_backlight(struct intel_connector *connector, u32 level) +static void lpt_set_backlight(const struct drm_connector_state *conn_state, u32 level) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + u32 val = I915_READ(BLC_PWM_PCH_CTL2) & ~BACKLIGHT_DUTY_CYCLE_MASK; I915_WRITE(BLC_PWM_PCH_CTL2, val | level); } -static void pch_set_backlight(struct intel_connector *connector, u32 level) +static void pch_set_backlight(const struct drm_connector_state *conn_state, u32 level) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); u32 tmp; @@ -577,8 +580,9 @@ static void pch_set_backlight(struct intel_connector *connector, u32 level) I915_WRITE(BLC_PWM_CPU_CTL, tmp | level); } -static void i9xx_set_backlight(struct intel_connector *connector, u32 level) +static void i9xx_set_backlight(const struct drm_connector_state *conn_state, u32 level) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; u32 tmp, mask; @@ -604,50 +608,51 @@ static void i9xx_set_backlight(struct intel_connector *connector, u32 level) I915_WRITE(BLC_PWM_CTL, tmp | level); } -static void vlv_set_backlight(struct intel_connector *connector, u32 level) +static void vlv_set_backlight(const struct drm_connector_state *conn_state, u32 level) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - enum pipe pipe = intel_get_pipe_from_connector(connector); + enum pipe pipe = to_intel_crtc(conn_state->crtc)->pipe; u32 tmp; - if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B)) - return; - tmp = I915_READ(VLV_BLC_PWM_CTL(pipe)) & ~BACKLIGHT_DUTY_CYCLE_MASK; I915_WRITE(VLV_BLC_PWM_CTL(pipe), tmp | level); } -static void bxt_set_backlight(struct intel_connector *connector, u32 level) +static void bxt_set_backlight(const struct drm_connector_state *conn_state, u32 level) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; I915_WRITE(BXT_BLC_PWM_DUTY(panel->backlight.controller), level); } -static void pwm_set_backlight(struct intel_connector *connector, u32 level) +static void pwm_set_backlight(const struct drm_connector_state *conn_state, u32 level) { - struct intel_panel *panel = &connector->panel; + struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel; int duty_ns = DIV_ROUND_UP(level * CRC_PMIC_PWM_PERIOD_NS, 100); pwm_config(panel->backlight.pwm, duty_ns, CRC_PMIC_PWM_PERIOD_NS); } static void -intel_panel_actually_set_backlight(struct intel_connector *connector, u32 level) +intel_panel_actually_set_backlight(const struct drm_connector_state *conn_state, u32 level) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_panel *panel = &connector->panel; DRM_DEBUG_DRIVER("set backlight PWM = %d\n", level); level = intel_panel_compute_brightness(connector, level); - panel->backlight.set(connector, level); + panel->backlight.set(conn_state, level); } /* set backlight brightness to level in range [0..max], scaling wrt hw min */ -static void intel_panel_set_backlight(struct intel_connector *connector, +static void intel_panel_set_backlight(const struct drm_connector_state *conn_state, u32 user_level, u32 user_max) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; u32 hw_level; @@ -663,7 +668,7 @@ static void intel_panel_set_backlight(struct intel_connector *connector, panel->backlight.level = hw_level; if (panel->backlight.enabled) - intel_panel_actually_set_backlight(connector, hw_level); + intel_panel_actually_set_backlight(conn_state, hw_level); mutex_unlock(&dev_priv->backlight_lock); } @@ -702,17 +707,18 @@ void intel_panel_set_backlight_acpi(const struct drm_connector_state *conn_state panel->backlight.device->props.max_brightness); if (panel->backlight.enabled) - intel_panel_actually_set_backlight(connector, hw_level); + intel_panel_actually_set_backlight(conn_state, hw_level); mutex_unlock(&dev_priv->backlight_lock); } -static void lpt_disable_backlight(struct intel_connector *connector) +static void lpt_disable_backlight(const struct drm_connector_state *old_conn_state) { + struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); u32 tmp; - intel_panel_actually_set_backlight(connector, 0); + intel_panel_actually_set_backlight(old_conn_state, 0); /* * Although we don't support or enable CPU PWM with LPT/SPT based @@ -732,12 +738,13 @@ static void lpt_disable_backlight(struct intel_connector *connector) I915_WRITE(BLC_PWM_PCH_CTL1, tmp & ~BLM_PCH_PWM_ENABLE); } -static void pch_disable_backlight(struct intel_connector *connector) +static void pch_disable_backlight(const struct drm_connector_state *old_conn_state) { + struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); u32 tmp; - intel_panel_actually_set_backlight(connector, 0); + intel_panel_actually_set_backlight(old_conn_state, 0); tmp = I915_READ(BLC_PWM_CPU_CTL2); I915_WRITE(BLC_PWM_CPU_CTL2, tmp & ~BLM_PWM_ENABLE); @@ -746,44 +753,43 @@ static void pch_disable_backlight(struct intel_connector *connector) I915_WRITE(BLC_PWM_PCH_CTL1, tmp & ~BLM_PCH_PWM_ENABLE); } -static void i9xx_disable_backlight(struct intel_connector *connector) +static void i9xx_disable_backlight(const struct drm_connector_state *old_conn_state) { - intel_panel_actually_set_backlight(connector, 0); + intel_panel_actually_set_backlight(old_conn_state, 0); } -static void i965_disable_backlight(struct intel_connector *connector) +static void i965_disable_backlight(const struct drm_connector_state *old_conn_state) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct drm_i915_private *dev_priv = to_i915(old_conn_state->connector->dev); u32 tmp; - intel_panel_actually_set_backlight(connector, 0); + intel_panel_actually_set_backlight(old_conn_state, 0); tmp = I915_READ(BLC_PWM_CTL2); I915_WRITE(BLC_PWM_CTL2, tmp & ~BLM_PWM_ENABLE); } -static void vlv_disable_backlight(struct intel_connector *connector) +static void vlv_disable_backlight(const struct drm_connector_state *old_conn_state) { + struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - enum pipe pipe = intel_get_pipe_from_connector(connector); + enum pipe pipe = to_intel_crtc(old_conn_state->crtc)->pipe; u32 tmp; - if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B)) - return; - - intel_panel_actually_set_backlight(connector, 0); + intel_panel_actually_set_backlight(old_conn_state, 0); tmp = I915_READ(VLV_BLC_PWM_CTL2(pipe)); I915_WRITE(VLV_BLC_PWM_CTL2(pipe), tmp & ~BLM_PWM_ENABLE); } -static void bxt_disable_backlight(struct intel_connector *connector) +static void bxt_disable_backlight(const struct drm_connector_state *old_conn_state) { + struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; u32 tmp, val; - intel_panel_actually_set_backlight(connector, 0); + intel_panel_actually_set_backlight(old_conn_state, 0); tmp = I915_READ(BXT_BLC_PWM_CTL(panel->backlight.controller)); I915_WRITE(BXT_BLC_PWM_CTL(panel->backlight.controller), @@ -796,21 +802,23 @@ static void bxt_disable_backlight(struct intel_connector *connector) } } -static void cnp_disable_backlight(struct intel_connector *connector) +static void cnp_disable_backlight(const struct drm_connector_state *old_conn_state) { + struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; u32 tmp; - intel_panel_actually_set_backlight(connector, 0); + intel_panel_actually_set_backlight(old_conn_state, 0); tmp = I915_READ(BXT_BLC_PWM_CTL(panel->backlight.controller)); I915_WRITE(BXT_BLC_PWM_CTL(panel->backlight.controller), tmp & ~BXT_BLC_PWM_ENABLE); } -static void pwm_disable_backlight(struct intel_connector *connector) +static void pwm_disable_backlight(const struct drm_connector_state *old_conn_state) { + struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct intel_panel *panel = &connector->panel; /* Disable the backlight */ @@ -844,13 +852,15 @@ void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_st if (panel->backlight.device) panel->backlight.device->props.power = FB_BLANK_POWERDOWN; panel->backlight.enabled = false; - panel->backlight.disable(connector); + panel->backlight.disable(old_conn_state); mutex_unlock(&dev_priv->backlight_lock); } -static void lpt_enable_backlight(struct intel_connector *connector) +static void lpt_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; u32 pch_ctl1, pch_ctl2, schicken; @@ -894,22 +904,18 @@ static void lpt_enable_backlight(struct intel_connector *connector) I915_WRITE(BLC_PWM_PCH_CTL1, pch_ctl1 | BLM_PCH_PWM_ENABLE); /* This won't stick until the above enable. */ - intel_panel_actually_set_backlight(connector, panel->backlight.level); + intel_panel_actually_set_backlight(conn_state, panel->backlight.level); } -static void pch_enable_backlight(struct intel_connector *connector) +static void pch_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - enum pipe pipe = intel_get_pipe_from_connector(connector); - enum transcoder cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 cpu_ctl2, pch_ctl1, pch_ctl2; - if (!WARN_ON_ONCE(pipe == INVALID_PIPE)) - cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe); - else - cpu_transcoder = TRANSCODER_EDP; - cpu_ctl2 = I915_READ(BLC_PWM_CPU_CTL2); if (cpu_ctl2 & BLM_PWM_ENABLE) { DRM_DEBUG_KMS("cpu backlight already enabled\n"); @@ -933,7 +939,7 @@ static void pch_enable_backlight(struct intel_connector *connector) I915_WRITE(BLC_PWM_CPU_CTL2, cpu_ctl2 | BLM_PWM_ENABLE); /* This won't stick until the above enable. */ - intel_panel_actually_set_backlight(connector, panel->backlight.level); + intel_panel_actually_set_backlight(conn_state, panel->backlight.level); pch_ctl2 = panel->backlight.max << 16; I915_WRITE(BLC_PWM_PCH_CTL2, pch_ctl2); @@ -947,8 +953,10 @@ static void pch_enable_backlight(struct intel_connector *connector) I915_WRITE(BLC_PWM_PCH_CTL1, pch_ctl1 | BLM_PCH_PWM_ENABLE); } -static void i9xx_enable_backlight(struct intel_connector *connector) +static void i9xx_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; u32 ctl, freq; @@ -973,7 +981,7 @@ static void i9xx_enable_backlight(struct intel_connector *connector) POSTING_READ(BLC_PWM_CTL); /* XXX: combine this into above write? */ - intel_panel_actually_set_backlight(connector, panel->backlight.level); + intel_panel_actually_set_backlight(conn_state, panel->backlight.level); /* * Needed to enable backlight on some 855gm models. BLC_HIST_CTL is @@ -984,16 +992,15 @@ static void i9xx_enable_backlight(struct intel_connector *connector) I915_WRITE(BLC_HIST_CTL, BLM_HISTOGRAM_ENABLE); } -static void i965_enable_backlight(struct intel_connector *connector) +static void i965_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - enum pipe pipe = intel_get_pipe_from_connector(connector); + enum pipe pipe = to_intel_crtc(conn_state->crtc)->pipe; u32 ctl, ctl2, freq; - if (WARN_ON_ONCE(pipe == INVALID_PIPE)) - pipe = PIPE_A; - ctl2 = I915_READ(BLC_PWM_CTL2); if (ctl2 & BLM_PWM_ENABLE) { DRM_DEBUG_KMS("backlight already enabled\n"); @@ -1017,19 +1024,18 @@ static void i965_enable_backlight(struct intel_connector *connector) POSTING_READ(BLC_PWM_CTL2); I915_WRITE(BLC_PWM_CTL2, ctl2 | BLM_PWM_ENABLE); - intel_panel_actually_set_backlight(connector, panel->backlight.level); + intel_panel_actually_set_backlight(conn_state, panel->backlight.level); } -static void vlv_enable_backlight(struct intel_connector *connector) +static void vlv_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - enum pipe pipe = intel_get_pipe_from_connector(connector); + enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; u32 ctl, ctl2; - if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B)) - return; - ctl2 = I915_READ(VLV_BLC_PWM_CTL2(pipe)); if (ctl2 & BLM_PWM_ENABLE) { DRM_DEBUG_KMS("backlight already enabled\n"); @@ -1041,7 +1047,7 @@ static void vlv_enable_backlight(struct intel_connector *connector) I915_WRITE(VLV_BLC_PWM_CTL(pipe), ctl); /* XXX: combine this into above write? */ - intel_panel_actually_set_backlight(connector, panel->backlight.level); + intel_panel_actually_set_backlight(conn_state, panel->backlight.level); ctl2 = 0; if (panel->backlight.active_low_pwm) @@ -1051,16 +1057,15 @@ static void vlv_enable_backlight(struct intel_connector *connector) I915_WRITE(VLV_BLC_PWM_CTL2(pipe), ctl2 | BLM_PWM_ENABLE); } -static void bxt_enable_backlight(struct intel_connector *connector) +static void bxt_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - enum pipe pipe = intel_get_pipe_from_connector(connector); + enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; u32 pwm_ctl, val; - if (WARN_ON_ONCE(pipe == INVALID_PIPE)) - pipe = PIPE_A; - /* Controller 1 uses the utility pin. */ if (panel->backlight.controller == 1) { val = I915_READ(UTIL_PIN_CTL); @@ -1088,7 +1093,7 @@ static void bxt_enable_backlight(struct intel_connector *connector) I915_WRITE(BXT_BLC_PWM_FREQ(panel->backlight.controller), panel->backlight.max); - intel_panel_actually_set_backlight(connector, panel->backlight.level); + intel_panel_actually_set_backlight(conn_state, panel->backlight.level); pwm_ctl = 0; if (panel->backlight.active_low_pwm) @@ -1100,8 +1105,10 @@ static void bxt_enable_backlight(struct intel_connector *connector) pwm_ctl | BXT_BLC_PWM_ENABLE); } -static void cnp_enable_backlight(struct intel_connector *connector) +static void cnp_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; u32 pwm_ctl; @@ -1117,7 +1124,7 @@ static void cnp_enable_backlight(struct intel_connector *connector) I915_WRITE(BXT_BLC_PWM_FREQ(panel->backlight.controller), panel->backlight.max); - intel_panel_actually_set_backlight(connector, panel->backlight.level); + intel_panel_actually_set_backlight(conn_state, panel->backlight.level); pwm_ctl = 0; if (panel->backlight.active_low_pwm) @@ -1129,12 +1136,14 @@ static void cnp_enable_backlight(struct intel_connector *connector) pwm_ctl | BXT_BLC_PWM_ENABLE); } -static void pwm_enable_backlight(struct intel_connector *connector) +static void pwm_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_panel *panel = &connector->panel; pwm_enable(panel->backlight.pwm); - intel_panel_actually_set_backlight(connector, panel->backlight.level); + intel_panel_actually_set_backlight(conn_state, panel->backlight.level); } void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, @@ -1163,7 +1172,7 @@ void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, panel->backlight.device->props.max_brightness); } - panel->backlight.enable(connector); + panel->backlight.enable(crtc_state, conn_state); panel->backlight.enabled = true; if (panel->backlight.device) panel->backlight.device->props.power = FB_BLANK_UNBLANK; @@ -1181,7 +1190,7 @@ static int intel_backlight_device_update_status(struct backlight_device *bd) drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); DRM_DEBUG_KMS("updating intel_backlight, brightness=%d/%d\n", bd->props.brightness, bd->props.max_brightness); - intel_panel_set_backlight(connector, bd->props.brightness, + intel_panel_set_backlight(connector->base.state, bd->props.brightness, bd->props.max_brightness); /* From 945f2672ccbb5c92a8a7bf23cba3a68a6b0885e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 9 Jun 2017 15:25:58 -0700 Subject: [PATCH 124/341] drm/i915/cnl: Implement .get_display_clock_speed() for CNL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for reading out the cdclk frequency from the hardware on CNL. Very similar to BXT, with a few new twists and turns: * the PLL is now called CDCLK PLL, not DE PLL * reference clock can be 24 MHz in addition to the 19.2 MHz BXT had * the ratio now lives in the PLL enable register * Only 1x and 2x CD2X dividers are supported v2: Deal with PLL lock bit the same way as BXT/SKL do now v3: DSSM refclk indicator is bit 31 not 24 (Ander) v4: Rebased by Rodrigo after Ville's cdclk rework. v5: Set cdclk to the ref clock as previous platforms. (Imre) Signed-off-by: Ville Syrjälä Signed-off-by: Rodrigo Vivi Reviewed-by: Imre Deak Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 5 +++ drivers/gpu/drm/i915/intel_cdclk.c | 56 +++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b6d69e289974..ac3df675b4f3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6550,6 +6550,9 @@ enum { #define SKL_DFSM_PIPE_B_DISABLE (1 << 21) #define SKL_DFSM_PIPE_C_DISABLE (1 << 28) +#define SKL_DSSM _MMIO(0x51004) +#define CNL_DSSM_CDCLK_PLL_REFCLK_24MHz (1 << 31) + #define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) #define GEN9_FFSC_PERCTX_PREEMPT_CTRL (1<<14) @@ -8116,6 +8119,8 @@ enum { #define BXT_DE_PLL_ENABLE _MMIO(0x46070) #define BXT_DE_PLL_PLL_ENABLE (1 << 31) #define BXT_DE_PLL_LOCK (1 << 30) +#define CNL_CDCLK_PLL_RATIO(x) (x) +#define CNL_CDCLK_PLL_RATIO_MASK 0xff /* GEN9 DC */ #define DC_STATE_EN _MMIO(0x45504) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 634c89fe6377..1b31d82b2cfe 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1400,6 +1400,58 @@ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) bxt_set_cdclk(dev_priv, &cdclk_state); } +static void cnl_cdclk_pll_update(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 val; + + if (I915_READ(SKL_DSSM) & CNL_DSSM_CDCLK_PLL_REFCLK_24MHz) + cdclk_state->ref = 24000; + else + cdclk_state->ref = 19200; + + cdclk_state->vco = 0; + + val = I915_READ(BXT_DE_PLL_ENABLE); + if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) + return; + + if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) + return; + + cdclk_state->vco = (val & CNL_CDCLK_PLL_RATIO_MASK) * cdclk_state->ref; +} + +static void cnl_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 divider; + int div; + + cnl_cdclk_pll_update(dev_priv, cdclk_state); + + cdclk_state->cdclk = cdclk_state->ref; + + if (cdclk_state->vco == 0) + return; + + divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; + + switch (divider) { + case BXT_CDCLK_CD2X_DIV_SEL_1: + div = 2; + break; + case BXT_CDCLK_CD2X_DIV_SEL_2: + div = 4; + break; + default: + MISSING_CASE(divider); + return; + } + + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); +} + /** * intel_cdclk_state_compare - Determine if two CDCLK states differ * @a: first CDCLK state @@ -1895,7 +1947,9 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) skl_modeset_calc_cdclk; } - if (IS_GEN9_BC(dev_priv)) + if (IS_CANNONLAKE(dev_priv)) + dev_priv->display.get_cdclk = cnl_get_cdclk; + else if (IS_GEN9_BC(dev_priv)) dev_priv->display.get_cdclk = skl_get_cdclk; else if (IS_GEN9_LP(dev_priv)) dev_priv->display.get_cdclk = bxt_get_cdclk; From ef4f7a689ac5f61e36ac9ae77ac967b6469ae68b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 9 Jun 2017 15:25:59 -0700 Subject: [PATCH 125/341] drm/i915/cnl: Implement .set_cdclk() for CNL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for changing the cdclk frequency on CNL. Again, quite similar to BXT, but there are some annoying differences which means trying to share more code might not be feasible: * PLL ratio now lives in the PLL enable register * pcode came from SKL, not from BXT We support three cdclk frequencies: 168,336,528 Mhz. The first two use the same PLL frequency, the last one uses a different one meaning we once again may need to toggle the PLL off and on when changing cdclk. v2: Rebased by Rodrigo on top of Ville's cdclk rework. v3: Respect order of set_ bellow get_ (Ville) v4: Added __attribute__((unused)) to avoid broken compilation with Werror. Signed-off-by: Ville Syrjälä Signed-off-by: Rodrigo Vivi Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-2-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 106 +++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 1b31d82b2cfe..0e892b8c84af 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1452,6 +1452,112 @@ static void cnl_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); } +static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) +{ + u32 val; + + val = I915_READ(BXT_DE_PLL_ENABLE); + val &= ~BXT_DE_PLL_PLL_ENABLE; + I915_WRITE(BXT_DE_PLL_ENABLE, val); + + /* Timeout 200us */ + if (wait_for((I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) == 0, 1)) + DRM_ERROR("timout waiting for CDCLK PLL unlock\n"); + + dev_priv->cdclk.hw.vco = 0; +} + +static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) +{ + int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); + u32 val; + + val = CNL_CDCLK_PLL_RATIO(ratio); + I915_WRITE(BXT_DE_PLL_ENABLE, val); + + val |= BXT_DE_PLL_PLL_ENABLE; + I915_WRITE(BXT_DE_PLL_ENABLE, val); + + /* Timeout 200us */ + if (wait_for((I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) != 0, 1)) + DRM_ERROR("timout waiting for CDCLK PLL lock\n"); + + dev_priv->cdclk.hw.vco = vco; +} + +__attribute__((unused)) +static void cnl_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + int cdclk = cdclk_state->cdclk; + int vco = cdclk_state->vco; + u32 val, divider, pcu_ack; + int ret; + + mutex_lock(&dev_priv->rps.hw_lock); + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", + ret); + return; + } + + /* cdclk = vco / 2 / div{1,2} */ + switch (DIV_ROUND_CLOSEST(vco, cdclk)) { + case 4: + divider = BXT_CDCLK_CD2X_DIV_SEL_2; + break; + case 2: + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + default: + WARN_ON(cdclk != dev_priv->cdclk.hw.ref); + WARN_ON(vco != 0); + + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + } + + switch (cdclk) { + case 528000: + pcu_ack = 2; + break; + case 336000: + pcu_ack = 1; + break; + case 168000: + default: + pcu_ack = 0; + break; + } + + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + cnl_cdclk_pll_disable(dev_priv); + + if (dev_priv->cdclk.hw.vco != vco) + cnl_cdclk_pll_enable(dev_priv, vco); + + val = divider | skl_cdclk_decimal(cdclk); + /* + * FIXME if only the cd2x divider needs changing, it could be done + * without shutting off the pipe (if only one pipe is active). + */ + val |= BXT_CDCLK_CD2X_PIPE_NONE; + I915_WRITE(CDCLK_CTL, val); + + /* inform PCU of the change */ + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_update_cdclk(dev_priv); +} + /** * intel_cdclk_state_compare - Determine if two CDCLK states differ * @a: first CDCLK state From d8d4a512a6ffa97bde442023e87b9c87a37d8838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 9 Jun 2017 15:26:00 -0700 Subject: [PATCH 126/341] drm/i915/cnl: Implement CNL display init/unit sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the CNL display init/uninit sequence as outlined in Bspec. Quite similar to SKL/BXT. The main complicaiton is probably the extra procmon setup we must do based on the process/voltage information we can read out from some register. v2: s/skl_dbuf/gen9_dbuf/ to follow upstream bxt needed a cdclk sanitize step, so let's add it for cnl too v3: s/CHICKEN_MISC_1/CHICKEN_MISC_2/ (Ander) v4: Rebased by Rodrigo after Ville's cdclk rework v5: Removed unecessary Aux IO forced enable/disable, Fix DW10 setup Fix procpon Mask. (Credits-to Paulo and Clint) Remove A0 workaround. v6: Rebased on top of recent code (Rodrigo). v7: Respect the order of sanitize_ after set_ (Done by Rodrigo, Requested by Ville) v8: Commit message updated to matvh v5 changes besides Remove unused DW8 and an extra blank line. (all noticed by Imre). v9: Remove __attribute__((unused)) added on latest version of drm/i915/cnl: Implement .set_cdclk() for CNL. Cc: Paulo Zanoni Cc: Clint Taylor Signed-off-by: Ville Syrjälä Signed-off-by: Rodrigo Vivi Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-3-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 23 +++++ drivers/gpu/drm/i915/intel_cdclk.c | 108 +++++++++++++++++++++- drivers/gpu/drm/i915/intel_drv.h | 2 + drivers/gpu/drm/i915/intel_runtime_pm.c | 113 +++++++++++++++++++++++- 4 files changed, 243 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ac3df675b4f3..539e44e88e01 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1661,6 +1661,9 @@ enum skl_disp_power_wells { #define PHY_RESERVED (1 << 7) #define BXT_PORT_CL1CM_DW0(phy) _BXT_PHY((phy), _PORT_CL1CM_DW0_BC) +#define CNL_PORT_CL1CM_DW5 _MMIO(0x162014) +#define CL_POWER_DOWN_ENABLE (1 << 4) + #define _PORT_CL1CM_DW9_A 0x162024 #define _PORT_CL1CM_DW9_BC 0x6C024 #define IREF0RC_OFFSET_SHIFT 8 @@ -1693,6 +1696,23 @@ enum skl_disp_power_wells { #define BXT_PORT_CL2CM_DW6(phy) _BXT_PHY((phy), _PORT_CL2CM_DW6_BC) #define DW6_OLDO_DYN_PWR_DOWN_EN (1 << 28) +#define CNL_PORT_COMP_DW0 _MMIO(0x162100) +#define COMP_INIT (1 << 31) +#define CNL_PORT_COMP_DW1 _MMIO(0x162104) +#define CNL_PORT_COMP_DW3 _MMIO(0x16210c) +#define PROCESS_INFO_DOT_0 (0 << 26) +#define PROCESS_INFO_DOT_1 (1 << 26) +#define PROCESS_INFO_DOT_4 (2 << 26) +#define PROCESS_INFO_MASK (7 << 26) +#define PROCESS_INFO_SHIFT 26 +#define VOLTAGE_INFO_0_85V (0 << 24) +#define VOLTAGE_INFO_0_95V (1 << 24) +#define VOLTAGE_INFO_1_05V (2 << 24) +#define VOLTAGE_INFO_MASK (3 << 24) +#define VOLTAGE_INFO_SHIFT 24 +#define CNL_PORT_COMP_DW9 _MMIO(0x162124) +#define CNL_PORT_COMP_DW10 _MMIO(0x162128) + /* BXT PHY Ref registers */ #define _PORT_REF_DW3_A 0x16218C #define _PORT_REF_DW3_BC 0x6C18C @@ -6510,6 +6530,9 @@ enum { #define GLK_CL1_PWR_DOWN (1 << 11) #define GLK_CL2_PWR_DOWN (1 << 12) +#define CHICKEN_MISC_2 _MMIO(0x42084) +#define COMP_PWR_DOWN (1 << 23) + #define _CHICKEN_PIPESL_1_A 0x420b0 #define _CHICKEN_PIPESL_1_B 0x420b4 #define HSW_FBCQ_DIS (1 << 22) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 0e892b8c84af..35a1432bc90b 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1485,7 +1485,6 @@ static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk.hw.vco = vco; } -__attribute__((unused)) static void cnl_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state) { @@ -1558,6 +1557,113 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, intel_update_cdclk(dev_priv); } +static int cnl_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk.hw.ref) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 168000: + case 336000: + ratio = dev_priv->cdclk.hw.ref == 19200 ? 35 : 28; + break; + case 528000: + ratio = dev_priv->cdclk.hw.ref == 19200 ? 55 : 44; + break; + } + + return dev_priv->cdclk.hw.ref * ratio; +} + +static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) +{ + u32 cdctl, expected; + + intel_update_cdclk(dev_priv); + + if (dev_priv->cdclk.hw.vco == 0 || + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) + goto sanitize; + + /* DPLL okay; verify the cdclock + * + * Some BIOS versions leave an incorrect decimal frequency value and + * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, + * so sanitize this register. + */ + cdctl = I915_READ(CDCLK_CTL); + /* + * Let's ignore the pipe field, since BIOS could have configured the + * dividers both synching to an active pipe, or asynchronously + * (PIPE_NONE). + */ + cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; + + expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); + + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + /* force cdclk programming */ + dev_priv->cdclk.hw.cdclk = 0; + + /* force full PLL disable + enable */ + dev_priv->cdclk.hw.vco = -1; +} + +/** + * cnl_init_cdclk - Initialize CDCLK on CNL + * @dev_priv: i915 device + * + * Initialize CDCLK for CNL. This is generally + * done only during the display core initialization sequence, + * after which the DMC will take care of turning CDCLK off/on + * as needed. + */ +void cnl_init_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state; + + cnl_sanitize_cdclk(dev_priv); + + if (dev_priv->cdclk.hw.cdclk != 0 && + dev_priv->cdclk.hw.vco != 0) + return; + + cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = 168000; + cdclk_state.vco = cnl_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); + + cnl_set_cdclk(dev_priv, &cdclk_state); +} + +/** + * cnl_uninit_cdclk - Uninitialize CDCLK on CNL + * @dev_priv: i915 device + * + * Uninitialize CDCLK for CNL. This is done only + * during the display core uninitialization sequence. + */ +void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.vco = 0; + + cnl_set_cdclk(dev_priv, &cdclk_state); +} + /** * intel_cdclk_state_compare - Determine if two CDCLK states differ * @a: first CDCLK state diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2ad5fa337ed1..2bc3326f4068 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1318,6 +1318,8 @@ void intel_audio_deinit(struct drm_i915_private *dev_priv); /* intel_cdclk.c */ void skl_init_cdclk(struct drm_i915_private *dev_priv); void skl_uninit_cdclk(struct drm_i915_private *dev_priv); +void cnl_init_cdclk(struct drm_i915_private *dev_priv); +void cnl_uninit_cdclk(struct drm_i915_private *dev_priv); void bxt_init_cdclk(struct drm_i915_private *dev_priv); void bxt_uninit_cdclk(struct drm_i915_private *dev_priv); void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8a6f287d225b..436ec7a7b843 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -2696,6 +2696,111 @@ void bxt_display_core_uninit(struct drm_i915_private *dev_priv) mutex_unlock(&power_domains->lock); } +#define CNL_PROCMON_IDX(val) \ + (((val) & (PROCESS_INFO_MASK | VOLTAGE_INFO_MASK)) >> VOLTAGE_INFO_SHIFT) +#define NUM_CNL_PROCMON \ + (CNL_PROCMON_IDX(VOLTAGE_INFO_MASK | PROCESS_INFO_MASK) + 1) + +static const struct cnl_procmon { + u32 dw1, dw9, dw10; +} cnl_procmon_values[NUM_CNL_PROCMON] = { + [CNL_PROCMON_IDX(VOLTAGE_INFO_0_85V | PROCESS_INFO_DOT_0)] = + { .dw1 = 0x00 << 16, .dw9 = 0x62AB67BB, .dw10 = 0x51914F96, }, + [CNL_PROCMON_IDX(VOLTAGE_INFO_0_95V | PROCESS_INFO_DOT_0)] = + { .dw1 = 0x00 << 16, .dw9 = 0x86E172C7, .dw10 = 0x77CA5EAB, }, + [CNL_PROCMON_IDX(VOLTAGE_INFO_0_95V | PROCESS_INFO_DOT_1)] = + { .dw1 = 0x00 << 16, .dw9 = 0x93F87FE1, .dw10 = 0x8AE871C5, }, + [CNL_PROCMON_IDX(VOLTAGE_INFO_1_05V | PROCESS_INFO_DOT_0)] = + { .dw1 = 0x00 << 16, .dw9 = 0x98FA82DD, .dw10 = 0x89E46DC1, }, + [CNL_PROCMON_IDX(VOLTAGE_INFO_1_05V | PROCESS_INFO_DOT_1)] = + { .dw1 = 0x44 << 16, .dw9 = 0x9A00AB25, .dw10 = 0x8AE38FF1, }, +}; + +static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + const struct cnl_procmon *procmon; + struct i915_power_well *well; + u32 val; + + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + + /* 1. Enable PCH Reset Handshake */ + val = I915_READ(HSW_NDE_RSTWRN_OPT); + val |= RESET_PCH_HANDSHAKE_ENABLE; + I915_WRITE(HSW_NDE_RSTWRN_OPT, val); + + /* 2. Enable Comp */ + val = I915_READ(CHICKEN_MISC_2); + val &= ~COMP_PWR_DOWN; + I915_WRITE(CHICKEN_MISC_2, val); + + val = I915_READ(CNL_PORT_COMP_DW3); + procmon = &cnl_procmon_values[CNL_PROCMON_IDX(val)]; + + WARN_ON(procmon->dw10 == 0); + + val = I915_READ(CNL_PORT_COMP_DW1); + val &= ~((0xff << 16) | 0xff); + val |= procmon->dw1; + I915_WRITE(CNL_PORT_COMP_DW1, val); + + I915_WRITE(CNL_PORT_COMP_DW9, procmon->dw9); + I915_WRITE(CNL_PORT_COMP_DW10, procmon->dw10); + + val = I915_READ(CNL_PORT_COMP_DW0); + val |= COMP_INIT; + I915_WRITE(CNL_PORT_COMP_DW0, val); + + /* 3. */ + val = I915_READ(CNL_PORT_CL1CM_DW5); + val |= CL_POWER_DOWN_ENABLE; + I915_WRITE(CNL_PORT_CL1CM_DW5, val); + + /* 4. Enable Power Well 1 (PG1) and Aux IO Power */ + mutex_lock(&power_domains->lock); + well = lookup_power_well(dev_priv, SKL_DISP_PW_1); + intel_power_well_enable(dev_priv, well); + mutex_unlock(&power_domains->lock); + + /* 5. Enable CD clock */ + cnl_init_cdclk(dev_priv); + + /* 6. Enable DBUF */ + gen9_dbuf_enable(dev_priv); +} + +#undef CNL_PROCMON_IDX +#undef NUM_CNL_PROCMON + +static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *well; + u32 val; + + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + + /* 1. Disable all display engine functions -> aready done */ + + /* 2. Disable DBUF */ + gen9_dbuf_disable(dev_priv); + + /* 3. Disable CD clock */ + cnl_uninit_cdclk(dev_priv); + + /* 4. Disable Power Well 1 (PG1) and Aux IO Power */ + mutex_lock(&power_domains->lock); + well = lookup_power_well(dev_priv, SKL_DISP_PW_1); + intel_power_well_disable(dev_priv, well); + mutex_unlock(&power_domains->lock); + + /* 5. Disable Comp */ + val = I915_READ(CHICKEN_MISC_2); + val |= COMP_PWR_DOWN; + I915_WRITE(CHICKEN_MISC_2, val); +} + static void chv_phy_control_init(struct drm_i915_private *dev_priv) { struct i915_power_well *cmn_bc = @@ -2828,7 +2933,9 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) power_domains->initializing = true; - if (IS_GEN9_BC(dev_priv)) { + if (IS_CANNONLAKE(dev_priv)) { + cnl_display_core_init(dev_priv, resume); + } else if (IS_GEN9_BC(dev_priv)) { skl_display_core_init(dev_priv, resume); } else if (IS_GEN9_LP(dev_priv)) { bxt_display_core_init(dev_priv, resume); @@ -2867,7 +2974,9 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) if (!i915.disable_power_well) intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - if (IS_GEN9_BC(dev_priv)) + if (IS_CANNONLAKE(dev_priv)) + cnl_display_core_uninit(dev_priv); + else if (IS_GEN9_BC(dev_priv)) skl_display_core_uninit(dev_priv); else if (IS_GEN9_LP(dev_priv)) bxt_display_core_uninit(dev_priv); From d1999e9ef84f2c2d012d8acec2b68d937bcfd6c4 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:26:01 -0700 Subject: [PATCH 127/341] drm/i915/cnl: Allow dynamic cdclk changes on CNL All the low level cdclk bits are present, so let's add the required hooks to reconfigure cdclk on the fly. Cannonlake also needs to adjust the minimal pixel rate as gen9 platforms. Specially for the Azalia audio case. v2: Rebase due to cnl_sanitize_cdclk() v3: Rebased by Rodrigo on top of Ville's cdclk rework. v4: Rebase moving cnl_calc_cdclk up to follow same order as previous platforms. v2: Squash drm/i915/cnl: Adjust min pixel rate. to address the current limitation where CDCLK cannot be set to 168MHz if audio is used with 96MHz. (Imre) v3: adjust some of the clock limits within bdw_adjust_min_pipe_pixel_rate. (Ville/DK/Imre). Fix commit message messed by squash. Cc: Dhinakaran Pandiyan Cc: Sanyog Kale Signed-off-by: Rodrigo Vivi Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-4-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 60 ++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 35a1432bc90b..b8914db7d2e1 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1400,6 +1400,16 @@ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) bxt_set_cdclk(dev_priv, &cdclk_state); } +static int cnl_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 336000) + return 528000; + else if (max_pixclk > 168000) + return 336000; + else + return 168000; +} + static void cnl_cdclk_pll_update(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { @@ -1641,7 +1651,7 @@ void cnl_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state = dev_priv->cdclk.hw; - cdclk_state.cdclk = 168000; + cdclk_state.cdclk = cnl_calc_cdclk(0); cdclk_state.vco = cnl_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); cnl_set_cdclk(dev_priv, &cdclk_state); @@ -1722,7 +1732,9 @@ static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, crtc_state->has_audio && crtc_state->port_clock >= 540000 && crtc_state->lane_count == 4) { - if (IS_GEMINILAKE(dev_priv)) + if (IS_CANNONLAKE(dev_priv)) + pixel_rate = max(316800, pixel_rate); + else if (IS_GEMINILAKE(dev_priv)) pixel_rate = max(2 * 316800, pixel_rate); else pixel_rate = max(432000, pixel_rate); @@ -1768,7 +1780,7 @@ static int intel_max_pixel_rate(struct drm_atomic_state *state) pixel_rate = crtc_state->pixel_rate; - if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) + if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, pixel_rate); @@ -1929,6 +1941,40 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } +static int cnl_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(state); + int max_pixclk = intel_max_pixel_rate(state); + int cdclk, vco; + + cdclk = cnl_calc_cdclk(max_pixclk); + vco = cnl_cdclk_pll_vco(dev_priv, cdclk); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = cnl_calc_cdclk(0); + vco = cnl_cdclk_pll_vco(dev_priv, cdclk); + + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } + + return 0; +} + static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) { int max_cdclk_freq = dev_priv->max_cdclk_freq; @@ -1960,7 +2006,9 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) */ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) { - if (IS_GEN9_BC(dev_priv)) { + if (IS_CANNONLAKE(dev_priv)) { + dev_priv->max_cdclk_freq = 528000; + } else if (IS_GEN9_BC(dev_priv)) { u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; int max_cdclk, vco; @@ -2157,6 +2205,10 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.set_cdclk = skl_set_cdclk; dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk; + } else if (IS_CANNONLAKE(dev_priv)) { + dev_priv->display.set_cdclk = cnl_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + cnl_modeset_calc_cdclk; } if (IS_CANNONLAKE(dev_priv)) From 555e38d2731720a8eacc0463a26bdd74315d2d63 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:26:02 -0700 Subject: [PATCH 128/341] drm/i915/cnl: DDI - PLL mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of the steps for PLL (un)initialization is to (un)map the correspondent DDI that is actually using that PLL. So, let's do this step following the places already stablished and used so far, although spec put this as part of PLL initialization sequences. v2: Use proper prefix on bits names as suggested by Ander. v3: Add missed "~". Without that the logic was inverted so we were disabling interrupts. Credits-to: Clinton Credits-to: Art v4: Spec is getting updated to do DDI -> PLL mapping and clock on in 2 separated reg writes. (Paulo) Also update bits definitions to use space (1 << 1) instead of (1<<1). (Paulo) Cc: Paulo Zanoni Cc: Art Runyan Cc: Clint Taylor Cc: Ville Syrjälä Cc: Kahola, Mika Cc: Ander Conselvan De Oliveira Signed-off-by: Rodrigo Vivi Reviewed-by: Kahola, Mika Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-5-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 9 +++++++++ drivers/gpu/drm/i915/intel_ddi.c | 23 ++++++++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 539e44e88e01..f9e329ada437 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8134,6 +8134,15 @@ enum { #define DPLL_CFGCR1(id) _MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR1, _DPLL2_CFGCR1) #define DPLL_CFGCR2(id) _MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR2, _DPLL2_CFGCR2) +/* + * CNL Clocks + */ +#define DPCLKA_CFGCR0 _MMIO(0x6C200) +#define DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port)+10)) +#define DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port) (3 << ((port)*2)) +#define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port)*2) +#define DPCLKA_CFGCR0_DDI_CLK_SEL(pll, port) ((pll) << ((port)*2)) + /* BXT display engine PLL */ #define BXT_DE_PLL_CTL _MMIO(0x6d000) #define BXT_DE_PLL_RATIO(x) (x) /* {60,65,100} * 19.2MHz */ diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 2d35d97d170e..62a623e72fe2 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1621,13 +1621,27 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); + uint32_t val; if (WARN_ON(!pll)) return; - if (IS_GEN9_BC(dev_priv)) { - uint32_t val; + if (IS_CANNONLAKE(dev_priv)) { + /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ + val = I915_READ(DPCLKA_CFGCR0); + val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->id, port); + I915_WRITE(DPCLKA_CFGCR0, val); + /* + * Configure DPCLKA_CFGCR0 to turn on the clock for the DDI. + * This step and the step before must be done with separate + * register writes. + */ + val = I915_READ(DPCLKA_CFGCR0); + val &= ~(DPCLKA_CFGCR0_DDI_CLK_OFF(port) | + DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port)); + I915_WRITE(DPCLKA_CFGCR0, val); + } else if (IS_GEN9_BC(dev_priv)) { /* DDI -> PLL mapping */ val = I915_READ(DPLL_CTRL2); @@ -1767,7 +1781,10 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, if (dig_port) intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); - if (IS_GEN9_BC(dev_priv)) + if (IS_CANNONLAKE(dev_priv)) + I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | + DPCLKA_CFGCR0_DDI_CLK_OFF(port)); + else if (IS_GEN9_BC(dev_priv)) I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port))); else if (INTEL_GEN(dev_priv) < 9) From 8b0f7e06895c0d5f3cc28e494c7816e728d40f35 Mon Sep 17 00:00:00 2001 From: "Kahola, Mika" Date: Fri, 9 Jun 2017 15:26:03 -0700 Subject: [PATCH 129/341] drm/i915: Configure DPLL's for Cannonlake DPLL's are defined in DPCLKA_CFGCR0 register (0x6C200). Let's use these definitions when computing dpll's for ddi ports. v2: (Rodrigo) Remove register that was defined in another patch with fixed name and more bits. Signed-off-by: Kahola, Mika Signed-off-by: Rodrigo Vivi Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-6-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_display.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 97dfce53df5b..99a3bfa528d3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8868,6 +8868,22 @@ static int haswell_crtc_compute_clock(struct intel_crtc *crtc, return 0; } +static void cannonlake_get_ddi_pll(struct drm_i915_private *dev_priv, + enum port port, + struct intel_crtc_state *pipe_config) +{ + enum intel_dpll_id id; + u32 temp; + + temp = I915_READ(DPCLKA_CFGCR0) & DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); + id = temp >> (port * 2); + + if (WARN_ON(id < SKL_DPLL0 || id > SKL_DPLL2)) + return; + + pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id); +} + static void bxt_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, struct intel_crtc_state *pipe_config) @@ -9055,7 +9071,9 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT; - if (IS_GEN9_BC(dev_priv)) + if (IS_CANNONLAKE(dev_priv)) + cannonlake_get_ddi_pll(dev_priv, port, pipe_config); + else if (IS_GEN9_BC(dev_priv)) skylake_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_get_ddi_pll(dev_priv, port, pipe_config); From a927c927de346525901991842b0646911a220d11 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:26:04 -0700 Subject: [PATCH 130/341] drm/i915/cnl: Initialize PLLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although CNL follows PLL initialization more like Skylake than Broxton we have a completely different initialization sequence and registers used. One big difference from SKL is that CDCLK PLL is now exclusive (ADPLL) and for DDIs and MIPI we need to use DFGPLLs 0, 1 or 2. v2: Accept all Ander's suggestions and fixes: - Registers and bits names prefix - Group pll functions - bits masks fixes - remove read and modify on cfgcr1 - fix cfgcr0 setup v3: Set SSC_ENABLE for DP. Fix HDMI_MODE cfgcr0. Avoid touch cfgcr0 on DP. Add missed else on dpll_mgr definition so we use cnl one, not hsw. v3: Centra freq should be always set to default and change bits definitions to (1 << 1) instead of (1<<1). (by Paulo) v4: Rebased. Cc: Paulo Zanoni Cc: Ville Syrjälä Cc: Kahola, Mika Reviewed-by: Ander Conselvan De Oliveira Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-7-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 48 +++++ drivers/gpu/drm/i915/intel_dpll_mgr.c | 300 +++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_dpll_mgr.h | 4 + 3 files changed, 350 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f9e329ada437..9421915cc0f5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -60,6 +60,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_PORT(port, a, b) _MMIO(_PORT(port, a, b)) #define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c)) #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c)) +#define _PLL(pll, a, b) ((a) + (pll)*((b)-(a))) +#define _MMIO_PLL(pll, a, b) _MMIO(_PLL(pll, a, b)) #define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) @@ -8143,6 +8145,52 @@ enum { #define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port)*2) #define DPCLKA_CFGCR0_DDI_CLK_SEL(pll, port) ((pll) << ((port)*2)) +/* CNL PLL */ +#define DPLL0_ENABLE 0x46010 +#define DPLL1_ENABLE 0x46014 +#define PLL_ENABLE (1 << 31) +#define PLL_LOCK (1 << 30) +#define PLL_POWER_ENABLE (1 << 27) +#define PLL_POWER_STATE (1 << 26) +#define CNL_DPLL_ENABLE(pll) _MMIO_PLL(pll, DPLL0_ENABLE, DPLL1_ENABLE) + +#define _CNL_DPLL0_CFGCR0 0x6C000 +#define _CNL_DPLL1_CFGCR0 0x6C080 +#define DPLL_CFGCR0_HDMI_MODE (1 << 30) +#define DPLL_CFGCR0_SSC_ENABLE (1 << 29) +#define DPLL_CFGCR0_LINK_RATE_MASK (0xf << 25) +#define DPLL_CFGCR0_LINK_RATE_2700 (0 << 25) +#define DPLL_CFGCR0_LINK_RATE_1350 (1 << 25) +#define DPLL_CFGCR0_LINK_RATE_810 (2 << 25) +#define DPLL_CFGCR0_LINK_RATE_1620 (3 << 25) +#define DPLL_CFGCR0_LINK_RATE_1080 (4 << 25) +#define DPLL_CFGCR0_LINK_RATE_2160 (5 << 25) +#define DPLL_CFGCR0_LINK_RATE_3240 (6 << 25) +#define DPLL_CFGCR0_LINK_RATE_4050 (7 << 25) +#define DPLL_CFGCR0_DCO_FRACTION_MASK (0x7fff << 10) +#define DPLL_CFGCR0_DCO_FRACTION(x) ((x) << 10) +#define DPLL_CFGCR0_DCO_INTEGER_MASK (0x3ff) +#define CNL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR0, _CNL_DPLL1_CFGCR0) + +#define _CNL_DPLL0_CFGCR1 0x6C004 +#define _CNL_DPLL1_CFGCR1 0x6C084 +#define DPLL_CFGCR1_QDIV_RATIO_MASK (0xff << 10) +#define DPLL_CFGCR1_QDIV_RATIO(x) ((x) << 10) +#define DPLL_CFGCR1_QDIV_MODE(x) ((x) << 9) +#define DPLL_CFGCR1_KDIV_MASK (7 << 6) +#define DPLL_CFGCR1_KDIV(x) ((x) << 6) +#define DPLL_CFGCR1_KDIV_1 (1 << 6) +#define DPLL_CFGCR1_KDIV_2 (2 << 6) +#define DPLL_CFGCR1_KDIV_4 (4 << 6) +#define DPLL_CFGCR1_PDIV_MASK (0xf << 2) +#define DPLL_CFGCR1_PDIV(x) ((x) << 2) +#define DPLL_CFGCR1_PDIV_2 (1 << 2) +#define DPLL_CFGCR1_PDIV_3 (2 << 2) +#define DPLL_CFGCR1_PDIV_5 (4 << 2) +#define DPLL_CFGCR1_PDIV_7 (8 << 2) +#define DPLL_CFGCR1_CENTRAL_FREQ (3 << 0) +#define CNL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR1, _CNL_DPLL1_CFGCR1) + /* BXT display engine PLL */ #define BXT_DE_PLL_CTL _MMIO(0x6d000) #define BXT_DE_PLL_RATIO(x) (x) /* {60,65,100} * 19.2MHz */ diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index b4de632f1158..903c38dc683a 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1321,7 +1321,6 @@ static bool skl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, return true; } - static bool skl_ddi_dp_set_dpll_hw_state(int clock, struct intel_dpll_hw_state *dpll_hw_state) @@ -1967,6 +1966,301 @@ static const struct intel_dpll_mgr bxt_pll_mgr = { .dump_hw_state = bxt_dump_hw_state, }; +static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + uint32_t val; + + /* 1. Enable DPLL power in DPLL_ENABLE. */ + val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val |= PLL_POWER_ENABLE; + I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + + /* 2. Wait for DPLL power state enabled in DPLL_ENABLE. */ + if (intel_wait_for_register(dev_priv, + CNL_DPLL_ENABLE(pll->id), + PLL_POWER_STATE, + PLL_POWER_STATE, + 5)) + DRM_ERROR("PLL %d Power not enabled\n", pll->id); + + /* + * 3. Configure DPLL_CFGCR0 to set SSC enable/disable, + * select DP mode, and set DP link rate. + */ + val = pll->state.hw_state.cfgcr0; + I915_WRITE(CNL_DPLL_CFGCR0(pll->id), val); + + /* 4. Reab back to ensure writes completed */ + POSTING_READ(CNL_DPLL_CFGCR0(pll->id)); + + /* 3. Configure DPLL_CFGCR0 */ + /* Avoid touch CFGCR1 if HDMI mode is not enabled */ + if (pll->state.hw_state.cfgcr0 & DPLL_CTRL1_HDMI_MODE(pll->id)) { + val = pll->state.hw_state.cfgcr1; + I915_WRITE(CNL_DPLL_CFGCR1(pll->id), val); + /* 4. Reab back to ensure writes completed */ + POSTING_READ(CNL_DPLL_CFGCR1(pll->id)); + } + + /* + * 5. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching + * Sequence Before Frequency Change + * + * FIXME: (DVFS) is used to adjust the display voltage to match the + * display clock frequencies + */ + + /* 6. Enable DPLL in DPLL_ENABLE. */ + val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val |= PLL_ENABLE; + I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + + /* 7. Wait for PLL lock status in DPLL_ENABLE. */ + if (intel_wait_for_register(dev_priv, + CNL_DPLL_ENABLE(pll->id), + PLL_LOCK, + PLL_LOCK, + 5)) + DRM_ERROR("PLL %d not locked\n", pll->id); + + /* + * 8. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching + * Sequence After Frequency Change + * + * FIXME: (DVFS) is used to adjust the display voltage to match the + * display clock frequencies + */ + + /* + * 9. turn on the clock for the DDI and map the DPLL to the DDI + * Done at intel_ddi_clk_select + */ +} + +static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + uint32_t val; + + /* + * 1. Configure DPCLKA_CFGCR0 to turn off the clock for the DDI. + * Done at intel_ddi_post_disable + */ + + /* + * 2. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching + * Sequence Before Frequency Change + * + * FIXME: (DVFS) is used to adjust the display voltage to match the + * display clock frequencies + */ + + /* 3. Disable DPLL through DPLL_ENABLE. */ + val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val &= ~PLL_ENABLE; + I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + + /* 4. Wait for PLL not locked status in DPLL_ENABLE. */ + if (intel_wait_for_register(dev_priv, + CNL_DPLL_ENABLE(pll->id), + PLL_LOCK, + 0, + 5)) + DRM_ERROR("PLL %d locked\n", pll->id); + + /* + * 5. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching + * Sequence After Frequency Change + * + * FIXME: (DVFS) is used to adjust the display voltage to match the + * display clock frequencies + */ + + /* 6. Disable DPLL power in DPLL_ENABLE. */ + val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val &= ~PLL_POWER_ENABLE; + I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + + /* 7. Wait for DPLL power state disabled in DPLL_ENABLE. */ + if (intel_wait_for_register(dev_priv, + CNL_DPLL_ENABLE(pll->id), + PLL_POWER_STATE, + 0, + 5)) + DRM_ERROR("PLL %d Power not disabled\n", pll->id); +} + +static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll, + struct intel_dpll_hw_state *hw_state) +{ + uint32_t val; + bool ret; + + if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + return false; + + ret = false; + + val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + if (!(val & PLL_ENABLE)) + goto out; + + val = I915_READ(CNL_DPLL_CFGCR0(pll->id)); + hw_state->cfgcr0 = val; + + /* avoid reading back stale values if HDMI mode is not enabled */ + if (val & DPLL_CFGCR0_HDMI_MODE) { + hw_state->cfgcr1 = I915_READ(CNL_DPLL_CFGCR1(pll->id)); + } + ret = true; + +out: + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + + return ret; +} + +static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state, + int clock) +{ + uint32_t cfgcr0, cfgcr1; + struct skl_wrpll_params wrpll_params = { 0, }; + + cfgcr0 = DPLL_CFGCR0_HDMI_MODE; + + /* FIXME: Proper wrpll calculation done in a following patch */ + return false; + + cfgcr0 |= DPLL_CFGCR0_DCO_FRACTION(wrpll_params.dco_fraction) | + wrpll_params.dco_integer; + + cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(wrpll_params.qdiv_ratio) | + DPLL_CFGCR1_QDIV_MODE(wrpll_params.qdiv_mode) | + DPLL_CFGCR1_KDIV(wrpll_params.kdiv) | + DPLL_CFGCR1_PDIV(wrpll_params.pdiv) | + wrpll_params.central_freq | + DPLL_CFGCR1_CENTRAL_FREQ; + + memset(&crtc_state->dpll_hw_state, 0, + sizeof(crtc_state->dpll_hw_state)); + + crtc_state->dpll_hw_state.cfgcr0 = cfgcr0; + crtc_state->dpll_hw_state.cfgcr1 = cfgcr1; + return true; +} + +bool cnl_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) +{ + uint32_t cfgcr0; + + cfgcr0 = DPLL_CFGCR0_SSC_ENABLE; + + switch (clock / 2) { + case 81000: + cfgcr0 |= DPLL_CFGCR0_LINK_RATE_810; + break; + case 135000: + cfgcr0 |= DPLL_CFGCR0_LINK_RATE_1350; + break; + case 270000: + cfgcr0 |= DPLL_CFGCR0_LINK_RATE_2700; + break; + /* eDP 1.4 rates */ + case 162000: + cfgcr0 |= DPLL_CFGCR0_LINK_RATE_1620; + break; + case 108000: + cfgcr0 |= DPLL_CFGCR0_LINK_RATE_1080; + break; + case 216000: + cfgcr0 |= DPLL_CFGCR0_LINK_RATE_2160; + break; + case 324000: + /* Some SKUs may require elevated I/O voltage to support this */ + cfgcr0 |= DPLL_CFGCR0_LINK_RATE_3240; + break; + case 405000: + /* Some SKUs may require elevated I/O voltage to support this */ + cfgcr0 |= DPLL_CFGCR0_LINK_RATE_4050; + break; + } + + dpll_hw_state->cfgcr0 = cfgcr0; + return true; +} + +static struct intel_shared_dpll * +cnl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder) +{ + struct intel_shared_dpll *pll; + int clock = crtc_state->port_clock; + bool bret; + struct intel_dpll_hw_state dpll_hw_state; + + memset(&dpll_hw_state, 0, sizeof(dpll_hw_state)); + + if (encoder->type == INTEL_OUTPUT_HDMI) { + bret = cnl_ddi_hdmi_pll_dividers(crtc, crtc_state, clock); + if (!bret) { + DRM_DEBUG_KMS("Could not get HDMI pll dividers.\n"); + return NULL; + } + } else if (encoder->type == INTEL_OUTPUT_DP || + encoder->type == INTEL_OUTPUT_DP_MST || + encoder->type == INTEL_OUTPUT_EDP) { + bret = cnl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state); + if (!bret) { + DRM_DEBUG_KMS("Could not set DP dpll HW state.\n"); + return NULL; + } + crtc_state->dpll_hw_state = dpll_hw_state; + } else { + DRM_DEBUG_KMS("Skip DPLL setup for encoder %d\n", + encoder->type); + return NULL; + } + + pll = intel_find_shared_dpll(crtc, crtc_state, + DPLL_ID_SKL_DPLL0, + DPLL_ID_SKL_DPLL2); + if (!pll) { + DRM_DEBUG_KMS("No PLL selected\n"); + return NULL; + } + + intel_reference_shared_dpll(pll, crtc_state); + + return pll; +} + +static const struct intel_shared_dpll_funcs cnl_ddi_pll_funcs = { + .enable = cnl_ddi_pll_enable, + .disable = cnl_ddi_pll_disable, + .get_hw_state = cnl_ddi_pll_get_hw_state, +}; + +static const struct dpll_info cnl_plls[] = { + { "DPLL 0", DPLL_ID_SKL_DPLL0, &cnl_ddi_pll_funcs, 0 }, + { "DPLL 1", DPLL_ID_SKL_DPLL1, &cnl_ddi_pll_funcs, 0 }, + { "DPLL 2", DPLL_ID_SKL_DPLL2, &cnl_ddi_pll_funcs, 0 }, + { NULL, -1, NULL, }, +}; + +static const struct intel_dpll_mgr cnl_pll_mgr = { + .dpll_info = cnl_plls, + .get_dpll = cnl_get_dpll, + .dump_hw_state = skl_dump_hw_state, +}; + /** * intel_shared_dpll_init - Initialize shared DPLLs * @dev: drm device @@ -1980,7 +2274,9 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_GEN9_BC(dev_priv)) + if (IS_CANNONLAKE(dev_priv)) + dpll_mgr = &cnl_pll_mgr; + else if (IS_GEN9_BC(dev_priv)) dpll_mgr = &skl_pll_mgr; else if (IS_GEN9_LP(dev_priv)) dpll_mgr = &bxt_pll_mgr; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index f8d13a947c13..f24ccf443d25 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -128,6 +128,10 @@ struct intel_dpll_hw_state { /* HDMI only, 0 when used for DP */ uint32_t cfgcr1, cfgcr2; + /* cnl */ + uint32_t cfgcr0; + /* CNL also uses cfgcr1 */ + /* bxt */ uint32_t ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, pcsdw12; From 4557c6072724fbb8e339b589e1897b20973b3b69 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:26:05 -0700 Subject: [PATCH 131/341] drm/i915: Add MMIO helper for 6 ports with different offsets. Also new registers can have different mmio offsets per different lane per port. v2: Use _PICK as PORT3 instead of creating a new macro with if per port. v3: Use _PICK directly on MMIO_PORT6. While MMIO_PORT isn't flexible enough let's continue with MMIO_PORT6 as we have MMIO_PORT3. Cc: Manasi Navare Signed-off-by: Rodrigo Vivi Reviewed-by: Manasi Navare Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-8-git-send-email-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9421915cc0f5..52a15ce98e2e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -62,6 +62,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c)) #define _PLL(pll, a, b) ((a) + (pll)*((b)-(a))) #define _MMIO_PLL(pll, a, b) _MMIO(_PLL(pll, a, b)) +#define _MMIO_PORT6(port, a, b, c, d, e, f) _MMIO(_PICK(port, a, b, c, d, e, f)) +#define _MMIO_PORT6_LN(port, ln, a0, a1, b, c, d, e, f) \ + _MMIO(_PICK(port, a0, b, c, d, e, f) + (ln * (a1 - a0))) #define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) From 04416108ccea55f7536abeb9f81f1879922774eb Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:26:06 -0700 Subject: [PATCH 132/341] drm/i915/cnl: Add registers related to voltage swing sequences. This are the registers and bits needed for the voltage swing sequence on Cannonlake. v2: Remove CL_DW5 that was wrongly defined. v3: Use (1 << 1) instead of (1<<1) as Paulo suggested Change DW2 swing sel upper and lower macros to do the bit selection instead of definint a table that doesn't match the spec. It is based on a Manasi version of it. Credits-to: Manasi. v4: Let SCALING_MODE_SEL flexible. (Manasi) Cc: Paulo Zanoni Cc: Manasi Navare Signed-off-by: Rodrigo Vivi Reviewed-by: Manasi Navare Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-9-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 140 ++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 52a15ce98e2e..d9d54113be10 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1693,6 +1693,146 @@ enum skl_disp_power_wells { #define OCL2_LDOFUSE_PWR_DIS (1 << 6) #define BXT_PORT_CL1CM_DW30(phy) _BXT_PHY((phy), _PORT_CL1CM_DW30_BC) +#define _CNL_PORT_PCS_DW1_GRP_AE 0x162304 +#define _CNL_PORT_PCS_DW1_GRP_B 0x162384 +#define _CNL_PORT_PCS_DW1_GRP_C 0x162B04 +#define _CNL_PORT_PCS_DW1_GRP_D 0x162B84 +#define _CNL_PORT_PCS_DW1_GRP_F 0x162A04 +#define _CNL_PORT_PCS_DW1_LN0_AE 0x162404 +#define _CNL_PORT_PCS_DW1_LN0_B 0x162604 +#define _CNL_PORT_PCS_DW1_LN0_C 0x162C04 +#define _CNL_PORT_PCS_DW1_LN0_D 0x162E04 +#define _CNL_PORT_PCS_DW1_LN0_F 0x162804 +#define CNL_PORT_PCS_DW1_GRP(port) _MMIO_PORT6(port, \ + _CNL_PORT_PCS_DW1_GRP_AE, \ + _CNL_PORT_PCS_DW1_GRP_B, \ + _CNL_PORT_PCS_DW1_GRP_C, \ + _CNL_PORT_PCS_DW1_GRP_D, \ + _CNL_PORT_PCS_DW1_GRP_AE, \ + _CNL_PORT_PCS_DW1_GRP_F) +#define CNL_PORT_PCS_DW1_LN0(port) _MMIO_PORT6(port, \ + _CNL_PORT_PCS_DW1_LN0_AE, \ + _CNL_PORT_PCS_DW1_LN0_B, \ + _CNL_PORT_PCS_DW1_LN0_C, \ + _CNL_PORT_PCS_DW1_LN0_D, \ + _CNL_PORT_PCS_DW1_LN0_AE, \ + _CNL_PORT_PCS_DW1_LN0_F) +#define COMMON_KEEPER_EN (1 << 26) + +#define _CNL_PORT_TX_DW2_GRP_AE 0x162348 +#define _CNL_PORT_TX_DW2_GRP_B 0x1623C8 +#define _CNL_PORT_TX_DW2_GRP_C 0x162B48 +#define _CNL_PORT_TX_DW2_GRP_D 0x162BC8 +#define _CNL_PORT_TX_DW2_GRP_F 0x162A48 +#define _CNL_PORT_TX_DW2_LN0_AE 0x162448 +#define _CNL_PORT_TX_DW2_LN0_B 0x162648 +#define _CNL_PORT_TX_DW2_LN0_C 0x162C48 +#define _CNL_PORT_TX_DW2_LN0_D 0x162E48 +#define _CNL_PORT_TX_DW2_LN0_F 0x162A48 +#define CNL_PORT_TX_DW2_GRP(port) _MMIO_PORT6(port, \ + _CNL_PORT_TX_DW2_GRP_AE, \ + _CNL_PORT_TX_DW2_GRP_B, \ + _CNL_PORT_TX_DW2_GRP_C, \ + _CNL_PORT_TX_DW2_GRP_D, \ + _CNL_PORT_TX_DW2_GRP_AE, \ + _CNL_PORT_TX_DW2_GRP_F) +#define CNL_PORT_TX_DW2_LN0(port) _MMIO_PORT6(port, \ + _CNL_PORT_TX_DW2_LN0_AE, \ + _CNL_PORT_TX_DW2_LN0_B, \ + _CNL_PORT_TX_DW2_LN0_C, \ + _CNL_PORT_TX_DW2_LN0_D, \ + _CNL_PORT_TX_DW2_LN0_AE, \ + _CNL_PORT_TX_DW2_LN0_F) +#define SWING_SEL_UPPER(x) ((x >> 3) << 15) +#define SWING_SEL_LOWER(x) ((x & 0x7) << 11) +#define RCOMP_SCALAR(x) ((x) << 0) + +#define _CNL_PORT_TX_DW4_GRP_AE 0x162350 +#define _CNL_PORT_TX_DW4_GRP_B 0x1623D0 +#define _CNL_PORT_TX_DW4_GRP_C 0x162B50 +#define _CNL_PORT_TX_DW4_GRP_D 0x162BD0 +#define _CNL_PORT_TX_DW4_GRP_F 0x162A50 +#define _CNL_PORT_TX_DW4_LN0_AE 0x162450 +#define _CNL_PORT_TX_DW4_LN1_AE 0x1624D0 +#define _CNL_PORT_TX_DW4_LN0_B 0x162650 +#define _CNL_PORT_TX_DW4_LN0_C 0x162C50 +#define _CNL_PORT_TX_DW4_LN0_D 0x162E50 +#define _CNL_PORT_TX_DW4_LN0_F 0x162850 +#define CNL_PORT_TX_DW4_GRP(port) _MMIO_PORT6(port, \ + _CNL_PORT_TX_DW4_GRP_AE, \ + _CNL_PORT_TX_DW4_GRP_B, \ + _CNL_PORT_TX_DW4_GRP_C, \ + _CNL_PORT_TX_DW4_GRP_D, \ + _CNL_PORT_TX_DW4_GRP_AE, \ + _CNL_PORT_TX_DW4_GRP_F) +#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO_PORT6_LN(port, ln, \ + _CNL_PORT_TX_DW4_LN0_AE, \ + _CNL_PORT_TX_DW4_LN1_AE, \ + _CNL_PORT_TX_DW4_LN0_B, \ + _CNL_PORT_TX_DW4_LN0_C, \ + _CNL_PORT_TX_DW4_LN0_D, \ + _CNL_PORT_TX_DW4_LN0_AE, \ + _CNL_PORT_TX_DW4_LN0_F) +#define LOADGEN_SELECT (1 << 31) +#define POST_CURSOR_1(x) ((x) << 12) +#define POST_CURSOR_2(x) ((x) << 6) +#define CURSOR_COEFF(x) ((x) << 0) + +#define _CNL_PORT_TX_DW5_GRP_AE 0x162354 +#define _CNL_PORT_TX_DW5_GRP_B 0x1623D4 +#define _CNL_PORT_TX_DW5_GRP_C 0x162B54 +#define _CNL_PORT_TX_DW5_GRP_D 0x162BD4 +#define _CNL_PORT_TX_DW5_GRP_F 0x162A54 +#define _CNL_PORT_TX_DW5_LN0_AE 0x162454 +#define _CNL_PORT_TX_DW5_LN0_B 0x162654 +#define _CNL_PORT_TX_DW5_LN0_C 0x162C54 +#define _CNL_PORT_TX_DW5_LN0_D 0x162ED4 +#define _CNL_PORT_TX_DW5_LN0_F 0x162854 +#define CNL_PORT_TX_DW5_GRP(port) _MMIO_PORT6(port, \ + _CNL_PORT_TX_DW5_GRP_AE, \ + _CNL_PORT_TX_DW5_GRP_B, \ + _CNL_PORT_TX_DW5_GRP_C, \ + _CNL_PORT_TX_DW5_GRP_D, \ + _CNL_PORT_TX_DW5_GRP_AE, \ + _CNL_PORT_TX_DW5_GRP_F) +#define CNL_PORT_TX_DW5_LN0(port) _MMIO_PORT6(port, \ + _CNL_PORT_TX_DW5_LN0_AE, \ + _CNL_PORT_TX_DW5_LN0_B, \ + _CNL_PORT_TX_DW5_LN0_C, \ + _CNL_PORT_TX_DW5_LN0_D, \ + _CNL_PORT_TX_DW5_LN0_AE, \ + _CNL_PORT_TX_DW5_LN0_F) +#define TX_TRAINING_EN (1 << 31) +#define TAP3_DISABLE (1 << 29) +#define SCALING_MODE_SEL(x) ((x) << 18) +#define RTERM_SELECT(x) ((x) << 3) + +#define _CNL_PORT_TX_DW7_GRP_AE 0x16235C +#define _CNL_PORT_TX_DW7_GRP_B 0x1623DC +#define _CNL_PORT_TX_DW7_GRP_C 0x162B5C +#define _CNL_PORT_TX_DW7_GRP_D 0x162BDC +#define _CNL_PORT_TX_DW7_GRP_F 0x162A5C +#define _CNL_PORT_TX_DW7_LN0_AE 0x16245C +#define _CNL_PORT_TX_DW7_LN0_B 0x16265C +#define _CNL_PORT_TX_DW7_LN0_C 0x162C5C +#define _CNL_PORT_TX_DW7_LN0_D 0x162EDC +#define _CNL_PORT_TX_DW7_LN0_F 0x16285C +#define CNL_PORT_TX_DW7_GRP(port) _MMIO_PORT6(port, \ + _CNL_PORT_TX_DW7_GRP_AE, \ + _CNL_PORT_TX_DW7_GRP_B, \ + _CNL_PORT_TX_DW7_GRP_C, \ + _CNL_PORT_TX_DW7_GRP_D, \ + _CNL_PORT_TX_DW7_GRP_AE, \ + _CNL_PORT_TX_DW7_GRP_F) +#define CNL_PORT_TX_DW7_LN0(port) _MMIO_PORT6(port, \ + _CNL_PORT_TX_DW7_LN0_AE, \ + _CNL_PORT_TX_DW7_LN0_B, \ + _CNL_PORT_TX_DW7_LN0_C, \ + _CNL_PORT_TX_DW7_LN0_D, \ + _CNL_PORT_TX_DW7_LN0_AE, \ + _CNL_PORT_TX_DW7_LN0_F) +#define N_SCALAR(x) ((x) << 24) + /* The spec defines this only for BXT PHY0, but lets assume that this * would exist for PHY1 too if it had a second channel. */ From 83fb7ab404fdcf314ca3a6ef4cd9f6790a0767f4 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:26:07 -0700 Subject: [PATCH 133/341] drm/i915/cnl: Add DDI Buffer translation tables for Cannonlake. These tables are used on voltage wswing sequence initialization on Cannonlake. It is a complete new format now in use by the voltage swing team, not following any other standard in use by any other platform. Also the registers are different as well. So let's redefine the translation table for Cannonlake. The table is huge. So we minimized with the fields that are different or might be different anytime soon. The common values will be hardcoded on the voltage swing sequence. v2: Merge the lower and the upper bits to match the spec table and make review easier. This was possible with the good idea for Manasi with a better way to handle it on the bit macro definition presented on previous patch. Credits-to: Manasi Cc: Manasi Navare Signed-off-by: Rodrigo Vivi Reviewed-by: Manasi Navare Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-10-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 140 +++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 62a623e72fe2..ee75d3ba89ae 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -353,6 +353,146 @@ static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = { { 154, 0x9A, 1, 128, true }, /* 9: 1200 0 */ }; +struct cnl_ddi_buf_trans { + u32 dw2_swing_sel; + u32 dw7_n_scalar; + u32 dw4_cursor_coeff; + u32 dw4_post_cursor_2; + u32 dw4_post_cursor_1; +}; + +/* Voltage Swing Programming for VccIO 0.85V for DP */ +static const struct cnl_ddi_buf_trans cnl_ddi_translations_dp_0_85V[] = { + /* NT mV Trans mV db */ + { 0xA, 0x5D, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ + { 0xA, 0x6A, 0x38, 0x00, 0x07 }, /* 350 500 3.1 */ + { 0xB, 0x7A, 0x32, 0x00, 0x0D }, /* 350 700 6.0 */ + { 0x6, 0x7C, 0x2D, 0x00, 0x12 }, /* 350 900 8.2 */ + { 0xA, 0x69, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ + { 0xB, 0x7A, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ + { 0x6, 0x7C, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ + { 0xB, 0x7D, 0x3C, 0x00, 0x03 }, /* 650 725 0.9 */ + { 0x6, 0x7C, 0x34, 0x00, 0x0B }, /* 600 900 3.5 */ + { 0x6, 0x7B, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ +}; + +/* Voltage Swing Programming for VccIO 0.85V for HDMI */ +static const struct cnl_ddi_buf_trans cnl_ddi_translations_hdmi_0_85V[] = { + /* NT mV Trans mV db */ + { 0xA, 0x60, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ + { 0xB, 0x73, 0x36, 0x00, 0x09 }, /* 450 650 3.2 */ + { 0x6, 0x7F, 0x31, 0x00, 0x0E }, /* 450 850 5.5 */ + { 0xB, 0x73, 0x3F, 0x00, 0x00 }, /* 650 650 0.0 */ + { 0x6, 0x7F, 0x37, 0x00, 0x08 }, /* 650 850 2.3 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 850 850 0.0 */ + { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ +}; + +/* Voltage Swing Programming for VccIO 0.85V for eDP */ +static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_0_85V[] = { + /* NT mV Trans mV db */ + { 0xA, 0x66, 0x3A, 0x00, 0x05 }, /* 384 500 2.3 */ + { 0x0, 0x7F, 0x38, 0x00, 0x07 }, /* 153 200 2.3 */ + { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 192 250 2.3 */ + { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 230 300 2.3 */ + { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 269 350 2.3 */ + { 0xA, 0x66, 0x3C, 0x00, 0x03 }, /* 446 500 1.0 */ + { 0xB, 0x70, 0x3C, 0x00, 0x03 }, /* 460 600 2.3 */ + { 0xC, 0x75, 0x3C, 0x00, 0x03 }, /* 537 700 2.3 */ + { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ +}; + +/* Voltage Swing Programming for VccIO 0.95V for DP */ +static const struct cnl_ddi_buf_trans cnl_ddi_translations_dp_0_95V[] = { + /* NT mV Trans mV db */ + { 0xA, 0x5D, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ + { 0xA, 0x6A, 0x38, 0x00, 0x07 }, /* 350 500 3.1 */ + { 0xB, 0x7A, 0x32, 0x00, 0x0D }, /* 350 700 6.0 */ + { 0x6, 0x7C, 0x2D, 0x00, 0x12 }, /* 350 900 8.2 */ + { 0xA, 0x69, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ + { 0xB, 0x7A, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ + { 0x6, 0x7C, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ + { 0xB, 0x7D, 0x3C, 0x00, 0x03 }, /* 650 725 0.9 */ + { 0x6, 0x7C, 0x34, 0x00, 0x0B }, /* 600 900 3.5 */ + { 0x6, 0x7B, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ +}; + +/* Voltage Swing Programming for VccIO 0.95V for HDMI */ +static const struct cnl_ddi_buf_trans cnl_ddi_translations_hdmi_0_95V[] = { + /* NT mV Trans mV db */ + { 0xA, 0x5C, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ + { 0xB, 0x69, 0x37, 0x00, 0x08 }, /* 400 600 3.5 */ + { 0x5, 0x76, 0x31, 0x00, 0x0E }, /* 400 800 6.0 */ + { 0xA, 0x5E, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ + { 0xB, 0x69, 0x3F, 0x00, 0x00 }, /* 600 600 0.0 */ + { 0xB, 0x79, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ + { 0x6, 0x7D, 0x32, 0x00, 0x0D }, /* 600 1000 4.4 */ + { 0x5, 0x76, 0x3F, 0x00, 0x00 }, /* 800 800 0.0 */ + { 0x6, 0x7D, 0x39, 0x00, 0x06 }, /* 800 1000 1.9 */ + { 0x6, 0x7F, 0x39, 0x00, 0x06 }, /* 850 1050 1.8 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1050 1050 0.0 */ +}; + +/* Voltage Swing Programming for VccIO 0.95V for eDP */ +static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_0_95V[] = { + /* NT mV Trans mV db */ + { 0xA, 0x61, 0x3A, 0x00, 0x05 }, /* 384 500 2.3 */ + { 0x0, 0x7F, 0x38, 0x00, 0x07 }, /* 153 200 2.3 */ + { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 192 250 2.3 */ + { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 230 300 2.3 */ + { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 269 350 2.3 */ + { 0xA, 0x61, 0x3C, 0x00, 0x03 }, /* 446 500 1.0 */ + { 0xB, 0x68, 0x39, 0x00, 0x06 }, /* 460 600 2.3 */ + { 0xC, 0x6E, 0x39, 0x00, 0x06 }, /* 537 700 2.3 */ + { 0x4, 0x7F, 0x3A, 0x00, 0x05 }, /* 460 600 2.3 */ + { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ +}; + +/* Voltage Swing Programming for VccIO 1.05V for DP */ +static const struct cnl_ddi_buf_trans cnl_ddi_translations_dp_1_05V[] = { + /* NT mV Trans mV db */ + { 0xA, 0x58, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ + { 0xB, 0x64, 0x37, 0x00, 0x08 }, /* 400 600 3.5 */ + { 0x5, 0x70, 0x31, 0x00, 0x0E }, /* 400 800 6.0 */ + { 0x6, 0x7F, 0x2C, 0x00, 0x13 }, /* 400 1050 8.4 */ + { 0xB, 0x64, 0x3F, 0x00, 0x00 }, /* 600 600 0.0 */ + { 0x5, 0x73, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ + { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 550 1050 5.6 */ + { 0x5, 0x76, 0x3E, 0x00, 0x01 }, /* 850 900 0.5 */ + { 0x6, 0x7F, 0x36, 0x00, 0x09 }, /* 750 1050 2.9 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1050 1050 0.0 */ +}; + +/* Voltage Swing Programming for VccIO 1.05V for HDMI */ +static const struct cnl_ddi_buf_trans cnl_ddi_translations_hdmi_1_05V[] = { + /* NT mV Trans mV db */ + { 0xA, 0x58, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ + { 0xB, 0x64, 0x37, 0x00, 0x08 }, /* 400 600 3.5 */ + { 0x5, 0x70, 0x31, 0x00, 0x0E }, /* 400 800 6.0 */ + { 0xA, 0x5B, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ + { 0xB, 0x64, 0x3F, 0x00, 0x00 }, /* 600 600 0.0 */ + { 0x5, 0x73, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ + { 0x6, 0x7C, 0x32, 0x00, 0x0D }, /* 600 1000 4.4 */ + { 0x5, 0x70, 0x3F, 0x00, 0x00 }, /* 800 800 0.0 */ + { 0x6, 0x7C, 0x39, 0x00, 0x06 }, /* 800 1000 1.9 */ + { 0x6, 0x7F, 0x39, 0x00, 0x06 }, /* 850 1050 1.8 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1050 1050 0.0 */ +}; + +/* Voltage Swing Programming for VccIO 1.05V for eDP */ +static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_1_05V[] = { + /* NT mV Trans mV db */ + { 0xA, 0x5E, 0x3A, 0x00, 0x05 }, /* 384 500 2.3 */ + { 0x0, 0x7F, 0x38, 0x00, 0x07 }, /* 153 200 2.3 */ + { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 192 250 2.3 */ + { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 230 300 2.3 */ + { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 269 350 2.3 */ + { 0xA, 0x5E, 0x3C, 0x00, 0x03 }, /* 446 500 1.0 */ + { 0xB, 0x64, 0x39, 0x00, 0x06 }, /* 460 600 2.3 */ + { 0xE, 0x6A, 0x39, 0x00, 0x06 }, /* 537 700 2.3 */ + { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ +}; + enum port intel_ddi_get_encoder_port(struct intel_encoder *encoder) { switch (encoder->type) { From cf54ca8bc5674049889d208131cb1b0e15161a2c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:26:08 -0700 Subject: [PATCH 134/341] drm/i915/cnl: Implement voltage swing sequence. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an important part of the DDI initalization as well as for changing the voltage during DisplayPort link training. This new sequence for Cannonlake is more like Broxton style but still with different registers, different table and different steps. v2: Do not write to DW4_GRP to avoid overwrite individual loadgen. Fix PORT_CL_DW5 SUS Clock Config set. v3: As previous platforms use only eDP table if low voltage was requested. v4: fix Werror:maybe uninitialized (Paulo) v5: Rebase on top of dw2_swing_sel changes on previous patches. v6: Using flexible SCALING_MODE_SEL(x). Cc: Manasi Navare Cc: Ville Syrjälä Signed-off-by: Rodrigo Vivi Reviewed-by: Manasi Navare Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-11-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ddi.c | 176 ++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_dp.c | 2 +- 3 files changed, 177 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d9d54113be10..88e4707f571d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1668,6 +1668,7 @@ enum skl_disp_power_wells { #define CNL_PORT_CL1CM_DW5 _MMIO(0x162014) #define CL_POWER_DOWN_ENABLE (1 << 4) +#define SUS_CLOCK_CONFIG (3 << 0) #define _PORT_CL1CM_DW9_A 0x162024 #define _PORT_CL1CM_DW9_BC 0x6C024 diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ee75d3ba89ae..9eb2940fc889 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1720,6 +1720,173 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) DP_TRAIN_VOLTAGE_SWING_MASK; } +static const struct cnl_ddi_buf_trans * +cnl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, + u32 voltage, int *n_entries) +{ + if (voltage == VOLTAGE_INFO_0_85V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_0_85V); + return cnl_ddi_translations_hdmi_0_85V; + } else if (voltage == VOLTAGE_INFO_0_95V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_0_95V); + return cnl_ddi_translations_hdmi_0_95V; + } else if (voltage == VOLTAGE_INFO_1_05V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_1_05V); + return cnl_ddi_translations_hdmi_1_05V; + } + return NULL; +} + +static const struct cnl_ddi_buf_trans * +cnl_get_buf_trans_dp(struct drm_i915_private *dev_priv, + u32 voltage, int *n_entries) +{ + if (voltage == VOLTAGE_INFO_0_85V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_0_85V); + return cnl_ddi_translations_dp_0_85V; + } else if (voltage == VOLTAGE_INFO_0_95V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_0_95V); + return cnl_ddi_translations_dp_0_95V; + } else if (voltage == VOLTAGE_INFO_1_05V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_1_05V); + return cnl_ddi_translations_dp_1_05V; + } + return NULL; +} + +static const struct cnl_ddi_buf_trans * +cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, + u32 voltage, int *n_entries) +{ + if (dev_priv->vbt.edp.low_vswing) { + if (voltage == VOLTAGE_INFO_0_85V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_85V); + return cnl_ddi_translations_dp_0_85V; + } else if (voltage == VOLTAGE_INFO_0_95V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_95V); + return cnl_ddi_translations_edp_0_95V; + } else if (voltage == VOLTAGE_INFO_1_05V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_1_05V); + return cnl_ddi_translations_edp_1_05V; + } + return NULL; + } else { + return cnl_get_buf_trans_dp(dev_priv, voltage, n_entries); + } +} + +static void cnl_ddi_vswing_program(struct drm_i915_private *dev_priv, + u32 level, enum port port, int type) +{ + const struct cnl_ddi_buf_trans *ddi_translations = NULL; + u32 n_entries, val, voltage; + int ln; + + /* + * Values for each port type are listed in + * voltage swing programming tables. + * Vccio voltage found in PORT_COMP_DW3. + */ + voltage = I915_READ(CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; + + if (type == INTEL_OUTPUT_HDMI) { + ddi_translations = cnl_get_buf_trans_hdmi(dev_priv, + voltage, &n_entries); + } else if (type == INTEL_OUTPUT_DP) { + ddi_translations = cnl_get_buf_trans_dp(dev_priv, + voltage, &n_entries); + } else if (type == INTEL_OUTPUT_EDP) { + ddi_translations = cnl_get_buf_trans_edp(dev_priv, + voltage, &n_entries); + } + + if (ddi_translations == NULL) { + MISSING_CASE(voltage); + return; + } + + if (level >= n_entries) { + DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.", level, n_entries - 1); + level = n_entries - 1; + } + + /* Set PORT_TX_DW5 Scaling Mode Sel to 010b. */ + val = I915_READ(CNL_PORT_TX_DW5_LN0(port)); + val |= SCALING_MODE_SEL(2); + I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); + + /* Program PORT_TX_DW2 */ + val = I915_READ(CNL_PORT_TX_DW2_LN0(port)); + val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_sel); + val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_sel); + /* Rcomp scalar is fixed as 0x98 for every table entry */ + val |= RCOMP_SCALAR(0x98); + I915_WRITE(CNL_PORT_TX_DW2_GRP(port), val); + + /* Program PORT_TX_DW4 */ + /* We cannot write to GRP. It would overrite individual loadgen */ + for (ln = 0; ln < 4; ln++) { + val = I915_READ(CNL_PORT_TX_DW4_LN(port, ln)); + val |= POST_CURSOR_1(ddi_translations[level].dw4_post_cursor_1); + val |= POST_CURSOR_2(ddi_translations[level].dw4_post_cursor_2); + val |= CURSOR_COEFF(ddi_translations[level].dw4_cursor_coeff); + I915_WRITE(CNL_PORT_TX_DW4_LN(port, ln), val); + } + + /* Program PORT_TX_DW5 */ + /* All DW5 values are fixed for every table entry */ + val = I915_READ(CNL_PORT_TX_DW5_LN0(port)); + val |= RTERM_SELECT(6); + val |= TAP3_DISABLE; + I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); + + /* Program PORT_TX_DW7 */ + val = I915_READ(CNL_PORT_TX_DW7_LN0(port)); + val |= N_SCALAR(ddi_translations[level].dw7_n_scalar); + I915_WRITE(CNL_PORT_TX_DW7_GRP(port), val); +} + +static void cnl_ddi_vswing_sequence(struct drm_i915_private *dev_priv, + u32 level, enum port port, int type) +{ + u32 val; + + /* + * 1. If port type is eDP or DP, + * set PORT_PCS_DW1 cmnkeeper_enable to 1b, + * else clear to 0b. + */ + val = I915_READ(CNL_PORT_PCS_DW1_LN0(port)); + if (type == INTEL_OUTPUT_EDP || type == INTEL_OUTPUT_DP) + val |= COMMON_KEEPER_EN; + else + val &= ~COMMON_KEEPER_EN; + I915_WRITE(CNL_PORT_PCS_DW1_GRP(port), val); + + /* 2. Program loadgen select */ + /* + * FIXME: Program PORT_TX_DW4_LN depending on Bit rate and used lanes + */ + + /* 3. Set PORT_CL_DW5 SUS Clock Config to 11b */ + val = I915_READ(CNL_PORT_CL1CM_DW5); + val |= SUS_CLOCK_CONFIG; + I915_WRITE(CNL_PORT_CL1CM_DW5, val); + + /* 4. Clear training enable to change swing values */ + val = I915_READ(CNL_PORT_TX_DW5_LN0(port)); + val &= ~TX_TRAINING_EN; + I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); + + /* 5. Program swing and de-emphasis */ + cnl_ddi_vswing_program(dev_priv, level, port, type); + + /* 6. Set training enable to trigger update */ + val = I915_READ(CNL_PORT_TX_DW5_LN0(port)); + val |= TX_TRAINING_EN; + I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); +} + static uint32_t translate_signal_level(int signal_levels) { int i; @@ -1752,7 +1919,11 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) skl_ddi_set_iboost(encoder, level); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); - + else if (IS_CANNONLAKE(dev_priv)) { + cnl_ddi_vswing_sequence(dev_priv, level, port, encoder->type); + /* DDI_BUF_CTL bits 27:24 are reserved on CNL */ + return 0; + } return DDI_BUF_TRANS_SELECT(level); } @@ -1850,6 +2021,9 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, INTEL_OUTPUT_HDMI); + else if (IS_CANNONLAKE(dev_priv)) + cnl_ddi_vswing_sequence(dev_priv, level, port, + INTEL_OUTPUT_HDMI); intel_hdmi->set_infoframes(drm_encoder, has_hdmi_sink, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d1ee278064b7..7a3a42c95381 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3492,7 +3492,7 @@ intel_dp_set_signal_levels(struct intel_dp *intel_dp) if (HAS_DDI(dev_priv)) { signal_levels = ddi_signal_levels(intel_dp); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEN9_LP(dev_priv) || IS_CANNONLAKE(dev_priv)) signal_levels = 0; else mask = DDI_BUF_EMP_MASK; From 0091abc3a621f4acf41e35ea00a4ab4f064c2fb7 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Fri, 9 Jun 2017 15:26:09 -0700 Subject: [PATCH 135/341] drm/i915/cnl: Enable loadgen_select bit for vswing sequence vswing programming sequence step 2 requires the Loadgen_select bit to be set in PORT_TX_DW4 lane reigsters per table defined by Bit rate and lane width. Implemented the change that was marked as FIXME in the driver. v2: (Rodrigo) checkpatch fixes. Signed-off-by: Clint Taylor Signed-off-by: Rodrigo Vivi Reviewed-by: Manasi Navare Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-12-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 38 +++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 9eb2940fc889..721c2e2c12a3 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1846,10 +1846,24 @@ static void cnl_ddi_vswing_program(struct drm_i915_private *dev_priv, I915_WRITE(CNL_PORT_TX_DW7_GRP(port), val); } -static void cnl_ddi_vswing_sequence(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) +static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + enum port port = intel_ddi_get_encoder_port(encoder); + int type = encoder->type; + int width = 0; + int rate = 0; u32 val; + int ln = 0; + + if ((intel_dp) && (type == INTEL_OUTPUT_EDP || type == INTEL_OUTPUT_DP)) { + width = intel_dp->lane_count; + rate = intel_dp->link_rate; + } else { + width = 4; + /* Rate is always < than 6GHz for HDMI */ + } /* * 1. If port type is eDP or DP, @@ -1865,8 +1879,21 @@ static void cnl_ddi_vswing_sequence(struct drm_i915_private *dev_priv, /* 2. Program loadgen select */ /* - * FIXME: Program PORT_TX_DW4_LN depending on Bit rate and used lanes + * Program PORT_TX_DW4_LN depending on Bit rate and used lanes + * <= 6 GHz and 4 lanes (LN0=0, LN1=1, LN2=1, LN3=1) + * <= 6 GHz and 1,2 lanes (LN0=0, LN1=1, LN2=1, LN3=0) + * > 6 GHz (LN0=0, LN1=0, LN2=0, LN3=0) */ + for (ln = 0; ln <= 3; ln++) { + val = I915_READ(CNL_PORT_TX_DW4_LN(port, ln)); + val &= ~LOADGEN_SELECT; + + if (((rate < 600000) && (width == 4) && (ln >= 1)) || + ((rate < 600000) && (width < 4) && ((ln == 1) || (ln == 2)))) { + val |= LOADGEN_SELECT; + } + I915_WRITE(CNL_PORT_TX_DW4_LN(port, ln), val); + } /* 3. Set PORT_CL_DW5 SUS Clock Config to 11b */ val = I915_READ(CNL_PORT_CL1CM_DW5); @@ -1920,7 +1947,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); else if (IS_CANNONLAKE(dev_priv)) { - cnl_ddi_vswing_sequence(dev_priv, level, port, encoder->type); + cnl_ddi_vswing_sequence(encoder, level); /* DDI_BUF_CTL bits 27:24 are reserved on CNL */ return 0; } @@ -2022,8 +2049,7 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, bxt_ddi_vswing_sequence(dev_priv, level, port, INTEL_OUTPUT_HDMI); else if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(dev_priv, level, port, - INTEL_OUTPUT_HDMI); + cnl_ddi_vswing_sequence(encoder, level); intel_hdmi->set_infoframes(drm_encoder, has_hdmi_sink, From cebfcead63de6f6b1414c3b58ee554b5fc8e103c Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Fri, 9 Jun 2017 15:26:10 -0700 Subject: [PATCH 136/341] drm/i915/DMC/CNL: Load DMC on CNL This patch loads the DMC on CNL.The firmware version is 1.04. v2: (Rodrigo) Remove MODULE_FIRMWARE. Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Signed-off-by: Rodrigo Vivi Reviewed-by: Animesh Manna Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-13-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/intel_csr.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 03b5fe3e3036..506ec32b9e53 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -445,6 +445,7 @@ static const struct intel_device_info intel_cannonlake_info = { .platform = INTEL_CANNONLAKE, .gen = 10, .ddb_size = 1024, + .has_csr = 1, }; /* diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index fb6af0bcdf8f..dedc5dff9fd3 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -37,6 +37,9 @@ #define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin" #define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) +#define I915_CSR_CNL "i915/cnl_dmc_ver1_04.bin" +#define CNL_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) + #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); #define KBL_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) @@ -289,7 +292,9 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, csr->version = css_header->version; - if (IS_GEMINILAKE(dev_priv)) { + if (IS_CANNONLAKE(dev_priv)) { + required_version = CNL_CSR_VERSION_REQUIRED; + } else if (IS_GEMINILAKE(dev_priv)) { required_version = GLK_CSR_VERSION_REQUIRED; } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { required_version = KBL_CSR_VERSION_REQUIRED; @@ -438,7 +443,9 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) if (!HAS_CSR(dev_priv)) return; - if (IS_GEMINILAKE(dev_priv)) + if (IS_CANNONLAKE(dev_priv)) + csr->fw_path = I915_CSR_CNL; + else if (IS_GEMINILAKE(dev_priv)) csr->fw_path = I915_CSR_GLK; else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) csr->fw_path = I915_CSR_KBL; From 1a7399aa58e93c65d3b5bf969ca1e69c81fb2535 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:26:11 -0700 Subject: [PATCH 137/341] drm/i915: Use HAS_CSR instead of gen number on DMC load. Since we have HAS_CSR tied to the platform definition let's use this instead of checking per platform. One less thing to worry when adding support to new platforms. Signed-off-by: Rodrigo Vivi Reviewed-by: Animesh Manna Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-14-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index dedc5dff9fd3..965988f79a55 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -241,7 +241,7 @@ void intel_csr_load_program(struct drm_i915_private *dev_priv) u32 *payload = dev_priv->csr.dmc_payload; uint32_t i, fw_size; - if (!IS_GEN9(dev_priv)) { + if (!HAS_CSR(dev_priv)) { DRM_ERROR("No CSR support available for this platform\n"); return; } From f9569aa1e583d90a4347a213566a2334c93aa176 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:26:12 -0700 Subject: [PATCH 138/341] drm/i915/cnl: Fix Cannonlake scaler mode programing. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As Geminilake scalers Cannonlake also don't need and don't have the "high quality" mode programming. Cc: Ander Conselvan de Oliveira Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-15-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_atomic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index d791b3ef89b5..36d4e635e4ce 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -325,7 +325,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, } /* set scaler mode */ - if (IS_GEMINILAKE(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { scaler_state->scalers[*scaler_id].mode = 0; } else if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { /* From 4efa16cae0629faa2f5f0e7d089c1b85d946805a Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:26:13 -0700 Subject: [PATCH 139/341] drm/i915/cnl: Enable fifo underrun for Cannonlake. Also in a way that reuse bdw+ for all next platforms. Signed-off-by: Rodrigo Vivi Reviewed-by: Mika Kahola Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-16-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_fifo_underrun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index 966e255ca053..d484862cc7df 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -262,7 +262,7 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, ironlake_set_fifo_underrun_reporting(dev, pipe, enable); else if (IS_GEN7(dev_priv)) ivybridge_set_fifo_underrun_reporting(dev, pipe, enable, old); - else if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv)) + else if (INTEL_GEN(dev_priv) >= 8) broadwell_set_fifo_underrun_reporting(dev, pipe, enable); return old; From ff15947e0f02ceccfffa8f342472765404d161b6 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:26:14 -0700 Subject: [PATCH 140/341] drm/i915/cnl: LSPCON support is gen9+ There is no platform specific change needed for LSPCON support on Cannonlake. So let's make it gen9+. Cc: Shashank Sharma Signed-off-by: Rodrigo Vivi Reviewed-by: Shashank Sharma Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-17-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dd22f3d0d9d6..c3ea485cb82a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3002,7 +3002,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_GMCH_DISPLAY(dev_priv) ((dev_priv)->info.has_gmch_display) -#define HAS_LSPCON(dev_priv) (IS_GEN9(dev_priv)) +#define HAS_LSPCON(dev_priv) (INTEL_GEN(dev_priv) >= 9) /* DPF == dynamic parity feature */ #define HAS_L3_DPF(dev_priv) ((dev_priv)->info.has_l3_dpf) From 1fa62e1b76d6fb4e353f6d7e3ec22e6d07e0a489 Mon Sep 17 00:00:00 2001 From: "Kahola, Mika" Date: Fri, 9 Jun 2017 15:26:15 -0700 Subject: [PATCH 141/341] drm/i915/cnl: Enable wrpll computation for CNL Enable wrpll computation for Cannonlake platform to support pll's required for HDMI output. The patch contains the following features - compute Cannonlake port clock programming dividers P, Q, and K. - compute PLL parameters for Cannonlake. These parameters set the values on DPLL registers. - find the register values to program wrpll for Cannonlake. The reference clock can be either 19.2MHz or 24MHz. v2: rebase v3: squash wrpll patches into one (Rodrigo) v4: switch order of getting even dividers (Paulo) update divider register values for PDiv and KDiv (Paulo) update wrpll computation algorithm (Paulo) v5: Remove ref clock division by 1000. (Rodrigo) v6: Rodrigo rebasing on top of latest code. Signed-off-by: Kahola, Mika Signed-off-by: Rodrigo Vivi Reviewed-by: Clint Taylor Link: http://patchwork.freedesktop.org/patch/msgid/1497047175-27250-18-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 140 +++++++++++++++++++++++++- 1 file changed, 138 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 903c38dc683a..8e669b6254ae 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2126,17 +2126,153 @@ out: return ret; } +static void cnl_wrpll_get_multipliers(unsigned int bestdiv, + unsigned int *pdiv, + unsigned int *qdiv, + unsigned int *kdiv) +{ + /* even dividers */ + if (bestdiv % 2 == 0) { + if (bestdiv == 2) { + *pdiv = 2; + *qdiv = 1; + *kdiv = 1; + } else if (bestdiv % 4 == 0) { + *pdiv = 2; + *qdiv = bestdiv / 4; + *kdiv = 2; + } else if (bestdiv % 6 == 0) { + *pdiv = 3; + *qdiv = bestdiv / 6; + *kdiv = 2; + } else if (bestdiv % 5 == 0) { + *pdiv = 5; + *qdiv = bestdiv / 10; + *kdiv = 2; + } else if (bestdiv % 14 == 0) { + *pdiv = 7; + *qdiv = bestdiv / 14; + *kdiv = 2; + } + } else { + if (bestdiv == 3 || bestdiv == 5 || bestdiv == 7) { + *pdiv = bestdiv; + *qdiv = 1; + *kdiv = 1; + } else { /* 9, 15, 21 */ + *pdiv = bestdiv / 3; + *qdiv = 1; + *kdiv = 3; + } + } +} + +static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, uint32_t dco_freq, + uint32_t ref_freq, uint32_t pdiv, uint32_t qdiv, + uint32_t kdiv) +{ + switch (kdiv) { + case 1: + params->kdiv = 1; + break; + case 2: + params->kdiv = 2; + break; + case 3: + params->kdiv = 4; + break; + default: + WARN(1, "Incorrect KDiv\n"); + } + + switch (pdiv) { + case 2: + params->pdiv = 1; + break; + case 3: + params->pdiv = 2; + break; + case 5: + params->pdiv = 4; + break; + case 7: + params->pdiv = 8; + break; + default: + WARN(1, "Incorrect PDiv\n"); + } + + if (kdiv != 2) + qdiv = 1; + + params->qdiv_ratio = qdiv; + params->qdiv_mode = (qdiv == 1) ? 0 : 1; + + params->dco_integer = div_u64(dco_freq, ref_freq); + params->dco_fraction = div_u64((div_u64((uint64_t)dco_freq<<15, (uint64_t)ref_freq) - + ((uint64_t)params->dco_integer<<15)) * 0x8000, 0x8000); +} + +static bool +cnl_ddi_calculate_wrpll(int clock /* in Hz */, + struct drm_i915_private *dev_priv, + struct skl_wrpll_params *wrpll_params) +{ + uint64_t afe_clock = clock * 5 / KHz(1); /* clocks in kHz */ + unsigned int dco_min = 7998 * KHz(1); + unsigned int dco_max = 10000 * KHz(1); + unsigned int dco_mid = (dco_min + dco_max) / 2; + + static const int dividers[] = { 2, 4, 6, 8, 10, 12, 14, 16, + 18, 20, 24, 28, 30, 32, 36, 40, + 42, 44, 48, 50, 52, 54, 56, 60, + 64, 66, 68, 70, 72, 76, 78, 80, + 84, 88, 90, 92, 96, 98, 100, 102, + 3, 5, 7, 9, 15, 21 }; + unsigned int d, dco; + unsigned int dco_centrality = 0; + unsigned int best_dco_centrality = 999999; + unsigned int best_div = 0; + unsigned int best_dco = 0; + unsigned int pdiv = 0, qdiv = 0, kdiv = 0; + + for (d = 0; d < ARRAY_SIZE(dividers); d++) { + dco = afe_clock * dividers[d]; + + if ((dco <= dco_max) && (dco >= dco_min)) { + dco_centrality = abs(dco - dco_mid); + + if (dco_centrality < best_dco_centrality) { + best_dco_centrality = dco_centrality; + best_div = dividers[d]; + best_dco = dco; + } + } + } + + if (best_div == 0) + return false; + + cnl_wrpll_get_multipliers(best_div, &pdiv, &qdiv, &kdiv); + + cnl_wrpll_params_populate(wrpll_params, best_dco, + dev_priv->cdclk.hw.ref, pdiv, qdiv, kdiv); + + return true; +} + static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, int clock) { + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); uint32_t cfgcr0, cfgcr1; struct skl_wrpll_params wrpll_params = { 0, }; cfgcr0 = DPLL_CFGCR0_HDMI_MODE; - /* FIXME: Proper wrpll calculation done in a following patch */ - return false; + if (!cnl_ddi_calculate_wrpll(clock * 1000, dev_priv, &wrpll_params)) + return false; cfgcr0 |= DPLL_CFGCR0_DCO_FRACTION(wrpll_params.dco_fraction) | wrpll_params.dco_integer; From da411a48bdeb648153dc9a485c15c18f3d063eac Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 9 Jun 2017 15:02:50 -0700 Subject: [PATCH 142/341] drm/i915/cfl: Basic DDI plumbing for Coffee Lake. All here is pretty much like Kabylake. Including CFL-U has to use same ddi translation table as KBL-U for now. v2: Include missed IS_COFFEELAKE on edp trans table. (DK) Handle CFL-U with same translation table as KBL-U. (DK and confirmed with HW engineers) v3: Adding missed case for IS_CFL_ULT. (DK). v4: Duh! Now with the real IS_CFL_ULT instead of KBL one. (DK) Also use IS_GEN9_BC when possible. (DK) Cc: Dhinakaran Pandiyan Signed-off-by: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Link: http://patchwork.freedesktop.org/patch/msgid/1497045770-21302-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_ddi.c | 13 +++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c3ea485cb82a..467786d25348 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2810,6 +2810,8 @@ intel_info(const struct drm_i915_private *dev_priv) (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0020) #define IS_SKL_GT4(dev_priv) (IS_SKYLAKE(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0030) +#define IS_CFL_ULT(dev_priv) (IS_COFFEELAKE(dev_priv) && \ + (INTEL_DEVID(dev_priv) & 0x00F0) == 0x00A0) #define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 721c2e2c12a3..db8093863f0c 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -544,7 +544,7 @@ kbl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) if (IS_KBL_ULX(dev_priv)) { *n_entries = ARRAY_SIZE(kbl_y_ddi_translations_dp); return kbl_y_ddi_translations_dp; - } else if (IS_KBL_ULT(dev_priv)) { + } else if (IS_KBL_ULT(dev_priv) || IS_CFL_ULT(dev_priv)) { *n_entries = ARRAY_SIZE(kbl_u_ddi_translations_dp); return kbl_u_ddi_translations_dp; } else { @@ -560,7 +560,8 @@ skl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) if (IS_SKL_ULX(dev_priv) || IS_KBL_ULX(dev_priv)) { *n_entries = ARRAY_SIZE(skl_y_ddi_translations_edp); return skl_y_ddi_translations_edp; - } else if (IS_SKL_ULT(dev_priv) || IS_KBL_ULT(dev_priv)) { + } else if (IS_SKL_ULT(dev_priv) || IS_KBL_ULT(dev_priv) || + IS_CFL_ULT(dev_priv)) { *n_entries = ARRAY_SIZE(skl_u_ddi_translations_edp); return skl_u_ddi_translations_edp; } else { @@ -569,7 +570,7 @@ skl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } } - if (IS_KABYLAKE(dev_priv)) + if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) return kbl_get_buf_trans_dp(dev_priv, n_entries); else return skl_get_buf_trans_dp(dev_priv, n_entries); @@ -625,7 +626,7 @@ static const struct ddi_buf_trans * intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) { - if (IS_KABYLAKE(dev_priv)) { + if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { return kbl_get_buf_trans_dp(dev_priv, n_entries); } else if (IS_SKYLAKE(dev_priv)) { return skl_get_buf_trans_dp(dev_priv, n_entries); @@ -645,7 +646,7 @@ static const struct ddi_buf_trans * intel_ddi_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { - if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { return skl_get_buf_trans_edp(dev_priv, n_entries); } else if (IS_BROADWELL(dev_priv)) { return bdw_get_buf_trans_edp(dev_priv, n_entries); @@ -1618,7 +1619,7 @@ static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) if (dp_iboost) { iboost = dp_iboost; } else { - if (IS_KABYLAKE(dev_priv)) + if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) ddi_translations = kbl_get_buf_trans_dp(dev_priv, &n_entries); else From 2ad50606f847a902303a5364b7cad64bdd6246f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ondrej=20Mosn=C3=A1=C4=8Dek?= Date: Mon, 5 Jun 2017 17:52:39 +0200 Subject: [PATCH 143/341] dm integrity: reject mappings too large for device dm-integrity would successfully create mappings with the number of sectors greater than the provided data sector count. Attempts to read sectors of this mapping that were beyond the provided data sector count would then yield run-time messages of the form "device-mapper: integrity: Too big sector number: ...". Fix this by emitting an error when the requested mapping size is bigger than the provided data sector count. Signed-off-by: Ondrej Mosnacek Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 7910bfe50da4..4ab10cf718c9 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3040,6 +3040,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->error = "The device is too small"; goto bad; } + if (ti->len > ic->provided_data_sectors) { + r = -EINVAL; + ti->error = "Not enough provided sectors for requested mapping size"; + goto bad; + } if (!buffer_sectors) buffer_sectors = 1; From ca8efa1df1d15a1795a2da57f9f6aada6ed6b946 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 6 Jun 2017 16:47:22 +1000 Subject: [PATCH 144/341] KVM: PPC: Book3S HV: Context-switch EBB registers properly This adds code to save the values of three SPRs (special-purpose registers) used by userspace to control event-based branches (EBBs), which are essentially interrupts that get delivered directly to userspace. These registers are loaded up with guest values when entering the guest, and their values are saved when exiting the guest, but we were not saving the host values and restoring them before going back to userspace. On POWER8 this would only affect userspace programs which explicitly request the use of EBBs and also use the KVM_RUN ioctl, since the only source of EBBs on POWER8 is the PMU, and there is an explicit enable bit in the PMU registers (and those PMU registers do get properly context-switched between host and guest). On POWER9 there is provision for externally-generated EBBs, and these are not subject to the control in the PMU registers. Since these registers only affect userspace, we can save them when we first come in from userspace and restore them before returning to userspace, rather than saving/restoring the host values on every guest entry/exit. Similarly, we don't need to worry about their values on offline secondary threads since they execute in the context of the idle task, which never executes in userspace. Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08) Cc: stable@vger.kernel.org # v3.14+ Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 42b7a4fd57d9..400a5992b121 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2907,6 +2907,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) { int r; int srcu_idx; + unsigned long ebb_regs[3] = {}; /* shut up GCC */ if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -2934,6 +2935,13 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) flush_all_to_thread(current); + /* Save userspace EBB register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + ebb_regs[0] = mfspr(SPRN_EBBHR); + ebb_regs[1] = mfspr(SPRN_EBBRR); + ebb_regs[2] = mfspr(SPRN_BESCR); + } + vcpu->arch.wqp = &vcpu->arch.vcore->wq; vcpu->arch.pgdir = current->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; @@ -2960,6 +2968,13 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } } while (is_kvmppc_resume_guest(r)); + /* Restore userspace EBB register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_EBBHR, ebb_regs[0]); + mtspr(SPRN_EBBRR, ebb_regs[1]); + mtspr(SPRN_BESCR, ebb_regs[2]); + } + out: vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&vcpu->kvm->arch.vcpus_running); From 459fa246d8fa4a543ed9a3331f15c8fe1caf9937 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sun, 11 Jun 2017 15:22:10 +1000 Subject: [PATCH 145/341] clocksource: Explicitly include linux/clocksource.h when needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kbuild test robot reported errors in these files when doing an ia64 allmodconfig build. drivers/clocksource/timer-sun5i.c:52:21: error: field 'clksrc' has incomplete type struct clocksource clksrc; ^~~~~~ drivers/clocksource/cadence_ttc_timer.c:92:21: error: field 'cs' has incomplete type struct clocksource cs; ^~ (and many more errors for these files) Cc: Michal Simek Cc: "Sören Brinkmann" Cc: Daniel Lezcano Cc: Thomas Gleixner Cc: Maxime Ripard Cc: Chen-Yu Tsai Reported-by: kbuild test robot Signed-off-by: Stephen Rothwell Acked-by: Michal Simek Signed-off-by: Daniel Lezcano --- drivers/clocksource/cadence_ttc_timer.c | 1 + drivers/clocksource/timer-sun5i.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c index 44e5e951583b..8e64b8460f11 100644 --- a/drivers/clocksource/cadence_ttc_timer.c +++ b/drivers/clocksource/cadence_ttc_timer.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 2e9c830ae1cd..c4656c4d44a6 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include From 46464411307746e6297a034a9983a22c9dfc5a0c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 18 May 2017 17:28:47 +0200 Subject: [PATCH 146/341] xen/blkback: fix disconnect while I/Os in flight MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Today disconnecting xen-blkback is broken in case there are still I/Os in flight: xen_blkif_disconnect() will bail out early without releasing all resources in the hope it will be called again when the last request has terminated. This, however, won't happen as xen_blkif_free() won't be called on termination of the last running request: xen_blkif_put() won't decrement the blkif refcnt to 0 as xen_blkif_disconnect() didn't finish before thus some xen_blkif_put() calls in xen_blkif_disconnect() didn't happen. To solve this deadlock xen_blkif_disconnect() and xen_blkif_alloc_rings() shouldn't use xen_blkif_put() and xen_blkif_get() but use some other way to do their accounting of resources. This at once fixes another error in xen_blkif_disconnect(): when it returned early with -EBUSY for another ring than 0 it would call xen_blkif_put() again for already handled rings on a subsequent call. This will lead to inconsistencies in the refcnt handling. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Tested-by: Steven Haigh Acked-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 1 + drivers/block/xen-blkback/xenbus.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index dea61f6ab8cb..638597b17a38 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -281,6 +281,7 @@ struct xen_blkif_ring { wait_queue_head_t wq; atomic_t inflight; + bool active; /* One thread per blkif ring. */ struct task_struct *xenblkd; unsigned int waiting_reqs; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 1f3dfaa54d87..998915174bb8 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif) init_waitqueue_head(&ring->shutdown_wq); ring->blkif = blkif; ring->st_print = jiffies; - xen_blkif_get(blkif); + ring->active = true; } return 0; @@ -249,6 +249,9 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) struct xen_blkif_ring *ring = &blkif->rings[r]; unsigned int i = 0; + if (!ring->active) + continue; + if (ring->xenblkd) { kthread_stop(ring->xenblkd); wake_up(&ring->shutdown_wq); @@ -296,7 +299,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) BUG_ON(ring->free_pages_num != 0); BUG_ON(ring->persistent_gnt_c != 0); WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); - xen_blkif_put(blkif); + ring->active = false; } blkif->nr_ring_pages = 0; /* From 71df1d7ccad1c36f7321d6b3b48f2ea42681c363 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 18 May 2017 17:28:48 +0200 Subject: [PATCH 147/341] xen/blkback: don't free be structure too early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The be structure must not be freed when freeing the blkif structure isn't done. Otherwise a use-after-free of be when unmapping the ring used for communicating with the frontend will occur in case of a late call of xenblk_disconnect() (e.g. due to an I/O still active when trying to disconnect). Signed-off-by: Juergen Gross Tested-by: Steven Haigh Acked-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 998915174bb8..4cdf0490983e 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -315,9 +315,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) static void xen_blkif_free(struct xen_blkif *blkif) { - - xen_blkif_disconnect(blkif); + WARN_ON(xen_blkif_disconnect(blkif)); xen_vbd_free(&blkif->vbd); + kfree(blkif->be->mode); + kfree(blkif->be); /* Make sure everything is drained before shutting down */ kmem_cache_free(xen_blkif_cachep, blkif); @@ -514,8 +515,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev) xen_blkif_put(be->blkif); } - kfree(be->mode); - kfree(be); return 0; } From a24fa22ce22ae302b3bf8f7008896d52d5d57b8d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 18 May 2017 17:28:49 +0200 Subject: [PATCH 148/341] xen/blkback: don't use xen_blkif_get() in xen-blkback kthread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to use xen_blkif_get()/xen_blkif_put() in the kthread of xen-blkback. Thread stopping is synchronous and using the blkif reference counting in the kthread will avoid to ever let the reference count drop to zero at the end of an I/O running concurrent to disconnecting and multiple rings. Setting ring->xenblkd to NULL after stopping the kthread isn't needed as the kthread does this already. Signed-off-by: Juergen Gross Tested-by: Steven Haigh Acked-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 3 --- drivers/block/xen-blkback/xenbus.c | 1 - 2 files changed, 4 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 726c32e35db9..6b14c509f3c7 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg) unsigned long timeout; int ret; - xen_blkif_get(blkif); - set_freezable(); while (!kthread_should_stop()) { if (try_to_freeze()) @@ -665,7 +663,6 @@ purge_gnt_list: print_stats(ring); ring->xenblkd = NULL; - xen_blkif_put(blkif); return 0; } diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 4cdf0490983e..792da683e70d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -255,7 +255,6 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) if (ring->xenblkd) { kthread_stop(ring->xenblkd); wake_up(&ring->shutdown_wq); - ring->xenblkd = NULL; } /* The above kthread_stop() guarantees that at this point we From 089bc0143f489bd3a4578bdff5f4ca68fb26f341 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 13 Jun 2017 16:28:27 -0400 Subject: [PATCH 149/341] xen-blkback: don't leak stack data via response ring Rather than constructing a local structure instance on the stack, fill the fields directly on the shared ring, just like other backends do. Build on the fact that all response structure flavors are actually identical (the old code did make this assumption too). This is XSA-216. Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 23 ++++++++++++----------- drivers/block/xen-blkback/common.h | 25 +++++-------------------- 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 6b14c509f3c7..0e824091a12f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1433,34 +1433,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, static void make_response(struct xen_blkif_ring *ring, u64 id, unsigned short op, int st) { - struct blkif_response resp; + struct blkif_response *resp; unsigned long flags; union blkif_back_rings *blk_rings; int notify; - resp.id = id; - resp.operation = op; - resp.status = st; - spin_lock_irqsave(&ring->blk_ring_lock, flags); blk_rings = &ring->blk_rings; /* Place on the response ring for the relevant domain. */ switch (ring->blkif->blk_protocol) { case BLKIF_PROTOCOL_NATIVE: - memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), - &resp, sizeof(resp)); + resp = RING_GET_RESPONSE(&blk_rings->native, + blk_rings->native.rsp_prod_pvt); break; case BLKIF_PROTOCOL_X86_32: - memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), - &resp, sizeof(resp)); + resp = RING_GET_RESPONSE(&blk_rings->x86_32, + blk_rings->x86_32.rsp_prod_pvt); break; case BLKIF_PROTOCOL_X86_64: - memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), - &resp, sizeof(resp)); + resp = RING_GET_RESPONSE(&blk_rings->x86_64, + blk_rings->x86_64.rsp_prod_pvt); break; default: BUG(); } + + resp->id = id; + resp->operation = op; + resp->status = st; + blk_rings->common.rsp_prod_pvt++; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); spin_unlock_irqrestore(&ring->blk_ring_lock, flags); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 638597b17a38..ecb35fe8ca8d 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues; struct blkif_common_request { char dummy; }; -struct blkif_common_response { - char dummy; -}; + +/* i386 protocol version */ struct blkif_x86_32_request_rw { uint8_t nr_segments; /* number of segments */ @@ -129,14 +128,6 @@ struct blkif_x86_32_request { } u; } __attribute__((__packed__)); -/* i386 protocol version */ -#pragma pack(push, 4) -struct blkif_x86_32_response { - uint64_t id; /* copied from request */ - uint8_t operation; /* copied from request */ - int16_t status; /* BLKIF_RSP_??? */ -}; -#pragma pack(pop) /* x86_64 protocol version */ struct blkif_x86_64_request_rw { @@ -193,18 +184,12 @@ struct blkif_x86_64_request { } u; } __attribute__((__packed__)); -struct blkif_x86_64_response { - uint64_t __attribute__((__aligned__(8))) id; - uint8_t operation; /* copied from request */ - int16_t status; /* BLKIF_RSP_??? */ -}; - DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, - struct blkif_common_response); + struct blkif_response); DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, - struct blkif_x86_32_response); + struct blkif_response __packed); DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, - struct blkif_x86_64_response); + struct blkif_response); union blkif_back_rings { struct blkif_back_ring native; From 8bfb3676606454ffec836f56c5dc3e69dfc0956a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Jun 2017 09:17:38 +0200 Subject: [PATCH 150/341] wireless: wext: remove ndo_do_ioctl fallback There are no longer any drivers (in the tree proper, I didn't check all the staging drivers) that take WEXT ioctls through this API, the only remaining ones that even have ndo_do_ioctl are using it only for private ioctls. Therefore, we can remove this call. Signed-off-by: Johannes Berg --- net/wireless/wext-core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 1a4db6790e20..24ba8a99b946 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -957,9 +957,6 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, else if (private) return private(dev, iwr, cmd, info, handler); } - /* Old driver API : call driver ioctl handler */ - if (dev->netdev_ops->ndo_do_ioctl) - return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); return -EOPNOTSUPP; } From e79b0006c45c9b0b22f3ea54ff6e256b34c1f208 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Wed, 14 Jun 2017 09:51:56 +0530 Subject: [PATCH 151/341] ALSA: hda - Add Coffelake PCI ID Coffelake is another Intel part, so need to add PCI ID for it. Signed-off-by: Megha Dey Signed-off-by: Subhransu S. Prusty Acked-by: Vinod Koul Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 1770f085c2a6..e3c696c46a21 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -371,9 +371,10 @@ enum { #define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) #define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198) +#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348) #define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \ IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci) || \ - IS_GLK(pci) + IS_GLK(pci) || IS_CFL(pci) static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", @@ -2378,6 +2379,9 @@ static const struct pci_device_id azx_ids[] = { /* Kabylake-H */ { PCI_DEVICE(0x8086, 0xa2f0), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, + /* Coffelake */ + { PCI_DEVICE(0x8086, 0xa348), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE}, /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, From 4846bf0ca8cb4304dde6140eff33a92b3fe8ef24 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 9 Jun 2017 12:03:46 +0100 Subject: [PATCH 152/341] drm/i915: Encourage our shrinker more when our shmemfs allocations fails Commit 24f8e00a8a2e ("drm/i915: Prefer to report ENOMEM rather than incur the oom for gfx allocations") made the bold decision to try and avoid the oomkiller by reporting -ENOMEM to userspace if our allocation failed after attempting to free enough buffer objects. In short, it appears we were giving up too easily (even before we start wondering if one pass of reclaim is as strong as we would like). Part of the problem is that if we only shrink just enough pages for our expected allocation, the likelihood of those pages becoming available to us is less than 100% To counter-act that we ask for twice the number of pages to be made available. Furthermore, we allow the shrinker to pull pages from the active list in later passes. v2: Be a little more cautious in paging out gfx buffers, and leave that to a more balanced approach from shrink_slab(). Important when combined with "drm/i915: Start writeback from the shrinker" as anything shrunk is immediately swapped out and so should be more conservative. Fixes: 24f8e00a8a2e ("drm/i915: Prefer to report ENOMEM rather than incur the oom for gfx allocations") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Daniel Vetter Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170609110350.1767-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 50 +++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index aff449807399..ca61a0be1458 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2337,8 +2337,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment; + gfp_t noreclaim; int ret; - gfp_t gfp; /* Assert that the object is not currently in any GPU domain. As it * wasn't in the GTT, there shouldn't be any way it could have been in @@ -2367,22 +2367,31 @@ rebuild_st: * Fail silently without starting the shrinker */ mapping = obj->base.filp->f_mapping; - gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); - gfp |= __GFP_NORETRY | __GFP_NOWARN; + noreclaim = mapping_gfp_constraint(mapping, + ~(__GFP_IO | __GFP_RECLAIM)); + noreclaim |= __GFP_NORETRY | __GFP_NOWARN; + sg = st->sgl; st->nents = 0; for (i = 0; i < page_count; i++) { - page = shmem_read_mapping_page_gfp(mapping, i, gfp); - if (unlikely(IS_ERR(page))) { - i915_gem_shrink(dev_priv, - page_count, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE); + const unsigned int shrink[] = { + I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, + 0, + }, *s = shrink; + gfp_t gfp = noreclaim; + + do { page = shmem_read_mapping_page_gfp(mapping, i, gfp); - } - if (unlikely(IS_ERR(page))) { - gfp_t reclaim; + if (likely(!IS_ERR(page))) + break; + + if (!*s) { + ret = PTR_ERR(page); + goto err_sg; + } + + i915_gem_shrink(dev_priv, 2 * page_count, *s++); + cond_resched(); /* We've tried hard to allocate the memory by reaping * our own buffer, now let the real VM do its job and @@ -2392,15 +2401,13 @@ rebuild_st: * defer the oom here by reporting the ENOMEM back * to userspace. */ - reclaim = mapping_gfp_mask(mapping); - reclaim |= __GFP_NORETRY; /* reclaim, but no oom */ - - page = shmem_read_mapping_page_gfp(mapping, i, reclaim); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto err_sg; + if (!*s) { + /* reclaim and warn, but no oom */ + gfp = mapping_gfp_mask(mapping); + gfp |= __GFP_NORETRY; } - } + } while (1); + if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { @@ -4285,6 +4292,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) mapping = obj->base.filp->f_mapping; mapping_set_gfp_mask(mapping, mask); + GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); i915_gem_object_init(obj, &i915_gem_object_ops); From eaf41801559a687cc7511c04dc712984765c9dd7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 9 Jun 2017 12:03:47 +0100 Subject: [PATCH 153/341] drm/i915: Remove __GFP_NORETRY from our buffer allocator I tried __GFP_NORETRY in the belief that __GFP_RECLAIM was effective. It struggles with handling reclaim of our dirty buffers and relies on reclaim via kswapd. As a result, a single pass of direct reclaim is unreliable when i915 occupies the majority of available memory, and the only means of effectively waiting on kswapd to amke progress is by not setting the __GFP_NORETRY flag and lopping. That leaves us with the dilemma of invoking the oomkiller instead of propagating the allocation failure back to userspace where it can be handled more gracefully (one hopes). In the future we may have __GFP_MAYFAIL to allow repeats up until we genuinely run out of memory and the oomkiller would have been invoked. Until then, let the oomkiller wreck havoc. v2: Stop playing with side-effects of gfp flags and await __GFP_MAYFAIL v3: Update comments that direct reclaim only appears to be ignoring our dirty buffers! Fixes: 24f8e00a8a2e ("drm/i915: Prefer to report ENOMEM rather than incur the oom for gfx allocations") Testcase: igt/gem_tiled_swapping Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: Michal Hocko Link: http://patchwork.freedesktop.org/patch/msgid/20170609110350.1767-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ca61a0be1458..b22145a876c5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2404,7 +2404,20 @@ rebuild_st: if (!*s) { /* reclaim and warn, but no oom */ gfp = mapping_gfp_mask(mapping); - gfp |= __GFP_NORETRY; + + /* Our bo are always dirty and so we require + * kswapd to reclaim our pages (direct reclaim + * does not effectively begin pageout of our + * buffers on its own). However, direct reclaim + * only waits for kswapd when under allocation + * congestion. So as a result __GFP_RECLAIM is + * unreliable and fails to actually reclaim our + * dirty pages -- unless you try over and over + * again with !__GFP_NORETRY. However, we still + * want to fail this allocation rather than + * trigger the out-of-memory killer and for + * this we want the future __GFP_MAYFAIL. + */ } } while (1); From 0f6ab55d7ab6847e9b799022ed3b5511c756e512 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 9 Jun 2017 12:03:48 +0100 Subject: [PATCH 154/341] drm/i915: Only restrict noreclaim in the early shrink passes In our first pass, we do not want to use reclaim at all as we want to solely reap the i915 buffer caches (its purgeable pages). But we don't mind it initiates IO or pulls via the FS (but it shouldn't anyway as we say no to reclaim!). Just drop the GFP_IO constraint for simplicity. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170609110350.1767-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b22145a876c5..31cbe78171a9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2367,8 +2367,7 @@ rebuild_st: * Fail silently without starting the shrinker */ mapping = obj->base.filp->f_mapping; - noreclaim = mapping_gfp_constraint(mapping, - ~(__GFP_IO | __GFP_RECLAIM)); + noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); noreclaim |= __GFP_NORETRY | __GFP_NOWARN; sg = st->sgl; From 290271de34f6c22ec2337e3293224575459747d6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 9 Jun 2017 12:03:49 +0100 Subject: [PATCH 155/341] drm/i915: Spin for struct_mutex inside shrinker Having resolved whether or not we would deadlock upon a call to mutex_lock(&dev->struct_mutex), we can then spin for the contended struct_mutex if we are not the owner. We cannot afford to simply block and wait for the mutex, as the owner may itself be waiting for the allocator -- i.e. a cyclic deadlock. This should significantly improve the chance of running the shrinker for other processes whilst the GPU is busy. A more balanced approach would be to optimistically spin whilst the mutex owner was on the cpu and there was an opportunity to acquire the mutex for ourselves quickly. However, that requires support from kernel/locking/ and a new mutex_spin_trylock() primitive. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170609110350.1767-4-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 58f27369183c..1032f98add11 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -38,16 +38,21 @@ static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock) { switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) { - case MUTEX_TRYLOCK_FAILED: - return false; - - case MUTEX_TRYLOCK_SUCCESS: - *unlock = true; - return true; - case MUTEX_TRYLOCK_RECURSIVE: *unlock = false; return true; + + case MUTEX_TRYLOCK_FAILED: + do { + cpu_relax(); + if (mutex_trylock(&dev_priv->drm.struct_mutex)) { + case MUTEX_TRYLOCK_SUCCESS: + *unlock = true; + return true; + } + } while (!need_resched()); + + return false; } BUG(); From 4f39a1f5870104b1670df2c09c831ac281896545 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Jun 2017 09:21:58 +0200 Subject: [PATCH 156/341] wireless: wext: use struct iwreq earlier in the call chain To make it clear that we never use struct ifreq, cast from it directly in the wext entrypoint and use struct iwreq from there on. The next patch will remove the cast again and pass the correct struct from the beginning. Signed-off-by: Johannes Berg --- net/wireless/wext-core.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 24ba8a99b946..12949c8d3e5f 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -914,13 +914,12 @@ int call_commit_handler(struct net_device *dev) * Main IOCTl dispatcher. * Check the type of IOCTL and call the appropriate wrapper... */ -static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, +static int wireless_process_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, struct iw_request_info *info, wext_ioctl_func standard, wext_ioctl_func private) { - struct iwreq *iwr = (struct iwreq *) ifr; struct net_device *dev; iw_handler handler; @@ -928,7 +927,7 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, * The copy_to/from_user() of ifr is also dealt with in there */ /* Make sure the device exist */ - if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL) + if ((dev = __dev_get_by_name(net, iwr->ifr_name)) == NULL) return -ENODEV; /* A bunch of special cases, then the generic case... @@ -974,7 +973,7 @@ static int wext_permission_check(unsigned int cmd) } /* entry point from dev ioctl */ -static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr, +static int wext_ioctl_dispatch(struct net *net, struct iwreq *iwr, unsigned int cmd, struct iw_request_info *info, wext_ioctl_func standard, wext_ioctl_func private) @@ -984,9 +983,9 @@ static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr, if (ret) return ret; - dev_load(net, ifr->ifr_name); + dev_load(net, iwr->ifr_name); rtnl_lock(); - ret = wireless_process_ioctl(net, ifr, cmd, info, standard, private); + ret = wireless_process_ioctl(net, iwr, cmd, info, standard, private); rtnl_unlock(); return ret; @@ -1042,7 +1041,7 @@ int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, struct iw_request_info info = { .cmd = cmd, .flags = 0 }; int ret; - ret = wext_ioctl_dispatch(net, ifr, cmd, &info, + ret = wext_ioctl_dispatch(net, (void *)ifr, cmd, &info, ioctl_standard_call, ioctl_private_call); if (ret >= 0 && @@ -1104,7 +1103,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, info.cmd = cmd; info.flags = IW_REQUEST_FLAG_COMPAT; - ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd, &info, + ret = wext_ioctl_dispatch(net, &iwr, cmd, &info, compat_standard_call, compat_private_call); From 68dd02d19c811ca8ea60220a9d73e13b4bdad73a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Jun 2017 09:28:11 +0200 Subject: [PATCH 157/341] dev_ioctl: copy only the smaller struct iwreq for wext Unfortunately, struct iwreq isn't a proper subset of struct ifreq, but is still handled by the same code path. Robert reported that then applications may (randomly) fault if the struct iwreq they pass happens to land within 8 bytes of the end of a mapping (the struct is only 32 bytes, vs. struct ifreq's 40 bytes). To fix this, pull out the code handling wireless extension ioctls and copy only the smaller structure in this case. This bug goes back a long time, I tracked that it was introduced into mainline in 2.1.15, over 20 years ago! This fixes https://bugzilla.kernel.org/show_bug.cgi?id=195869 Reported-by: Robert O'Callahan Signed-off-by: Johannes Berg --- include/net/wext.h | 4 ++-- net/core/dev_ioctl.c | 19 ++++++++++++++++--- net/wireless/wext-core.c | 6 +++--- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/include/net/wext.h b/include/net/wext.h index 345911965dbb..454ff763eeba 100644 --- a/include/net/wext.h +++ b/include/net/wext.h @@ -6,7 +6,7 @@ struct net; #ifdef CONFIG_WEXT_CORE -int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, +int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, void __user *arg); int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, unsigned long arg); @@ -14,7 +14,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, struct iw_statistics *get_wireless_stats(struct net_device *dev); int call_commit_handler(struct net_device *dev); #else -static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, +static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, void __user *arg) { return -EINVAL; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index b94b1d293506..27fad31784a8 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -410,6 +410,22 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (cmd == SIOCGIFNAME) return dev_ifname(net, (struct ifreq __user *)arg); + /* + * Take care of Wireless Extensions. Unfortunately struct iwreq + * isn't a proper subset of struct ifreq (it's 8 byte shorter) + * so we need to treat it specially, otherwise applications may + * fault if the struct they're passing happens to land at the + * end of a mapped page. + */ + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { + struct iwreq iwr; + + if (copy_from_user(&iwr, arg, sizeof(iwr))) + return -EFAULT; + + return wext_handle_ioctl(net, &iwr, cmd, arg); + } + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; @@ -559,9 +575,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) ret = -EFAULT; return ret; } - /* Take care of Wireless Extensions */ - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) - return wext_handle_ioctl(net, &ifr, cmd, arg); return -ENOTTY; } } diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 12949c8d3e5f..6cdb054484d6 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -1035,18 +1035,18 @@ static int ioctl_standard_call(struct net_device * dev, } -int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, +int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, void __user *arg) { struct iw_request_info info = { .cmd = cmd, .flags = 0 }; int ret; - ret = wext_ioctl_dispatch(net, (void *)ifr, cmd, &info, + ret = wext_ioctl_dispatch(net, iwr, cmd, &info, ioctl_standard_call, ioctl_private_call); if (ret >= 0 && IW_IS_GET(cmd) && - copy_to_user(arg, ifr, sizeof(struct iwreq))) + copy_to_user(arg, iwr, sizeof(struct iwreq))) return -EFAULT; return ret; From 9ee82d783eef3177928a586c1df3f08d9ced72a8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 May 2017 10:46:18 +0100 Subject: [PATCH 158/341] drm/i915: Reinstate reservation_object zapping for batch_pool objects I removed the zapping of the reservation_object->fence array of shared fences prematurely. We don't yet have the code to zap that array when retiring the object, and so currently it remains possible to continually grow the shared array trapping requests when reusing the batch_pool object across many timelines. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Matthew Auld Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170518094638.5469-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 41aa598c4f3b..c93005c2e0fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -114,12 +114,27 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, list_for_each_entry(obj, list, batch_pool_link) { /* The batches are strictly LRU ordered */ if (i915_gem_object_is_active(obj)) { - if (!reservation_object_test_signaled_rcu(obj->resv, - true)) + struct reservation_object *resv = obj->resv; + + if (!reservation_object_test_signaled_rcu(resv, true)) break; i915_gem_retire_requests(pool->engine->i915); GEM_BUG_ON(i915_gem_object_is_active(obj)); + + /* + * The object is now idle, clear the array of shared + * fences before we add a new request. Although, we + * remain on the same engine, we may be on a different + * timeline and so may continually grow the array, + * trapping a reference to all the old fences, rather + * than replace the existing fence. + */ + if (rcu_access_pointer(resv->fence)) { + reservation_object_lock(resv, NULL); + reservation_object_add_excl_fence(resv, NULL); + reservation_object_unlock(resv); + } } GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv, From 2deaeaf102d692cb6f764123b1df7aa118a8e97c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jun 2017 16:20:32 +0200 Subject: [PATCH 159/341] ALSA: pcm: Don't treat NULL chmap as a fatal error The standard PCM chmap helper callbacks treat the NULL info->chmap as a fatal error and spews the kernel warning with stack trace when CONFIG_SND_DEBUG is on. This was OK, originally it was supposed to be always static and non-NULL. But, as the recent addition of Intel LPE audio driver shows, the chmap content may vary dynamically, and it can be even NULL when disconnected. The user still sees the kernel warning unnecessarily. For clearing such a confusion, this patch simply removes the snd_BUG_ON() in each place, just returns an error without warning. Cc: # v4.11+ Signed-off-by: Takashi Iwai --- sound/core/pcm_lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 5088d4b8db22..009e6c98754e 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -2492,7 +2492,7 @@ static int pcm_chmap_ctl_get(struct snd_kcontrol *kcontrol, struct snd_pcm_substream *substream; const struct snd_pcm_chmap_elem *map; - if (snd_BUG_ON(!info->chmap)) + if (!info->chmap) return -EINVAL; substream = snd_pcm_chmap_substream(info, idx); if (!substream) @@ -2524,7 +2524,7 @@ static int pcm_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag, unsigned int __user *dst; int c, count = 0; - if (snd_BUG_ON(!info->chmap)) + if (!info->chmap) return -EINVAL; if (size < 8) return -ENOMEM; From 7fed555c02f754af44f8963890b5ef8e30e97391 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Tue, 13 Jun 2017 12:22:59 +0100 Subject: [PATCH 160/341] drm/i915: expose _SLICE_MASK GETPARM Enables userspace to determine the maximum number of slices that can be enabled on the device and also know what specific slices can be enabled. This information is required, for example, to be able to analyse some OA counter reports where the counter configuration depends on the HW slice configuration. Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Signed-off-by: Lionel Landwerlin Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/i915_drv.c | 5 +++++ include/uapi/drm/i915_drm.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1f802de7b94b..d503612a7fc1 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -374,6 +374,11 @@ static int i915_getparam(struct drm_device *dev, void *data, */ value = 1; break; + case I915_PARAM_SLICE_MASK: + value = INTEL_INFO(dev_priv)->sseu.slice_mask; + if (!value) + return -ENODEV; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f24a80d2d42e..25695c3d9a76 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -418,6 +418,9 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_CAPTURE 45 +/* Query the mask of slices available for this system */ +#define I915_PARAM_SLICE_MASK 46 + typedef struct drm_i915_getparam { __s32 param; /* From f532023381df49ac00cb2d1e70df607cf534720d Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Tue, 13 Jun 2017 12:23:00 +0100 Subject: [PATCH 161/341] drm/i915: expose _SUBSLICE_MASK GETPARM Assuming a uniform mask across all slices, this enables userspace to determine the specific sub slices can be enabled. This information is required, for example, to be able to analyse some OA counter reports where the counter configuration depends on the HW sub slice configuration. Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Signed-off-by: Lionel Landwerlin Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/i915_drv.c | 5 +++++ include/uapi/drm/i915_drm.h | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d503612a7fc1..a534412a5551 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -379,6 +379,11 @@ static int i915_getparam(struct drm_device *dev, void *data, if (!value) return -ENODEV; break; + case I915_PARAM_SUBSLICE_MASK: + value = INTEL_INFO(dev_priv)->sseu.subslice_mask; + if (!value) + return -ENODEV; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 25695c3d9a76..464547d08173 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -421,6 +421,11 @@ typedef struct drm_i915_irq_wait { /* Query the mask of slices available for this system */ #define I915_PARAM_SLICE_MASK 46 +/* Assuming it's uniform for each slice, this queries the mask of subslices + * per-slice for this system. + */ +#define I915_PARAM_SUBSLICE_MASK 47 + typedef struct drm_i915_getparam { __s32 param; /* From 3f488d99858d22f584e1734f317bfcff9dbdf4fd Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 13 Jun 2017 12:23:01 +0100 Subject: [PATCH 162/341] drm/i915/perf: rework mux configurations queries Gen8+ might have mux configurations per slices/subslices. Depending on whether slices/subslices have been fused off, only part of the configuration needs to be applied. This change reworks the mux configurations query mechanism to allow more than one set of registers to be programmed. v2: s/n_mux_regs/n_mux_configs/ (Matthew) Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/i915_oa_hsw.c | 215 +++++++++++++++++++---------- drivers/gpu/drm/i915/i915_oa_hsw.h | 4 +- drivers/gpu/drm/i915/i915_perf.c | 7 +- 4 files changed, 151 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 467786d25348..f04449f9b1d5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2417,8 +2417,10 @@ struct drm_i915_private { int metrics_set; - const struct i915_oa_reg *mux_regs; - int mux_regs_len; + const struct i915_oa_reg *mux_regs[1]; + int mux_regs_lens[1]; + int n_mux_configs; + const struct i915_oa_reg *b_counter_regs; int b_counter_regs_len; diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.c b/drivers/gpu/drm/i915/i915_oa_hsw.c index 4ddf756add31..8c13e0880e53 100644 --- a/drivers/gpu/drm/i915/i915_oa_hsw.c +++ b/drivers/gpu/drm/i915/i915_oa_hsw.c @@ -1,5 +1,7 @@ /* - * Autogenerated file, DO NOT EDIT manually! + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * * * Copyright (c) 2015 Intel Corporation * @@ -109,12 +111,21 @@ static const struct i915_oa_reg mux_config_render_basic[] = { { _MMIO(0x25428), 0x00042049 }, }; -static const struct i915_oa_reg * +static int get_render_basic_mux_config(struct drm_i915_private *dev_priv, - int *len) + const struct i915_oa_reg **regs, + int *lens) { - *len = ARRAY_SIZE(mux_config_render_basic); - return mux_config_render_basic; + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_basic; + lens[n] = ARRAY_SIZE(mux_config_render_basic); + n++; + + return n; } static const struct i915_oa_reg b_counter_config_compute_basic[] = { @@ -172,12 +183,21 @@ static const struct i915_oa_reg mux_config_compute_basic[] = { { _MMIO(0x25428), 0x00000c03 }, }; -static const struct i915_oa_reg * +static int get_compute_basic_mux_config(struct drm_i915_private *dev_priv, - int *len) + const struct i915_oa_reg **regs, + int *lens) { - *len = ARRAY_SIZE(mux_config_compute_basic); - return mux_config_compute_basic; + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_basic; + lens[n] = ARRAY_SIZE(mux_config_compute_basic); + n++; + + return n; } static const struct i915_oa_reg b_counter_config_compute_extended[] = { @@ -221,12 +241,21 @@ static const struct i915_oa_reg mux_config_compute_extended[] = { { _MMIO(0x25428), 0x00000000 }, }; -static const struct i915_oa_reg * +static int get_compute_extended_mux_config(struct drm_i915_private *dev_priv, - int *len) + const struct i915_oa_reg **regs, + int *lens) { - *len = ARRAY_SIZE(mux_config_compute_extended); - return mux_config_compute_extended; + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extended; + lens[n] = ARRAY_SIZE(mux_config_compute_extended); + n++; + + return n; } static const struct i915_oa_reg b_counter_config_memory_reads[] = { @@ -281,12 +310,21 @@ static const struct i915_oa_reg mux_config_memory_reads[] = { { _MMIO(0x25428), 0x00000000 }, }; -static const struct i915_oa_reg * +static int get_memory_reads_mux_config(struct drm_i915_private *dev_priv, - int *len) + const struct i915_oa_reg **regs, + int *lens) { - *len = ARRAY_SIZE(mux_config_memory_reads); - return mux_config_memory_reads; + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_reads; + lens[n] = ARRAY_SIZE(mux_config_memory_reads); + n++; + + return n; } static const struct i915_oa_reg b_counter_config_memory_writes[] = { @@ -341,12 +379,21 @@ static const struct i915_oa_reg mux_config_memory_writes[] = { { _MMIO(0x25428), 0x00000000 }, }; -static const struct i915_oa_reg * +static int get_memory_writes_mux_config(struct drm_i915_private *dev_priv, - int *len) + const struct i915_oa_reg **regs, + int *lens) { - *len = ARRAY_SIZE(mux_config_memory_writes); - return mux_config_memory_writes; + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_writes; + lens[n] = ARRAY_SIZE(mux_config_memory_writes); + n++; + + return n; } static const struct i915_oa_reg b_counter_config_sampler_balance[] = { @@ -401,31 +448,40 @@ static const struct i915_oa_reg mux_config_sampler_balance[] = { { _MMIO(0x25428), 0x0004a54a }, }; -static const struct i915_oa_reg * +static int get_sampler_balance_mux_config(struct drm_i915_private *dev_priv, - int *len) + const struct i915_oa_reg **regs, + int *lens) { - *len = ARRAY_SIZE(mux_config_sampler_balance); - return mux_config_sampler_balance; + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler_balance; + lens[n] = ARRAY_SIZE(mux_config_sampler_balance); + n++; + + return n; } int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) { - dev_priv->perf.oa.mux_regs = NULL; - dev_priv->perf.oa.mux_regs_len = 0; + dev_priv->perf.oa.n_mux_configs = 0; dev_priv->perf.oa.b_counter_regs = NULL; dev_priv->perf.oa.b_counter_regs_len = 0; switch (dev_priv->perf.oa.metrics_set) { case METRIC_SET_ID_RENDER_BASIC: - dev_priv->perf.oa.mux_regs = + dev_priv->perf.oa.n_mux_configs = get_render_basic_mux_config(dev_priv, - &dev_priv->perf.oa.mux_regs_len); - if (!dev_priv->perf.oa.mux_regs) { - DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_BASIC\" metric set"); + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_BASIC\" metric set\n"); /* EINVAL because *_register_sysfs already checked this - * and so it wouldn't have been advertised so userspace and + * and so it wouldn't have been advertised to userspace and * so shouldn't have been requested */ return -EINVAL; @@ -438,14 +494,15 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) return 0; case METRIC_SET_ID_COMPUTE_BASIC: - dev_priv->perf.oa.mux_regs = + dev_priv->perf.oa.n_mux_configs = get_compute_basic_mux_config(dev_priv, - &dev_priv->perf.oa.mux_regs_len); - if (!dev_priv->perf.oa.mux_regs) { - DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_BASIC\" metric set"); + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_BASIC\" metric set\n"); /* EINVAL because *_register_sysfs already checked this - * and so it wouldn't have been advertised so userspace and + * and so it wouldn't have been advertised to userspace and * so shouldn't have been requested */ return -EINVAL; @@ -458,14 +515,15 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) return 0; case METRIC_SET_ID_COMPUTE_EXTENDED: - dev_priv->perf.oa.mux_regs = + dev_priv->perf.oa.n_mux_configs = get_compute_extended_mux_config(dev_priv, - &dev_priv->perf.oa.mux_regs_len); - if (!dev_priv->perf.oa.mux_regs) { - DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTENDED\" metric set"); + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTENDED\" metric set\n"); /* EINVAL because *_register_sysfs already checked this - * and so it wouldn't have been advertised so userspace and + * and so it wouldn't have been advertised to userspace and * so shouldn't have been requested */ return -EINVAL; @@ -478,14 +536,15 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) return 0; case METRIC_SET_ID_MEMORY_READS: - dev_priv->perf.oa.mux_regs = + dev_priv->perf.oa.n_mux_configs = get_memory_reads_mux_config(dev_priv, - &dev_priv->perf.oa.mux_regs_len); - if (!dev_priv->perf.oa.mux_regs) { - DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_READS\" metric set"); + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_READS\" metric set\n"); /* EINVAL because *_register_sysfs already checked this - * and so it wouldn't have been advertised so userspace and + * and so it wouldn't have been advertised to userspace and * so shouldn't have been requested */ return -EINVAL; @@ -498,14 +557,15 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) return 0; case METRIC_SET_ID_MEMORY_WRITES: - dev_priv->perf.oa.mux_regs = + dev_priv->perf.oa.n_mux_configs = get_memory_writes_mux_config(dev_priv, - &dev_priv->perf.oa.mux_regs_len); - if (!dev_priv->perf.oa.mux_regs) { - DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_WRITES\" metric set"); + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_WRITES\" metric set\n"); /* EINVAL because *_register_sysfs already checked this - * and so it wouldn't have been advertised so userspace and + * and so it wouldn't have been advertised to userspace and * so shouldn't have been requested */ return -EINVAL; @@ -518,14 +578,15 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) return 0; case METRIC_SET_ID_SAMPLER_BALANCE: - dev_priv->perf.oa.mux_regs = + dev_priv->perf.oa.n_mux_configs = get_sampler_balance_mux_config(dev_priv, - &dev_priv->perf.oa.mux_regs_len); - if (!dev_priv->perf.oa.mux_regs) { - DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER_BALANCE\" metric set"); + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER_BALANCE\" metric set\n"); /* EINVAL because *_register_sysfs already checked this - * and so it wouldn't have been advertised so userspace and + * and so it wouldn't have been advertised to userspace and * so shouldn't have been requested */ return -EINVAL; @@ -677,35 +738,36 @@ static struct attribute_group group_sampler_balance = { int i915_perf_register_sysfs_hsw(struct drm_i915_private *dev_priv) { - int mux_len; + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; int ret = 0; - if (get_render_basic_mux_config(dev_priv, &mux_len)) { + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) { ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_basic); if (ret) goto error_render_basic; } - if (get_compute_basic_mux_config(dev_priv, &mux_len)) { + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) { ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_basic); if (ret) goto error_compute_basic; } - if (get_compute_extended_mux_config(dev_priv, &mux_len)) { + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) { ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extended); if (ret) goto error_compute_extended; } - if (get_memory_reads_mux_config(dev_priv, &mux_len)) { + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) { ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_reads); if (ret) goto error_memory_reads; } - if (get_memory_writes_mux_config(dev_priv, &mux_len)) { + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) { ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_writes); if (ret) goto error_memory_writes; } - if (get_sampler_balance_mux_config(dev_priv, &mux_len)) { + if (get_sampler_balance_mux_config(dev_priv, mux_regs, mux_lens)) { ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler_balance); if (ret) goto error_sampler_balance; @@ -714,19 +776,19 @@ i915_perf_register_sysfs_hsw(struct drm_i915_private *dev_priv) return 0; error_sampler_balance: - if (get_sampler_balance_mux_config(dev_priv, &mux_len)) + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); error_memory_writes: - if (get_sampler_balance_mux_config(dev_priv, &mux_len)) + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); error_memory_reads: - if (get_sampler_balance_mux_config(dev_priv, &mux_len)) + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); error_compute_extended: - if (get_sampler_balance_mux_config(dev_priv, &mux_len)) + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); error_compute_basic: - if (get_sampler_balance_mux_config(dev_priv, &mux_len)) + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); error_render_basic: return ret; @@ -735,18 +797,19 @@ error_render_basic: void i915_perf_unregister_sysfs_hsw(struct drm_i915_private *dev_priv) { - int mux_len; + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; - if (get_render_basic_mux_config(dev_priv, &mux_len)) + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); - if (get_compute_basic_mux_config(dev_priv, &mux_len)) + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); - if (get_compute_extended_mux_config(dev_priv, &mux_len)) + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); - if (get_memory_reads_mux_config(dev_priv, &mux_len)) + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); - if (get_memory_writes_mux_config(dev_priv, &mux_len)) + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); - if (get_sampler_balance_mux_config(dev_priv, &mux_len)) + if (get_sampler_balance_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler_balance); } diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.h b/drivers/gpu/drm/i915/i915_oa_hsw.h index 429a229b5158..6fe7e0690ef3 100644 --- a/drivers/gpu/drm/i915/i915_oa_hsw.h +++ b/drivers/gpu/drm/i915/i915_oa_hsw.h @@ -1,5 +1,7 @@ /* - * Autogenerated file, DO NOT EDIT manually! + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * * * Copyright (c) 2015 Intel Corporation * diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 85269bcc8372..7e56b895fd34 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1047,6 +1047,7 @@ static void config_oa_regs(struct drm_i915_private *dev_priv, static int hsw_enable_metric_set(struct drm_i915_private *dev_priv) { int ret = i915_oa_select_metric_set_hsw(dev_priv); + int i; if (ret) return ret; @@ -1068,8 +1069,10 @@ static int hsw_enable_metric_set(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | GEN6_CSUNIT_CLOCK_GATE_DISABLE)); - config_oa_regs(dev_priv, dev_priv->perf.oa.mux_regs, - dev_priv->perf.oa.mux_regs_len); + for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) { + config_oa_regs(dev_priv, dev_priv->perf.oa.mux_regs[i], + dev_priv->perf.oa.mux_regs_lens[i]); + } /* It apparently takes a fairly long time for a new MUX * configuration to be be applied after these register writes. From 5182f646c7615ede9a9ba3ecd241f6cbe16829dc Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Tue, 13 Jun 2017 12:23:02 +0100 Subject: [PATCH 163/341] drm/i915/perf: Add 'render basic' Gen8+ OA unit configs Adds a static OA unit, MUX, B Counter + Flex EU configurations for basic render metrics on Broadwell, Cherryview, Skylake and Broxton. These are auto generated from an XML description of metric sets, currently maintained in gputop, ref: https://github.com/rib/gputop > gputop-data/oa-*.xml > scripts/i915-perf-kernelgen.py $ make -C gputop-data -f Makefile.xml WHITELIST=RenderBasic v2: add newlines to debug messages + fix comment (Matthew Auld) Signed-off-by: Robert Bragg Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/Makefile | 8 +- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/i915_oa_bdw.c | 392 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_oa_bdw.h | 40 +++ drivers/gpu/drm/i915/i915_oa_bxt.c | 248 ++++++++++++++++ drivers/gpu/drm/i915/i915_oa_bxt.h | 40 +++ drivers/gpu/drm/i915/i915_oa_chv.c | 238 ++++++++++++++++ drivers/gpu/drm/i915/i915_oa_chv.h | 40 +++ drivers/gpu/drm/i915/i915_oa_sklgt2.c | 238 ++++++++++++++++ drivers/gpu/drm/i915/i915_oa_sklgt2.h | 40 +++ drivers/gpu/drm/i915/i915_oa_sklgt3.c | 249 ++++++++++++++++ drivers/gpu/drm/i915/i915_oa_sklgt3.h | 40 +++ drivers/gpu/drm/i915/i915_oa_sklgt4.c | 260 +++++++++++++++++ drivers/gpu/drm/i915/i915_oa_sklgt4.h | 40 +++ 14 files changed, 1876 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_oa_bdw.c create mode 100644 drivers/gpu/drm/i915/i915_oa_bdw.h create mode 100644 drivers/gpu/drm/i915/i915_oa_bxt.c create mode 100644 drivers/gpu/drm/i915/i915_oa_bxt.h create mode 100644 drivers/gpu/drm/i915/i915_oa_chv.c create mode 100644 drivers/gpu/drm/i915/i915_oa_chv.h create mode 100644 drivers/gpu/drm/i915/i915_oa_sklgt2.c create mode 100644 drivers/gpu/drm/i915/i915_oa_sklgt2.h create mode 100644 drivers/gpu/drm/i915/i915_oa_sklgt3.c create mode 100644 drivers/gpu/drm/i915/i915_oa_sklgt3.h create mode 100644 drivers/gpu/drm/i915/i915_oa_sklgt4.c create mode 100644 drivers/gpu/drm/i915/i915_oa_sklgt4.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 16dccf550412..49a628cdef9e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -129,7 +129,13 @@ i915-y += i915_vgpu.o # perf code i915-y += i915_perf.o \ - i915_oa_hsw.o + i915_oa_hsw.o \ + i915_oa_bdw.o \ + i915_oa_chv.o \ + i915_oa_sklgt2.o \ + i915_oa_sklgt3.o \ + i915_oa_sklgt4.o \ + i915_oa_bxt.o ifeq ($(CONFIG_DRM_I915_GVT),y) i915-y += intel_gvt.o diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f04449f9b1d5..2faad94c9ef2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2417,12 +2417,14 @@ struct drm_i915_private { int metrics_set; - const struct i915_oa_reg *mux_regs[1]; - int mux_regs_lens[1]; + const struct i915_oa_reg *mux_regs[2]; + int mux_regs_lens[2]; int n_mux_configs; const struct i915_oa_reg *b_counter_regs; int b_counter_regs_len; + const struct i915_oa_reg *flex_regs; + int flex_regs_len; struct { struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/i915_oa_bdw.c b/drivers/gpu/drm/i915/i915_oa_bdw.c new file mode 100644 index 000000000000..9a11c03b4ecb --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_bdw.c @@ -0,0 +1,392 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_bdw.h" + +enum metric_set_id { + METRIC_SET_ID_RENDER_BASIC = 1, +}; + +int i915_oa_n_builtin_metric_sets_bdw = 1; + +static const struct i915_oa_reg b_counter_config_render_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_render_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_render_basic_0_slices_0x01[] = { + { _MMIO(0x9888), 0x143f000f }, + { _MMIO(0x9888), 0x14110014 }, + { _MMIO(0x9888), 0x14310014 }, + { _MMIO(0x9888), 0x14bf000f }, + { _MMIO(0x9888), 0x118a0317 }, + { _MMIO(0x9888), 0x13837be0 }, + { _MMIO(0x9888), 0x3b800060 }, + { _MMIO(0x9888), 0x3d800005 }, + { _MMIO(0x9888), 0x005c4000 }, + { _MMIO(0x9888), 0x065c8000 }, + { _MMIO(0x9888), 0x085cc000 }, + { _MMIO(0x9888), 0x003d8000 }, + { _MMIO(0x9888), 0x183d0800 }, + { _MMIO(0x9888), 0x0a3f0023 }, + { _MMIO(0x9888), 0x103f0000 }, + { _MMIO(0x9888), 0x00584000 }, + { _MMIO(0x9888), 0x08584000 }, + { _MMIO(0x9888), 0x0a5a4000 }, + { _MMIO(0x9888), 0x005b4000 }, + { _MMIO(0x9888), 0x0e5b8000 }, + { _MMIO(0x9888), 0x185b2400 }, + { _MMIO(0x9888), 0x0a1d4000 }, + { _MMIO(0x9888), 0x0c1f0800 }, + { _MMIO(0x9888), 0x0e1faa00 }, + { _MMIO(0x9888), 0x00384000 }, + { _MMIO(0x9888), 0x0e384000 }, + { _MMIO(0x9888), 0x16384000 }, + { _MMIO(0x9888), 0x18380001 }, + { _MMIO(0x9888), 0x00392000 }, + { _MMIO(0x9888), 0x06398000 }, + { _MMIO(0x9888), 0x0839a000 }, + { _MMIO(0x9888), 0x0a391000 }, + { _MMIO(0x9888), 0x00104000 }, + { _MMIO(0x9888), 0x08104000 }, + { _MMIO(0x9888), 0x00110030 }, + { _MMIO(0x9888), 0x08110031 }, + { _MMIO(0x9888), 0x10110000 }, + { _MMIO(0x9888), 0x00134000 }, + { _MMIO(0x9888), 0x16130020 }, + { _MMIO(0x9888), 0x06308000 }, + { _MMIO(0x9888), 0x08308000 }, + { _MMIO(0x9888), 0x06311800 }, + { _MMIO(0x9888), 0x08311880 }, + { _MMIO(0x9888), 0x10310000 }, + { _MMIO(0x9888), 0x0e334000 }, + { _MMIO(0x9888), 0x16330080 }, + { _MMIO(0x9888), 0x0abf1180 }, + { _MMIO(0x9888), 0x10bf0000 }, + { _MMIO(0x9888), 0x0ada8000 }, + { _MMIO(0x9888), 0x0a9d8000 }, + { _MMIO(0x9888), 0x109f0002 }, + { _MMIO(0x9888), 0x0ab94000 }, + { _MMIO(0x9888), 0x0d888000 }, + { _MMIO(0x9888), 0x038a0380 }, + { _MMIO(0x9888), 0x058a000e }, + { _MMIO(0x9888), 0x018a8000 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x198a8000 }, + { _MMIO(0x9888), 0x1b8a00a0 }, + { _MMIO(0x9888), 0x078a0000 }, + { _MMIO(0x9888), 0x098a0000 }, + { _MMIO(0x9888), 0x238b2820 }, + { _MMIO(0x9888), 0x258b2550 }, + { _MMIO(0x9888), 0x198c1000 }, + { _MMIO(0x9888), 0x0b8d8000 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaa0 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x0d831021 }, + { _MMIO(0x9888), 0x0f83572f }, + { _MMIO(0x9888), 0x01835680 }, + { _MMIO(0x9888), 0x0383002c }, + { _MMIO(0x9888), 0x11830000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830001 }, + { _MMIO(0x9888), 0x05830000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x05844000 }, + { _MMIO(0x9888), 0x1b80c137 }, + { _MMIO(0x9888), 0x1d80c147 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x17808000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x15804000 }, + { _MMIO(0x9888), 0x4d801110 }, + { _MMIO(0x9888), 0x4f800331 }, + { _MMIO(0x9888), 0x43800802 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45801465 }, + { _MMIO(0x9888), 0x53801111 }, + { _MMIO(0x9888), 0x478014a5 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800ca5 }, + { _MMIO(0x9888), 0x41800003 }, +}; + +static const struct i915_oa_reg mux_config_render_basic_1_slices_0x02[] = { + { _MMIO(0x9888), 0x143f000f }, + { _MMIO(0x9888), 0x14bf000f }, + { _MMIO(0x9888), 0x14910014 }, + { _MMIO(0x9888), 0x14b10014 }, + { _MMIO(0x9888), 0x118a0317 }, + { _MMIO(0x9888), 0x13837be0 }, + { _MMIO(0x9888), 0x3b800060 }, + { _MMIO(0x9888), 0x3d800005 }, + { _MMIO(0x9888), 0x0a3f0023 }, + { _MMIO(0x9888), 0x103f0000 }, + { _MMIO(0x9888), 0x0a5a4000 }, + { _MMIO(0x9888), 0x0a1d4000 }, + { _MMIO(0x9888), 0x0e1f8000 }, + { _MMIO(0x9888), 0x0a391000 }, + { _MMIO(0x9888), 0x00dc4000 }, + { _MMIO(0x9888), 0x06dc8000 }, + { _MMIO(0x9888), 0x08dcc000 }, + { _MMIO(0x9888), 0x00bd8000 }, + { _MMIO(0x9888), 0x18bd0800 }, + { _MMIO(0x9888), 0x0abf1180 }, + { _MMIO(0x9888), 0x10bf0000 }, + { _MMIO(0x9888), 0x00d84000 }, + { _MMIO(0x9888), 0x08d84000 }, + { _MMIO(0x9888), 0x0ada8000 }, + { _MMIO(0x9888), 0x00db4000 }, + { _MMIO(0x9888), 0x0edb8000 }, + { _MMIO(0x9888), 0x18db2400 }, + { _MMIO(0x9888), 0x0a9d8000 }, + { _MMIO(0x9888), 0x0c9f0800 }, + { _MMIO(0x9888), 0x0e9f2a00 }, + { _MMIO(0x9888), 0x109f0002 }, + { _MMIO(0x9888), 0x00b84000 }, + { _MMIO(0x9888), 0x0eb84000 }, + { _MMIO(0x9888), 0x16b84000 }, + { _MMIO(0x9888), 0x18b80001 }, + { _MMIO(0x9888), 0x00b92000 }, + { _MMIO(0x9888), 0x06b98000 }, + { _MMIO(0x9888), 0x08b9a000 }, + { _MMIO(0x9888), 0x0ab94000 }, + { _MMIO(0x9888), 0x00904000 }, + { _MMIO(0x9888), 0x08904000 }, + { _MMIO(0x9888), 0x00910030 }, + { _MMIO(0x9888), 0x08910031 }, + { _MMIO(0x9888), 0x10910000 }, + { _MMIO(0x9888), 0x00934000 }, + { _MMIO(0x9888), 0x16930020 }, + { _MMIO(0x9888), 0x06b08000 }, + { _MMIO(0x9888), 0x08b08000 }, + { _MMIO(0x9888), 0x06b11800 }, + { _MMIO(0x9888), 0x08b11880 }, + { _MMIO(0x9888), 0x10b10000 }, + { _MMIO(0x9888), 0x0eb34000 }, + { _MMIO(0x9888), 0x16b30080 }, + { _MMIO(0x9888), 0x01888000 }, + { _MMIO(0x9888), 0x0d88b800 }, + { _MMIO(0x9888), 0x038a0380 }, + { _MMIO(0x9888), 0x058a000e }, + { _MMIO(0x9888), 0x1b8a0080 }, + { _MMIO(0x9888), 0x078a0000 }, + { _MMIO(0x9888), 0x098a0000 }, + { _MMIO(0x9888), 0x238b2840 }, + { _MMIO(0x9888), 0x258b26a0 }, + { _MMIO(0x9888), 0x018c4000 }, + { _MMIO(0x9888), 0x0f8c4000 }, + { _MMIO(0x9888), 0x178c2000 }, + { _MMIO(0x9888), 0x198c1100 }, + { _MMIO(0x9888), 0x018d2000 }, + { _MMIO(0x9888), 0x078d8000 }, + { _MMIO(0x9888), 0x098da000 }, + { _MMIO(0x9888), 0x0b8d8000 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaa0 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x0d831021 }, + { _MMIO(0x9888), 0x0f83572f }, + { _MMIO(0x9888), 0x01835680 }, + { _MMIO(0x9888), 0x0383002c }, + { _MMIO(0x9888), 0x11830000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830001 }, + { _MMIO(0x9888), 0x05830000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x05844000 }, + { _MMIO(0x9888), 0x1b80c137 }, + { _MMIO(0x9888), 0x1d80c147 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x17808000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x15804000 }, + { _MMIO(0x9888), 0x4d801550 }, + { _MMIO(0x9888), 0x4f800331 }, + { _MMIO(0x9888), 0x43800802 }, + { _MMIO(0x9888), 0x51800400 }, + { _MMIO(0x9888), 0x458004a1 }, + { _MMIO(0x9888), 0x53805555 }, + { _MMIO(0x9888), 0x47800421 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f801421 }, + { _MMIO(0x9888), 0x41800845 }, +}; + +static int +get_render_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 2); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 2); + + if (INTEL_INFO(dev_priv)->sseu.slice_mask & 0x01) { + regs[n] = mux_config_render_basic_0_slices_0x01; + lens[n] = ARRAY_SIZE(mux_config_render_basic_0_slices_0x01); + n++; + } + if (INTEL_INFO(dev_priv)->sseu.slice_mask & 0x02) { + regs[n] = mux_config_render_basic_1_slices_0x02; + lens[n] = ARRAY_SIZE(mux_config_render_basic_1_slices_0x02); + n++; + } + + return n; +} + +int i915_oa_select_metric_set_bdw(struct drm_i915_private *dev_priv) +{ + dev_priv->perf.oa.n_mux_configs = 0; + dev_priv->perf.oa.b_counter_regs = NULL; + dev_priv->perf.oa.b_counter_regs_len = 0; + dev_priv->perf.oa.flex_regs = NULL; + dev_priv->perf.oa.flex_regs_len = 0; + + switch (dev_priv->perf.oa.metrics_set) { + case METRIC_SET_ID_RENDER_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_render_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_basic); + + return 0; + default: + return -ENODEV; + } +} + +static ssize_t +show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_BASIC); +} + +static struct device_attribute dev_attr_render_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_render_basic[] = { + &dev_attr_render_basic_id.attr, + NULL, +}; + +static struct attribute_group group_render_basic = { + .name = "b541bd57-0e0f-4154-b4c0-5858010a2bf7", + .attrs = attrs_render_basic, +}; + +int +i915_perf_register_sysfs_bdw(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + int ret = 0; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (ret) + goto error_render_basic; + } + + return 0; + +error_render_basic: + return ret; +} + +void +i915_perf_unregister_sysfs_bdw(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); +} diff --git a/drivers/gpu/drm/i915/i915_oa_bdw.h b/drivers/gpu/drm/i915/i915_oa_bdw.h new file mode 100644 index 000000000000..6363ff9f64c0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_bdw.h @@ -0,0 +1,40 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_BDW_H__ +#define __I915_OA_BDW_H__ + +extern int i915_oa_n_builtin_metric_sets_bdw; + +extern int i915_oa_select_metric_set_bdw(struct drm_i915_private *dev_priv); + +extern int i915_perf_register_sysfs_bdw(struct drm_i915_private *dev_priv); + +extern void i915_perf_unregister_sysfs_bdw(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_bxt.c b/drivers/gpu/drm/i915/i915_oa_bxt.c new file mode 100644 index 000000000000..345ec1d3faa7 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_bxt.c @@ -0,0 +1,248 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_bxt.h" + +enum metric_set_id { + METRIC_SET_ID_RENDER_BASIC = 1, +}; + +int i915_oa_n_builtin_metric_sets_bxt = 1; + +static const struct i915_oa_reg b_counter_config_render_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_render_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_render_basic_0_sku_gte_0x03[] = { + { _MMIO(0x9888), 0x166c00f0 }, + { _MMIO(0x9888), 0x12120280 }, + { _MMIO(0x9888), 0x12320280 }, + { _MMIO(0x9888), 0x11930317 }, + { _MMIO(0x9888), 0x159303df }, + { _MMIO(0x9888), 0x3f900c00 }, + { _MMIO(0x9888), 0x419000a0 }, + { _MMIO(0x9888), 0x002d1000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x082d5000 }, + { _MMIO(0x9888), 0x0a2d1000 }, + { _MMIO(0x9888), 0x0c2e0800 }, + { _MMIO(0x9888), 0x0e2e5900 }, + { _MMIO(0x9888), 0x0a4c8000 }, + { _MMIO(0x9888), 0x0c4c8000 }, + { _MMIO(0x9888), 0x0e4c4000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e2000 }, + { _MMIO(0x9888), 0x1c4f0010 }, + { _MMIO(0x9888), 0x0a6c0053 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1a0fcc00 }, + { _MMIO(0x9888), 0x1c0f0002 }, + { _MMIO(0x9888), 0x1c2c0040 }, + { _MMIO(0x9888), 0x00101000 }, + { _MMIO(0x9888), 0x04101000 }, + { _MMIO(0x9888), 0x00114000 }, + { _MMIO(0x9888), 0x08114000 }, + { _MMIO(0x9888), 0x00120020 }, + { _MMIO(0x9888), 0x08120021 }, + { _MMIO(0x9888), 0x00141000 }, + { _MMIO(0x9888), 0x08141000 }, + { _MMIO(0x9888), 0x02308000 }, + { _MMIO(0x9888), 0x04302000 }, + { _MMIO(0x9888), 0x06318000 }, + { _MMIO(0x9888), 0x08318000 }, + { _MMIO(0x9888), 0x06320800 }, + { _MMIO(0x9888), 0x08320840 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x06344000 }, + { _MMIO(0x9888), 0x08344000 }, + { _MMIO(0x9888), 0x0d931831 }, + { _MMIO(0x9888), 0x0f939f3f }, + { _MMIO(0x9888), 0x01939e80 }, + { _MMIO(0x9888), 0x039303bc }, + { _MMIO(0x9888), 0x0593000e }, + { _MMIO(0x9888), 0x1993002a }, + { _MMIO(0x9888), 0x07930000 }, + { _MMIO(0x9888), 0x09930000 }, + { _MMIO(0x9888), 0x1d900177 }, + { _MMIO(0x9888), 0x1f900187 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25904000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x53901110 }, + { _MMIO(0x9888), 0x43900423 }, + { _MMIO(0x9888), 0x55900111 }, + { _MMIO(0x9888), 0x47900c02 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900020 }, + { _MMIO(0x9888), 0x59901111 }, + { _MMIO(0x9888), 0x4b900421 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900001 }, + { _MMIO(0x9888), 0x45900821 }, +}; + +static int +get_render_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + if (dev_priv->drm.pdev->revision >= 0x03) { + regs[n] = mux_config_render_basic_0_sku_gte_0x03; + lens[n] = ARRAY_SIZE(mux_config_render_basic_0_sku_gte_0x03); + n++; + } + + return n; +} + +int i915_oa_select_metric_set_bxt(struct drm_i915_private *dev_priv) +{ + dev_priv->perf.oa.n_mux_configs = 0; + dev_priv->perf.oa.b_counter_regs = NULL; + dev_priv->perf.oa.b_counter_regs_len = 0; + dev_priv->perf.oa.flex_regs = NULL; + dev_priv->perf.oa.flex_regs_len = 0; + + switch (dev_priv->perf.oa.metrics_set) { + case METRIC_SET_ID_RENDER_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_render_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_basic); + + return 0; + default: + return -ENODEV; + } +} + +static ssize_t +show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_BASIC); +} + +static struct device_attribute dev_attr_render_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_render_basic[] = { + &dev_attr_render_basic_id.attr, + NULL, +}; + +static struct attribute_group group_render_basic = { + .name = "22b9519a-e9ba-4c41-8b54-f4f8ca14fa0a", + .attrs = attrs_render_basic, +}; + +int +i915_perf_register_sysfs_bxt(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + int ret = 0; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (ret) + goto error_render_basic; + } + + return 0; + +error_render_basic: + return ret; +} + +void +i915_perf_unregister_sysfs_bxt(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); +} diff --git a/drivers/gpu/drm/i915/i915_oa_bxt.h b/drivers/gpu/drm/i915/i915_oa_bxt.h new file mode 100644 index 000000000000..6cf7ba746e7e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_bxt.h @@ -0,0 +1,40 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_BXT_H__ +#define __I915_OA_BXT_H__ + +extern int i915_oa_n_builtin_metric_sets_bxt; + +extern int i915_oa_select_metric_set_bxt(struct drm_i915_private *dev_priv); + +extern int i915_perf_register_sysfs_bxt(struct drm_i915_private *dev_priv); + +extern void i915_perf_unregister_sysfs_bxt(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_chv.c b/drivers/gpu/drm/i915/i915_oa_chv.c new file mode 100644 index 000000000000..b15f6c980d11 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_chv.c @@ -0,0 +1,238 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_chv.h" + +enum metric_set_id { + METRIC_SET_ID_RENDER_BASIC = 1, +}; + +int i915_oa_n_builtin_metric_sets_chv = 1; + +static const struct i915_oa_reg b_counter_config_render_basic[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_render_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_render_basic[] = { + { _MMIO(0x9888), 0x59800000 }, + { _MMIO(0x9888), 0x59800001 }, + { _MMIO(0x9888), 0x285a0006 }, + { _MMIO(0x9888), 0x2c110014 }, + { _MMIO(0x9888), 0x2e110000 }, + { _MMIO(0x9888), 0x2c310014 }, + { _MMIO(0x9888), 0x2e310000 }, + { _MMIO(0x9888), 0x2b8303df }, + { _MMIO(0x9888), 0x3580024f }, + { _MMIO(0x9888), 0x00580888 }, + { _MMIO(0x9888), 0x1e5a0015 }, + { _MMIO(0x9888), 0x205a0014 }, + { _MMIO(0x9888), 0x045a0000 }, + { _MMIO(0x9888), 0x025a0000 }, + { _MMIO(0x9888), 0x02180500 }, + { _MMIO(0x9888), 0x00190555 }, + { _MMIO(0x9888), 0x021d0500 }, + { _MMIO(0x9888), 0x021f0a00 }, + { _MMIO(0x9888), 0x00380444 }, + { _MMIO(0x9888), 0x02390500 }, + { _MMIO(0x9888), 0x003a0666 }, + { _MMIO(0x9888), 0x00100111 }, + { _MMIO(0x9888), 0x06110030 }, + { _MMIO(0x9888), 0x0a110031 }, + { _MMIO(0x9888), 0x0e110046 }, + { _MMIO(0x9888), 0x04110000 }, + { _MMIO(0x9888), 0x00110000 }, + { _MMIO(0x9888), 0x00130111 }, + { _MMIO(0x9888), 0x00300444 }, + { _MMIO(0x9888), 0x08310030 }, + { _MMIO(0x9888), 0x0c310031 }, + { _MMIO(0x9888), 0x10310046 }, + { _MMIO(0x9888), 0x04310000 }, + { _MMIO(0x9888), 0x00310000 }, + { _MMIO(0x9888), 0x00330444 }, + { _MMIO(0x9888), 0x038a0a00 }, + { _MMIO(0x9888), 0x018b0fff }, + { _MMIO(0x9888), 0x038b0a00 }, + { _MMIO(0x9888), 0x01855000 }, + { _MMIO(0x9888), 0x03850055 }, + { _MMIO(0x9888), 0x13830021 }, + { _MMIO(0x9888), 0x15830020 }, + { _MMIO(0x9888), 0x1783002f }, + { _MMIO(0x9888), 0x1983002e }, + { _MMIO(0x9888), 0x1b83002d }, + { _MMIO(0x9888), 0x1d83002c }, + { _MMIO(0x9888), 0x05830000 }, + { _MMIO(0x9888), 0x01840555 }, + { _MMIO(0x9888), 0x03840500 }, + { _MMIO(0x9888), 0x23800074 }, + { _MMIO(0x9888), 0x2580007d }, + { _MMIO(0x9888), 0x05800000 }, + { _MMIO(0x9888), 0x01805000 }, + { _MMIO(0x9888), 0x03800055 }, + { _MMIO(0x9888), 0x01865000 }, + { _MMIO(0x9888), 0x03860055 }, + { _MMIO(0x9888), 0x01875000 }, + { _MMIO(0x9888), 0x03870055 }, + { _MMIO(0x9888), 0x418000aa }, + { _MMIO(0x9888), 0x4380000a }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x4780000a }, + { _MMIO(0x9888), 0x49800000 }, + { _MMIO(0x9888), 0x4b800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x55800000 }, + { _MMIO(0x9888), 0x57800000 }, + { _MMIO(0x9888), 0x59800000 }, +}; + +static int +get_render_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_basic; + lens[n] = ARRAY_SIZE(mux_config_render_basic); + n++; + + return n; +} + +int i915_oa_select_metric_set_chv(struct drm_i915_private *dev_priv) +{ + dev_priv->perf.oa.n_mux_configs = 0; + dev_priv->perf.oa.b_counter_regs = NULL; + dev_priv->perf.oa.b_counter_regs_len = 0; + dev_priv->perf.oa.flex_regs = NULL; + dev_priv->perf.oa.flex_regs_len = 0; + + switch (dev_priv->perf.oa.metrics_set) { + case METRIC_SET_ID_RENDER_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_render_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_basic); + + return 0; + default: + return -ENODEV; + } +} + +static ssize_t +show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_BASIC); +} + +static struct device_attribute dev_attr_render_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_render_basic[] = { + &dev_attr_render_basic_id.attr, + NULL, +}; + +static struct attribute_group group_render_basic = { + .name = "9d8a3af5-c02c-4a4a-b947-f1672469e0fb", + .attrs = attrs_render_basic, +}; + +int +i915_perf_register_sysfs_chv(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + int ret = 0; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (ret) + goto error_render_basic; + } + + return 0; + +error_render_basic: + return ret; +} + +void +i915_perf_unregister_sysfs_chv(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); +} diff --git a/drivers/gpu/drm/i915/i915_oa_chv.h b/drivers/gpu/drm/i915/i915_oa_chv.h new file mode 100644 index 000000000000..8b8bdc26d726 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_chv.h @@ -0,0 +1,40 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_CHV_H__ +#define __I915_OA_CHV_H__ + +extern int i915_oa_n_builtin_metric_sets_chv; + +extern int i915_oa_select_metric_set_chv(struct drm_i915_private *dev_priv); + +extern int i915_perf_register_sysfs_chv(struct drm_i915_private *dev_priv); + +extern void i915_perf_unregister_sysfs_chv(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt2.c b/drivers/gpu/drm/i915/i915_oa_sklgt2.c new file mode 100644 index 000000000000..9ab9d21ec335 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_sklgt2.c @@ -0,0 +1,238 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_sklgt2.h" + +enum metric_set_id { + METRIC_SET_ID_RENDER_BASIC = 1, +}; + +int i915_oa_n_builtin_metric_sets_sklgt2 = 1; + +static const struct i915_oa_reg b_counter_config_render_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_render_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_render_basic_1_sku_gte_0x02[] = { + { _MMIO(0x9888), 0x166c01e0 }, + { _MMIO(0x9888), 0x12170280 }, + { _MMIO(0x9888), 0x12370280 }, + { _MMIO(0x9888), 0x11930317 }, + { _MMIO(0x9888), 0x159303df }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x1a4e0080 }, + { _MMIO(0x9888), 0x0a6c0053 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x0a1b4000 }, + { _MMIO(0x9888), 0x1c1c0001 }, + { _MMIO(0x9888), 0x002f1000 }, + { _MMIO(0x9888), 0x042f1000 }, + { _MMIO(0x9888), 0x004c4000 }, + { _MMIO(0x9888), 0x0a4c8400 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0d2000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f6600 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x162c2200 }, + { _MMIO(0x9888), 0x062d8000 }, + { _MMIO(0x9888), 0x082d8000 }, + { _MMIO(0x9888), 0x00133000 }, + { _MMIO(0x9888), 0x08133000 }, + { _MMIO(0x9888), 0x00170020 }, + { _MMIO(0x9888), 0x08170021 }, + { _MMIO(0x9888), 0x10170000 }, + { _MMIO(0x9888), 0x0633c000 }, + { _MMIO(0x9888), 0x0833c000 }, + { _MMIO(0x9888), 0x06370800 }, + { _MMIO(0x9888), 0x08370840 }, + { _MMIO(0x9888), 0x10370000 }, + { _MMIO(0x9888), 0x0d933031 }, + { _MMIO(0x9888), 0x0f933e3f }, + { _MMIO(0x9888), 0x01933d00 }, + { _MMIO(0x9888), 0x0393073c }, + { _MMIO(0x9888), 0x0593000e }, + { _MMIO(0x9888), 0x1d930000 }, + { _MMIO(0x9888), 0x19930000 }, + { _MMIO(0x9888), 0x1b930000 }, + { _MMIO(0x9888), 0x1d900157 }, + { _MMIO(0x9888), 0x1f900158 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x2b908000 }, + { _MMIO(0x9888), 0x2d908000 }, + { _MMIO(0x9888), 0x2f908000 }, + { _MMIO(0x9888), 0x31908000 }, + { _MMIO(0x9888), 0x15908000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1190001f }, + { _MMIO(0x9888), 0x51904400 }, + { _MMIO(0x9888), 0x41900020 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c21 }, + { _MMIO(0x9888), 0x47900061 }, + { _MMIO(0x9888), 0x57904440 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900004 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x53904444 }, +}; + +static int +get_render_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + if (dev_priv->drm.pdev->revision >= 0x02) { + regs[n] = mux_config_render_basic_1_sku_gte_0x02; + lens[n] = ARRAY_SIZE(mux_config_render_basic_1_sku_gte_0x02); + n++; + } + + return n; +} + +int i915_oa_select_metric_set_sklgt2(struct drm_i915_private *dev_priv) +{ + dev_priv->perf.oa.n_mux_configs = 0; + dev_priv->perf.oa.b_counter_regs = NULL; + dev_priv->perf.oa.b_counter_regs_len = 0; + dev_priv->perf.oa.flex_regs = NULL; + dev_priv->perf.oa.flex_regs_len = 0; + + switch (dev_priv->perf.oa.metrics_set) { + case METRIC_SET_ID_RENDER_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_render_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_basic); + + return 0; + default: + return -ENODEV; + } +} + +static ssize_t +show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_BASIC); +} + +static struct device_attribute dev_attr_render_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_render_basic[] = { + &dev_attr_render_basic_id.attr, + NULL, +}; + +static struct attribute_group group_render_basic = { + .name = "f519e481-24d2-4d42-87c9-3fdd12c00202", + .attrs = attrs_render_basic, +}; + +int +i915_perf_register_sysfs_sklgt2(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + int ret = 0; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (ret) + goto error_render_basic; + } + + return 0; + +error_render_basic: + return ret; +} + +void +i915_perf_unregister_sysfs_sklgt2(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); +} diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt2.h b/drivers/gpu/drm/i915/i915_oa_sklgt2.h new file mode 100644 index 000000000000..f4397baf3328 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_sklgt2.h @@ -0,0 +1,40 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_SKLGT2_H__ +#define __I915_OA_SKLGT2_H__ + +extern int i915_oa_n_builtin_metric_sets_sklgt2; + +extern int i915_oa_select_metric_set_sklgt2(struct drm_i915_private *dev_priv); + +extern int i915_perf_register_sysfs_sklgt2(struct drm_i915_private *dev_priv); + +extern void i915_perf_unregister_sysfs_sklgt2(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt3.c b/drivers/gpu/drm/i915/i915_oa_sklgt3.c new file mode 100644 index 000000000000..e32d3b3ad77a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_sklgt3.c @@ -0,0 +1,249 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_sklgt3.h" + +enum metric_set_id { + METRIC_SET_ID_RENDER_BASIC = 1, +}; + +int i915_oa_n_builtin_metric_sets_sklgt3 = 1; + +static const struct i915_oa_reg b_counter_config_render_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_render_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_render_basic[] = { + { _MMIO(0x9888), 0x166c01e0 }, + { _MMIO(0x9888), 0x12170280 }, + { _MMIO(0x9888), 0x12370280 }, + { _MMIO(0x9888), 0x16ec01e0 }, + { _MMIO(0x9888), 0x11930317 }, + { _MMIO(0x9888), 0x159303df }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x1a4e0380 }, + { _MMIO(0x9888), 0x0a6c0053 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x0a1b4000 }, + { _MMIO(0x9888), 0x1c1c0001 }, + { _MMIO(0x9888), 0x002f1000 }, + { _MMIO(0x9888), 0x042f1000 }, + { _MMIO(0x9888), 0x004c4000 }, + { _MMIO(0x9888), 0x0a4c8400 }, + { _MMIO(0x9888), 0x0c4c0002 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f6600 }, + { _MMIO(0x9888), 0x100f0001 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x162ca200 }, + { _MMIO(0x9888), 0x062d8000 }, + { _MMIO(0x9888), 0x082d8000 }, + { _MMIO(0x9888), 0x00133000 }, + { _MMIO(0x9888), 0x08133000 }, + { _MMIO(0x9888), 0x00170020 }, + { _MMIO(0x9888), 0x08170021 }, + { _MMIO(0x9888), 0x10170000 }, + { _MMIO(0x9888), 0x0633c000 }, + { _MMIO(0x9888), 0x0833c000 }, + { _MMIO(0x9888), 0x06370800 }, + { _MMIO(0x9888), 0x08370840 }, + { _MMIO(0x9888), 0x10370000 }, + { _MMIO(0x9888), 0x1ace0200 }, + { _MMIO(0x9888), 0x0aec5300 }, + { _MMIO(0x9888), 0x10ec0000 }, + { _MMIO(0x9888), 0x1cec0000 }, + { _MMIO(0x9888), 0x0a9b8000 }, + { _MMIO(0x9888), 0x1c9c0002 }, + { _MMIO(0x9888), 0x0ccc0002 }, + { _MMIO(0x9888), 0x0a8d8000 }, + { _MMIO(0x9888), 0x108f0001 }, + { _MMIO(0x9888), 0x16ac8000 }, + { _MMIO(0x9888), 0x0d933031 }, + { _MMIO(0x9888), 0x0f933e3f }, + { _MMIO(0x9888), 0x01933d00 }, + { _MMIO(0x9888), 0x0393073c }, + { _MMIO(0x9888), 0x0593000e }, + { _MMIO(0x9888), 0x1d930000 }, + { _MMIO(0x9888), 0x19930000 }, + { _MMIO(0x9888), 0x1b930000 }, + { _MMIO(0x9888), 0x1d900157 }, + { _MMIO(0x9888), 0x1f900158 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x2b908000 }, + { _MMIO(0x9888), 0x2d908000 }, + { _MMIO(0x9888), 0x2f908000 }, + { _MMIO(0x9888), 0x31908000 }, + { _MMIO(0x9888), 0x15908000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1190003f }, + { _MMIO(0x9888), 0x51907710 }, + { _MMIO(0x9888), 0x419020a0 }, + { _MMIO(0x9888), 0x55901515 }, + { _MMIO(0x9888), 0x45900529 }, + { _MMIO(0x9888), 0x47901025 }, + { _MMIO(0x9888), 0x57907770 }, + { _MMIO(0x9888), 0x49902100 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900108 }, + { _MMIO(0x9888), 0x59900007 }, + { _MMIO(0x9888), 0x43902108 }, + { _MMIO(0x9888), 0x53907777 }, +}; + +static int +get_render_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_basic; + lens[n] = ARRAY_SIZE(mux_config_render_basic); + n++; + + return n; +} + +int i915_oa_select_metric_set_sklgt3(struct drm_i915_private *dev_priv) +{ + dev_priv->perf.oa.n_mux_configs = 0; + dev_priv->perf.oa.b_counter_regs = NULL; + dev_priv->perf.oa.b_counter_regs_len = 0; + dev_priv->perf.oa.flex_regs = NULL; + dev_priv->perf.oa.flex_regs_len = 0; + + switch (dev_priv->perf.oa.metrics_set) { + case METRIC_SET_ID_RENDER_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_render_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_basic); + + return 0; + default: + return -ENODEV; + } +} + +static ssize_t +show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_BASIC); +} + +static struct device_attribute dev_attr_render_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_render_basic[] = { + &dev_attr_render_basic_id.attr, + NULL, +}; + +static struct attribute_group group_render_basic = { + .name = "4616d450-2393-4836-8146-53c5ed84d359", + .attrs = attrs_render_basic, +}; + +int +i915_perf_register_sysfs_sklgt3(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + int ret = 0; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (ret) + goto error_render_basic; + } + + return 0; + +error_render_basic: + return ret; +} + +void +i915_perf_unregister_sysfs_sklgt3(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); +} diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt3.h b/drivers/gpu/drm/i915/i915_oa_sklgt3.h new file mode 100644 index 000000000000..c0accb1f9b74 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_sklgt3.h @@ -0,0 +1,40 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_SKLGT3_H__ +#define __I915_OA_SKLGT3_H__ + +extern int i915_oa_n_builtin_metric_sets_sklgt3; + +extern int i915_oa_select_metric_set_sklgt3(struct drm_i915_private *dev_priv); + +extern int i915_perf_register_sysfs_sklgt3(struct drm_i915_private *dev_priv); + +extern void i915_perf_unregister_sysfs_sklgt3(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt4.c b/drivers/gpu/drm/i915/i915_oa_sklgt4.c new file mode 100644 index 000000000000..ed034f190a6c --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_sklgt4.c @@ -0,0 +1,260 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_sklgt4.h" + +enum metric_set_id { + METRIC_SET_ID_RENDER_BASIC = 1, +}; + +int i915_oa_n_builtin_metric_sets_sklgt4 = 1; + +static const struct i915_oa_reg b_counter_config_render_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_render_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_render_basic[] = { + { _MMIO(0x9888), 0x166c01e0 }, + { _MMIO(0x9888), 0x12170280 }, + { _MMIO(0x9888), 0x12370280 }, + { _MMIO(0x9888), 0x16ec01e0 }, + { _MMIO(0x9888), 0x176c01e0 }, + { _MMIO(0x9888), 0x11930317 }, + { _MMIO(0x9888), 0x159303df }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x1a4e03b0 }, + { _MMIO(0x9888), 0x0a6c0053 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x0a1b4000 }, + { _MMIO(0x9888), 0x1c1c0001 }, + { _MMIO(0x9888), 0x002f1000 }, + { _MMIO(0x9888), 0x042f1000 }, + { _MMIO(0x9888), 0x004c4000 }, + { _MMIO(0x9888), 0x0a4ca400 }, + { _MMIO(0x9888), 0x0c4c0002 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f5600 }, + { _MMIO(0x9888), 0x100f0001 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x162caa00 }, + { _MMIO(0x9888), 0x062d8000 }, + { _MMIO(0x9888), 0x00133000 }, + { _MMIO(0x9888), 0x08133000 }, + { _MMIO(0x9888), 0x00170020 }, + { _MMIO(0x9888), 0x08170021 }, + { _MMIO(0x9888), 0x10170000 }, + { _MMIO(0x9888), 0x0633c000 }, + { _MMIO(0x9888), 0x06370800 }, + { _MMIO(0x9888), 0x10370000 }, + { _MMIO(0x9888), 0x1ace0230 }, + { _MMIO(0x9888), 0x0aec5300 }, + { _MMIO(0x9888), 0x10ec0000 }, + { _MMIO(0x9888), 0x1cec0000 }, + { _MMIO(0x9888), 0x0a9b8000 }, + { _MMIO(0x9888), 0x1c9c0002 }, + { _MMIO(0x9888), 0x0acc2000 }, + { _MMIO(0x9888), 0x0ccc0002 }, + { _MMIO(0x9888), 0x088d8000 }, + { _MMIO(0x9888), 0x0a8d8000 }, + { _MMIO(0x9888), 0x0e8f1000 }, + { _MMIO(0x9888), 0x108f0001 }, + { _MMIO(0x9888), 0x16ac8800 }, + { _MMIO(0x9888), 0x1b4e0020 }, + { _MMIO(0x9888), 0x096c5300 }, + { _MMIO(0x9888), 0x116c0000 }, + { _MMIO(0x9888), 0x1d6c0000 }, + { _MMIO(0x9888), 0x091b8000 }, + { _MMIO(0x9888), 0x1b1c8000 }, + { _MMIO(0x9888), 0x0b4c2000 }, + { _MMIO(0x9888), 0x090d8000 }, + { _MMIO(0x9888), 0x0f0f1000 }, + { _MMIO(0x9888), 0x172c0800 }, + { _MMIO(0x9888), 0x0d933031 }, + { _MMIO(0x9888), 0x0f933e3f }, + { _MMIO(0x9888), 0x01933d00 }, + { _MMIO(0x9888), 0x0393073c }, + { _MMIO(0x9888), 0x0593000e }, + { _MMIO(0x9888), 0x1d930000 }, + { _MMIO(0x9888), 0x19930000 }, + { _MMIO(0x9888), 0x1b930000 }, + { _MMIO(0x9888), 0x1d900157 }, + { _MMIO(0x9888), 0x1f900158 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x2b908000 }, + { _MMIO(0x9888), 0x2d908000 }, + { _MMIO(0x9888), 0x2f908000 }, + { _MMIO(0x9888), 0x31908000 }, + { _MMIO(0x9888), 0x15908000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1190003f }, + { _MMIO(0x9888), 0x5190ff30 }, + { _MMIO(0x9888), 0x41900060 }, + { _MMIO(0x9888), 0x55903033 }, + { _MMIO(0x9888), 0x45901421 }, + { _MMIO(0x9888), 0x47900803 }, + { _MMIO(0x9888), 0x5790fff1 }, + { _MMIO(0x9888), 0x49900001 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x5990000f }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x5390ffff }, +}; + +static int +get_render_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_basic; + lens[n] = ARRAY_SIZE(mux_config_render_basic); + n++; + + return n; +} + +int i915_oa_select_metric_set_sklgt4(struct drm_i915_private *dev_priv) +{ + dev_priv->perf.oa.n_mux_configs = 0; + dev_priv->perf.oa.b_counter_regs = NULL; + dev_priv->perf.oa.b_counter_regs_len = 0; + dev_priv->perf.oa.flex_regs = NULL; + dev_priv->perf.oa.flex_regs_len = 0; + + switch (dev_priv->perf.oa.metrics_set) { + case METRIC_SET_ID_RENDER_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_render_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_basic); + + return 0; + default: + return -ENODEV; + } +} + +static ssize_t +show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_BASIC); +} + +static struct device_attribute dev_attr_render_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_render_basic[] = { + &dev_attr_render_basic_id.attr, + NULL, +}; + +static struct attribute_group group_render_basic = { + .name = "bad77c24-cc64-480d-99bf-e7b740713800", + .attrs = attrs_render_basic, +}; + +int +i915_perf_register_sysfs_sklgt4(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + int ret = 0; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (ret) + goto error_render_basic; + } + + return 0; + +error_render_basic: + return ret; +} + +void +i915_perf_unregister_sysfs_sklgt4(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); +} diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt4.h b/drivers/gpu/drm/i915/i915_oa_sklgt4.h new file mode 100644 index 000000000000..1b718f15f62e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_sklgt4.h @@ -0,0 +1,40 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_SKLGT4_H__ +#define __I915_OA_SKLGT4_H__ + +extern int i915_oa_n_builtin_metric_sets_sklgt4; + +extern int i915_oa_select_metric_set_sklgt4(struct drm_i915_private *dev_priv); + +extern int i915_perf_register_sysfs_sklgt4(struct drm_i915_private *dev_priv); + +extern void i915_perf_unregister_sysfs_sklgt4(struct drm_i915_private *dev_priv); + +#endif From 19f81df2859eb10e92d68991cefa39f826dea013 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Tue, 13 Jun 2017 12:23:03 +0100 Subject: [PATCH 164/341] drm/i915/perf: Add OA unit support for Gen 8+ Enables access to OA unit metrics for BDW, CHV, SKL and BXT which all share (more-or-less) the same OA unit design. Of particular note in comparison to Haswell: some OA unit HW config state has become per-context state and as a consequence it is somewhat more complicated to manage synchronous state changes from the cpu while there's no guarantee of what context (if any) is currently actively running on the gpu. The periodic sampling frequency which can be particularly useful for system-wide analysis (as opposed to command stream synchronised MI_REPORT_PERF_COUNT commands) is perhaps the most surprising state to have become per-context save and restored (while the OABUFFER destination is still a shared, system-wide resource). This support for gen8+ takes care to consider a number of timing challenges involved in synchronously updating per-context state primarily by programming all config state from the cpu and updating all current and saved contexts synchronously while the OA unit is still disabled. The driver intentionally avoids depending on command streamer programming to update OA state considering the lack of synchronization between the automatic loading of OACTXCONTROL state (that includes the periodic sampling state and enable state) on context restore and the parsing of any general purpose BB the driver can control. I.e. this implementation is careful to avoid the possibility of a context restore temporarily enabling any out-of-date periodic sampling state. In addition to the risk of transiently-out-of-date state being loaded automatically; there are also internal HW latencies involved in the loading of MUX configurations which would be difficult to account for from the command streamer (and we only want to enable the unit when once the MUX configuration is complete). Since the Gen8+ OA unit design no longer supports clock gating the unit off for a single given context (which effectively stopped any progress of counters while any other context was running) and instead supports tagging OA reports with a context ID for filtering on the CPU, it means we can no longer hide the system-wide progress of counters from a non-privileged application only interested in metrics for its own context. Although we could theoretically try and subtract the progress of other contexts before forwarding reports via read() we aren't in a position to filter reports captured via MI_REPORT_PERF_COUNT commands. As a result, for Gen8+, we always require the dev.i915.perf_stream_paranoid to be unset for any access to OA metrics if not root. v5: Drain submitted requests when enabling metric set to ensure no lite-restore erases the context image we just updated (Lionel) v6: In addition to drain, switch to kernel context & update all context in place (Chris) v7: Add missing mutex_unlock() if switching to kernel context fails (Matthew) v8: Simplify OA period/flex-eu-counters programming by using the batchbuffer instead of modifying ctx-image (Lionel) v9: Back to updating the context image (due to erroneous testing, batchbuffer programming the OA unit doesn't actually work) (Lionel) Pin context before updating context image (Chris) Drop MMIO programming now that we switch to a kernel context with right values in initial context image (Chris) v10: Just pin_map the contexts we want to modify or let the configuration happen on first use (Chris) v11: Update kernel context OA config through the batchbuffer rather than on the fly ctx-image update (Lionel) v12: Rework OA context registers update again by swithing away from user contexts and reconfiguring the kernel context through the batchbuffer and updating all the other contexts' context image. Also take care to lock slice/subslice configuration when OA is on. (Lionel) v13: Request rpcs updates on all engine when updating the OA config (Lionel) v14: Drop any kind of rpcs management now that we monitor sseu configuration changes in a later patch (Lionel) Remove usleep after programming the NOA configs on Gen8+, this doesn't seem to be needed (Lionel) v15: Respect coding style for block comments (Chris) v16: Add missing i915_add_request() in case we fail to emit OA configuration (Matthew) Signed-off-by: Robert Bragg Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld \o/ Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/i915_drv.h | 46 +- drivers/gpu/drm/i915/i915_perf.c | 1032 +++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 22 + drivers/gpu/drm/i915/intel_lrc.c | 2 + include/uapi/drm/i915_drm.h | 19 +- 5 files changed, 1027 insertions(+), 94 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2faad94c9ef2..eefb35a5d27d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2018,9 +2018,17 @@ struct i915_oa_ops { void (*init_oa_buffer)(struct drm_i915_private *dev_priv); /** - * @enable_metric_set: Applies any MUX configuration to set up the - * Boolean and Custom (B/C) counters that are part of the counter - * reports being sampled. May apply system constraints such as + * @select_metric_set: The auto generated code that checks whether a + * requested OA config is applicable to the system and if so sets up + * the mux, oa and flex eu register config pointers according to the + * current dev_priv->perf.oa.metrics_set. + */ + int (*select_metric_set)(struct drm_i915_private *dev_priv); + + /** + * @enable_metric_set: Selects and applies any MUX configuration to set + * up the Boolean and Custom (B/C) counters that are part of the + * counter reports being sampled. May apply system constraints such as * disabling EU clock gating as required. */ int (*enable_metric_set)(struct drm_i915_private *dev_priv); @@ -2051,20 +2059,13 @@ struct i915_oa_ops { size_t *offset); /** - * @oa_buffer_check: Check for OA buffer data + update tail + * @oa_hw_tail_read: read the OA tail pointer register * - * This is either called via fops or the poll check hrtimer (atomic - * ctx) without any locks taken. - * - * It's safe to read OA config state here unlocked, assuming that this - * is only called while the stream is enabled, while the global OA - * configuration can't be modified. - * - * Efficiency is more important than avoiding some false positives - * here, which will be handled gracefully - likely resulting in an - * %EAGAIN error for userspace. + * In particular this enables us to share all the fiddly code for + * handling the OA unit tail pointer race that affects multiple + * generations. */ - bool (*oa_buffer_check)(struct drm_i915_private *dev_priv); + u32 (*oa_hw_tail_read)(struct drm_i915_private *dev_priv); }; struct intel_cdclk_state { @@ -2429,6 +2430,7 @@ struct drm_i915_private { struct { struct i915_vma *vma; u8 *vaddr; + u32 last_ctx_id; int format; int format_size; @@ -2498,6 +2500,15 @@ struct drm_i915_private { } oa_buffer; u32 gen7_latched_oastatus1; + u32 ctx_oactxctrl_offset; + u32 ctx_flexeu0_offset; + + /** + * The RPT_ID/reason field for Gen8+ includes a bit + * to determine if the CTX ID in the report is valid + * but the specific bit differs between Gen 8 and 9 + */ + u32 gen8_valid_ctx_bit; struct i915_oa_ops ops; const struct i915_oa_format *oa_formats; @@ -2810,6 +2821,8 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_KBL_ULX(dev_priv) (INTEL_DEVID(dev_priv) == 0x590E || \ INTEL_DEVID(dev_priv) == 0x5915 || \ INTEL_DEVID(dev_priv) == 0x591E) +#define IS_SKL_GT2(dev_priv) (IS_SKYLAKE(dev_priv) && \ + (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0010) #define IS_SKL_GT3(dev_priv) (IS_SKYLAKE(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0020) #define IS_SKL_GT4(dev_priv) (IS_SKYLAKE(dev_priv) && \ @@ -3554,6 +3567,9 @@ i915_gem_context_lookup_timeline(struct i915_gem_context *ctx, int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void i915_oa_init_reg_state(struct intel_engine_cs *engine, + struct i915_gem_context *ctx, + uint32_t *reg_state); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 7e56b895fd34..a6af4d7dc4d6 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -196,6 +196,12 @@ #include "i915_drv.h" #include "i915_oa_hsw.h" +#include "i915_oa_bdw.h" +#include "i915_oa_chv.h" +#include "i915_oa_sklgt2.h" +#include "i915_oa_sklgt3.h" +#include "i915_oa_sklgt4.h" +#include "i915_oa_bxt.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -215,7 +221,7 @@ * * Although this can be observed explicitly while copying reports to userspace * by checking for a zeroed report-id field in tail reports, we want to account - * for this earlier, as part of the _oa_buffer_check to avoid lots of redundant + * for this earlier, as part of the oa_buffer_check to avoid lots of redundant * read() attempts. * * In effect we define a tail pointer for reading that lags the real tail @@ -237,7 +243,7 @@ * indicates that an updated tail pointer is needed. * * Most of the implementation details for this workaround are in - * gen7_oa_buffer_check_unlocked() and gen7_appand_oa_reports() + * oa_buffer_check_unlocked() and _append_oa_reports() * * Note for posterity: previously the driver used to define an effective tail * pointer that lagged the real pointer by a 'tail margin' measured in bytes @@ -272,6 +278,13 @@ static u32 i915_perf_stream_paranoid = true; #define INVALID_CTX_ID 0xffffffff +/* On Gen8+ automatically triggered OA reports include a 'reason' field... */ +#define OAREPORT_REASON_MASK 0x3f +#define OAREPORT_REASON_SHIFT 19 +#define OAREPORT_REASON_TIMER (1<<0) +#define OAREPORT_REASON_CTX_SWITCH (1<<3) +#define OAREPORT_REASON_CLK_RATIO (1<<5) + /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate * @@ -303,6 +316,13 @@ static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_C4_B8] = { 7, 64 }, }; +static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { + [I915_OA_FORMAT_A12] = { 0, 64 }, + [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, + [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, + [I915_OA_FORMAT_C4_B8] = { 7, 64 }, +}; + #define SAMPLE_OA_REPORT (1<<0) /** @@ -332,8 +352,20 @@ struct perf_open_properties { int oa_period_exponent; }; +static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv) +{ + return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; +} + +static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv) +{ + u32 oastatus1 = I915_READ(GEN7_OASTATUS1); + + return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; +} + /** - * gen7_oa_buffer_check_unlocked - check for data and update tail ptr state + * oa_buffer_check_unlocked - check for data and update tail ptr state * @dev_priv: i915 device instance * * This is either called via fops (for blocking reads in user ctx) or the poll @@ -356,12 +388,11 @@ struct perf_open_properties { * * Returns: %true if the OA buffer contains data, else %false */ -static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) +static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) { int report_size = dev_priv->perf.oa.oa_buffer.format_size; unsigned long flags; unsigned int aged_idx; - u32 oastatus1; u32 head, hw_tail, aged_tail, aging_tail; u64 now; @@ -381,8 +412,7 @@ static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset; aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset; - oastatus1 = I915_READ(GEN7_OASTATUS1); - hw_tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; + hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv); /* The tail pointer increases in 64 byte increments, * not in report_size steps... @@ -404,6 +434,7 @@ static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) if (aging_tail != INVALID_TAIL_PTR && ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC)) { + aged_idx ^= 1; dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx; @@ -533,6 +564,300 @@ static int append_oa_sample(struct i915_perf_stream *stream, return 0; } +/** + * Copies all buffered OA reports into userspace read() buffer. + * @stream: An i915-perf stream opened for OA metrics + * @buf: destination buffer given by userspace + * @count: the number of bytes userspace wants to read + * @offset: (inout): the current position for writing into @buf + * + * Notably any error condition resulting in a short read (-%ENOSPC or + * -%EFAULT) will be returned even though one or more records may + * have been successfully copied. In this case it's up to the caller + * to decide if the error should be squashed before returning to + * userspace. + * + * Note: reports are consumed from the head, and appended to the + * tail, so the tail chases the head?... If you think that's mad + * and back-to-front you're not alone, but this follows the + * Gen PRM naming convention. + * + * Returns: 0 on success, negative error code on failure. + */ +static int gen8_append_oa_reports(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset) +{ + struct drm_i915_private *dev_priv = stream->dev_priv; + int report_size = dev_priv->perf.oa.oa_buffer.format_size; + u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; + u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); + u32 mask = (OA_BUFFER_SIZE - 1); + size_t start_offset = *offset; + unsigned long flags; + unsigned int aged_tail_idx; + u32 head, tail; + u32 taken; + int ret = 0; + + if (WARN_ON(!stream->enabled)) + return -EIO; + + spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + + head = dev_priv->perf.oa.oa_buffer.head; + aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; + tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset; + + spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + + /* + * An invalid tail pointer here means we're still waiting for the poll + * hrtimer callback to give us a pointer + */ + if (tail == INVALID_TAIL_PTR) + return -EAGAIN; + + /* + * NB: oa_buffer.head/tail include the gtt_offset which we don't want + * while indexing relative to oa_buf_base. + */ + head -= gtt_offset; + tail -= gtt_offset; + + /* + * An out of bounds or misaligned head or tail pointer implies a driver + * bug since we validate + align the tail pointers we read from the + * hardware and we are in full control of the head pointer which should + * only be incremented by multiples of the report size (notably also + * all a power of two). + */ + if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || + tail > OA_BUFFER_SIZE || tail % report_size, + "Inconsistent OA buffer pointers: head = %u, tail = %u\n", + head, tail)) + return -EIO; + + + for (/* none */; + (taken = OA_TAKEN(tail, head)); + head = (head + report_size) & mask) { + u8 *report = oa_buf_base + head; + u32 *report32 = (void *)report; + u32 ctx_id; + u32 reason; + + /* + * All the report sizes factor neatly into the buffer + * size so we never expect to see a report split + * between the beginning and end of the buffer. + * + * Given the initial alignment check a misalignment + * here would imply a driver bug that would result + * in an overrun. + */ + if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { + DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); + break; + } + + /* + * The reason field includes flags identifying what + * triggered this specific report (mostly timer + * triggered or e.g. due to a context switch). + * + * This field is never expected to be zero so we can + * check that the report isn't invalid before copying + * it to userspace... + */ + reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & + OAREPORT_REASON_MASK); + if (reason == 0) { + if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs)) + DRM_NOTE("Skipping spurious, invalid OA report\n"); + continue; + } + + /* + * XXX: Just keep the lower 21 bits for now since I'm not + * entirely sure if the HW touches any of the higher bits in + * this field + */ + ctx_id = report32[2] & 0x1fffff; + + /* + * Squash whatever is in the CTX_ID field if it's marked as + * invalid to be sure we avoid false-positive, single-context + * filtering below... + * + * Note: that we don't clear the valid_ctx_bit so userspace can + * understand that the ID has been squashed by the kernel. + */ + if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit)) + ctx_id = report32[2] = INVALID_CTX_ID; + + /* + * NB: For Gen 8 the OA unit no longer supports clock gating + * off for a specific context and the kernel can't securely + * stop the counters from updating as system-wide / global + * values. + * + * Automatic reports now include a context ID so reports can be + * filtered on the cpu but it's not worth trying to + * automatically subtract/hide counter progress for other + * contexts while filtering since we can't stop userspace + * issuing MI_REPORT_PERF_COUNT commands which would still + * provide a side-band view of the real values. + * + * To allow userspace (such as Mesa/GL_INTEL_performance_query) + * to normalize counters for a single filtered context then it + * needs be forwarded bookend context-switch reports so that it + * can track switches in between MI_REPORT_PERF_COUNT commands + * and can itself subtract/ignore the progress of counters + * associated with other contexts. Note that the hardware + * automatically triggers reports when switching to a new + * context which are tagged with the ID of the newly active + * context. To avoid the complexity (and likely fragility) of + * reading ahead while parsing reports to try and minimize + * forwarding redundant context switch reports (i.e. between + * other, unrelated contexts) we simply elect to forward them + * all. + * + * We don't rely solely on the reason field to identify context + * switches since it's not-uncommon for periodic samples to + * identify a switch before any 'context switch' report. + */ + if (!dev_priv->perf.oa.exclusive_stream->ctx || + dev_priv->perf.oa.specific_ctx_id == ctx_id || + (dev_priv->perf.oa.oa_buffer.last_ctx_id == + dev_priv->perf.oa.specific_ctx_id) || + reason & OAREPORT_REASON_CTX_SWITCH) { + + /* + * While filtering for a single context we avoid + * leaking the IDs of other contexts. + */ + if (dev_priv->perf.oa.exclusive_stream->ctx && + dev_priv->perf.oa.specific_ctx_id != ctx_id) { + report32[2] = INVALID_CTX_ID; + } + + ret = append_oa_sample(stream, buf, count, offset, + report); + if (ret) + break; + + dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id; + } + + /* + * The above reason field sanity check is based on + * the assumption that the OA buffer is initially + * zeroed and we reset the field after copying so the + * check is still meaningful once old reports start + * being overwritten. + */ + report32[0] = 0; + } + + if (start_offset != *offset) { + spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + + /* + * We removed the gtt_offset for the copy loop above, indexing + * relative to oa_buf_base so put back here... + */ + head += gtt_offset; + + I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK); + dev_priv->perf.oa.oa_buffer.head = head; + + spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + } + + return ret; +} + +/** + * gen8_oa_read - copy status records then buffered OA reports + * @stream: An i915-perf stream opened for OA metrics + * @buf: destination buffer given by userspace + * @count: the number of bytes userspace wants to read + * @offset: (inout): the current position for writing into @buf + * + * Checks OA unit status registers and if necessary appends corresponding + * status records for userspace (such as for a buffer full condition) and then + * initiate appending any buffered OA reports. + * + * Updates @offset according to the number of bytes successfully copied into + * the userspace buffer. + * + * NB: some data may be successfully copied to the userspace buffer + * even if an error is returned, and this is reflected in the + * updated @offset. + * + * Returns: zero on success or a negative error code + */ +static int gen8_oa_read(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset) +{ + struct drm_i915_private *dev_priv = stream->dev_priv; + u32 oastatus; + int ret; + + if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) + return -EIO; + + oastatus = I915_READ(GEN8_OASTATUS); + + /* + * We treat OABUFFER_OVERFLOW as a significant error: + * + * Although theoretically we could handle this more gracefully + * sometimes, some Gens don't correctly suppress certain + * automatically triggered reports in this condition and so we + * have to assume that old reports are now being trampled + * over. + * + * Considering how we don't currently give userspace control + * over the OA buffer size and always configure a large 16MB + * buffer, then a buffer overflow does anyway likely indicate + * that something has gone quite badly wrong. + */ + if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) { + ret = append_oa_status(stream, buf, count, offset, + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); + if (ret) + return ret; + + DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", + dev_priv->perf.oa.period_exponent); + + dev_priv->perf.oa.ops.oa_disable(dev_priv); + dev_priv->perf.oa.ops.oa_enable(dev_priv); + + /* + * Note: .oa_enable() is expected to re-init the oabuffer and + * reset GEN8_OASTATUS for us + */ + oastatus = I915_READ(GEN8_OASTATUS); + } + + if (oastatus & GEN8_OASTATUS_REPORT_LOST) { + ret = append_oa_status(stream, buf, count, offset, + DRM_I915_PERF_RECORD_OA_REPORT_LOST); + if (ret) + return ret; + I915_WRITE(GEN8_OASTATUS, + oastatus & ~GEN8_OASTATUS_REPORT_LOST); + } + + return gen8_append_oa_reports(stream, buf, count, offset); +} + /** * Copies all buffered OA reports into userspace read() buffer. * @stream: An i915-perf stream opened for OA metrics @@ -732,7 +1057,8 @@ static int gen7_oa_read(struct i915_perf_stream *stream, if (ret) return ret; - DRM_DEBUG("OA buffer overflow: force restart\n"); + DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", + dev_priv->perf.oa.period_exponent); dev_priv->perf.oa.ops.oa_disable(dev_priv); dev_priv->perf.oa.ops.oa_enable(dev_priv); @@ -775,7 +1101,7 @@ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) return -EIO; return wait_event_interruptible(dev_priv->perf.oa.poll_wq, - dev_priv->perf.oa.ops.oa_buffer_check(dev_priv)); + oa_buffer_check_unlocked(dev_priv)); } /** @@ -832,30 +1158,38 @@ static int i915_oa_read(struct i915_perf_stream *stream, static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct intel_ring *ring; - int ret; - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - return ret; + if (i915.enable_execlists) + dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id; + else { + struct intel_engine_cs *engine = dev_priv->engine[RCS]; + struct intel_ring *ring; + int ret; - /* As the ID is the gtt offset of the context's vma we pin - * the vma to ensure the ID remains fixed. - * - * NB: implied RCS engine... - */ - ring = engine->context_pin(engine, stream->ctx); - mutex_unlock(&dev_priv->drm.struct_mutex); - if (IS_ERR(ring)) - return PTR_ERR(ring); + ret = i915_mutex_lock_interruptible(&dev_priv->drm); + if (ret) + return ret; - /* Explicitly track the ID (instead of calling i915_ggtt_offset() - * on the fly) considering the difference with gen8+ and - * execlists - */ - dev_priv->perf.oa.specific_ctx_id = - i915_ggtt_offset(stream->ctx->engine[engine->id].state); + /* + * As the ID is the gtt offset of the context's vma we + * pin the vma to ensure the ID remains fixed. + * + * NB: implied RCS engine... + */ + ring = engine->context_pin(engine, stream->ctx); + mutex_unlock(&dev_priv->drm.struct_mutex); + if (IS_ERR(ring)) + return PTR_ERR(ring); + + + /* + * Explicitly track the ID (instead of calling + * i915_ggtt_offset() on the fly) considering the difference + * with gen8+ and execlists + */ + dev_priv->perf.oa.specific_ctx_id = + i915_ggtt_offset(stream->ctx->engine[engine->id].state); + } return 0; } @@ -870,14 +1204,19 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - struct intel_engine_cs *engine = dev_priv->engine[RCS]; - mutex_lock(&dev_priv->drm.struct_mutex); + if (i915.enable_execlists) { + dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; + } else { + struct intel_engine_cs *engine = dev_priv->engine[RCS]; - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - engine->context_unpin(engine, stream->ctx); + mutex_lock(&dev_priv->drm.struct_mutex); - mutex_unlock(&dev_priv->drm.struct_mutex); + dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; + engine->context_unpin(engine, stream->ctx); + + mutex_unlock(&dev_priv->drm.struct_mutex); + } } static void @@ -901,6 +1240,12 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) BUG_ON(stream != dev_priv->perf.oa.exclusive_stream); + /* + * Unset exclusive_stream first, it might be checked while + * disabling the metric set on gen8+. + */ + dev_priv->perf.oa.exclusive_stream = NULL; + dev_priv->perf.oa.ops.disable_metric_set(dev_priv); free_oa_buffer(dev_priv); @@ -911,8 +1256,6 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) if (stream->ctx) oa_put_render_ctx_id(stream); - dev_priv->perf.oa.exclusive_stream = NULL; - if (dev_priv->perf.oa.spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", dev_priv->perf.oa.spurious_report_rs.missed); @@ -967,6 +1310,65 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) dev_priv->perf.oa.pollin = false; } +static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) +{ + u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); + unsigned long flags; + + spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + + I915_WRITE(GEN8_OASTATUS, 0); + I915_WRITE(GEN8_OAHEADPTR, gtt_offset); + dev_priv->perf.oa.oa_buffer.head = gtt_offset; + + I915_WRITE(GEN8_OABUFFER_UDW, 0); + + /* + * PRM says: + * + * "This MMIO must be set before the OATAILPTR + * register and after the OAHEADPTR register. This is + * to enable proper functionality of the overflow + * bit." + */ + I915_WRITE(GEN8_OABUFFER, gtt_offset | + OABUFFER_SIZE_16M | OA_MEM_SELECT_GGTT); + I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); + + /* Mark that we need updated tail pointers to read from... */ + dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; + dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR; + + /* + * Reset state used to recognise context switches, affecting which + * reports we will forward to userspace while filtering for a single + * context. + */ + dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID; + + spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + + /* + * NB: although the OA buffer will initially be allocated + * zeroed via shmfs (and so this memset is redundant when + * first allocating), we may re-init the OA buffer, either + * when re-enabling a stream or in error/reset paths. + * + * The reason we clear the buffer for each re-init is for the + * sanity check in gen8_append_oa_reports() that looks at the + * reason field to make sure it's non-zero which relies on + * the assumption that new reports are being written to zeroed + * memory... + */ + memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); + + /* + * Maybe make ->pollin per-stream state if we support multiple + * concurrent streams in the future. + */ + dev_priv->perf.oa.pollin = false; +} + static int alloc_oa_buffer(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *bo; @@ -1114,6 +1516,324 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) ~GT_NOA_ENABLE)); } +/* + * NB: It must always remain pointer safe to run this even if the OA unit + * has been disabled. + * + * It's fine to put out-of-date values into these per-context registers + * in the case that the OA unit has been disabled. + */ +static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, + u32 *reg_state) +{ + struct drm_i915_private *dev_priv = ctx->i915; + const struct i915_oa_reg *flex_regs = dev_priv->perf.oa.flex_regs; + int n_flex_regs = dev_priv->perf.oa.flex_regs_len; + u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; + u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; + /* The MMIO offsets for Flex EU registers aren't contiguous */ + u32 flex_mmio[] = { + i915_mmio_reg_offset(EU_PERF_CNTL0), + i915_mmio_reg_offset(EU_PERF_CNTL1), + i915_mmio_reg_offset(EU_PERF_CNTL2), + i915_mmio_reg_offset(EU_PERF_CNTL3), + i915_mmio_reg_offset(EU_PERF_CNTL4), + i915_mmio_reg_offset(EU_PERF_CNTL5), + i915_mmio_reg_offset(EU_PERF_CNTL6), + }; + int i; + + reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL); + reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent << + GEN8_OA_TIMER_PERIOD_SHIFT) | + (dev_priv->perf.oa.periodic ? + GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; + + for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) { + u32 state_offset = ctx_flexeu0 + i * 2; + u32 mmio = flex_mmio[i]; + + /* + * This arbitrary default will select the 'EU FPU0 Pipeline + * Active' event. In the future it's anticipated that there + * will be an explicit 'No Event' we can select, but not yet... + */ + u32 value = 0; + int j; + + for (j = 0; j < n_flex_regs; j++) { + if (i915_mmio_reg_offset(flex_regs[j].addr) == mmio) { + value = flex_regs[j].value; + break; + } + } + + reg_state[state_offset] = mmio; + reg_state[state_offset+1] = value; + } +} + +/* + * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This + * is only used by the kernel context. + */ +static int gen8_emit_oa_config(struct drm_i915_gem_request *req) +{ + struct drm_i915_private *dev_priv = req->i915; + const struct i915_oa_reg *flex_regs = dev_priv->perf.oa.flex_regs; + int n_flex_regs = dev_priv->perf.oa.flex_regs_len; + /* The MMIO offsets for Flex EU registers aren't contiguous */ + u32 flex_mmio[] = { + i915_mmio_reg_offset(EU_PERF_CNTL0), + i915_mmio_reg_offset(EU_PERF_CNTL1), + i915_mmio_reg_offset(EU_PERF_CNTL2), + i915_mmio_reg_offset(EU_PERF_CNTL3), + i915_mmio_reg_offset(EU_PERF_CNTL4), + i915_mmio_reg_offset(EU_PERF_CNTL5), + i915_mmio_reg_offset(EU_PERF_CNTL6), + }; + u32 *cs; + int i; + + cs = intel_ring_begin(req, n_flex_regs * 2 + 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(n_flex_regs + 1); + + *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL); + *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; + + for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) { + u32 mmio = flex_mmio[i]; + + /* + * This arbitrary default will select the 'EU FPU0 Pipeline + * Active' event. In the future it's anticipated that there + * will be an explicit 'No Event' we can select, but not + * yet... + */ + u32 value = 0; + int j; + + for (j = 0; j < n_flex_regs; j++) { + if (i915_mmio_reg_offset(flex_regs[j].addr) == mmio) { + value = flex_regs[j].value; + break; + } + } + + *cs++ = mmio; + *cs++ = value; + } + + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + return 0; +} + +static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine = dev_priv->engine[RCS]; + struct i915_gem_timeline *timeline; + struct drm_i915_gem_request *req; + int ret; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + i915_gem_retire_requests(dev_priv); + + req = i915_gem_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(req)) + return PTR_ERR(req); + + ret = gen8_emit_oa_config(req); + if (ret) { + i915_add_request(req); + return ret; + } + + /* Queue this switch after all other activity */ + list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { + struct drm_i915_gem_request *prev; + struct intel_timeline *tl; + + tl = &timeline->engine[engine->id]; + prev = i915_gem_active_raw(&tl->last_request, + &dev_priv->drm.struct_mutex); + if (prev) + i915_sw_fence_await_sw_fence_gfp(&req->submit, + &prev->submit, + GFP_KERNEL); + } + + ret = i915_switch_context(req); + i915_add_request(req); + + return ret; +} + +/* + * Manages updating the per-context aspects of the OA stream + * configuration across all contexts. + * + * The awkward consideration here is that OACTXCONTROL controls the + * exponent for periodic sampling which is primarily used for system + * wide profiling where we'd like a consistent sampling period even in + * the face of context switches. + * + * Our approach of updating the register state context (as opposed to + * say using a workaround batch buffer) ensures that the hardware + * won't automatically reload an out-of-date timer exponent even + * transiently before a WA BB could be parsed. + * + * This function needs to: + * - Ensure the currently running context's per-context OA state is + * updated + * - Ensure that all existing contexts will have the correct per-context + * OA state if they are scheduled for use. + * - Ensure any new contexts will be initialized with the correct + * per-context OA state. + * + * Note: it's only the RCS/Render context that has any OA state. + */ +static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, + bool interruptible) +{ + struct i915_gem_context *ctx; + int ret; + unsigned int wait_flags = I915_WAIT_LOCKED; + + if (interruptible) { + ret = i915_mutex_lock_interruptible(&dev_priv->drm); + if (ret) + return ret; + + wait_flags |= I915_WAIT_INTERRUPTIBLE; + } else { + mutex_lock(&dev_priv->drm.struct_mutex); + } + + /* Switch away from any user context. */ + ret = gen8_switch_to_updated_kernel_context(dev_priv); + if (ret) + goto out; + + /* + * The OA register config is setup through the context image. This image + * might be written to by the GPU on context switch (in particular on + * lite-restore). This means we can't safely update a context's image, + * if this context is scheduled/submitted to run on the GPU. + * + * We could emit the OA register config through the batch buffer but + * this might leave small interval of time where the OA unit is + * configured at an invalid sampling period. + * + * So far the best way to work around this issue seems to be draining + * the GPU from any submitted work. + */ + ret = i915_gem_wait_for_idle(dev_priv, wait_flags); + if (ret) + goto out; + + /* Update all contexts now that we've stalled the submission. */ + list_for_each_entry(ctx, &dev_priv->context_list, link) { + struct intel_context *ce = &ctx->engine[RCS]; + u32 *regs; + + /* OA settings will be set upon first use */ + if (!ce->state) + continue; + + regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); + if (IS_ERR(regs)) { + ret = PTR_ERR(regs); + goto out; + } + + ce->state->obj->mm.dirty = true; + regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); + + gen8_update_reg_state_unlocked(ctx, regs); + + i915_gem_object_unpin_map(ce->state->obj); + } + + out: + mutex_unlock(&dev_priv->drm.struct_mutex); + + return ret; +} + +static int gen8_enable_metric_set(struct drm_i915_private *dev_priv) +{ + int ret = dev_priv->perf.oa.ops.select_metric_set(dev_priv); + int i; + + if (ret) + return ret; + + /* + * We disable slice/unslice clock ratio change reports on SKL since + * they are too noisy. The HW generates a lot of redundant reports + * where the ratio hasn't really changed causing a lot of redundant + * work to processes and increasing the chances we'll hit buffer + * overruns. + * + * Although we don't currently use the 'disable overrun' OABUFFER + * feature it's worth noting that clock ratio reports have to be + * disabled before considering to use that feature since the HW doesn't + * correctly block these reports. + * + * Currently none of the high-level metrics we have depend on knowing + * this ratio to normalize. + * + * Note: This register is not power context saved and restored, but + * that's OK considering that we disable RC6 while the OA unit is + * enabled. + * + * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to + * be read back from automatically triggered reports, as part of the + * RPT_ID field. + */ + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) { + I915_WRITE(GEN8_OA_DEBUG, + _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | + GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); + } + + /* + * Update all contexts prior writing the mux configurations as we need + * to make sure all slices/subslices are ON before writing to NOA + * registers. + */ + ret = gen8_configure_all_contexts(dev_priv, true); + if (ret) + return ret; + + I915_WRITE(GDT_CHICKEN_BITS, 0xA0); + for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) { + config_oa_regs(dev_priv, dev_priv->perf.oa.mux_regs[i], + dev_priv->perf.oa.mux_regs_lens[i]); + } + I915_WRITE(GDT_CHICKEN_BITS, 0x80); + + config_oa_regs(dev_priv, dev_priv->perf.oa.b_counter_regs, + dev_priv->perf.oa.b_counter_regs_len); + + return 0; +} + +static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) +{ + /* Reset all contexts' slices/subslices configurations. */ + gen8_configure_all_contexts(dev_priv, false); +} + static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv) { lockdep_assert_held(&dev_priv->perf.hook_lock); @@ -1158,6 +1878,31 @@ static void gen7_oa_enable(struct drm_i915_private *dev_priv) spin_unlock_irqrestore(&dev_priv->perf.hook_lock, flags); } +static void gen8_oa_enable(struct drm_i915_private *dev_priv) +{ + u32 report_format = dev_priv->perf.oa.oa_buffer.format; + + /* + * Reset buf pointers so we don't forward reports from before now. + * + * Think carefully if considering trying to avoid this, since it + * also ensures status flags and the buffer itself are cleared + * in error paths, and we have checks for invalid reports based + * on the assumption that certain fields are written to zeroed + * memory which this helps maintains. + */ + gen8_init_oa_buffer(dev_priv); + + /* + * Note: we don't rely on the hardware to perform single context + * filtering and instead filter on the cpu based on the context-id + * field of reports + */ + I915_WRITE(GEN8_OACONTROL, (report_format << + GEN8_OA_REPORT_FORMAT_SHIFT) | + GEN8_OA_COUNTER_ENABLE); +} + /** * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream * @stream: An i915 perf stream opened for OA metrics @@ -1184,6 +1929,11 @@ static void gen7_oa_disable(struct drm_i915_private *dev_priv) I915_WRITE(GEN7_OACONTROL, 0); } +static void gen8_oa_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(GEN8_OACONTROL, 0); +} + /** * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream * @stream: An i915 perf stream opened for OA metrics @@ -1362,6 +2112,21 @@ err_oa_buf_alloc: return ret; } +void i915_oa_init_reg_state(struct intel_engine_cs *engine, + struct i915_gem_context *ctx, + u32 *reg_state) +{ + struct drm_i915_private *dev_priv = engine->i915; + + if (engine->id != RCS) + return; + + if (!dev_priv->perf.initialized) + return; + + gen8_update_reg_state_unlocked(ctx, reg_state); +} + /** * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation * @stream: An i915 perf stream @@ -1487,7 +2252,7 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) container_of(hrtimer, typeof(*dev_priv), perf.oa.poll_check_timer); - if (dev_priv->perf.oa.ops.oa_buffer_check(dev_priv)) { + if (oa_buffer_check_unlocked(dev_priv)) { dev_priv->perf.oa.pollin = true; wake_up(&dev_priv->perf.oa.poll_wq); } @@ -1776,6 +2541,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, struct i915_gem_context *specific_ctx = NULL; struct i915_perf_stream *stream = NULL; unsigned long f_flags = 0; + bool privileged_op = true; int stream_fd; int ret; @@ -1793,12 +2559,29 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, } } + /* + * On Haswell the OA unit supports clock gating off for a specific + * context and in this mode there's no visibility of metrics for the + * rest of the system, which we consider acceptable for a + * non-privileged client. + * + * For Gen8+ the OA unit no longer supports clock gating off for a + * specific context and the kernel can't securely stop the counters + * from updating as system-wide / global values. Even though we can + * filter reports based on the included context ID we can't block + * clients from seeing the raw / global counter values via + * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to + * enable the OA unit by default. + */ + if (IS_HASWELL(dev_priv) && specific_ctx) + privileged_op = false; + /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option * to determine if it's ok to access system wide OA counters * without CAP_SYS_ADMIN privileges. */ - if (!specific_ctx && + if (privileged_op && i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); ret = -EACCES; @@ -2070,9 +2853,6 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, */ void i915_perf_register(struct drm_i915_private *dev_priv) { - if (!IS_HASWELL(dev_priv)) - return; - if (!dev_priv->perf.initialized) return; @@ -2088,11 +2868,38 @@ void i915_perf_register(struct drm_i915_private *dev_priv) if (!dev_priv->perf.metrics_kobj) goto exit; - if (i915_perf_register_sysfs_hsw(dev_priv)) { - kobject_put(dev_priv->perf.metrics_kobj); - dev_priv->perf.metrics_kobj = NULL; + if (IS_HASWELL(dev_priv)) { + if (i915_perf_register_sysfs_hsw(dev_priv)) + goto sysfs_error; + } else if (IS_BROADWELL(dev_priv)) { + if (i915_perf_register_sysfs_bdw(dev_priv)) + goto sysfs_error; + } else if (IS_CHERRYVIEW(dev_priv)) { + if (i915_perf_register_sysfs_chv(dev_priv)) + goto sysfs_error; + } else if (IS_SKYLAKE(dev_priv)) { + if (IS_SKL_GT2(dev_priv)) { + if (i915_perf_register_sysfs_sklgt2(dev_priv)) + goto sysfs_error; + } else if (IS_SKL_GT3(dev_priv)) { + if (i915_perf_register_sysfs_sklgt3(dev_priv)) + goto sysfs_error; + } else if (IS_SKL_GT4(dev_priv)) { + if (i915_perf_register_sysfs_sklgt4(dev_priv)) + goto sysfs_error; + } else + goto sysfs_error; + } else if (IS_BROXTON(dev_priv)) { + if (i915_perf_register_sysfs_bxt(dev_priv)) + goto sysfs_error; } + goto exit; + +sysfs_error: + kobject_put(dev_priv->perf.metrics_kobj); + dev_priv->perf.metrics_kobj = NULL; + exit: mutex_unlock(&dev_priv->perf.lock); } @@ -2108,13 +2915,24 @@ exit: */ void i915_perf_unregister(struct drm_i915_private *dev_priv) { - if (!IS_HASWELL(dev_priv)) - return; - if (!dev_priv->perf.metrics_kobj) return; - i915_perf_unregister_sysfs_hsw(dev_priv); + if (IS_HASWELL(dev_priv)) + i915_perf_unregister_sysfs_hsw(dev_priv); + else if (IS_BROADWELL(dev_priv)) + i915_perf_unregister_sysfs_bdw(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + i915_perf_unregister_sysfs_chv(dev_priv); + else if (IS_SKYLAKE(dev_priv)) { + if (IS_SKL_GT2(dev_priv)) + i915_perf_unregister_sysfs_sklgt2(dev_priv); + else if (IS_SKL_GT3(dev_priv)) + i915_perf_unregister_sysfs_sklgt3(dev_priv); + else if (IS_SKL_GT4(dev_priv)) + i915_perf_unregister_sysfs_sklgt4(dev_priv); + } else if (IS_BROXTON(dev_priv)) + i915_perf_unregister_sysfs_bxt(dev_priv); kobject_put(dev_priv->perf.metrics_kobj); dev_priv->perf.metrics_kobj = NULL; @@ -2173,36 +2991,105 @@ static struct ctl_table dev_root[] = { */ void i915_perf_init(struct drm_i915_private *dev_priv) { - if (!IS_HASWELL(dev_priv)) - return; + dev_priv->perf.oa.n_builtin_sets = 0; - hrtimer_init(&dev_priv->perf.oa.poll_check_timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); - dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; - init_waitqueue_head(&dev_priv->perf.oa.poll_wq); + if (IS_HASWELL(dev_priv)) { + dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer; + dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; + dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set; + dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable; + dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable; + dev_priv->perf.oa.ops.read = gen7_oa_read; + dev_priv->perf.oa.ops.oa_hw_tail_read = + gen7_oa_hw_tail_read; - INIT_LIST_HEAD(&dev_priv->perf.streams); - mutex_init(&dev_priv->perf.lock); - spin_lock_init(&dev_priv->perf.hook_lock); - spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); + dev_priv->perf.oa.oa_formats = hsw_oa_formats; - dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer; - dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; - dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set; - dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable; - dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable; - dev_priv->perf.oa.ops.read = gen7_oa_read; - dev_priv->perf.oa.ops.oa_buffer_check = - gen7_oa_buffer_check_unlocked; + dev_priv->perf.oa.n_builtin_sets = + i915_oa_n_builtin_metric_sets_hsw; + } else if (i915.enable_execlists) { + /* Note: that although we could theoretically also support the + * legacy ringbuffer mode on BDW (and earlier iterations of + * this driver, before upstreaming did this) it didn't seem + * worth the complexity to maintain now that BDW+ enable + * execlist mode by default. + */ - dev_priv->perf.oa.oa_formats = hsw_oa_formats; + if (IS_GEN8(dev_priv)) { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; + dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); - dev_priv->perf.oa.n_builtin_sets = - i915_oa_n_builtin_metric_sets_hsw; + if (IS_BROADWELL(dev_priv)) { + dev_priv->perf.oa.n_builtin_sets = + i915_oa_n_builtin_metric_sets_bdw; + dev_priv->perf.oa.ops.select_metric_set = + i915_oa_select_metric_set_bdw; + } else if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->perf.oa.n_builtin_sets = + i915_oa_n_builtin_metric_sets_chv; + dev_priv->perf.oa.ops.select_metric_set = + i915_oa_select_metric_set_chv; + } + } else if (IS_GEN9(dev_priv)) { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); - dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); + if (IS_SKL_GT2(dev_priv)) { + dev_priv->perf.oa.n_builtin_sets = + i915_oa_n_builtin_metric_sets_sklgt2; + dev_priv->perf.oa.ops.select_metric_set = + i915_oa_select_metric_set_sklgt2; + } else if (IS_SKL_GT3(dev_priv)) { + dev_priv->perf.oa.n_builtin_sets = + i915_oa_n_builtin_metric_sets_sklgt3; + dev_priv->perf.oa.ops.select_metric_set = + i915_oa_select_metric_set_sklgt3; + } else if (IS_SKL_GT4(dev_priv)) { + dev_priv->perf.oa.n_builtin_sets = + i915_oa_n_builtin_metric_sets_sklgt4; + dev_priv->perf.oa.ops.select_metric_set = + i915_oa_select_metric_set_sklgt4; + } else if (IS_BROXTON(dev_priv)) { + dev_priv->perf.oa.n_builtin_sets = + i915_oa_n_builtin_metric_sets_bxt; + dev_priv->perf.oa.ops.select_metric_set = + i915_oa_select_metric_set_bxt; + } + } - dev_priv->perf.initialized = true; + if (dev_priv->perf.oa.n_builtin_sets) { + dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer; + dev_priv->perf.oa.ops.enable_metric_set = + gen8_enable_metric_set; + dev_priv->perf.oa.ops.disable_metric_set = + gen8_disable_metric_set; + dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; + dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; + dev_priv->perf.oa.ops.read = gen8_oa_read; + dev_priv->perf.oa.ops.oa_hw_tail_read = + gen8_oa_hw_tail_read; + + dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; + } + } + + if (dev_priv->perf.oa.n_builtin_sets) { + hrtimer_init(&dev_priv->perf.oa.poll_check_timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; + init_waitqueue_head(&dev_priv->perf.oa.poll_wq); + + INIT_LIST_HEAD(&dev_priv->perf.streams); + mutex_init(&dev_priv->perf.lock); + spin_lock_init(&dev_priv->perf.hook_lock); + spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); + + dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); + + dev_priv->perf.initialized = true; + } } /** @@ -2217,5 +3104,6 @@ void i915_perf_fini(struct drm_i915_private *dev_priv) unregister_sysctl_table(dev_priv->perf.sysctl_header); memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops)); + dev_priv->perf.initialized = false; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 88e4707f571d..bd535f12db18 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -656,6 +656,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_OACTXID _MMIO(0x2364) +#define GEN8_OA_DEBUG _MMIO(0x2B04) +#define GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS (1<<5) +#define GEN9_OA_DEBUG_INCLUDE_CLK_RATIO (1<<6) +#define GEN9_OA_DEBUG_DISABLE_GO_1_0_REPORTS (1<<2) +#define GEN9_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1<<1) + #define GEN8_OACONTROL _MMIO(0x2B00) #define GEN8_OA_REPORT_FORMAT_A12 (0<<2) #define GEN8_OA_REPORT_FORMAT_A12_B8_C8 (2<<2) @@ -677,6 +683,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN7_OABUFFER_STOP_RESUME_ENABLE (1<<1) #define GEN7_OABUFFER_RESUME (1<<0) +#define GEN8_OABUFFER_UDW _MMIO(0x23b4) #define GEN8_OABUFFER _MMIO(0x2b14) #define GEN7_OASTATUS1 _MMIO(0x2364) @@ -695,7 +702,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_OASTATUS_REPORT_LOST (1<<0) #define GEN8_OAHEADPTR _MMIO(0x2B0C) +#define GEN8_OAHEADPTR_MASK 0xffffffc0 #define GEN8_OATAILPTR _MMIO(0x2B10) +#define GEN8_OATAILPTR_MASK 0xffffffc0 #define OABUFFER_SIZE_128K (0<<3) #define OABUFFER_SIZE_256K (1<<3) @@ -708,7 +717,17 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OA_MEM_SELECT_GGTT (1<<0) +/* + * Flexible, Aggregate EU Counter Registers. + * Note: these aren't contiguous + */ #define EU_PERF_CNTL0 _MMIO(0xe458) +#define EU_PERF_CNTL1 _MMIO(0xe558) +#define EU_PERF_CNTL2 _MMIO(0xe658) +#define EU_PERF_CNTL3 _MMIO(0xe758) +#define EU_PERF_CNTL4 _MMIO(0xe45c) +#define EU_PERF_CNTL5 _MMIO(0xe55c) +#define EU_PERF_CNTL6 _MMIO(0xe65c) #define GDT_CHICKEN_BITS _MMIO(0x9840) #define GT_NOA_ENABLE 0x00000080 @@ -2494,6 +2513,9 @@ enum skl_disp_power_wells { #define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12) #define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1<<10) +#define GEN6_RCS_PWR_FSM _MMIO(0x22ac) +#define GEN9_RCS_FE_FSM2 _MMIO(0x22a4) + /* Fuse readout registers for GT */ #define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d49dbaa931b5..7404cf2aac28 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1962,6 +1962,8 @@ static void execlists_init_reg_state(u32 *regs, regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, make_rpcs(dev_priv)); + + i915_oa_init_reg_state(engine, ctx, regs); } } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 464547d08173..15bc9f78ba4d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1316,13 +1316,18 @@ struct drm_i915_gem_context_param { }; enum drm_i915_oa_format { - I915_OA_FORMAT_A13 = 1, - I915_OA_FORMAT_A29, - I915_OA_FORMAT_A13_B8_C8, - I915_OA_FORMAT_B4_C8, - I915_OA_FORMAT_A45_B8_C8, - I915_OA_FORMAT_B4_C8_A16, - I915_OA_FORMAT_C4_B8, + I915_OA_FORMAT_A13 = 1, /* HSW only */ + I915_OA_FORMAT_A29, /* HSW only */ + I915_OA_FORMAT_A13_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8, /* HSW only */ + I915_OA_FORMAT_A45_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8_A16, /* HSW only */ + I915_OA_FORMAT_C4_B8, /* HSW+ */ + + /* Gen8+ */ + I915_OA_FORMAT_A12, + I915_OA_FORMAT_A12_B8_C8, + I915_OA_FORMAT_A32u40_A4u32_B8_C8, I915_OA_FORMAT_MAX /* non-ABI */ }; From fc59921178fd63f1dbe445c2fc86e6ca997a4744 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Tue, 13 Jun 2017 12:23:04 +0100 Subject: [PATCH 165/341] drm/i915/perf: Add more OA configs for BDW, CHV, SKL + BXT These are auto generated from an XML description of metric sets, currently maintained in gputop, ref: https://github.com/rib/gputop > gputop-data/oa-*.xml > scripts/i915-perf-kernelgen.py $ make -C gputop-data -f Makefile.xml Signed-off-by: Robert Bragg Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_oa_bdw.c | 4986 ++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_oa_bxt.c | 2444 +++++++++++- drivers/gpu/drm/i915/i915_oa_chv.c | 2637 ++++++++++++- drivers/gpu/drm/i915/i915_oa_hsw.c | 48 + drivers/gpu/drm/i915/i915_oa_sklgt2.c | 3243 +++++++++++++++- drivers/gpu/drm/i915/i915_oa_sklgt3.c | 2792 +++++++++++++- drivers/gpu/drm/i915/i915_oa_sklgt4.c | 2835 +++++++++++++- 8 files changed, 18981 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eefb35a5d27d..101b66b3f86a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2418,8 +2418,8 @@ struct drm_i915_private { int metrics_set; - const struct i915_oa_reg *mux_regs[2]; - int mux_regs_lens[2]; + const struct i915_oa_reg *mux_regs[6]; + int mux_regs_lens[6]; int n_mux_configs; const struct i915_oa_reg *b_counter_regs; diff --git a/drivers/gpu/drm/i915/i915_oa_bdw.c b/drivers/gpu/drm/i915/i915_oa_bdw.c index 9a11c03b4ecb..d4462c2aaaee 100644 --- a/drivers/gpu/drm/i915/i915_oa_bdw.c +++ b/drivers/gpu/drm/i915/i915_oa_bdw.c @@ -33,9 +33,30 @@ enum metric_set_id { METRIC_SET_ID_RENDER_BASIC = 1, + METRIC_SET_ID_COMPUTE_BASIC, + METRIC_SET_ID_RENDER_PIPE_PROFILE, + METRIC_SET_ID_MEMORY_READS, + METRIC_SET_ID_MEMORY_WRITES, + METRIC_SET_ID_COMPUTE_EXTENDED, + METRIC_SET_ID_COMPUTE_L3_CACHE, + METRIC_SET_ID_DATA_PORT_READS_COALESCING, + METRIC_SET_ID_DATA_PORT_WRITES_COALESCING, + METRIC_SET_ID_HDC_AND_SF, + METRIC_SET_ID_L3_1, + METRIC_SET_ID_L3_2, + METRIC_SET_ID_L3_3, + METRIC_SET_ID_L3_4, + METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND, + METRIC_SET_ID_SAMPLER_1, + METRIC_SET_ID_SAMPLER_2, + METRIC_SET_ID_TDL_1, + METRIC_SET_ID_TDL_2, + METRIC_SET_ID_COMPUTE_EXTRA, + METRIC_SET_ID_VME_PIPE, + METRIC_SET_ID_TEST_OA, }; -int i915_oa_n_builtin_metric_sets_bdw = 1; +int i915_oa_n_builtin_metric_sets_bdw = 22; static const struct i915_oa_reg b_counter_config_render_basic[] = { { _MMIO(0x2710), 0x00000000 }, @@ -300,6 +321,3751 @@ get_render_basic_mux_config(struct drm_i915_private *dev_priv, return n; } +static const struct i915_oa_reg b_counter_config_compute_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_basic_0_slices_0x01[] = { + { _MMIO(0x9888), 0x105c00e0 }, + { _MMIO(0x9888), 0x105800e0 }, + { _MMIO(0x9888), 0x103800e0 }, + { _MMIO(0x9888), 0x3580001a }, + { _MMIO(0x9888), 0x3b800060 }, + { _MMIO(0x9888), 0x3d800005 }, + { _MMIO(0x9888), 0x065c2100 }, + { _MMIO(0x9888), 0x0a5c0041 }, + { _MMIO(0x9888), 0x0c5c6600 }, + { _MMIO(0x9888), 0x005c6580 }, + { _MMIO(0x9888), 0x085c8000 }, + { _MMIO(0x9888), 0x0e5c8000 }, + { _MMIO(0x9888), 0x00580042 }, + { _MMIO(0x9888), 0x08582080 }, + { _MMIO(0x9888), 0x0c58004c }, + { _MMIO(0x9888), 0x0e582580 }, + { _MMIO(0x9888), 0x005b4000 }, + { _MMIO(0x9888), 0x185b1000 }, + { _MMIO(0x9888), 0x1a5b0104 }, + { _MMIO(0x9888), 0x0c1fa800 }, + { _MMIO(0x9888), 0x0e1faa00 }, + { _MMIO(0x9888), 0x101f02aa }, + { _MMIO(0x9888), 0x08380042 }, + { _MMIO(0x9888), 0x0a382080 }, + { _MMIO(0x9888), 0x0e38404c }, + { _MMIO(0x9888), 0x0238404b }, + { _MMIO(0x9888), 0x00384000 }, + { _MMIO(0x9888), 0x16380000 }, + { _MMIO(0x9888), 0x18381145 }, + { _MMIO(0x9888), 0x04380000 }, + { _MMIO(0x9888), 0x0039a000 }, + { _MMIO(0x9888), 0x06398000 }, + { _MMIO(0x9888), 0x0839a000 }, + { _MMIO(0x9888), 0x0a39a000 }, + { _MMIO(0x9888), 0x0c39a000 }, + { _MMIO(0x9888), 0x0e39a000 }, + { _MMIO(0x9888), 0x02392000 }, + { _MMIO(0x9888), 0x018a8000 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x198a8000 }, + { _MMIO(0x9888), 0x1b8aaaa0 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x238b02a0 }, + { _MMIO(0x9888), 0x258b5550 }, + { _MMIO(0x9888), 0x278b0015 }, + { _MMIO(0x9888), 0x1f850a80 }, + { _MMIO(0x9888), 0x2185aaa0 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x03844000 }, + { _MMIO(0x9888), 0x17808137 }, + { _MMIO(0x9888), 0x1980c147 }, + { _MMIO(0x9888), 0x1b80c0e5 }, + { _MMIO(0x9888), 0x1d80c0e3 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x13804000 }, + { _MMIO(0x9888), 0x15800000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d801000 }, + { _MMIO(0x9888), 0x4f800111 }, + { _MMIO(0x9888), 0x43800062 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800062 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800062 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f801062 }, + { _MMIO(0x9888), 0x41801084 }, +}; + +static const struct i915_oa_reg mux_config_compute_basic_2_slices_0x02[] = { + { _MMIO(0x9888), 0x10dc00e0 }, + { _MMIO(0x9888), 0x10d800e0 }, + { _MMIO(0x9888), 0x10b800e0 }, + { _MMIO(0x9888), 0x3580001a }, + { _MMIO(0x9888), 0x3b800060 }, + { _MMIO(0x9888), 0x3d800005 }, + { _MMIO(0x9888), 0x06dc2100 }, + { _MMIO(0x9888), 0x0adc0041 }, + { _MMIO(0x9888), 0x0cdc6600 }, + { _MMIO(0x9888), 0x00dc6580 }, + { _MMIO(0x9888), 0x08dc8000 }, + { _MMIO(0x9888), 0x0edc8000 }, + { _MMIO(0x9888), 0x00d80042 }, + { _MMIO(0x9888), 0x08d82080 }, + { _MMIO(0x9888), 0x0cd8004c }, + { _MMIO(0x9888), 0x0ed82580 }, + { _MMIO(0x9888), 0x00db4000 }, + { _MMIO(0x9888), 0x18db1000 }, + { _MMIO(0x9888), 0x1adb0104 }, + { _MMIO(0x9888), 0x0c9fa800 }, + { _MMIO(0x9888), 0x0e9faa00 }, + { _MMIO(0x9888), 0x109f02aa }, + { _MMIO(0x9888), 0x08b80042 }, + { _MMIO(0x9888), 0x0ab82080 }, + { _MMIO(0x9888), 0x0eb8404c }, + { _MMIO(0x9888), 0x02b8404b }, + { _MMIO(0x9888), 0x00b84000 }, + { _MMIO(0x9888), 0x16b80000 }, + { _MMIO(0x9888), 0x18b81145 }, + { _MMIO(0x9888), 0x04b80000 }, + { _MMIO(0x9888), 0x00b9a000 }, + { _MMIO(0x9888), 0x06b98000 }, + { _MMIO(0x9888), 0x08b9a000 }, + { _MMIO(0x9888), 0x0ab9a000 }, + { _MMIO(0x9888), 0x0cb9a000 }, + { _MMIO(0x9888), 0x0eb9a000 }, + { _MMIO(0x9888), 0x02b92000 }, + { _MMIO(0x9888), 0x01888000 }, + { _MMIO(0x9888), 0x0d88f800 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x238b0540 }, + { _MMIO(0x9888), 0x258baaa0 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x018c4000 }, + { _MMIO(0x9888), 0x0f8c4000 }, + { _MMIO(0x9888), 0x178c2000 }, + { _MMIO(0x9888), 0x198c5500 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x018da000 }, + { _MMIO(0x9888), 0x078d8000 }, + { _MMIO(0x9888), 0x098da000 }, + { _MMIO(0x9888), 0x0b8da000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x038d2000 }, + { _MMIO(0x9888), 0x1f850a80 }, + { _MMIO(0x9888), 0x2185aaa0 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x03844000 }, + { _MMIO(0x9888), 0x17808137 }, + { _MMIO(0x9888), 0x1980c147 }, + { _MMIO(0x9888), 0x1b80c0e5 }, + { _MMIO(0x9888), 0x1d80c0e3 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x13804000 }, + { _MMIO(0x9888), 0x15800000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d805000 }, + { _MMIO(0x9888), 0x4f800555 }, + { _MMIO(0x9888), 0x43800062 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800062 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800062 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800062 }, + { _MMIO(0x9888), 0x41800000 }, +}; + +static int +get_compute_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 2); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 2); + + if (INTEL_INFO(dev_priv)->sseu.slice_mask & 0x01) { + regs[n] = mux_config_compute_basic_0_slices_0x01; + lens[n] = ARRAY_SIZE(mux_config_compute_basic_0_slices_0x01); + n++; + } + if (INTEL_INFO(dev_priv)->sseu.slice_mask & 0x02) { + regs[n] = mux_config_compute_basic_2_slices_0x02; + lens[n] = ARRAY_SIZE(mux_config_compute_basic_2_slices_0x02); + n++; + } + + return n; +} + +static const struct i915_oa_reg b_counter_config_render_pipe_profile[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007ffea }, + { _MMIO(0x2774), 0x00007ffc }, + { _MMIO(0x2778), 0x0007affa }, + { _MMIO(0x277c), 0x0000f5fd }, + { _MMIO(0x2780), 0x00079ffa }, + { _MMIO(0x2784), 0x0000f3fb }, + { _MMIO(0x2788), 0x0007bf7a }, + { _MMIO(0x278c), 0x0000f7e7 }, + { _MMIO(0x2790), 0x0007fefa }, + { _MMIO(0x2794), 0x0000f7cf }, + { _MMIO(0x2798), 0x00077ffa }, + { _MMIO(0x279c), 0x0000efdf }, + { _MMIO(0x27a0), 0x0006fffa }, + { _MMIO(0x27a4), 0x0000cfbf }, + { _MMIO(0x27a8), 0x0003fffa }, + { _MMIO(0x27ac), 0x00005f7f }, +}; + +static const struct i915_oa_reg flex_eu_config_render_pipe_profile[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_render_pipe_profile[] = { + { _MMIO(0x9888), 0x0a1e0000 }, + { _MMIO(0x9888), 0x0c1f000f }, + { _MMIO(0x9888), 0x10176800 }, + { _MMIO(0x9888), 0x1191001f }, + { _MMIO(0x9888), 0x0b880320 }, + { _MMIO(0x9888), 0x01890c40 }, + { _MMIO(0x9888), 0x118a1c00 }, + { _MMIO(0x9888), 0x118d7c00 }, + { _MMIO(0x9888), 0x118e0020 }, + { _MMIO(0x9888), 0x118f4c00 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x13900001 }, + { _MMIO(0x9888), 0x065c4000 }, + { _MMIO(0x9888), 0x0c3d8000 }, + { _MMIO(0x9888), 0x06584000 }, + { _MMIO(0x9888), 0x0c5b4000 }, + { _MMIO(0x9888), 0x081e0040 }, + { _MMIO(0x9888), 0x0e1e0000 }, + { _MMIO(0x9888), 0x021f5400 }, + { _MMIO(0x9888), 0x001f0000 }, + { _MMIO(0x9888), 0x101f0010 }, + { _MMIO(0x9888), 0x0e1f0080 }, + { _MMIO(0x9888), 0x0c384000 }, + { _MMIO(0x9888), 0x06392000 }, + { _MMIO(0x9888), 0x0c13c000 }, + { _MMIO(0x9888), 0x06164000 }, + { _MMIO(0x9888), 0x06170012 }, + { _MMIO(0x9888), 0x00170000 }, + { _MMIO(0x9888), 0x01910005 }, + { _MMIO(0x9888), 0x07880002 }, + { _MMIO(0x9888), 0x01880c00 }, + { _MMIO(0x9888), 0x0f880000 }, + { _MMIO(0x9888), 0x0d880000 }, + { _MMIO(0x9888), 0x05880000 }, + { _MMIO(0x9888), 0x09890032 }, + { _MMIO(0x9888), 0x078a0800 }, + { _MMIO(0x9888), 0x0f8a0a00 }, + { _MMIO(0x9888), 0x198a4000 }, + { _MMIO(0x9888), 0x1b8a2000 }, + { _MMIO(0x9888), 0x1d8a0000 }, + { _MMIO(0x9888), 0x038a4000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x238b54c0 }, + { _MMIO(0x9888), 0x258baa55 }, + { _MMIO(0x9888), 0x278b0019 }, + { _MMIO(0x9888), 0x198c0100 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x0f8d0015 }, + { _MMIO(0x9888), 0x018d1000 }, + { _MMIO(0x9888), 0x098d8000 }, + { _MMIO(0x9888), 0x0b8df000 }, + { _MMIO(0x9888), 0x0d8d3000 }, + { _MMIO(0x9888), 0x038de000 }, + { _MMIO(0x9888), 0x058d3000 }, + { _MMIO(0x9888), 0x0d8e0004 }, + { _MMIO(0x9888), 0x058e000c }, + { _MMIO(0x9888), 0x098e0000 }, + { _MMIO(0x9888), 0x078e0000 }, + { _MMIO(0x9888), 0x038e0000 }, + { _MMIO(0x9888), 0x0b8f0020 }, + { _MMIO(0x9888), 0x198f0c00 }, + { _MMIO(0x9888), 0x078f8000 }, + { _MMIO(0x9888), 0x098f4000 }, + { _MMIO(0x9888), 0x0b900980 }, + { _MMIO(0x9888), 0x03900d80 }, + { _MMIO(0x9888), 0x01900000 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaaa }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x0784c000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x1780c000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d801111 }, + { _MMIO(0x9888), 0x3d800800 }, + { _MMIO(0x9888), 0x4f801011 }, + { _MMIO(0x9888), 0x43800443 }, + { _MMIO(0x9888), 0x51801111 }, + { _MMIO(0x9888), 0x45800422 }, + { _MMIO(0x9888), 0x53801111 }, + { _MMIO(0x9888), 0x47800c60 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800422 }, + { _MMIO(0x9888), 0x41800021 }, +}; + +static int +get_render_pipe_profile_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_pipe_profile; + lens[n] = ARRAY_SIZE(mux_config_render_pipe_profile); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_reads[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f872 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_reads[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_reads[] = { + { _MMIO(0x9888), 0x198b0343 }, + { _MMIO(0x9888), 0x13845800 }, + { _MMIO(0x9888), 0x15840018 }, + { _MMIO(0x9888), 0x3580001a }, + { _MMIO(0x9888), 0x038b6300 }, + { _MMIO(0x9888), 0x058b6b62 }, + { _MMIO(0x9888), 0x078b006a }, + { _MMIO(0x9888), 0x118b0000 }, + { _MMIO(0x9888), 0x238b0000 }, + { _MMIO(0x9888), 0x258b0000 }, + { _MMIO(0x9888), 0x1f85a080 }, + { _MMIO(0x9888), 0x2185aaaa }, + { _MMIO(0x9888), 0x2385000a }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x01840018 }, + { _MMIO(0x9888), 0x07844c80 }, + { _MMIO(0x9888), 0x09840d9a }, + { _MMIO(0x9888), 0x0b840e9c }, + { _MMIO(0x9888), 0x0d840f9e }, + { _MMIO(0x9888), 0x0f840010 }, + { _MMIO(0x9888), 0x11840000 }, + { _MMIO(0x9888), 0x03848000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x2f8000e5 }, + { _MMIO(0x9888), 0x138080e3 }, + { _MMIO(0x9888), 0x1580c0e1 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x11804000 }, + { _MMIO(0x9888), 0x1780c000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f804000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3d800800 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800842 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800842 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47801042 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800084 }, + { _MMIO(0x9888), 0x41800000 }, +}; + +static int +get_memory_reads_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_reads; + lens[n] = ARRAY_SIZE(mux_config_memory_reads); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_writes[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f822 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_writes[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_writes[] = { + { _MMIO(0x9888), 0x198b0343 }, + { _MMIO(0x9888), 0x13845400 }, + { _MMIO(0x9888), 0x3580001a }, + { _MMIO(0x9888), 0x3d800805 }, + { _MMIO(0x9888), 0x038b6300 }, + { _MMIO(0x9888), 0x058b6b62 }, + { _MMIO(0x9888), 0x078b006a }, + { _MMIO(0x9888), 0x118b0000 }, + { _MMIO(0x9888), 0x238b0000 }, + { _MMIO(0x9888), 0x258b0000 }, + { _MMIO(0x9888), 0x1f85a080 }, + { _MMIO(0x9888), 0x2185aaaa }, + { _MMIO(0x9888), 0x23850002 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x01840010 }, + { _MMIO(0x9888), 0x07844880 }, + { _MMIO(0x9888), 0x09840992 }, + { _MMIO(0x9888), 0x0b840a94 }, + { _MMIO(0x9888), 0x0d840b96 }, + { _MMIO(0x9888), 0x11840000 }, + { _MMIO(0x9888), 0x03848000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x2d800147 }, + { _MMIO(0x9888), 0x2f8000e5 }, + { _MMIO(0x9888), 0x138080e3 }, + { _MMIO(0x9888), 0x1580c0e1 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x11804000 }, + { _MMIO(0x9888), 0x1780c000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f800000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800842 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800842 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47801082 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800084 }, + { _MMIO(0x9888), 0x41800000 }, +}; + +static int +get_memory_writes_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_writes; + lens[n] = ARRAY_SIZE(mux_config_memory_writes); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extended[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fc2a }, + { _MMIO(0x2774), 0x0000bf00 }, + { _MMIO(0x2778), 0x0007fc6a }, + { _MMIO(0x277c), 0x0000bf00 }, + { _MMIO(0x2780), 0x0007fc92 }, + { _MMIO(0x2784), 0x0000bf00 }, + { _MMIO(0x2788), 0x0007fca2 }, + { _MMIO(0x278c), 0x0000bf00 }, + { _MMIO(0x2790), 0x0007fc32 }, + { _MMIO(0x2794), 0x0000bf00 }, + { _MMIO(0x2798), 0x0007fc9a }, + { _MMIO(0x279c), 0x0000bf00 }, + { _MMIO(0x27a0), 0x0007fe6a }, + { _MMIO(0x27a4), 0x0000bf00 }, + { _MMIO(0x27a8), 0x0007fe7a }, + { _MMIO(0x27ac), 0x0000bf00 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extended[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended_0_subslices_0x01[] = { + { _MMIO(0x9888), 0x143d0160 }, + { _MMIO(0x9888), 0x163d2800 }, + { _MMIO(0x9888), 0x183d0120 }, + { _MMIO(0x9888), 0x105800e0 }, + { _MMIO(0x9888), 0x005cc000 }, + { _MMIO(0x9888), 0x065c8000 }, + { _MMIO(0x9888), 0x085cc000 }, + { _MMIO(0x9888), 0x0a5cc000 }, + { _MMIO(0x9888), 0x0c5cc000 }, + { _MMIO(0x9888), 0x0e5cc000 }, + { _MMIO(0x9888), 0x025cc000 }, + { _MMIO(0x9888), 0x045cc000 }, + { _MMIO(0x9888), 0x003d0011 }, + { _MMIO(0x9888), 0x063d0900 }, + { _MMIO(0x9888), 0x083d0a13 }, + { _MMIO(0x9888), 0x0a3d0b15 }, + { _MMIO(0x9888), 0x0c3d2317 }, + { _MMIO(0x9888), 0x043d21b7 }, + { _MMIO(0x9888), 0x103d0000 }, + { _MMIO(0x9888), 0x0e3d0000 }, + { _MMIO(0x9888), 0x1a3d0000 }, + { _MMIO(0x9888), 0x0e5825c1 }, + { _MMIO(0x9888), 0x00586100 }, + { _MMIO(0x9888), 0x0258204c }, + { _MMIO(0x9888), 0x06588000 }, + { _MMIO(0x9888), 0x0858c000 }, + { _MMIO(0x9888), 0x0a58c000 }, + { _MMIO(0x9888), 0x0c58c000 }, + { _MMIO(0x9888), 0x0458c000 }, + { _MMIO(0x9888), 0x005b4000 }, + { _MMIO(0x9888), 0x0e5b4000 }, + { _MMIO(0x9888), 0x185b5400 }, + { _MMIO(0x9888), 0x1a5b0155 }, + { _MMIO(0x9888), 0x025b4000 }, + { _MMIO(0x9888), 0x045b4000 }, + { _MMIO(0x9888), 0x065b4000 }, + { _MMIO(0x9888), 0x085b4000 }, + { _MMIO(0x9888), 0x0a5b4000 }, + { _MMIO(0x9888), 0x0c1fa800 }, + { _MMIO(0x9888), 0x0e1faa2a }, + { _MMIO(0x9888), 0x101f02aa }, + { _MMIO(0x9888), 0x00384000 }, + { _MMIO(0x9888), 0x0e384000 }, + { _MMIO(0x9888), 0x16384000 }, + { _MMIO(0x9888), 0x18381555 }, + { _MMIO(0x9888), 0x02384000 }, + { _MMIO(0x9888), 0x04384000 }, + { _MMIO(0x9888), 0x06384000 }, + { _MMIO(0x9888), 0x08384000 }, + { _MMIO(0x9888), 0x0a384000 }, + { _MMIO(0x9888), 0x0039a000 }, + { _MMIO(0x9888), 0x06398000 }, + { _MMIO(0x9888), 0x0839a000 }, + { _MMIO(0x9888), 0x0a39a000 }, + { _MMIO(0x9888), 0x0c39a000 }, + { _MMIO(0x9888), 0x0e39a000 }, + { _MMIO(0x9888), 0x0239a000 }, + { _MMIO(0x9888), 0x0439a000 }, + { _MMIO(0x9888), 0x018a8000 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x198a8000 }, + { _MMIO(0x9888), 0x1b8aaaa0 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x238b2aa0 }, + { _MMIO(0x9888), 0x258b5551 }, + { _MMIO(0x9888), 0x278b0015 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaa2 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x17808000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3d800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800000 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800420 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800421 }, + { _MMIO(0x9888), 0x41800000 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended_2_subslices_0x02[] = { + { _MMIO(0x9888), 0x105c00e0 }, + { _MMIO(0x9888), 0x145b0160 }, + { _MMIO(0x9888), 0x165b2800 }, + { _MMIO(0x9888), 0x185b0120 }, + { _MMIO(0x9888), 0x0e5c25c1 }, + { _MMIO(0x9888), 0x005c6100 }, + { _MMIO(0x9888), 0x025c204c }, + { _MMIO(0x9888), 0x065c8000 }, + { _MMIO(0x9888), 0x085cc000 }, + { _MMIO(0x9888), 0x0a5cc000 }, + { _MMIO(0x9888), 0x0c5cc000 }, + { _MMIO(0x9888), 0x045cc000 }, + { _MMIO(0x9888), 0x005b0011 }, + { _MMIO(0x9888), 0x065b0900 }, + { _MMIO(0x9888), 0x085b0a13 }, + { _MMIO(0x9888), 0x0a5b0b15 }, + { _MMIO(0x9888), 0x0c5b2317 }, + { _MMIO(0x9888), 0x045b21b7 }, + { _MMIO(0x9888), 0x105b0000 }, + { _MMIO(0x9888), 0x0e5b0000 }, + { _MMIO(0x9888), 0x1a5b0000 }, + { _MMIO(0x9888), 0x0c1fa800 }, + { _MMIO(0x9888), 0x0e1faa2a }, + { _MMIO(0x9888), 0x101f02aa }, + { _MMIO(0x9888), 0x00384000 }, + { _MMIO(0x9888), 0x0e384000 }, + { _MMIO(0x9888), 0x16384000 }, + { _MMIO(0x9888), 0x18381555 }, + { _MMIO(0x9888), 0x02384000 }, + { _MMIO(0x9888), 0x04384000 }, + { _MMIO(0x9888), 0x06384000 }, + { _MMIO(0x9888), 0x08384000 }, + { _MMIO(0x9888), 0x0a384000 }, + { _MMIO(0x9888), 0x0039a000 }, + { _MMIO(0x9888), 0x06398000 }, + { _MMIO(0x9888), 0x0839a000 }, + { _MMIO(0x9888), 0x0a39a000 }, + { _MMIO(0x9888), 0x0c39a000 }, + { _MMIO(0x9888), 0x0e39a000 }, + { _MMIO(0x9888), 0x0239a000 }, + { _MMIO(0x9888), 0x0439a000 }, + { _MMIO(0x9888), 0x018a8000 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x198a8000 }, + { _MMIO(0x9888), 0x1b8aaaa0 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x238b2aa0 }, + { _MMIO(0x9888), 0x258b5551 }, + { _MMIO(0x9888), 0x278b0015 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaa2 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x17808000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3d800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800000 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800420 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800421 }, + { _MMIO(0x9888), 0x41800000 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended_4_subslices_0x04[] = { + { _MMIO(0x9888), 0x103800e0 }, + { _MMIO(0x9888), 0x143a0160 }, + { _MMIO(0x9888), 0x163a2800 }, + { _MMIO(0x9888), 0x183a0120 }, + { _MMIO(0x9888), 0x0c1fa800 }, + { _MMIO(0x9888), 0x0e1faa2a }, + { _MMIO(0x9888), 0x101f02aa }, + { _MMIO(0x9888), 0x0e38a5c1 }, + { _MMIO(0x9888), 0x0038a100 }, + { _MMIO(0x9888), 0x0238204c }, + { _MMIO(0x9888), 0x16388000 }, + { _MMIO(0x9888), 0x183802aa }, + { _MMIO(0x9888), 0x04380000 }, + { _MMIO(0x9888), 0x06380000 }, + { _MMIO(0x9888), 0x08388000 }, + { _MMIO(0x9888), 0x0a388000 }, + { _MMIO(0x9888), 0x0039a000 }, + { _MMIO(0x9888), 0x06398000 }, + { _MMIO(0x9888), 0x0839a000 }, + { _MMIO(0x9888), 0x0a39a000 }, + { _MMIO(0x9888), 0x0c39a000 }, + { _MMIO(0x9888), 0x0e39a000 }, + { _MMIO(0x9888), 0x0239a000 }, + { _MMIO(0x9888), 0x0439a000 }, + { _MMIO(0x9888), 0x003a0011 }, + { _MMIO(0x9888), 0x063a0900 }, + { _MMIO(0x9888), 0x083a0a13 }, + { _MMIO(0x9888), 0x0a3a0b15 }, + { _MMIO(0x9888), 0x0c3a2317 }, + { _MMIO(0x9888), 0x043a21b7 }, + { _MMIO(0x9888), 0x103a0000 }, + { _MMIO(0x9888), 0x0e3a0000 }, + { _MMIO(0x9888), 0x1a3a0000 }, + { _MMIO(0x9888), 0x018a8000 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x198a8000 }, + { _MMIO(0x9888), 0x1b8aaaa0 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x238b2aa0 }, + { _MMIO(0x9888), 0x258b5551 }, + { _MMIO(0x9888), 0x278b0015 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaa2 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x17808000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3d800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800000 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800420 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800421 }, + { _MMIO(0x9888), 0x41800000 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended_1_subslices_0x08[] = { + { _MMIO(0x9888), 0x14bd0160 }, + { _MMIO(0x9888), 0x16bd2800 }, + { _MMIO(0x9888), 0x18bd0120 }, + { _MMIO(0x9888), 0x10d800e0 }, + { _MMIO(0x9888), 0x00dcc000 }, + { _MMIO(0x9888), 0x06dc8000 }, + { _MMIO(0x9888), 0x08dcc000 }, + { _MMIO(0x9888), 0x0adcc000 }, + { _MMIO(0x9888), 0x0cdcc000 }, + { _MMIO(0x9888), 0x0edcc000 }, + { _MMIO(0x9888), 0x02dcc000 }, + { _MMIO(0x9888), 0x04dcc000 }, + { _MMIO(0x9888), 0x00bd0011 }, + { _MMIO(0x9888), 0x06bd0900 }, + { _MMIO(0x9888), 0x08bd0a13 }, + { _MMIO(0x9888), 0x0abd0b15 }, + { _MMIO(0x9888), 0x0cbd2317 }, + { _MMIO(0x9888), 0x04bd21b7 }, + { _MMIO(0x9888), 0x10bd0000 }, + { _MMIO(0x9888), 0x0ebd0000 }, + { _MMIO(0x9888), 0x1abd0000 }, + { _MMIO(0x9888), 0x0ed825c1 }, + { _MMIO(0x9888), 0x00d86100 }, + { _MMIO(0x9888), 0x02d8204c }, + { _MMIO(0x9888), 0x06d88000 }, + { _MMIO(0x9888), 0x08d8c000 }, + { _MMIO(0x9888), 0x0ad8c000 }, + { _MMIO(0x9888), 0x0cd8c000 }, + { _MMIO(0x9888), 0x04d8c000 }, + { _MMIO(0x9888), 0x00db4000 }, + { _MMIO(0x9888), 0x0edb4000 }, + { _MMIO(0x9888), 0x18db5400 }, + { _MMIO(0x9888), 0x1adb0155 }, + { _MMIO(0x9888), 0x02db4000 }, + { _MMIO(0x9888), 0x04db4000 }, + { _MMIO(0x9888), 0x06db4000 }, + { _MMIO(0x9888), 0x08db4000 }, + { _MMIO(0x9888), 0x0adb4000 }, + { _MMIO(0x9888), 0x0c9fa800 }, + { _MMIO(0x9888), 0x0e9faa2a }, + { _MMIO(0x9888), 0x109f02aa }, + { _MMIO(0x9888), 0x00b84000 }, + { _MMIO(0x9888), 0x0eb84000 }, + { _MMIO(0x9888), 0x16b84000 }, + { _MMIO(0x9888), 0x18b81555 }, + { _MMIO(0x9888), 0x02b84000 }, + { _MMIO(0x9888), 0x04b84000 }, + { _MMIO(0x9888), 0x06b84000 }, + { _MMIO(0x9888), 0x08b84000 }, + { _MMIO(0x9888), 0x0ab84000 }, + { _MMIO(0x9888), 0x00b9a000 }, + { _MMIO(0x9888), 0x06b98000 }, + { _MMIO(0x9888), 0x08b9a000 }, + { _MMIO(0x9888), 0x0ab9a000 }, + { _MMIO(0x9888), 0x0cb9a000 }, + { _MMIO(0x9888), 0x0eb9a000 }, + { _MMIO(0x9888), 0x02b9a000 }, + { _MMIO(0x9888), 0x04b9a000 }, + { _MMIO(0x9888), 0x01888000 }, + { _MMIO(0x9888), 0x0d88f800 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x09888000 }, + { _MMIO(0x9888), 0x0b888000 }, + { _MMIO(0x9888), 0x238b5540 }, + { _MMIO(0x9888), 0x258baaa2 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x018c4000 }, + { _MMIO(0x9888), 0x0f8c4000 }, + { _MMIO(0x9888), 0x178c2000 }, + { _MMIO(0x9888), 0x198c5500 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x098c4000 }, + { _MMIO(0x9888), 0x0b8c4000 }, + { _MMIO(0x9888), 0x018da000 }, + { _MMIO(0x9888), 0x078d8000 }, + { _MMIO(0x9888), 0x098da000 }, + { _MMIO(0x9888), 0x0b8da000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x038da000 }, + { _MMIO(0x9888), 0x058da000 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaa2 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x17808000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3d800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800000 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800420 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800421 }, + { _MMIO(0x9888), 0x41800000 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended_3_subslices_0x10[] = { + { _MMIO(0x9888), 0x10dc00e0 }, + { _MMIO(0x9888), 0x14db0160 }, + { _MMIO(0x9888), 0x16db2800 }, + { _MMIO(0x9888), 0x18db0120 }, + { _MMIO(0x9888), 0x0edc25c1 }, + { _MMIO(0x9888), 0x00dc6100 }, + { _MMIO(0x9888), 0x02dc204c }, + { _MMIO(0x9888), 0x06dc8000 }, + { _MMIO(0x9888), 0x08dcc000 }, + { _MMIO(0x9888), 0x0adcc000 }, + { _MMIO(0x9888), 0x0cdcc000 }, + { _MMIO(0x9888), 0x04dcc000 }, + { _MMIO(0x9888), 0x00db0011 }, + { _MMIO(0x9888), 0x06db0900 }, + { _MMIO(0x9888), 0x08db0a13 }, + { _MMIO(0x9888), 0x0adb0b15 }, + { _MMIO(0x9888), 0x0cdb2317 }, + { _MMIO(0x9888), 0x04db21b7 }, + { _MMIO(0x9888), 0x10db0000 }, + { _MMIO(0x9888), 0x0edb0000 }, + { _MMIO(0x9888), 0x1adb0000 }, + { _MMIO(0x9888), 0x0c9fa800 }, + { _MMIO(0x9888), 0x0e9faa2a }, + { _MMIO(0x9888), 0x109f02aa }, + { _MMIO(0x9888), 0x00b84000 }, + { _MMIO(0x9888), 0x0eb84000 }, + { _MMIO(0x9888), 0x16b84000 }, + { _MMIO(0x9888), 0x18b81555 }, + { _MMIO(0x9888), 0x02b84000 }, + { _MMIO(0x9888), 0x04b84000 }, + { _MMIO(0x9888), 0x06b84000 }, + { _MMIO(0x9888), 0x08b84000 }, + { _MMIO(0x9888), 0x0ab84000 }, + { _MMIO(0x9888), 0x00b9a000 }, + { _MMIO(0x9888), 0x06b98000 }, + { _MMIO(0x9888), 0x08b9a000 }, + { _MMIO(0x9888), 0x0ab9a000 }, + { _MMIO(0x9888), 0x0cb9a000 }, + { _MMIO(0x9888), 0x0eb9a000 }, + { _MMIO(0x9888), 0x02b9a000 }, + { _MMIO(0x9888), 0x04b9a000 }, + { _MMIO(0x9888), 0x01888000 }, + { _MMIO(0x9888), 0x0d88f800 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x09888000 }, + { _MMIO(0x9888), 0x0b888000 }, + { _MMIO(0x9888), 0x238b5540 }, + { _MMIO(0x9888), 0x258baaa2 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x018c4000 }, + { _MMIO(0x9888), 0x0f8c4000 }, + { _MMIO(0x9888), 0x178c2000 }, + { _MMIO(0x9888), 0x198c5500 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x098c4000 }, + { _MMIO(0x9888), 0x0b8c4000 }, + { _MMIO(0x9888), 0x018da000 }, + { _MMIO(0x9888), 0x078d8000 }, + { _MMIO(0x9888), 0x098da000 }, + { _MMIO(0x9888), 0x0b8da000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x038da000 }, + { _MMIO(0x9888), 0x058da000 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaa2 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x17808000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3d800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800000 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800420 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800421 }, + { _MMIO(0x9888), 0x41800000 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended_5_subslices_0x20[] = { + { _MMIO(0x9888), 0x10b800e0 }, + { _MMIO(0x9888), 0x14ba0160 }, + { _MMIO(0x9888), 0x16ba2800 }, + { _MMIO(0x9888), 0x18ba0120 }, + { _MMIO(0x9888), 0x0c9fa800 }, + { _MMIO(0x9888), 0x0e9faa2a }, + { _MMIO(0x9888), 0x109f02aa }, + { _MMIO(0x9888), 0x0eb8a5c1 }, + { _MMIO(0x9888), 0x00b8a100 }, + { _MMIO(0x9888), 0x02b8204c }, + { _MMIO(0x9888), 0x16b88000 }, + { _MMIO(0x9888), 0x18b802aa }, + { _MMIO(0x9888), 0x04b80000 }, + { _MMIO(0x9888), 0x06b80000 }, + { _MMIO(0x9888), 0x08b88000 }, + { _MMIO(0x9888), 0x0ab88000 }, + { _MMIO(0x9888), 0x00b9a000 }, + { _MMIO(0x9888), 0x06b98000 }, + { _MMIO(0x9888), 0x08b9a000 }, + { _MMIO(0x9888), 0x0ab9a000 }, + { _MMIO(0x9888), 0x0cb9a000 }, + { _MMIO(0x9888), 0x0eb9a000 }, + { _MMIO(0x9888), 0x02b9a000 }, + { _MMIO(0x9888), 0x04b9a000 }, + { _MMIO(0x9888), 0x00ba0011 }, + { _MMIO(0x9888), 0x06ba0900 }, + { _MMIO(0x9888), 0x08ba0a13 }, + { _MMIO(0x9888), 0x0aba0b15 }, + { _MMIO(0x9888), 0x0cba2317 }, + { _MMIO(0x9888), 0x04ba21b7 }, + { _MMIO(0x9888), 0x10ba0000 }, + { _MMIO(0x9888), 0x0eba0000 }, + { _MMIO(0x9888), 0x1aba0000 }, + { _MMIO(0x9888), 0x01888000 }, + { _MMIO(0x9888), 0x0d88f800 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x09888000 }, + { _MMIO(0x9888), 0x0b888000 }, + { _MMIO(0x9888), 0x238b5540 }, + { _MMIO(0x9888), 0x258baaa2 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x018c4000 }, + { _MMIO(0x9888), 0x0f8c4000 }, + { _MMIO(0x9888), 0x178c2000 }, + { _MMIO(0x9888), 0x198c5500 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x098c4000 }, + { _MMIO(0x9888), 0x0b8c4000 }, + { _MMIO(0x9888), 0x018da000 }, + { _MMIO(0x9888), 0x078d8000 }, + { _MMIO(0x9888), 0x098da000 }, + { _MMIO(0x9888), 0x0b8da000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x038da000 }, + { _MMIO(0x9888), 0x058da000 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaa2 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x17808000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3d800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800000 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800420 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800421 }, + { _MMIO(0x9888), 0x41800000 }, +}; + +static int +get_compute_extended_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 6); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 6); + + if (INTEL_INFO(dev_priv)->sseu.subslice_mask & 0x01) { + regs[n] = mux_config_compute_extended_0_subslices_0x01; + lens[n] = ARRAY_SIZE(mux_config_compute_extended_0_subslices_0x01); + n++; + } + if (INTEL_INFO(dev_priv)->sseu.subslice_mask & 0x08) { + regs[n] = mux_config_compute_extended_1_subslices_0x08; + lens[n] = ARRAY_SIZE(mux_config_compute_extended_1_subslices_0x08); + n++; + } + if (INTEL_INFO(dev_priv)->sseu.subslice_mask & 0x02) { + regs[n] = mux_config_compute_extended_2_subslices_0x02; + lens[n] = ARRAY_SIZE(mux_config_compute_extended_2_subslices_0x02); + n++; + } + if (INTEL_INFO(dev_priv)->sseu.subslice_mask & 0x10) { + regs[n] = mux_config_compute_extended_3_subslices_0x10; + lens[n] = ARRAY_SIZE(mux_config_compute_extended_3_subslices_0x10); + n++; + } + if (INTEL_INFO(dev_priv)->sseu.subslice_mask & 0x04) { + regs[n] = mux_config_compute_extended_4_subslices_0x04; + lens[n] = ARRAY_SIZE(mux_config_compute_extended_4_subslices_0x04); + n++; + } + if (INTEL_INFO(dev_priv)->sseu.subslice_mask & 0x20) { + regs[n] = mux_config_compute_extended_5_subslices_0x20; + lens[n] = ARRAY_SIZE(mux_config_compute_extended_5_subslices_0x20); + n++; + } + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_l3_cache[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fffa }, + { _MMIO(0x2774), 0x0000fefe }, + { _MMIO(0x2778), 0x0007fffa }, + { _MMIO(0x277c), 0x0000fefd }, + { _MMIO(0x2790), 0x0007fffa }, + { _MMIO(0x2794), 0x0000fbef }, + { _MMIO(0x2798), 0x0007fffa }, + { _MMIO(0x279c), 0x0000fbdf }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_l3_cache[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00101100 }, + { _MMIO(0xe45c), 0x00201200 }, + { _MMIO(0xe55c), 0x00301300 }, + { _MMIO(0xe65c), 0x00401400 }, +}; + +static const struct i915_oa_reg mux_config_compute_l3_cache[] = { + { _MMIO(0x9888), 0x143f00b3 }, + { _MMIO(0x9888), 0x14bf00b3 }, + { _MMIO(0x9888), 0x138303c0 }, + { _MMIO(0x9888), 0x3b800060 }, + { _MMIO(0x9888), 0x3d800805 }, + { _MMIO(0x9888), 0x003f0029 }, + { _MMIO(0x9888), 0x063f1400 }, + { _MMIO(0x9888), 0x083f1225 }, + { _MMIO(0x9888), 0x0e3f1327 }, + { _MMIO(0x9888), 0x103f0000 }, + { _MMIO(0x9888), 0x005a4000 }, + { _MMIO(0x9888), 0x065a8000 }, + { _MMIO(0x9888), 0x085ac000 }, + { _MMIO(0x9888), 0x0e5ac000 }, + { _MMIO(0x9888), 0x001d4000 }, + { _MMIO(0x9888), 0x061d8000 }, + { _MMIO(0x9888), 0x081dc000 }, + { _MMIO(0x9888), 0x0e1dc000 }, + { _MMIO(0x9888), 0x0c1f0800 }, + { _MMIO(0x9888), 0x0e1f2a00 }, + { _MMIO(0x9888), 0x101f0280 }, + { _MMIO(0x9888), 0x00391000 }, + { _MMIO(0x9888), 0x06394000 }, + { _MMIO(0x9888), 0x08395000 }, + { _MMIO(0x9888), 0x0e395000 }, + { _MMIO(0x9888), 0x0abf1429 }, + { _MMIO(0x9888), 0x0cbf1225 }, + { _MMIO(0x9888), 0x00bf1380 }, + { _MMIO(0x9888), 0x02bf0026 }, + { _MMIO(0x9888), 0x10bf0000 }, + { _MMIO(0x9888), 0x0adac000 }, + { _MMIO(0x9888), 0x0cdac000 }, + { _MMIO(0x9888), 0x00da8000 }, + { _MMIO(0x9888), 0x02da4000 }, + { _MMIO(0x9888), 0x0a9dc000 }, + { _MMIO(0x9888), 0x0c9dc000 }, + { _MMIO(0x9888), 0x009d8000 }, + { _MMIO(0x9888), 0x029d4000 }, + { _MMIO(0x9888), 0x0e9f8000 }, + { _MMIO(0x9888), 0x109f002a }, + { _MMIO(0x9888), 0x0c9fa000 }, + { _MMIO(0x9888), 0x0ab95000 }, + { _MMIO(0x9888), 0x0cb95000 }, + { _MMIO(0x9888), 0x00b94000 }, + { _MMIO(0x9888), 0x02b91000 }, + { _MMIO(0x9888), 0x0d88c000 }, + { _MMIO(0x9888), 0x0f880003 }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x018a8000 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x198a8000 }, + { _MMIO(0x9888), 0x1b8a8020 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x238b0520 }, + { _MMIO(0x9888), 0x258ba950 }, + { _MMIO(0x9888), 0x278b0016 }, + { _MMIO(0x9888), 0x198c5400 }, + { _MMIO(0x9888), 0x1b8c0001 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x0b8da000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x018d8000 }, + { _MMIO(0x9888), 0x038d2000 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaa0 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x03835180 }, + { _MMIO(0x9888), 0x05834022 }, + { _MMIO(0x9888), 0x11830000 }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x07830000 }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x05844000 }, + { _MMIO(0x9888), 0x1b80c137 }, + { _MMIO(0x9888), 0x1d80c147 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x17808000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x15804000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d801000 }, + { _MMIO(0x9888), 0x4f800111 }, + { _MMIO(0x9888), 0x43800842 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800840 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800800 }, + { _MMIO(0x9888), 0x418014a2 }, +}; + +static int +get_compute_l3_cache_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_l3_cache; + lens[n] = ARRAY_SIZE(mux_config_compute_l3_cache); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_data_port_reads_coalescing[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0xba98ba98 }, + { _MMIO(0x2748), 0xba98ba98 }, + { _MMIO(0x2744), 0x00003377 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fff2 }, + { _MMIO(0x2774), 0x00007ff0 }, + { _MMIO(0x2778), 0x0007ffe2 }, + { _MMIO(0x277c), 0x00007ff0 }, + { _MMIO(0x2780), 0x0007ffc2 }, + { _MMIO(0x2784), 0x00007ff0 }, + { _MMIO(0x2788), 0x0007ff82 }, + { _MMIO(0x278c), 0x00007ff0 }, + { _MMIO(0x2790), 0x0007fffa }, + { _MMIO(0x2794), 0x0000bfef }, + { _MMIO(0x2798), 0x0007fffa }, + { _MMIO(0x279c), 0x0000bfdf }, + { _MMIO(0x27a0), 0x0007fffa }, + { _MMIO(0x27a4), 0x0000bfbf }, + { _MMIO(0x27a8), 0x0007fffa }, + { _MMIO(0x27ac), 0x0000bf7f }, +}; + +static const struct i915_oa_reg flex_eu_config_data_port_reads_coalescing[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_data_port_reads_coalescing_0_subslices_0x01[] = { + { _MMIO(0x9888), 0x103d0005 }, + { _MMIO(0x9888), 0x163d240b }, + { _MMIO(0x9888), 0x1058022f }, + { _MMIO(0x9888), 0x185b5520 }, + { _MMIO(0x9888), 0x198b0003 }, + { _MMIO(0x9888), 0x005cc000 }, + { _MMIO(0x9888), 0x065cc000 }, + { _MMIO(0x9888), 0x085cc000 }, + { _MMIO(0x9888), 0x0a5cc000 }, + { _MMIO(0x9888), 0x0c5cc000 }, + { _MMIO(0x9888), 0x0e5cc000 }, + { _MMIO(0x9888), 0x025c4000 }, + { _MMIO(0x9888), 0x045c8000 }, + { _MMIO(0x9888), 0x003d0000 }, + { _MMIO(0x9888), 0x063d00b0 }, + { _MMIO(0x9888), 0x083d0182 }, + { _MMIO(0x9888), 0x0a3d10a0 }, + { _MMIO(0x9888), 0x0c3d11a2 }, + { _MMIO(0x9888), 0x0e3d0000 }, + { _MMIO(0x9888), 0x183d0000 }, + { _MMIO(0x9888), 0x1a3d0000 }, + { _MMIO(0x9888), 0x0e582242 }, + { _MMIO(0x9888), 0x00586700 }, + { _MMIO(0x9888), 0x0258004f }, + { _MMIO(0x9888), 0x0658c000 }, + { _MMIO(0x9888), 0x0858c000 }, + { _MMIO(0x9888), 0x0a58c000 }, + { _MMIO(0x9888), 0x0c58c000 }, + { _MMIO(0x9888), 0x045b6300 }, + { _MMIO(0x9888), 0x105b0000 }, + { _MMIO(0x9888), 0x005b4000 }, + { _MMIO(0x9888), 0x0e5b4000 }, + { _MMIO(0x9888), 0x1a5b0155 }, + { _MMIO(0x9888), 0x025b4000 }, + { _MMIO(0x9888), 0x0a5b0000 }, + { _MMIO(0x9888), 0x0c5b4000 }, + { _MMIO(0x9888), 0x0c1fa800 }, + { _MMIO(0x9888), 0x0e1faaa0 }, + { _MMIO(0x9888), 0x101f02aa }, + { _MMIO(0x9888), 0x00384000 }, + { _MMIO(0x9888), 0x0e384000 }, + { _MMIO(0x9888), 0x16384000 }, + { _MMIO(0x9888), 0x18381555 }, + { _MMIO(0x9888), 0x02384000 }, + { _MMIO(0x9888), 0x04384000 }, + { _MMIO(0x9888), 0x0a384000 }, + { _MMIO(0x9888), 0x0c384000 }, + { _MMIO(0x9888), 0x0039a000 }, + { _MMIO(0x9888), 0x0639a000 }, + { _MMIO(0x9888), 0x0839a000 }, + { _MMIO(0x9888), 0x0a39a000 }, + { _MMIO(0x9888), 0x0c39a000 }, + { _MMIO(0x9888), 0x0e39a000 }, + { _MMIO(0x9888), 0x02392000 }, + { _MMIO(0x9888), 0x04398000 }, + { _MMIO(0x9888), 0x018a8000 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x198a8000 }, + { _MMIO(0x9888), 0x1b8aaaa0 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x038b6300 }, + { _MMIO(0x9888), 0x058b0062 }, + { _MMIO(0x9888), 0x118b0000 }, + { _MMIO(0x9888), 0x238b02a0 }, + { _MMIO(0x9888), 0x258b5555 }, + { _MMIO(0x9888), 0x278b0015 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaaa }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x0784c000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x1780c000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d801000 }, + { _MMIO(0x9888), 0x3d800000 }, + { _MMIO(0x9888), 0x4f800001 }, + { _MMIO(0x9888), 0x43800000 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800420 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800421 }, + { _MMIO(0x9888), 0x41800041 }, +}; + +static int +get_data_port_reads_coalescing_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + if (INTEL_INFO(dev_priv)->sseu.subslice_mask & 0x01) { + regs[n] = mux_config_data_port_reads_coalescing_0_subslices_0x01; + lens[n] = ARRAY_SIZE(mux_config_data_port_reads_coalescing_0_subslices_0x01); + n++; + } + + return n; +} + +static const struct i915_oa_reg b_counter_config_data_port_writes_coalescing[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0xba98ba98 }, + { _MMIO(0x2748), 0xba98ba98 }, + { _MMIO(0x2744), 0x00003377 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007ff72 }, + { _MMIO(0x2774), 0x0000bfd0 }, + { _MMIO(0x2778), 0x0007ff62 }, + { _MMIO(0x277c), 0x0000bfd0 }, + { _MMIO(0x2780), 0x0007ff42 }, + { _MMIO(0x2784), 0x0000bfd0 }, + { _MMIO(0x2788), 0x0007ff02 }, + { _MMIO(0x278c), 0x0000bfd0 }, + { _MMIO(0x2790), 0x0005fff2 }, + { _MMIO(0x2794), 0x0000bfd0 }, + { _MMIO(0x2798), 0x0005ffe2 }, + { _MMIO(0x279c), 0x0000bfd0 }, + { _MMIO(0x27a0), 0x0005ffc2 }, + { _MMIO(0x27a4), 0x0000bfd0 }, + { _MMIO(0x27a8), 0x0005ff82 }, + { _MMIO(0x27ac), 0x0000bfd0 }, +}; + +static const struct i915_oa_reg flex_eu_config_data_port_writes_coalescing[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_data_port_writes_coalescing_0_subslices_0x01[] = { + { _MMIO(0x9888), 0x103d0005 }, + { _MMIO(0x9888), 0x143d0120 }, + { _MMIO(0x9888), 0x163d2400 }, + { _MMIO(0x9888), 0x1058022f }, + { _MMIO(0x9888), 0x105b0000 }, + { _MMIO(0x9888), 0x198b0003 }, + { _MMIO(0x9888), 0x005cc000 }, + { _MMIO(0x9888), 0x065cc000 }, + { _MMIO(0x9888), 0x085cc000 }, + { _MMIO(0x9888), 0x0a5cc000 }, + { _MMIO(0x9888), 0x0e5cc000 }, + { _MMIO(0x9888), 0x025c4000 }, + { _MMIO(0x9888), 0x045c8000 }, + { _MMIO(0x9888), 0x003d0000 }, + { _MMIO(0x9888), 0x063d0094 }, + { _MMIO(0x9888), 0x083d0182 }, + { _MMIO(0x9888), 0x0a3d1814 }, + { _MMIO(0x9888), 0x0e3d0000 }, + { _MMIO(0x9888), 0x183d0000 }, + { _MMIO(0x9888), 0x1a3d0000 }, + { _MMIO(0x9888), 0x0c3d0000 }, + { _MMIO(0x9888), 0x0e582242 }, + { _MMIO(0x9888), 0x00586700 }, + { _MMIO(0x9888), 0x0258004f }, + { _MMIO(0x9888), 0x0658c000 }, + { _MMIO(0x9888), 0x0858c000 }, + { _MMIO(0x9888), 0x0a58c000 }, + { _MMIO(0x9888), 0x045b6a80 }, + { _MMIO(0x9888), 0x005b4000 }, + { _MMIO(0x9888), 0x0e5b4000 }, + { _MMIO(0x9888), 0x185b5400 }, + { _MMIO(0x9888), 0x1a5b0141 }, + { _MMIO(0x9888), 0x025b4000 }, + { _MMIO(0x9888), 0x0a5b0000 }, + { _MMIO(0x9888), 0x0c5b4000 }, + { _MMIO(0x9888), 0x0c1fa800 }, + { _MMIO(0x9888), 0x0e1faaa0 }, + { _MMIO(0x9888), 0x101f0282 }, + { _MMIO(0x9888), 0x00384000 }, + { _MMIO(0x9888), 0x0e384000 }, + { _MMIO(0x9888), 0x16384000 }, + { _MMIO(0x9888), 0x18381415 }, + { _MMIO(0x9888), 0x02384000 }, + { _MMIO(0x9888), 0x04384000 }, + { _MMIO(0x9888), 0x0a384000 }, + { _MMIO(0x9888), 0x0c384000 }, + { _MMIO(0x9888), 0x0039a000 }, + { _MMIO(0x9888), 0x0639a000 }, + { _MMIO(0x9888), 0x0839a000 }, + { _MMIO(0x9888), 0x0a39a000 }, + { _MMIO(0x9888), 0x0e39a000 }, + { _MMIO(0x9888), 0x02392000 }, + { _MMIO(0x9888), 0x04398000 }, + { _MMIO(0x9888), 0x018a8000 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x198a8000 }, + { _MMIO(0x9888), 0x1b8a82a0 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x038b6300 }, + { _MMIO(0x9888), 0x058b0062 }, + { _MMIO(0x9888), 0x118b0000 }, + { _MMIO(0x9888), 0x238b02a0 }, + { _MMIO(0x9888), 0x258b1555 }, + { _MMIO(0x9888), 0x278b0014 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x21852aaa }, + { _MMIO(0x9888), 0x23850028 }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830141 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x0784c000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x1780c000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0xd24), 0x00000000 }, + { _MMIO(0x9888), 0x4d801000 }, + { _MMIO(0x9888), 0x3d800000 }, + { _MMIO(0x9888), 0x4f800001 }, + { _MMIO(0x9888), 0x43800000 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800420 }, + { _MMIO(0x9888), 0x3f800421 }, + { _MMIO(0x9888), 0x41800041 }, +}; + +static int +get_data_port_writes_coalescing_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + if (INTEL_INFO(dev_priv)->sseu.subslice_mask & 0x01) { + regs[n] = mux_config_data_port_writes_coalescing_0_subslices_0x01; + lens[n] = ARRAY_SIZE(mux_config_data_port_writes_coalescing_0_subslices_0x01); + n++; + } + + return n; +} + +static const struct i915_oa_reg b_counter_config_hdc_and_sf[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x10800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fff7 }, +}; + +static const struct i915_oa_reg flex_eu_config_hdc_and_sf[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_hdc_and_sf[] = { + { _MMIO(0x9888), 0x105c0232 }, + { _MMIO(0x9888), 0x10580232 }, + { _MMIO(0x9888), 0x10380232 }, + { _MMIO(0x9888), 0x10dc0232 }, + { _MMIO(0x9888), 0x10d80232 }, + { _MMIO(0x9888), 0x10b80232 }, + { _MMIO(0x9888), 0x118e4400 }, + { _MMIO(0x9888), 0x025c6080 }, + { _MMIO(0x9888), 0x045c004b }, + { _MMIO(0x9888), 0x005c8000 }, + { _MMIO(0x9888), 0x00582080 }, + { _MMIO(0x9888), 0x0258004b }, + { _MMIO(0x9888), 0x025b4000 }, + { _MMIO(0x9888), 0x045b4000 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f00aa }, + { _MMIO(0x9888), 0x04386080 }, + { _MMIO(0x9888), 0x0638404b }, + { _MMIO(0x9888), 0x02384000 }, + { _MMIO(0x9888), 0x08384000 }, + { _MMIO(0x9888), 0x0a380000 }, + { _MMIO(0x9888), 0x0c380000 }, + { _MMIO(0x9888), 0x00398000 }, + { _MMIO(0x9888), 0x0239a000 }, + { _MMIO(0x9888), 0x0439a000 }, + { _MMIO(0x9888), 0x06392000 }, + { _MMIO(0x9888), 0x0cdc25c1 }, + { _MMIO(0x9888), 0x0adcc000 }, + { _MMIO(0x9888), 0x0ad825c1 }, + { _MMIO(0x9888), 0x18db4000 }, + { _MMIO(0x9888), 0x1adb0001 }, + { _MMIO(0x9888), 0x0e9f8000 }, + { _MMIO(0x9888), 0x109f02aa }, + { _MMIO(0x9888), 0x0eb825c1 }, + { _MMIO(0x9888), 0x18b80154 }, + { _MMIO(0x9888), 0x0ab9a000 }, + { _MMIO(0x9888), 0x0cb9a000 }, + { _MMIO(0x9888), 0x0eb9a000 }, + { _MMIO(0x9888), 0x0d88c000 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x258baa05 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x198c5400 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x098dc000 }, + { _MMIO(0x9888), 0x0b8da000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x098e05c0 }, + { _MMIO(0x9888), 0x058e0000 }, + { _MMIO(0x9888), 0x198f0020 }, + { _MMIO(0x9888), 0x2185aa0a }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x19835000 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x09848000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x19808000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x51800040 }, + { _MMIO(0x9888), 0x43800400 }, + { _MMIO(0x9888), 0x45800800 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800c62 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f801042 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x418014a4 }, +}; + +static int +get_hdc_and_sf_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_hdc_and_sf; + lens[n] = ARRAY_SIZE(mux_config_hdc_and_sf); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_1[] = { + { _MMIO(0x9888), 0x10bf03da }, + { _MMIO(0x9888), 0x14bf0001 }, + { _MMIO(0x9888), 0x12980340 }, + { _MMIO(0x9888), 0x12990340 }, + { _MMIO(0x9888), 0x0cbf1187 }, + { _MMIO(0x9888), 0x0ebf1205 }, + { _MMIO(0x9888), 0x00bf0500 }, + { _MMIO(0x9888), 0x02bf042b }, + { _MMIO(0x9888), 0x04bf002c }, + { _MMIO(0x9888), 0x0cdac000 }, + { _MMIO(0x9888), 0x0edac000 }, + { _MMIO(0x9888), 0x00da8000 }, + { _MMIO(0x9888), 0x02dac000 }, + { _MMIO(0x9888), 0x04da4000 }, + { _MMIO(0x9888), 0x04983400 }, + { _MMIO(0x9888), 0x10980000 }, + { _MMIO(0x9888), 0x06990034 }, + { _MMIO(0x9888), 0x10990000 }, + { _MMIO(0x9888), 0x0c9dc000 }, + { _MMIO(0x9888), 0x0e9dc000 }, + { _MMIO(0x9888), 0x009d8000 }, + { _MMIO(0x9888), 0x029dc000 }, + { _MMIO(0x9888), 0x049d4000 }, + { _MMIO(0x9888), 0x109f02a8 }, + { _MMIO(0x9888), 0x0c9fa000 }, + { _MMIO(0x9888), 0x0e9f00ba }, + { _MMIO(0x9888), 0x0cb88000 }, + { _MMIO(0x9888), 0x0cb95000 }, + { _MMIO(0x9888), 0x0eb95000 }, + { _MMIO(0x9888), 0x00b94000 }, + { _MMIO(0x9888), 0x02b95000 }, + { _MMIO(0x9888), 0x04b91000 }, + { _MMIO(0x9888), 0x06b92000 }, + { _MMIO(0x9888), 0x0cba4000 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x09888000 }, + { _MMIO(0x9888), 0x0b888000 }, + { _MMIO(0x9888), 0x0d880400 }, + { _MMIO(0x9888), 0x258b800a }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b5500 }, + { _MMIO(0x9888), 0x198c4000 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x098c4000 }, + { _MMIO(0x9888), 0x0b8c4000 }, + { _MMIO(0x9888), 0x0d8c4000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x018d8000 }, + { _MMIO(0x9888), 0x038da000 }, + { _MMIO(0x9888), 0x058da000 }, + { _MMIO(0x9888), 0x078d2000 }, + { _MMIO(0x9888), 0x2185800a }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x1b830154 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x47800000 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800060 }, +}; + +static int +get_l3_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_1; + lens[n] = ARRAY_SIZE(mux_config_l3_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_2[] = { + { _MMIO(0x9888), 0x103f03da }, + { _MMIO(0x9888), 0x143f0001 }, + { _MMIO(0x9888), 0x12180340 }, + { _MMIO(0x9888), 0x12190340 }, + { _MMIO(0x9888), 0x0c3f1187 }, + { _MMIO(0x9888), 0x0e3f1205 }, + { _MMIO(0x9888), 0x003f0500 }, + { _MMIO(0x9888), 0x023f042b }, + { _MMIO(0x9888), 0x043f002c }, + { _MMIO(0x9888), 0x0c5ac000 }, + { _MMIO(0x9888), 0x0e5ac000 }, + { _MMIO(0x9888), 0x005a8000 }, + { _MMIO(0x9888), 0x025ac000 }, + { _MMIO(0x9888), 0x045a4000 }, + { _MMIO(0x9888), 0x04183400 }, + { _MMIO(0x9888), 0x10180000 }, + { _MMIO(0x9888), 0x06190034 }, + { _MMIO(0x9888), 0x10190000 }, + { _MMIO(0x9888), 0x0c1dc000 }, + { _MMIO(0x9888), 0x0e1dc000 }, + { _MMIO(0x9888), 0x001d8000 }, + { _MMIO(0x9888), 0x021dc000 }, + { _MMIO(0x9888), 0x041d4000 }, + { _MMIO(0x9888), 0x101f02a8 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f00ba }, + { _MMIO(0x9888), 0x0c388000 }, + { _MMIO(0x9888), 0x0c395000 }, + { _MMIO(0x9888), 0x0e395000 }, + { _MMIO(0x9888), 0x00394000 }, + { _MMIO(0x9888), 0x02395000 }, + { _MMIO(0x9888), 0x04391000 }, + { _MMIO(0x9888), 0x06392000 }, + { _MMIO(0x9888), 0x0c3a4000 }, + { _MMIO(0x9888), 0x1b8aa800 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x258b4005 }, + { _MMIO(0x9888), 0x278b0015 }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x2185800a }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x1b830154 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x47800000 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800060 }, +}; + +static int +get_l3_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_2; + lens[n] = ARRAY_SIZE(mux_config_l3_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_3[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_3[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_3[] = { + { _MMIO(0x9888), 0x121b0340 }, + { _MMIO(0x9888), 0x103f0274 }, + { _MMIO(0x9888), 0x123f0000 }, + { _MMIO(0x9888), 0x129b0340 }, + { _MMIO(0x9888), 0x10bf0274 }, + { _MMIO(0x9888), 0x12bf0000 }, + { _MMIO(0x9888), 0x041b3400 }, + { _MMIO(0x9888), 0x101b0000 }, + { _MMIO(0x9888), 0x045c8000 }, + { _MMIO(0x9888), 0x0a3d4000 }, + { _MMIO(0x9888), 0x003f0080 }, + { _MMIO(0x9888), 0x023f0793 }, + { _MMIO(0x9888), 0x043f0014 }, + { _MMIO(0x9888), 0x04588000 }, + { _MMIO(0x9888), 0x005a8000 }, + { _MMIO(0x9888), 0x025ac000 }, + { _MMIO(0x9888), 0x045a4000 }, + { _MMIO(0x9888), 0x0a5b4000 }, + { _MMIO(0x9888), 0x001d8000 }, + { _MMIO(0x9888), 0x021dc000 }, + { _MMIO(0x9888), 0x041d4000 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f002a }, + { _MMIO(0x9888), 0x0a384000 }, + { _MMIO(0x9888), 0x00394000 }, + { _MMIO(0x9888), 0x02395000 }, + { _MMIO(0x9888), 0x04399000 }, + { _MMIO(0x9888), 0x069b0034 }, + { _MMIO(0x9888), 0x109b0000 }, + { _MMIO(0x9888), 0x06dc4000 }, + { _MMIO(0x9888), 0x0cbd4000 }, + { _MMIO(0x9888), 0x0cbf0981 }, + { _MMIO(0x9888), 0x0ebf0a0f }, + { _MMIO(0x9888), 0x06d84000 }, + { _MMIO(0x9888), 0x0cdac000 }, + { _MMIO(0x9888), 0x0edac000 }, + { _MMIO(0x9888), 0x0cdb4000 }, + { _MMIO(0x9888), 0x0c9dc000 }, + { _MMIO(0x9888), 0x0e9dc000 }, + { _MMIO(0x9888), 0x109f02a8 }, + { _MMIO(0x9888), 0x0e9f0080 }, + { _MMIO(0x9888), 0x0cb84000 }, + { _MMIO(0x9888), 0x0cb95000 }, + { _MMIO(0x9888), 0x0eb95000 }, + { _MMIO(0x9888), 0x06b92000 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x0d880400 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x258b8009 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x198c4000 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x0d8c4000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x078d2000 }, + { _MMIO(0x9888), 0x2185800a }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x1b830154 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x45800c00 }, + { _MMIO(0x9888), 0x47800c63 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f8014a5 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800045 }, +}; + +static int +get_l3_3_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_3; + lens[n] = ARRAY_SIZE(mux_config_l3_3); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_4[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_4[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_4[] = { + { _MMIO(0x9888), 0x121a0340 }, + { _MMIO(0x9888), 0x103f0017 }, + { _MMIO(0x9888), 0x123f0020 }, + { _MMIO(0x9888), 0x129a0340 }, + { _MMIO(0x9888), 0x10bf0017 }, + { _MMIO(0x9888), 0x12bf0020 }, + { _MMIO(0x9888), 0x041a3400 }, + { _MMIO(0x9888), 0x101a0000 }, + { _MMIO(0x9888), 0x043b8000 }, + { _MMIO(0x9888), 0x0a3e0010 }, + { _MMIO(0x9888), 0x003f0200 }, + { _MMIO(0x9888), 0x023f0113 }, + { _MMIO(0x9888), 0x043f0014 }, + { _MMIO(0x9888), 0x02592000 }, + { _MMIO(0x9888), 0x005a8000 }, + { _MMIO(0x9888), 0x025ac000 }, + { _MMIO(0x9888), 0x045a4000 }, + { _MMIO(0x9888), 0x0a1c8000 }, + { _MMIO(0x9888), 0x001d8000 }, + { _MMIO(0x9888), 0x021dc000 }, + { _MMIO(0x9888), 0x041d4000 }, + { _MMIO(0x9888), 0x0a1e8000 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f001a }, + { _MMIO(0x9888), 0x00394000 }, + { _MMIO(0x9888), 0x02395000 }, + { _MMIO(0x9888), 0x04391000 }, + { _MMIO(0x9888), 0x069a0034 }, + { _MMIO(0x9888), 0x109a0000 }, + { _MMIO(0x9888), 0x06bb4000 }, + { _MMIO(0x9888), 0x0abe0040 }, + { _MMIO(0x9888), 0x0cbf0984 }, + { _MMIO(0x9888), 0x0ebf0a02 }, + { _MMIO(0x9888), 0x02d94000 }, + { _MMIO(0x9888), 0x0cdac000 }, + { _MMIO(0x9888), 0x0edac000 }, + { _MMIO(0x9888), 0x0c9c0400 }, + { _MMIO(0x9888), 0x0c9dc000 }, + { _MMIO(0x9888), 0x0e9dc000 }, + { _MMIO(0x9888), 0x0c9e0400 }, + { _MMIO(0x9888), 0x109f02a8 }, + { _MMIO(0x9888), 0x0e9f0040 }, + { _MMIO(0x9888), 0x0cb95000 }, + { _MMIO(0x9888), 0x0eb95000 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x0d880400 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x258b8009 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x198c4000 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x0d8c4000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x078d2000 }, + { _MMIO(0x9888), 0x2185800a }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x1b830154 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x45800800 }, + { _MMIO(0x9888), 0x47800842 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f801084 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800044 }, +}; + +static int +get_l3_4_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_4; + lens[n] = ARRAY_SIZE(mux_config_l3_4); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00006000 }, + { _MMIO(0x2774), 0x0000f3ff }, + { _MMIO(0x2778), 0x00001800 }, + { _MMIO(0x277c), 0x0000fcff }, + { _MMIO(0x2780), 0x00000600 }, + { _MMIO(0x2784), 0x0000ff3f }, + { _MMIO(0x2788), 0x00000180 }, + { _MMIO(0x278c), 0x0000ffcf }, + { _MMIO(0x2790), 0x00000060 }, + { _MMIO(0x2794), 0x0000fff3 }, + { _MMIO(0x2798), 0x00000018 }, + { _MMIO(0x279c), 0x0000fffc }, +}; + +static const struct i915_oa_reg flex_eu_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x9888), 0x143b000e }, + { _MMIO(0x9888), 0x043c55c0 }, + { _MMIO(0x9888), 0x0a1e0280 }, + { _MMIO(0x9888), 0x0c1e0408 }, + { _MMIO(0x9888), 0x10390000 }, + { _MMIO(0x9888), 0x12397a1f }, + { _MMIO(0x9888), 0x14bb000e }, + { _MMIO(0x9888), 0x04bc5000 }, + { _MMIO(0x9888), 0x0a9e0296 }, + { _MMIO(0x9888), 0x0c9e0008 }, + { _MMIO(0x9888), 0x10b90000 }, + { _MMIO(0x9888), 0x12b97a1f }, + { _MMIO(0x9888), 0x063b0042 }, + { _MMIO(0x9888), 0x103b0000 }, + { _MMIO(0x9888), 0x083c0000 }, + { _MMIO(0x9888), 0x0a3e0040 }, + { _MMIO(0x9888), 0x043f8000 }, + { _MMIO(0x9888), 0x02594000 }, + { _MMIO(0x9888), 0x045a8000 }, + { _MMIO(0x9888), 0x0c1c0400 }, + { _MMIO(0x9888), 0x041d8000 }, + { _MMIO(0x9888), 0x081e02c0 }, + { _MMIO(0x9888), 0x0e1e0000 }, + { _MMIO(0x9888), 0x0c1fa800 }, + { _MMIO(0x9888), 0x0e1f0260 }, + { _MMIO(0x9888), 0x101f0014 }, + { _MMIO(0x9888), 0x003905e0 }, + { _MMIO(0x9888), 0x06390bc0 }, + { _MMIO(0x9888), 0x02390018 }, + { _MMIO(0x9888), 0x04394000 }, + { _MMIO(0x9888), 0x04bb0042 }, + { _MMIO(0x9888), 0x10bb0000 }, + { _MMIO(0x9888), 0x02bc05c0 }, + { _MMIO(0x9888), 0x08bc0000 }, + { _MMIO(0x9888), 0x0abe0004 }, + { _MMIO(0x9888), 0x02bf8000 }, + { _MMIO(0x9888), 0x02d91000 }, + { _MMIO(0x9888), 0x02da8000 }, + { _MMIO(0x9888), 0x089c8000 }, + { _MMIO(0x9888), 0x029d8000 }, + { _MMIO(0x9888), 0x089e8000 }, + { _MMIO(0x9888), 0x0e9e0000 }, + { _MMIO(0x9888), 0x0e9fa806 }, + { _MMIO(0x9888), 0x109f0142 }, + { _MMIO(0x9888), 0x08b90617 }, + { _MMIO(0x9888), 0x0ab90be0 }, + { _MMIO(0x9888), 0x02b94000 }, + { _MMIO(0x9888), 0x0d88f000 }, + { _MMIO(0x9888), 0x0f88000c }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x09888000 }, + { _MMIO(0x9888), 0x018a8000 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x1b8a2800 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x238b52a0 }, + { _MMIO(0x9888), 0x258b6a95 }, + { _MMIO(0x9888), 0x278b0029 }, + { _MMIO(0x9888), 0x178c2000 }, + { _MMIO(0x9888), 0x198c1500 }, + { _MMIO(0x9888), 0x1b8c0014 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x098c4000 }, + { _MMIO(0x9888), 0x098da000 }, + { _MMIO(0x9888), 0x0b8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x038d8000 }, + { _MMIO(0x9888), 0x058d2000 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaaa }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x0784c000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x1780c000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x4d800444 }, + { _MMIO(0x9888), 0x3d800000 }, + { _MMIO(0x9888), 0x4f804000 }, + { _MMIO(0x9888), 0x43801080 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800084 }, + { _MMIO(0x9888), 0x53800044 }, + { _MMIO(0x9888), 0x47801080 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800000 }, + { _MMIO(0x9888), 0x41800840 }, +}; + +static int +get_rasterizer_and_pixel_backend_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_rasterizer_and_pixel_backend; + lens[n] = ARRAY_SIZE(mux_config_rasterizer_and_pixel_backend); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_sampler_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x70800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x0000c000 }, + { _MMIO(0x2774), 0x0000e7ff }, + { _MMIO(0x2778), 0x00003000 }, + { _MMIO(0x277c), 0x0000f9ff }, + { _MMIO(0x2780), 0x00000c00 }, + { _MMIO(0x2784), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_sampler_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_sampler_1[] = { + { _MMIO(0x9888), 0x18921400 }, + { _MMIO(0x9888), 0x149500ab }, + { _MMIO(0x9888), 0x18b21400 }, + { _MMIO(0x9888), 0x14b500ab }, + { _MMIO(0x9888), 0x18d21400 }, + { _MMIO(0x9888), 0x14d500ab }, + { _MMIO(0x9888), 0x0cdc8000 }, + { _MMIO(0x9888), 0x0edc4000 }, + { _MMIO(0x9888), 0x02dcc000 }, + { _MMIO(0x9888), 0x04dcc000 }, + { _MMIO(0x9888), 0x1abd00a0 }, + { _MMIO(0x9888), 0x0abd8000 }, + { _MMIO(0x9888), 0x0cd88000 }, + { _MMIO(0x9888), 0x0ed84000 }, + { _MMIO(0x9888), 0x04d88000 }, + { _MMIO(0x9888), 0x1adb0050 }, + { _MMIO(0x9888), 0x04db8000 }, + { _MMIO(0x9888), 0x06db8000 }, + { _MMIO(0x9888), 0x08db8000 }, + { _MMIO(0x9888), 0x0adb4000 }, + { _MMIO(0x9888), 0x109f02a0 }, + { _MMIO(0x9888), 0x0c9fa000 }, + { _MMIO(0x9888), 0x0e9f00aa }, + { _MMIO(0x9888), 0x18b82500 }, + { _MMIO(0x9888), 0x02b88000 }, + { _MMIO(0x9888), 0x04b84000 }, + { _MMIO(0x9888), 0x06b84000 }, + { _MMIO(0x9888), 0x08b84000 }, + { _MMIO(0x9888), 0x0ab84000 }, + { _MMIO(0x9888), 0x0cb88000 }, + { _MMIO(0x9888), 0x0cb98000 }, + { _MMIO(0x9888), 0x0eb9a000 }, + { _MMIO(0x9888), 0x00b98000 }, + { _MMIO(0x9888), 0x02b9a000 }, + { _MMIO(0x9888), 0x04b9a000 }, + { _MMIO(0x9888), 0x06b92000 }, + { _MMIO(0x9888), 0x1aba0200 }, + { _MMIO(0x9888), 0x02ba8000 }, + { _MMIO(0x9888), 0x0cba8000 }, + { _MMIO(0x9888), 0x04908000 }, + { _MMIO(0x9888), 0x04918000 }, + { _MMIO(0x9888), 0x04927300 }, + { _MMIO(0x9888), 0x10920000 }, + { _MMIO(0x9888), 0x1893000a }, + { _MMIO(0x9888), 0x0a934000 }, + { _MMIO(0x9888), 0x0a946000 }, + { _MMIO(0x9888), 0x0c959000 }, + { _MMIO(0x9888), 0x0e950098 }, + { _MMIO(0x9888), 0x10950000 }, + { _MMIO(0x9888), 0x04b04000 }, + { _MMIO(0x9888), 0x04b14000 }, + { _MMIO(0x9888), 0x04b20073 }, + { _MMIO(0x9888), 0x10b20000 }, + { _MMIO(0x9888), 0x04b38000 }, + { _MMIO(0x9888), 0x06b38000 }, + { _MMIO(0x9888), 0x08b34000 }, + { _MMIO(0x9888), 0x04b4c000 }, + { _MMIO(0x9888), 0x02b59890 }, + { _MMIO(0x9888), 0x10b50000 }, + { _MMIO(0x9888), 0x06d04000 }, + { _MMIO(0x9888), 0x06d14000 }, + { _MMIO(0x9888), 0x06d20073 }, + { _MMIO(0x9888), 0x10d20000 }, + { _MMIO(0x9888), 0x18d30020 }, + { _MMIO(0x9888), 0x02d38000 }, + { _MMIO(0x9888), 0x0cd34000 }, + { _MMIO(0x9888), 0x0ad48000 }, + { _MMIO(0x9888), 0x04d42000 }, + { _MMIO(0x9888), 0x0ed59000 }, + { _MMIO(0x9888), 0x00d59800 }, + { _MMIO(0x9888), 0x10d50000 }, + { _MMIO(0x9888), 0x0f88000e }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x09888000 }, + { _MMIO(0x9888), 0x0b888000 }, + { _MMIO(0x9888), 0x0d880400 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b5500 }, + { _MMIO(0x9888), 0x258b000a }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x098c4000 }, + { _MMIO(0x9888), 0x0b8c4000 }, + { _MMIO(0x9888), 0x0d8c4000 }, + { _MMIO(0x9888), 0x0d8d8000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x018d8000 }, + { _MMIO(0x9888), 0x038da000 }, + { _MMIO(0x9888), 0x058da000 }, + { _MMIO(0x9888), 0x078d2000 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x2185000a }, + { _MMIO(0x9888), 0x1b830150 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d848000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d808000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47801021 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f800c64 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800c02 }, +}; + +static int +get_sampler_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler_1; + lens[n] = ARRAY_SIZE(mux_config_sampler_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_sampler_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x70800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x0000c000 }, + { _MMIO(0x2774), 0x0000e7ff }, + { _MMIO(0x2778), 0x00003000 }, + { _MMIO(0x277c), 0x0000f9ff }, + { _MMIO(0x2780), 0x00000c00 }, + { _MMIO(0x2784), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_sampler_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_sampler_2[] = { + { _MMIO(0x9888), 0x18121400 }, + { _MMIO(0x9888), 0x141500ab }, + { _MMIO(0x9888), 0x18321400 }, + { _MMIO(0x9888), 0x143500ab }, + { _MMIO(0x9888), 0x18521400 }, + { _MMIO(0x9888), 0x145500ab }, + { _MMIO(0x9888), 0x0c5c8000 }, + { _MMIO(0x9888), 0x0e5c4000 }, + { _MMIO(0x9888), 0x025cc000 }, + { _MMIO(0x9888), 0x045cc000 }, + { _MMIO(0x9888), 0x1a3d00a0 }, + { _MMIO(0x9888), 0x0a3d8000 }, + { _MMIO(0x9888), 0x0c588000 }, + { _MMIO(0x9888), 0x0e584000 }, + { _MMIO(0x9888), 0x04588000 }, + { _MMIO(0x9888), 0x1a5b0050 }, + { _MMIO(0x9888), 0x045b8000 }, + { _MMIO(0x9888), 0x065b8000 }, + { _MMIO(0x9888), 0x085b8000 }, + { _MMIO(0x9888), 0x0a5b4000 }, + { _MMIO(0x9888), 0x101f02a0 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f00aa }, + { _MMIO(0x9888), 0x18382500 }, + { _MMIO(0x9888), 0x02388000 }, + { _MMIO(0x9888), 0x04384000 }, + { _MMIO(0x9888), 0x06384000 }, + { _MMIO(0x9888), 0x08384000 }, + { _MMIO(0x9888), 0x0a384000 }, + { _MMIO(0x9888), 0x0c388000 }, + { _MMIO(0x9888), 0x0c398000 }, + { _MMIO(0x9888), 0x0e39a000 }, + { _MMIO(0x9888), 0x00398000 }, + { _MMIO(0x9888), 0x0239a000 }, + { _MMIO(0x9888), 0x0439a000 }, + { _MMIO(0x9888), 0x06392000 }, + { _MMIO(0x9888), 0x1a3a0200 }, + { _MMIO(0x9888), 0x023a8000 }, + { _MMIO(0x9888), 0x0c3a8000 }, + { _MMIO(0x9888), 0x04108000 }, + { _MMIO(0x9888), 0x04118000 }, + { _MMIO(0x9888), 0x04127300 }, + { _MMIO(0x9888), 0x10120000 }, + { _MMIO(0x9888), 0x1813000a }, + { _MMIO(0x9888), 0x0a134000 }, + { _MMIO(0x9888), 0x0a146000 }, + { _MMIO(0x9888), 0x0c159000 }, + { _MMIO(0x9888), 0x0e150098 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x04304000 }, + { _MMIO(0x9888), 0x04314000 }, + { _MMIO(0x9888), 0x04320073 }, + { _MMIO(0x9888), 0x10320000 }, + { _MMIO(0x9888), 0x04338000 }, + { _MMIO(0x9888), 0x06338000 }, + { _MMIO(0x9888), 0x08334000 }, + { _MMIO(0x9888), 0x0434c000 }, + { _MMIO(0x9888), 0x02359890 }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x06504000 }, + { _MMIO(0x9888), 0x06514000 }, + { _MMIO(0x9888), 0x06520073 }, + { _MMIO(0x9888), 0x10520000 }, + { _MMIO(0x9888), 0x18530020 }, + { _MMIO(0x9888), 0x02538000 }, + { _MMIO(0x9888), 0x0c534000 }, + { _MMIO(0x9888), 0x0a548000 }, + { _MMIO(0x9888), 0x04542000 }, + { _MMIO(0x9888), 0x0e559000 }, + { _MMIO(0x9888), 0x00559800 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x1b8aa000 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x278b0015 }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x258b0005 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x2185000a }, + { _MMIO(0x9888), 0x1b830150 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d848000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d808000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47801021 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f800c64 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800c02 }, +}; + +static int +get_sampler_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler_2; + lens[n] = ARRAY_SIZE(mux_config_sampler_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fdff }, + { _MMIO(0x2778), 0x00000000 }, + { _MMIO(0x277c), 0x0000fe7f }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000ffbf }, + { _MMIO(0x2788), 0x00000000 }, + { _MMIO(0x278c), 0x0000ffcf }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000fff7 }, + { _MMIO(0x2798), 0x00000000 }, + { _MMIO(0x279c), 0x0000fff9 }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_1[] = { + { _MMIO(0x9888), 0x16154d60 }, + { _MMIO(0x9888), 0x16352e60 }, + { _MMIO(0x9888), 0x16554d60 }, + { _MMIO(0x9888), 0x16950000 }, + { _MMIO(0x9888), 0x16b50000 }, + { _MMIO(0x9888), 0x16d50000 }, + { _MMIO(0x9888), 0x005c8000 }, + { _MMIO(0x9888), 0x045cc000 }, + { _MMIO(0x9888), 0x065c4000 }, + { _MMIO(0x9888), 0x083d8000 }, + { _MMIO(0x9888), 0x0a3d8000 }, + { _MMIO(0x9888), 0x0458c000 }, + { _MMIO(0x9888), 0x025b8000 }, + { _MMIO(0x9888), 0x085b4000 }, + { _MMIO(0x9888), 0x0a5b4000 }, + { _MMIO(0x9888), 0x0c5b8000 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f00aa }, + { _MMIO(0x9888), 0x02384000 }, + { _MMIO(0x9888), 0x04388000 }, + { _MMIO(0x9888), 0x06388000 }, + { _MMIO(0x9888), 0x08384000 }, + { _MMIO(0x9888), 0x0a384000 }, + { _MMIO(0x9888), 0x0c384000 }, + { _MMIO(0x9888), 0x00398000 }, + { _MMIO(0x9888), 0x0239a000 }, + { _MMIO(0x9888), 0x0439a000 }, + { _MMIO(0x9888), 0x06392000 }, + { _MMIO(0x9888), 0x043a8000 }, + { _MMIO(0x9888), 0x063a8000 }, + { _MMIO(0x9888), 0x08138000 }, + { _MMIO(0x9888), 0x0a138000 }, + { _MMIO(0x9888), 0x06143000 }, + { _MMIO(0x9888), 0x0415cfc7 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x02338000 }, + { _MMIO(0x9888), 0x0c338000 }, + { _MMIO(0x9888), 0x04342000 }, + { _MMIO(0x9888), 0x06344000 }, + { _MMIO(0x9888), 0x0035c700 }, + { _MMIO(0x9888), 0x063500cf }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x04538000 }, + { _MMIO(0x9888), 0x06538000 }, + { _MMIO(0x9888), 0x0454c000 }, + { _MMIO(0x9888), 0x0255cfc7 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x06dc8000 }, + { _MMIO(0x9888), 0x08dc4000 }, + { _MMIO(0x9888), 0x0cdcc000 }, + { _MMIO(0x9888), 0x0edcc000 }, + { _MMIO(0x9888), 0x1abd00a8 }, + { _MMIO(0x9888), 0x0cd8c000 }, + { _MMIO(0x9888), 0x0ed84000 }, + { _MMIO(0x9888), 0x0edb8000 }, + { _MMIO(0x9888), 0x18db0800 }, + { _MMIO(0x9888), 0x1adb0254 }, + { _MMIO(0x9888), 0x0e9faa00 }, + { _MMIO(0x9888), 0x109f02aa }, + { _MMIO(0x9888), 0x0eb84000 }, + { _MMIO(0x9888), 0x16b84000 }, + { _MMIO(0x9888), 0x18b8156a }, + { _MMIO(0x9888), 0x06b98000 }, + { _MMIO(0x9888), 0x08b9a000 }, + { _MMIO(0x9888), 0x0ab9a000 }, + { _MMIO(0x9888), 0x0cb9a000 }, + { _MMIO(0x9888), 0x0eb9a000 }, + { _MMIO(0x9888), 0x18baa000 }, + { _MMIO(0x9888), 0x1aba0002 }, + { _MMIO(0x9888), 0x16934000 }, + { _MMIO(0x9888), 0x1893000a }, + { _MMIO(0x9888), 0x0a947000 }, + { _MMIO(0x9888), 0x0c95c5c1 }, + { _MMIO(0x9888), 0x0e9500c3 }, + { _MMIO(0x9888), 0x10950000 }, + { _MMIO(0x9888), 0x0eb38000 }, + { _MMIO(0x9888), 0x16b30040 }, + { _MMIO(0x9888), 0x18b30020 }, + { _MMIO(0x9888), 0x06b48000 }, + { _MMIO(0x9888), 0x08b41000 }, + { _MMIO(0x9888), 0x0ab48000 }, + { _MMIO(0x9888), 0x06b5c500 }, + { _MMIO(0x9888), 0x08b500c3 }, + { _MMIO(0x9888), 0x0eb5c100 }, + { _MMIO(0x9888), 0x10b50000 }, + { _MMIO(0x9888), 0x16d31500 }, + { _MMIO(0x9888), 0x08d4e000 }, + { _MMIO(0x9888), 0x08d5c100 }, + { _MMIO(0x9888), 0x0ad5c3c5 }, + { _MMIO(0x9888), 0x10d50000 }, + { _MMIO(0x9888), 0x0d88f800 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x258baaa5 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x0f8c4000 }, + { _MMIO(0x9888), 0x178c2000 }, + { _MMIO(0x9888), 0x198c5500 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x078d8000 }, + { _MMIO(0x9888), 0x098da000 }, + { _MMIO(0x9888), 0x0b8da000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x2185aaaa }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0784c000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1780c000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800c42 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800063 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800800 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f8014a4 }, + { _MMIO(0x9888), 0x41801042 }, +}; + +static int +get_tdl_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_1; + lens[n] = ARRAY_SIZE(mux_config_tdl_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fdff }, + { _MMIO(0x2778), 0x00000000 }, + { _MMIO(0x277c), 0x0000fe7f }, + { _MMIO(0x2780), 0x00000000 }, + { _MMIO(0x2784), 0x0000ff9f }, + { _MMIO(0x2788), 0x00000000 }, + { _MMIO(0x278c), 0x0000ffe7 }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000fffb }, + { _MMIO(0x2798), 0x00000002 }, + { _MMIO(0x279c), 0x0000fffd }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_2[] = { + { _MMIO(0x9888), 0x16150000 }, + { _MMIO(0x9888), 0x16350000 }, + { _MMIO(0x9888), 0x16550000 }, + { _MMIO(0x9888), 0x16952e60 }, + { _MMIO(0x9888), 0x16b54d60 }, + { _MMIO(0x9888), 0x16d52e60 }, + { _MMIO(0x9888), 0x065c8000 }, + { _MMIO(0x9888), 0x085cc000 }, + { _MMIO(0x9888), 0x0a5cc000 }, + { _MMIO(0x9888), 0x0c5c4000 }, + { _MMIO(0x9888), 0x0e3d8000 }, + { _MMIO(0x9888), 0x183da000 }, + { _MMIO(0x9888), 0x06588000 }, + { _MMIO(0x9888), 0x08588000 }, + { _MMIO(0x9888), 0x0a584000 }, + { _MMIO(0x9888), 0x0e5b4000 }, + { _MMIO(0x9888), 0x185b5800 }, + { _MMIO(0x9888), 0x1a5b000a }, + { _MMIO(0x9888), 0x0e1faa00 }, + { _MMIO(0x9888), 0x101f02aa }, + { _MMIO(0x9888), 0x0e384000 }, + { _MMIO(0x9888), 0x16384000 }, + { _MMIO(0x9888), 0x18382a55 }, + { _MMIO(0x9888), 0x06398000 }, + { _MMIO(0x9888), 0x0839a000 }, + { _MMIO(0x9888), 0x0a39a000 }, + { _MMIO(0x9888), 0x0c39a000 }, + { _MMIO(0x9888), 0x0e39a000 }, + { _MMIO(0x9888), 0x1a3a02a0 }, + { _MMIO(0x9888), 0x0e138000 }, + { _MMIO(0x9888), 0x16130500 }, + { _MMIO(0x9888), 0x06148000 }, + { _MMIO(0x9888), 0x08146000 }, + { _MMIO(0x9888), 0x0615c100 }, + { _MMIO(0x9888), 0x0815c500 }, + { _MMIO(0x9888), 0x0a1500c3 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x16335040 }, + { _MMIO(0x9888), 0x08349000 }, + { _MMIO(0x9888), 0x0a341000 }, + { _MMIO(0x9888), 0x083500c1 }, + { _MMIO(0x9888), 0x0a35c500 }, + { _MMIO(0x9888), 0x0c3500c3 }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x1853002a }, + { _MMIO(0x9888), 0x0a54e000 }, + { _MMIO(0x9888), 0x0c55c500 }, + { _MMIO(0x9888), 0x0e55c1c3 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x00dc8000 }, + { _MMIO(0x9888), 0x02dcc000 }, + { _MMIO(0x9888), 0x04dc4000 }, + { _MMIO(0x9888), 0x04bd8000 }, + { _MMIO(0x9888), 0x06bd8000 }, + { _MMIO(0x9888), 0x02d8c000 }, + { _MMIO(0x9888), 0x02db8000 }, + { _MMIO(0x9888), 0x04db4000 }, + { _MMIO(0x9888), 0x06db4000 }, + { _MMIO(0x9888), 0x08db8000 }, + { _MMIO(0x9888), 0x0c9fa000 }, + { _MMIO(0x9888), 0x0e9f00aa }, + { _MMIO(0x9888), 0x02b84000 }, + { _MMIO(0x9888), 0x04b84000 }, + { _MMIO(0x9888), 0x06b84000 }, + { _MMIO(0x9888), 0x08b84000 }, + { _MMIO(0x9888), 0x0ab88000 }, + { _MMIO(0x9888), 0x0cb88000 }, + { _MMIO(0x9888), 0x00b98000 }, + { _MMIO(0x9888), 0x02b9a000 }, + { _MMIO(0x9888), 0x04b9a000 }, + { _MMIO(0x9888), 0x06b92000 }, + { _MMIO(0x9888), 0x0aba8000 }, + { _MMIO(0x9888), 0x0cba8000 }, + { _MMIO(0x9888), 0x04938000 }, + { _MMIO(0x9888), 0x06938000 }, + { _MMIO(0x9888), 0x0494c000 }, + { _MMIO(0x9888), 0x0295cfc7 }, + { _MMIO(0x9888), 0x10950000 }, + { _MMIO(0x9888), 0x02b38000 }, + { _MMIO(0x9888), 0x08b38000 }, + { _MMIO(0x9888), 0x04b42000 }, + { _MMIO(0x9888), 0x06b41000 }, + { _MMIO(0x9888), 0x00b5c700 }, + { _MMIO(0x9888), 0x04b500cf }, + { _MMIO(0x9888), 0x10b50000 }, + { _MMIO(0x9888), 0x0ad38000 }, + { _MMIO(0x9888), 0x0cd38000 }, + { _MMIO(0x9888), 0x06d46000 }, + { _MMIO(0x9888), 0x04d5c700 }, + { _MMIO(0x9888), 0x06d500cf }, + { _MMIO(0x9888), 0x10d50000 }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x09888000 }, + { _MMIO(0x9888), 0x0b888000 }, + { _MMIO(0x9888), 0x0d880400 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x198a8000 }, + { _MMIO(0x9888), 0x1b8aaaa0 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x258b555a }, + { _MMIO(0x9888), 0x278b0015 }, + { _MMIO(0x9888), 0x238b5500 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x098c4000 }, + { _MMIO(0x9888), 0x0b8c4000 }, + { _MMIO(0x9888), 0x0d8c4000 }, + { _MMIO(0x9888), 0x018d8000 }, + { _MMIO(0x9888), 0x038da000 }, + { _MMIO(0x9888), 0x058da000 }, + { _MMIO(0x9888), 0x078d2000 }, + { _MMIO(0x9888), 0x2185aaaa }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0784c000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1780c000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800882 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45801082 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x478014a5 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f800002 }, + { _MMIO(0x9888), 0x41800c62 }, +}; + +static int +get_tdl_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_2; + lens[n] = ARRAY_SIZE(mux_config_tdl_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extra[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extra[] = { + { _MMIO(0xe458), 0x00001000 }, + { _MMIO(0xe558), 0x00003002 }, + { _MMIO(0xe658), 0x00005004 }, + { _MMIO(0xe758), 0x00011010 }, + { _MMIO(0xe45c), 0x00050012 }, + { _MMIO(0xe55c), 0x00052051 }, + { _MMIO(0xe65c), 0x00000008 }, +}; + +static const struct i915_oa_reg mux_config_compute_extra[] = { + { _MMIO(0x9888), 0x161503e0 }, + { _MMIO(0x9888), 0x163503e0 }, + { _MMIO(0x9888), 0x165503e0 }, + { _MMIO(0x9888), 0x169503e0 }, + { _MMIO(0x9888), 0x16b503e0 }, + { _MMIO(0x9888), 0x16d503e0 }, + { _MMIO(0x9888), 0x045cc000 }, + { _MMIO(0x9888), 0x083d8000 }, + { _MMIO(0x9888), 0x04584000 }, + { _MMIO(0x9888), 0x085b4000 }, + { _MMIO(0x9888), 0x0a5b8000 }, + { _MMIO(0x9888), 0x0e1f00a8 }, + { _MMIO(0x9888), 0x08384000 }, + { _MMIO(0x9888), 0x0a384000 }, + { _MMIO(0x9888), 0x0c388000 }, + { _MMIO(0x9888), 0x0439a000 }, + { _MMIO(0x9888), 0x06392000 }, + { _MMIO(0x9888), 0x0c3a8000 }, + { _MMIO(0x9888), 0x08138000 }, + { _MMIO(0x9888), 0x06141000 }, + { _MMIO(0x9888), 0x041500c3 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x0a338000 }, + { _MMIO(0x9888), 0x06342000 }, + { _MMIO(0x9888), 0x0435c300 }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x0c538000 }, + { _MMIO(0x9888), 0x06544000 }, + { _MMIO(0x9888), 0x065500c3 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x00dc8000 }, + { _MMIO(0x9888), 0x02dc4000 }, + { _MMIO(0x9888), 0x02bd8000 }, + { _MMIO(0x9888), 0x00d88000 }, + { _MMIO(0x9888), 0x02db4000 }, + { _MMIO(0x9888), 0x04db8000 }, + { _MMIO(0x9888), 0x0c9fa000 }, + { _MMIO(0x9888), 0x0e9f0002 }, + { _MMIO(0x9888), 0x02b84000 }, + { _MMIO(0x9888), 0x04b84000 }, + { _MMIO(0x9888), 0x06b88000 }, + { _MMIO(0x9888), 0x00b98000 }, + { _MMIO(0x9888), 0x02b9a000 }, + { _MMIO(0x9888), 0x06ba8000 }, + { _MMIO(0x9888), 0x02938000 }, + { _MMIO(0x9888), 0x04942000 }, + { _MMIO(0x9888), 0x0095c300 }, + { _MMIO(0x9888), 0x10950000 }, + { _MMIO(0x9888), 0x04b38000 }, + { _MMIO(0x9888), 0x04b44000 }, + { _MMIO(0x9888), 0x02b500c3 }, + { _MMIO(0x9888), 0x10b50000 }, + { _MMIO(0x9888), 0x06d38000 }, + { _MMIO(0x9888), 0x04d48000 }, + { _MMIO(0x9888), 0x02d5c300 }, + { _MMIO(0x9888), 0x10d50000 }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x238b3500 }, + { _MMIO(0x9888), 0x258b0005 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x018d8000 }, + { _MMIO(0x9888), 0x038da000 }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x2185000a }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f800c40 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41801482 }, + { _MMIO(0x9888), 0x31800000 }, +}; + +static int +get_compute_extra_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extra; + lens[n] = ARRAY_SIZE(mux_config_compute_extra); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_vme_pipe[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00100030 }, + { _MMIO(0x2774), 0x0000fff9 }, + { _MMIO(0x2778), 0x00000002 }, + { _MMIO(0x277c), 0x0000fffc }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000fff3 }, + { _MMIO(0x2788), 0x00100180 }, + { _MMIO(0x278c), 0x0000ffcf }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00000002 }, + { _MMIO(0x279c), 0x0000ff3f }, +}; + +static const struct i915_oa_reg flex_eu_config_vme_pipe[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00008003 }, +}; + +static const struct i915_oa_reg mux_config_vme_pipe[] = { + { _MMIO(0x9888), 0x14100812 }, + { _MMIO(0x9888), 0x14125800 }, + { _MMIO(0x9888), 0x161200c0 }, + { _MMIO(0x9888), 0x14300812 }, + { _MMIO(0x9888), 0x14325800 }, + { _MMIO(0x9888), 0x163200c0 }, + { _MMIO(0x9888), 0x005c4000 }, + { _MMIO(0x9888), 0x065c8000 }, + { _MMIO(0x9888), 0x085cc000 }, + { _MMIO(0x9888), 0x0a5cc000 }, + { _MMIO(0x9888), 0x0c5cc000 }, + { _MMIO(0x9888), 0x003d8000 }, + { _MMIO(0x9888), 0x0e3d8000 }, + { _MMIO(0x9888), 0x183d2800 }, + { _MMIO(0x9888), 0x00584000 }, + { _MMIO(0x9888), 0x06588000 }, + { _MMIO(0x9888), 0x0858c000 }, + { _MMIO(0x9888), 0x005b4000 }, + { _MMIO(0x9888), 0x0e5b4000 }, + { _MMIO(0x9888), 0x185b9400 }, + { _MMIO(0x9888), 0x1a5b002a }, + { _MMIO(0x9888), 0x0c1f0800 }, + { _MMIO(0x9888), 0x0e1faa00 }, + { _MMIO(0x9888), 0x101f002a }, + { _MMIO(0x9888), 0x00384000 }, + { _MMIO(0x9888), 0x0e384000 }, + { _MMIO(0x9888), 0x16384000 }, + { _MMIO(0x9888), 0x18380155 }, + { _MMIO(0x9888), 0x00392000 }, + { _MMIO(0x9888), 0x06398000 }, + { _MMIO(0x9888), 0x0839a000 }, + { _MMIO(0x9888), 0x0a39a000 }, + { _MMIO(0x9888), 0x0c39a000 }, + { _MMIO(0x9888), 0x00100047 }, + { _MMIO(0x9888), 0x06101a80 }, + { _MMIO(0x9888), 0x10100000 }, + { _MMIO(0x9888), 0x0810c000 }, + { _MMIO(0x9888), 0x0811c000 }, + { _MMIO(0x9888), 0x08126151 }, + { _MMIO(0x9888), 0x10120000 }, + { _MMIO(0x9888), 0x00134000 }, + { _MMIO(0x9888), 0x0e134000 }, + { _MMIO(0x9888), 0x161300a0 }, + { _MMIO(0x9888), 0x0a301ac7 }, + { _MMIO(0x9888), 0x10300000 }, + { _MMIO(0x9888), 0x0c30c000 }, + { _MMIO(0x9888), 0x0c31c000 }, + { _MMIO(0x9888), 0x0c326151 }, + { _MMIO(0x9888), 0x10320000 }, + { _MMIO(0x9888), 0x16332a00 }, + { _MMIO(0x9888), 0x18330001 }, + { _MMIO(0x9888), 0x018a8000 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x198a8000 }, + { _MMIO(0x9888), 0x1b8a2aa0 }, + { _MMIO(0x9888), 0x238b0020 }, + { _MMIO(0x9888), 0x258b5550 }, + { _MMIO(0x9888), 0x278b0001 }, + { _MMIO(0x9888), 0x1f850080 }, + { _MMIO(0x9888), 0x2185aaa0 }, + { _MMIO(0x9888), 0x23850002 }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830015 }, + { _MMIO(0x9888), 0x01844000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x11804000 }, + { _MMIO(0x9888), 0x17808000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3d800800 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800002 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800884 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800002 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, +}; + +static int +get_vme_pipe_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_vme_pipe; + lens[n] = ARRAY_SIZE(mux_config_vme_pipe); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9888), 0x198b0000 }, + { _MMIO(0x9888), 0x078b0066 }, + { _MMIO(0x9888), 0x118b0000 }, + { _MMIO(0x9888), 0x258b0000 }, + { _MMIO(0x9888), 0x21850008 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800000 }, + { _MMIO(0x9888), 0x31800000 }, +}; + +static int +get_test_oa_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_test_oa; + lens[n] = ARRAY_SIZE(mux_config_test_oa); + n++; + + return n; +} + int i915_oa_select_metric_set_bdw(struct drm_i915_private *dev_priv) { dev_priv->perf.oa.n_mux_configs = 0; @@ -334,6 +4100,552 @@ int i915_oa_select_metric_set_bdw(struct drm_i915_private *dev_priv) dev_priv->perf.oa.flex_regs_len = ARRAY_SIZE(flex_eu_config_render_basic); + return 0; + case METRIC_SET_ID_COMPUTE_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_compute_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_basic); + + return 0; + case METRIC_SET_ID_RENDER_PIPE_PROFILE: + dev_priv->perf.oa.n_mux_configs = + get_render_pipe_profile_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_PIPE_PROFILE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_pipe_profile; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_pipe_profile); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_pipe_profile; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_pipe_profile); + + return 0; + case METRIC_SET_ID_MEMORY_READS: + dev_priv->perf.oa.n_mux_configs = + get_memory_reads_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_READS\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_reads; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_reads); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_reads; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_reads); + + return 0; + case METRIC_SET_ID_MEMORY_WRITES: + dev_priv->perf.oa.n_mux_configs = + get_memory_writes_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_WRITES\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_writes; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_writes); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_writes; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_writes); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTENDED: + dev_priv->perf.oa.n_mux_configs = + get_compute_extended_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTENDED\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extended; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extended); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extended; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extended); + + return 0; + case METRIC_SET_ID_COMPUTE_L3_CACHE: + dev_priv->perf.oa.n_mux_configs = + get_compute_l3_cache_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_L3_CACHE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_l3_cache; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_l3_cache); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_l3_cache; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_l3_cache); + + return 0; + case METRIC_SET_ID_DATA_PORT_READS_COALESCING: + dev_priv->perf.oa.n_mux_configs = + get_data_port_reads_coalescing_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"DATA_PORT_READS_COALESCING\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_data_port_reads_coalescing; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_data_port_reads_coalescing); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_data_port_reads_coalescing; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_data_port_reads_coalescing); + + return 0; + case METRIC_SET_ID_DATA_PORT_WRITES_COALESCING: + dev_priv->perf.oa.n_mux_configs = + get_data_port_writes_coalescing_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"DATA_PORT_WRITES_COALESCING\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_data_port_writes_coalescing; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_data_port_writes_coalescing); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_data_port_writes_coalescing; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_data_port_writes_coalescing); + + return 0; + case METRIC_SET_ID_HDC_AND_SF: + dev_priv->perf.oa.n_mux_configs = + get_hdc_and_sf_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"HDC_AND_SF\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_hdc_and_sf; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_hdc_and_sf); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_hdc_and_sf; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_hdc_and_sf); + + return 0; + case METRIC_SET_ID_L3_1: + dev_priv->perf.oa.n_mux_configs = + get_l3_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_1); + + return 0; + case METRIC_SET_ID_L3_2: + dev_priv->perf.oa.n_mux_configs = + get_l3_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_2); + + return 0; + case METRIC_SET_ID_L3_3: + dev_priv->perf.oa.n_mux_configs = + get_l3_3_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_3\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_3; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_3); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_3; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_3); + + return 0; + case METRIC_SET_ID_L3_4: + dev_priv->perf.oa.n_mux_configs = + get_l3_4_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_4\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_4; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_4); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_4; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_4); + + return 0; + case METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND: + dev_priv->perf.oa.n_mux_configs = + get_rasterizer_and_pixel_backend_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RASTERIZER_AND_PIXEL_BACKEND\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_rasterizer_and_pixel_backend); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_rasterizer_and_pixel_backend); + + return 0; + case METRIC_SET_ID_SAMPLER_1: + dev_priv->perf.oa.n_mux_configs = + get_sampler_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_sampler_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_sampler_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler_1); + + return 0; + case METRIC_SET_ID_SAMPLER_2: + dev_priv->perf.oa.n_mux_configs = + get_sampler_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_sampler_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_sampler_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler_2); + + return 0; + case METRIC_SET_ID_TDL_1: + dev_priv->perf.oa.n_mux_configs = + get_tdl_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_1); + + return 0; + case METRIC_SET_ID_TDL_2: + dev_priv->perf.oa.n_mux_configs = + get_tdl_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_2); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTRA: + dev_priv->perf.oa.n_mux_configs = + get_compute_extra_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTRA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extra; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extra); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extra; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extra); + + return 0; + case METRIC_SET_ID_VME_PIPE: + dev_priv->perf.oa.n_mux_configs = + get_vme_pipe_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"VME_PIPE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_vme_pipe; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_vme_pipe); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_vme_pipe; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_vme_pipe); + + return 0; + case METRIC_SET_ID_TEST_OA: + dev_priv->perf.oa.n_mux_configs = + get_test_oa_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TEST_OA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_test_oa; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_test_oa; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_test_oa); + return 0; default: return -ENODEV; @@ -362,6 +4674,468 @@ static struct attribute_group group_render_basic = { .attrs = attrs_render_basic, }; +static ssize_t +show_compute_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_BASIC); +} + +static struct device_attribute dev_attr_compute_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_basic[] = { + &dev_attr_compute_basic_id.attr, + NULL, +}; + +static struct attribute_group group_compute_basic = { + .name = "35fbc9b2-a891-40a6-a38d-022bb7057552", + .attrs = attrs_compute_basic, +}; + +static ssize_t +show_render_pipe_profile_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_PIPE_PROFILE); +} + +static struct device_attribute dev_attr_render_pipe_profile_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_pipe_profile_id, + .store = NULL, +}; + +static struct attribute *attrs_render_pipe_profile[] = { + &dev_attr_render_pipe_profile_id.attr, + NULL, +}; + +static struct attribute_group group_render_pipe_profile = { + .name = "233d0544-fff7-4281-8291-e02f222aff72", + .attrs = attrs_render_pipe_profile, +}; + +static ssize_t +show_memory_reads_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_READS); +} + +static struct device_attribute dev_attr_memory_reads_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_reads_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_reads[] = { + &dev_attr_memory_reads_id.attr, + NULL, +}; + +static struct attribute_group group_memory_reads = { + .name = "2b255d48-2117-4fef-a8f7-f151e1d25a2c", + .attrs = attrs_memory_reads, +}; + +static ssize_t +show_memory_writes_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_WRITES); +} + +static struct device_attribute dev_attr_memory_writes_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_writes_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_writes[] = { + &dev_attr_memory_writes_id.attr, + NULL, +}; + +static struct attribute_group group_memory_writes = { + .name = "f7fd3220-b466-4a4d-9f98-b0caf3f2394c", + .attrs = attrs_memory_writes, +}; + +static ssize_t +show_compute_extended_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTENDED); +} + +static struct device_attribute dev_attr_compute_extended_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extended_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extended[] = { + &dev_attr_compute_extended_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extended = { + .name = "e99ccaca-821c-4df9-97a7-96bdb7204e43", + .attrs = attrs_compute_extended, +}; + +static ssize_t +show_compute_l3_cache_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_L3_CACHE); +} + +static struct device_attribute dev_attr_compute_l3_cache_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_l3_cache_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_l3_cache[] = { + &dev_attr_compute_l3_cache_id.attr, + NULL, +}; + +static struct attribute_group group_compute_l3_cache = { + .name = "27a364dc-8225-4ecb-b607-d6f1925598d9", + .attrs = attrs_compute_l3_cache, +}; + +static ssize_t +show_data_port_reads_coalescing_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_DATA_PORT_READS_COALESCING); +} + +static struct device_attribute dev_attr_data_port_reads_coalescing_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_data_port_reads_coalescing_id, + .store = NULL, +}; + +static struct attribute *attrs_data_port_reads_coalescing[] = { + &dev_attr_data_port_reads_coalescing_id.attr, + NULL, +}; + +static struct attribute_group group_data_port_reads_coalescing = { + .name = "857fc630-2f09-4804-85f1-084adfadd5ab", + .attrs = attrs_data_port_reads_coalescing, +}; + +static ssize_t +show_data_port_writes_coalescing_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_DATA_PORT_WRITES_COALESCING); +} + +static struct device_attribute dev_attr_data_port_writes_coalescing_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_data_port_writes_coalescing_id, + .store = NULL, +}; + +static struct attribute *attrs_data_port_writes_coalescing[] = { + &dev_attr_data_port_writes_coalescing_id.attr, + NULL, +}; + +static struct attribute_group group_data_port_writes_coalescing = { + .name = "343ebc99-4a55-414c-8c17-d8e259cf5e20", + .attrs = attrs_data_port_writes_coalescing, +}; + +static ssize_t +show_hdc_and_sf_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_HDC_AND_SF); +} + +static struct device_attribute dev_attr_hdc_and_sf_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_hdc_and_sf_id, + .store = NULL, +}; + +static struct attribute *attrs_hdc_and_sf[] = { + &dev_attr_hdc_and_sf_id.attr, + NULL, +}; + +static struct attribute_group group_hdc_and_sf = { + .name = "7bdafd88-a4fa-4ed5-bc09-1a977aa5be3e", + .attrs = attrs_hdc_and_sf, +}; + +static ssize_t +show_l3_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_1); +} + +static struct device_attribute dev_attr_l3_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_1_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_1[] = { + &dev_attr_l3_1_id.attr, + NULL, +}; + +static struct attribute_group group_l3_1 = { + .name = "9385ebb2-f34f-4aa5-aec5-7e9cbbea0f0b", + .attrs = attrs_l3_1, +}; + +static ssize_t +show_l3_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_2); +} + +static struct device_attribute dev_attr_l3_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_2_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_2[] = { + &dev_attr_l3_2_id.attr, + NULL, +}; + +static struct attribute_group group_l3_2 = { + .name = "446ae59b-ff2e-41c9-b49e-0184a54bf00a", + .attrs = attrs_l3_2, +}; + +static ssize_t +show_l3_3_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_3); +} + +static struct device_attribute dev_attr_l3_3_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_3_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_3[] = { + &dev_attr_l3_3_id.attr, + NULL, +}; + +static struct attribute_group group_l3_3 = { + .name = "84a7956f-1ea4-4d0d-837f-e39a0376e38c", + .attrs = attrs_l3_3, +}; + +static ssize_t +show_l3_4_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_4); +} + +static struct device_attribute dev_attr_l3_4_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_4_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_4[] = { + &dev_attr_l3_4_id.attr, + NULL, +}; + +static struct attribute_group group_l3_4 = { + .name = "92b493d9-df18-4bed-be06-5cac6f2a6f5f", + .attrs = attrs_l3_4, +}; + +static ssize_t +show_rasterizer_and_pixel_backend_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND); +} + +static struct device_attribute dev_attr_rasterizer_and_pixel_backend_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_rasterizer_and_pixel_backend_id, + .store = NULL, +}; + +static struct attribute *attrs_rasterizer_and_pixel_backend[] = { + &dev_attr_rasterizer_and_pixel_backend_id.attr, + NULL, +}; + +static struct attribute_group group_rasterizer_and_pixel_backend = { + .name = "14345c35-cc46-40d0-bb04-6ed1fbb43679", + .attrs = attrs_rasterizer_and_pixel_backend, +}; + +static ssize_t +show_sampler_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_SAMPLER_1); +} + +static struct device_attribute dev_attr_sampler_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_sampler_1_id, + .store = NULL, +}; + +static struct attribute *attrs_sampler_1[] = { + &dev_attr_sampler_1_id.attr, + NULL, +}; + +static struct attribute_group group_sampler_1 = { + .name = "f0c6ba37-d3d3-4211-91b5-226730312a54", + .attrs = attrs_sampler_1, +}; + +static ssize_t +show_sampler_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_SAMPLER_2); +} + +static struct device_attribute dev_attr_sampler_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_sampler_2_id, + .store = NULL, +}; + +static struct attribute *attrs_sampler_2[] = { + &dev_attr_sampler_2_id.attr, + NULL, +}; + +static struct attribute_group group_sampler_2 = { + .name = "30bf3702-48cf-4bca-b412-7cf50bb2f564", + .attrs = attrs_sampler_2, +}; + +static ssize_t +show_tdl_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_1); +} + +static struct device_attribute dev_attr_tdl_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_1_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_1[] = { + &dev_attr_tdl_1_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_1 = { + .name = "238bec85-df05-44f3-b905-d166712f2451", + .attrs = attrs_tdl_1, +}; + +static ssize_t +show_tdl_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_2); +} + +static struct device_attribute dev_attr_tdl_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_2_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_2[] = { + &dev_attr_tdl_2_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_2 = { + .name = "24bf02cd-8693-4583-981c-c4165b33da01", + .attrs = attrs_tdl_2, +}; + +static ssize_t +show_compute_extra_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTRA); +} + +static struct device_attribute dev_attr_compute_extra_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extra_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extra[] = { + &dev_attr_compute_extra_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extra = { + .name = "8fb61ba2-2fbb-454c-a136-2dec5a8a595e", + .attrs = attrs_compute_extra, +}; + +static ssize_t +show_vme_pipe_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_VME_PIPE); +} + +static struct device_attribute dev_attr_vme_pipe_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_vme_pipe_id, + .store = NULL, +}; + +static struct attribute *attrs_vme_pipe[] = { + &dev_attr_vme_pipe_id.attr, + NULL, +}; + +static struct attribute_group group_vme_pipe = { + .name = "e1743ca0-7fc8-410b-a066-de7bbb9280b7", + .attrs = attrs_vme_pipe, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TEST_OA); +} + +static struct device_attribute dev_attr_test_oa_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_test_oa_id, + .store = NULL, +}; + +static struct attribute *attrs_test_oa[] = { + &dev_attr_test_oa_id.attr, + NULL, +}; + +static struct attribute_group group_test_oa = { + .name = "d6de6f55-e526-4f79-a6a6-d7315c09044e", + .attrs = attrs_test_oa, +}; + int i915_perf_register_sysfs_bdw(struct drm_i915_private *dev_priv) { @@ -374,9 +5148,177 @@ i915_perf_register_sysfs_bdw(struct drm_i915_private *dev_priv) if (ret) goto error_render_basic; } + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (ret) + goto error_compute_basic; + } + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (ret) + goto error_render_pipe_profile; + } + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (ret) + goto error_memory_reads; + } + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (ret) + goto error_memory_writes; + } + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (ret) + goto error_compute_extended; + } + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (ret) + goto error_compute_l3_cache; + } + if (get_data_port_reads_coalescing_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_data_port_reads_coalescing); + if (ret) + goto error_data_port_reads_coalescing; + } + if (get_data_port_writes_coalescing_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_data_port_writes_coalescing); + if (ret) + goto error_data_port_writes_coalescing; + } + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (ret) + goto error_hdc_and_sf; + } + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (ret) + goto error_l3_1; + } + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (ret) + goto error_l3_2; + } + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (ret) + goto error_l3_3; + } + if (get_l3_4_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_4); + if (ret) + goto error_l3_4; + } + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (ret) + goto error_rasterizer_and_pixel_backend; + } + if (get_sampler_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler_1); + if (ret) + goto error_sampler_1; + } + if (get_sampler_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler_2); + if (ret) + goto error_sampler_2; + } + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (ret) + goto error_tdl_1; + } + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (ret) + goto error_tdl_2; + } + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (ret) + goto error_compute_extra; + } + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (ret) + goto error_vme_pipe; + } + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_test_oa); + if (ret) + goto error_test_oa; + } return 0; +error_test_oa: + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); +error_vme_pipe: + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); +error_compute_extra: + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); +error_tdl_2: + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); +error_tdl_1: + if (get_sampler_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler_2); +error_sampler_2: + if (get_sampler_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler_1); +error_sampler_1: + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); +error_rasterizer_and_pixel_backend: + if (get_l3_4_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_4); +error_l3_4: + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); +error_l3_3: + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); +error_l3_2: + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); +error_l3_1: + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); +error_hdc_and_sf: + if (get_data_port_writes_coalescing_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_data_port_writes_coalescing); +error_data_port_writes_coalescing: + if (get_data_port_reads_coalescing_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_data_port_reads_coalescing); +error_data_port_reads_coalescing: + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); +error_compute_l3_cache: + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); +error_compute_extended: + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); +error_memory_writes: + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); +error_memory_reads: + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); +error_render_pipe_profile: + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); +error_compute_basic: + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); error_render_basic: return ret; } @@ -389,4 +5331,46 @@ i915_perf_unregister_sysfs_bdw(struct drm_i915_private *dev_priv) if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (get_data_port_reads_coalescing_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_data_port_reads_coalescing); + if (get_data_port_writes_coalescing_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_data_port_writes_coalescing); + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (get_l3_4_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_4); + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (get_sampler_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler_1); + if (get_sampler_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler_2); + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_test_oa); } diff --git a/drivers/gpu/drm/i915/i915_oa_bxt.c b/drivers/gpu/drm/i915/i915_oa_bxt.c index 345ec1d3faa7..93864d8f32dd 100644 --- a/drivers/gpu/drm/i915/i915_oa_bxt.c +++ b/drivers/gpu/drm/i915/i915_oa_bxt.c @@ -33,9 +33,23 @@ enum metric_set_id { METRIC_SET_ID_RENDER_BASIC = 1, + METRIC_SET_ID_COMPUTE_BASIC, + METRIC_SET_ID_RENDER_PIPE_PROFILE, + METRIC_SET_ID_MEMORY_READS, + METRIC_SET_ID_MEMORY_WRITES, + METRIC_SET_ID_COMPUTE_EXTENDED, + METRIC_SET_ID_COMPUTE_L3_CACHE, + METRIC_SET_ID_HDC_AND_SF, + METRIC_SET_ID_L3_1, + METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND, + METRIC_SET_ID_SAMPLER, + METRIC_SET_ID_TDL_1, + METRIC_SET_ID_TDL_2, + METRIC_SET_ID_COMPUTE_EXTRA, + METRIC_SET_ID_TEST_OA, }; -int i915_oa_n_builtin_metric_sets_bxt = 1; +int i915_oa_n_builtin_metric_sets_bxt = 15; static const struct i915_oa_reg b_counter_config_render_basic[] = { { _MMIO(0x2710), 0x00000000 }, @@ -156,6 +170,1622 @@ get_render_basic_mux_config(struct drm_i915_private *dev_priv, return n; } +static const struct i915_oa_reg b_counter_config_compute_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_basic[] = { + { _MMIO(0x9888), 0x104f00e0 }, + { _MMIO(0x9888), 0x124f1c00 }, + { _MMIO(0x9888), 0x39900340 }, + { _MMIO(0x9888), 0x3f900c00 }, + { _MMIO(0x9888), 0x41900000 }, + { _MMIO(0x9888), 0x002d5000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x082d4000 }, + { _MMIO(0x9888), 0x0a2d1000 }, + { _MMIO(0x9888), 0x0c2d5000 }, + { _MMIO(0x9888), 0x0e2d4000 }, + { _MMIO(0x9888), 0x0c2e1400 }, + { _MMIO(0x9888), 0x0e2e5100 }, + { _MMIO(0x9888), 0x102e0114 }, + { _MMIO(0x9888), 0x044cc000 }, + { _MMIO(0x9888), 0x0a4c8000 }, + { _MMIO(0x9888), 0x0c4c8000 }, + { _MMIO(0x9888), 0x0e4c4000 }, + { _MMIO(0x9888), 0x104c8000 }, + { _MMIO(0x9888), 0x124c8000 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x004ea000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e2000 }, + { _MMIO(0x9888), 0x0c4ea000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x004f6b42 }, + { _MMIO(0x9888), 0x064f6200 }, + { _MMIO(0x9888), 0x084f4100 }, + { _MMIO(0x9888), 0x0a4f0061 }, + { _MMIO(0x9888), 0x0c4f6c4c }, + { _MMIO(0x9888), 0x0e4f4b00 }, + { _MMIO(0x9888), 0x1a4f0000 }, + { _MMIO(0x9888), 0x1c4f0000 }, + { _MMIO(0x9888), 0x180f5000 }, + { _MMIO(0x9888), 0x1a0f8800 }, + { _MMIO(0x9888), 0x1c0f08a2 }, + { _MMIO(0x9888), 0x182c4000 }, + { _MMIO(0x9888), 0x1c2c1451 }, + { _MMIO(0x9888), 0x1e2c0001 }, + { _MMIO(0x9888), 0x1a2c0010 }, + { _MMIO(0x9888), 0x01938000 }, + { _MMIO(0x9888), 0x0f938000 }, + { _MMIO(0x9888), 0x19938a28 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x19900177 }, + { _MMIO(0x9888), 0x1b900178 }, + { _MMIO(0x9888), 0x1d900125 }, + { _MMIO(0x9888), 0x1f900123 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x25904000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x53901000 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x55900111 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900000 }, + { _MMIO(0x9888), 0x45900000 }, +}; + +static int +get_compute_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_basic; + lens[n] = ARRAY_SIZE(mux_config_compute_basic); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_render_pipe_profile[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007ffea }, + { _MMIO(0x2774), 0x00007ffc }, + { _MMIO(0x2778), 0x0007affa }, + { _MMIO(0x277c), 0x0000f5fd }, + { _MMIO(0x2780), 0x00079ffa }, + { _MMIO(0x2784), 0x0000f3fb }, + { _MMIO(0x2788), 0x0007bf7a }, + { _MMIO(0x278c), 0x0000f7e7 }, + { _MMIO(0x2790), 0x0007fefa }, + { _MMIO(0x2794), 0x0000f7cf }, + { _MMIO(0x2798), 0x00077ffa }, + { _MMIO(0x279c), 0x0000efdf }, + { _MMIO(0x27a0), 0x0006fffa }, + { _MMIO(0x27a4), 0x0000cfbf }, + { _MMIO(0x27a8), 0x0003fffa }, + { _MMIO(0x27ac), 0x00005f7f }, +}; + +static const struct i915_oa_reg flex_eu_config_render_pipe_profile[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_render_pipe_profile[] = { + { _MMIO(0x9888), 0x0c2e001f }, + { _MMIO(0x9888), 0x0a2f0000 }, + { _MMIO(0x9888), 0x10186800 }, + { _MMIO(0x9888), 0x11810019 }, + { _MMIO(0x9888), 0x15810013 }, + { _MMIO(0x9888), 0x13820020 }, + { _MMIO(0x9888), 0x11830020 }, + { _MMIO(0x9888), 0x17840000 }, + { _MMIO(0x9888), 0x11860007 }, + { _MMIO(0x9888), 0x21860000 }, + { _MMIO(0x9888), 0x178703e0 }, + { _MMIO(0x9888), 0x0c2d8000 }, + { _MMIO(0x9888), 0x042d4000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x022e5400 }, + { _MMIO(0x9888), 0x002e0000 }, + { _MMIO(0x9888), 0x0e2e0080 }, + { _MMIO(0x9888), 0x082f0040 }, + { _MMIO(0x9888), 0x002f0000 }, + { _MMIO(0x9888), 0x06143000 }, + { _MMIO(0x9888), 0x06174000 }, + { _MMIO(0x9888), 0x06180012 }, + { _MMIO(0x9888), 0x00180000 }, + { _MMIO(0x9888), 0x0d804000 }, + { _MMIO(0x9888), 0x0f804000 }, + { _MMIO(0x9888), 0x05804000 }, + { _MMIO(0x9888), 0x09810200 }, + { _MMIO(0x9888), 0x0b810030 }, + { _MMIO(0x9888), 0x03810003 }, + { _MMIO(0x9888), 0x21819140 }, + { _MMIO(0x9888), 0x23819050 }, + { _MMIO(0x9888), 0x25810018 }, + { _MMIO(0x9888), 0x0b820980 }, + { _MMIO(0x9888), 0x03820d80 }, + { _MMIO(0x9888), 0x11820000 }, + { _MMIO(0x9888), 0x0182c000 }, + { _MMIO(0x9888), 0x07828000 }, + { _MMIO(0x9888), 0x09824000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x0d830004 }, + { _MMIO(0x9888), 0x0583000c }, + { _MMIO(0x9888), 0x0f831000 }, + { _MMIO(0x9888), 0x01848072 }, + { _MMIO(0x9888), 0x11840000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x09844000 }, + { _MMIO(0x9888), 0x0f848000 }, + { _MMIO(0x9888), 0x07860000 }, + { _MMIO(0x9888), 0x09860092 }, + { _MMIO(0x9888), 0x0f860400 }, + { _MMIO(0x9888), 0x01869100 }, + { _MMIO(0x9888), 0x0f870065 }, + { _MMIO(0x9888), 0x01870000 }, + { _MMIO(0x9888), 0x19930800 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x1b952000 }, + { _MMIO(0x9888), 0x1d955055 }, + { _MMIO(0x9888), 0x1f951455 }, + { _MMIO(0x9888), 0x0992a000 }, + { _MMIO(0x9888), 0x0f928000 }, + { _MMIO(0x9888), 0x1192a800 }, + { _MMIO(0x9888), 0x1392028a }, + { _MMIO(0x9888), 0x0b92a000 }, + { _MMIO(0x9888), 0x0d922000 }, + { _MMIO(0x9888), 0x13908000 }, + { _MMIO(0x9888), 0x21908000 }, + { _MMIO(0x9888), 0x23908000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27908000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b908000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f908000 }, + { _MMIO(0x9888), 0x31908000 }, + { _MMIO(0x9888), 0x15908000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900c01 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900863 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900061 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900000 }, + { _MMIO(0x9888), 0x45900c22 }, +}; + +static int +get_render_pipe_profile_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_pipe_profile; + lens[n] = ARRAY_SIZE(mux_config_render_pipe_profile); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_reads[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f872 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_reads[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_reads[] = { + { _MMIO(0x9888), 0x19800343 }, + { _MMIO(0x9888), 0x39900340 }, + { _MMIO(0x9888), 0x3f901000 }, + { _MMIO(0x9888), 0x41900003 }, + { _MMIO(0x9888), 0x03803180 }, + { _MMIO(0x9888), 0x058035e2 }, + { _MMIO(0x9888), 0x0780006a }, + { _MMIO(0x9888), 0x11800000 }, + { _MMIO(0x9888), 0x2181a000 }, + { _MMIO(0x9888), 0x2381000a }, + { _MMIO(0x9888), 0x1d950550 }, + { _MMIO(0x9888), 0x0b928000 }, + { _MMIO(0x9888), 0x0d92a000 }, + { _MMIO(0x9888), 0x0f922000 }, + { _MMIO(0x9888), 0x13900170 }, + { _MMIO(0x9888), 0x21900171 }, + { _MMIO(0x9888), 0x23900172 }, + { _MMIO(0x9888), 0x25900173 }, + { _MMIO(0x9888), 0x27900174 }, + { _MMIO(0x9888), 0x29900175 }, + { _MMIO(0x9888), 0x2b900176 }, + { _MMIO(0x9888), 0x2d900177 }, + { _MMIO(0x9888), 0x2f90017f }, + { _MMIO(0x9888), 0x31900125 }, + { _MMIO(0x9888), 0x15900123 }, + { _MMIO(0x9888), 0x17900121 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43901084 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47901080 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49901084 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b901084 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900004 }, + { _MMIO(0x9888), 0x45900000 }, +}; + +static int +get_memory_reads_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_reads; + lens[n] = ARRAY_SIZE(mux_config_memory_reads); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_writes[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f822 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_writes[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_writes[] = { + { _MMIO(0x9888), 0x19800343 }, + { _MMIO(0x9888), 0x39900340 }, + { _MMIO(0x9888), 0x3f900000 }, + { _MMIO(0x9888), 0x41900080 }, + { _MMIO(0x9888), 0x03803180 }, + { _MMIO(0x9888), 0x058035e2 }, + { _MMIO(0x9888), 0x0780006a }, + { _MMIO(0x9888), 0x11800000 }, + { _MMIO(0x9888), 0x2181a000 }, + { _MMIO(0x9888), 0x2381000a }, + { _MMIO(0x9888), 0x1d950550 }, + { _MMIO(0x9888), 0x0b928000 }, + { _MMIO(0x9888), 0x0d92a000 }, + { _MMIO(0x9888), 0x0f922000 }, + { _MMIO(0x9888), 0x13900180 }, + { _MMIO(0x9888), 0x21900181 }, + { _MMIO(0x9888), 0x23900182 }, + { _MMIO(0x9888), 0x25900183 }, + { _MMIO(0x9888), 0x27900184 }, + { _MMIO(0x9888), 0x29900185 }, + { _MMIO(0x9888), 0x2b900186 }, + { _MMIO(0x9888), 0x2d900187 }, + { _MMIO(0x9888), 0x2f900170 }, + { _MMIO(0x9888), 0x31900125 }, + { _MMIO(0x9888), 0x15900123 }, + { _MMIO(0x9888), 0x17900121 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43901084 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47901080 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49901084 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b901084 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900004 }, + { _MMIO(0x9888), 0x45900000 }, +}; + +static int +get_memory_writes_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_writes; + lens[n] = ARRAY_SIZE(mux_config_memory_writes); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extended[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fc2a }, + { _MMIO(0x2774), 0x0000bf00 }, + { _MMIO(0x2778), 0x0007fc6a }, + { _MMIO(0x277c), 0x0000bf00 }, + { _MMIO(0x2780), 0x0007fc92 }, + { _MMIO(0x2784), 0x0000bf00 }, + { _MMIO(0x2788), 0x0007fca2 }, + { _MMIO(0x278c), 0x0000bf00 }, + { _MMIO(0x2790), 0x0007fc32 }, + { _MMIO(0x2794), 0x0000bf00 }, + { _MMIO(0x2798), 0x0007fc9a }, + { _MMIO(0x279c), 0x0000bf00 }, + { _MMIO(0x27a0), 0x0007fe6a }, + { _MMIO(0x27a4), 0x0000bf00 }, + { _MMIO(0x27a8), 0x0007fe7a }, + { _MMIO(0x27ac), 0x0000bf00 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extended[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended[] = { + { _MMIO(0x9888), 0x104f00e0 }, + { _MMIO(0x9888), 0x141c0160 }, + { _MMIO(0x9888), 0x161c0015 }, + { _MMIO(0x9888), 0x181c0120 }, + { _MMIO(0x9888), 0x002d5000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x082d5000 }, + { _MMIO(0x9888), 0x0a2d5000 }, + { _MMIO(0x9888), 0x0c2d5000 }, + { _MMIO(0x9888), 0x0e2d5000 }, + { _MMIO(0x9888), 0x022d5000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x0c2e5400 }, + { _MMIO(0x9888), 0x0e2e5515 }, + { _MMIO(0x9888), 0x102e0155 }, + { _MMIO(0x9888), 0x044cc000 }, + { _MMIO(0x9888), 0x0a4c8000 }, + { _MMIO(0x9888), 0x0c4cc000 }, + { _MMIO(0x9888), 0x0e4cc000 }, + { _MMIO(0x9888), 0x104c8000 }, + { _MMIO(0x9888), 0x124c8000 }, + { _MMIO(0x9888), 0x144c8000 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x064cc000 }, + { _MMIO(0x9888), 0x084cc000 }, + { _MMIO(0x9888), 0x004ea000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084ea000 }, + { _MMIO(0x9888), 0x0a4ea000 }, + { _MMIO(0x9888), 0x0c4ea000 }, + { _MMIO(0x9888), 0x0e4ea000 }, + { _MMIO(0x9888), 0x024ea000 }, + { _MMIO(0x9888), 0x044ea000 }, + { _MMIO(0x9888), 0x0e4f4b41 }, + { _MMIO(0x9888), 0x004f4200 }, + { _MMIO(0x9888), 0x024f404c }, + { _MMIO(0x9888), 0x1c4f0000 }, + { _MMIO(0x9888), 0x1a4f0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x061b8000 }, + { _MMIO(0x9888), 0x081bc000 }, + { _MMIO(0x9888), 0x0a1bc000 }, + { _MMIO(0x9888), 0x0c1bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x001c0031 }, + { _MMIO(0x9888), 0x061c1900 }, + { _MMIO(0x9888), 0x081c1a33 }, + { _MMIO(0x9888), 0x0a1c1b35 }, + { _MMIO(0x9888), 0x0c1c3337 }, + { _MMIO(0x9888), 0x041c31c7 }, + { _MMIO(0x9888), 0x180f5000 }, + { _MMIO(0x9888), 0x1a0fa8aa }, + { _MMIO(0x9888), 0x1c0f0aaa }, + { _MMIO(0x9888), 0x182c8000 }, + { _MMIO(0x9888), 0x1c2c6aaa }, + { _MMIO(0x9888), 0x1e2c0001 }, + { _MMIO(0x9888), 0x1a2c2950 }, + { _MMIO(0x9888), 0x01938000 }, + { _MMIO(0x9888), 0x0f938000 }, + { _MMIO(0x9888), 0x1993aaaa }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25904000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x29904000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900420 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900400 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900001 }, + { _MMIO(0x9888), 0x45900001 }, +}; + +static int +get_compute_extended_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extended; + lens[n] = ARRAY_SIZE(mux_config_compute_extended); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_l3_cache[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fffa }, + { _MMIO(0x2774), 0x0000fefe }, + { _MMIO(0x2778), 0x0007fffa }, + { _MMIO(0x277c), 0x0000fefd }, + { _MMIO(0x2790), 0x0007fffa }, + { _MMIO(0x2794), 0x0000fbef }, + { _MMIO(0x2798), 0x0007fffa }, + { _MMIO(0x279c), 0x0000fbdf }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_l3_cache[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00101100 }, + { _MMIO(0xe45c), 0x00201200 }, + { _MMIO(0xe55c), 0x00301300 }, + { _MMIO(0xe65c), 0x00401400 }, +}; + +static const struct i915_oa_reg mux_config_compute_l3_cache[] = { + { _MMIO(0x9888), 0x166c03b0 }, + { _MMIO(0x9888), 0x1593001e }, + { _MMIO(0x9888), 0x3f900c00 }, + { _MMIO(0x9888), 0x41900000 }, + { _MMIO(0x9888), 0x002d1000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x082d5000 }, + { _MMIO(0x9888), 0x0e2d5000 }, + { _MMIO(0x9888), 0x0c2e0400 }, + { _MMIO(0x9888), 0x0e2e1500 }, + { _MMIO(0x9888), 0x102e0140 }, + { _MMIO(0x9888), 0x044c4000 }, + { _MMIO(0x9888), 0x0a4c8000 }, + { _MMIO(0x9888), 0x0c4cc000 }, + { _MMIO(0x9888), 0x144c8000 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x004e2000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084ea000 }, + { _MMIO(0x9888), 0x0e4ea000 }, + { _MMIO(0x9888), 0x1a4f4001 }, + { _MMIO(0x9888), 0x1c4f5005 }, + { _MMIO(0x9888), 0x006c0051 }, + { _MMIO(0x9888), 0x066c5000 }, + { _MMIO(0x9888), 0x086c5c5d }, + { _MMIO(0x9888), 0x0e6c5e5f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x146c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x180f1000 }, + { _MMIO(0x9888), 0x1a0fa800 }, + { _MMIO(0x9888), 0x1c0f0a00 }, + { _MMIO(0x9888), 0x182c4000 }, + { _MMIO(0x9888), 0x1c2c4015 }, + { _MMIO(0x9888), 0x1e2c0001 }, + { _MMIO(0x9888), 0x03931980 }, + { _MMIO(0x9888), 0x05930032 }, + { _MMIO(0x9888), 0x11930000 }, + { _MMIO(0x9888), 0x01938000 }, + { _MMIO(0x9888), 0x0f938000 }, + { _MMIO(0x9888), 0x1993a00a }, + { _MMIO(0x9888), 0x07930000 }, + { _MMIO(0x9888), 0x09930000 }, + { _MMIO(0x9888), 0x1d900177 }, + { _MMIO(0x9888), 0x1f900178 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x53901000 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x55900111 }, + { _MMIO(0x9888), 0x47900001 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x4d900000 }, + { _MMIO(0x9888), 0x45900400 }, +}; + +static int +get_compute_l3_cache_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_l3_cache; + lens[n] = ARRAY_SIZE(mux_config_compute_l3_cache); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_hdc_and_sf[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x10800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fdff }, +}; + +static const struct i915_oa_reg flex_eu_config_hdc_and_sf[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_hdc_and_sf[] = { + { _MMIO(0x9888), 0x104f0232 }, + { _MMIO(0x9888), 0x124f4640 }, + { _MMIO(0x9888), 0x11834400 }, + { _MMIO(0x9888), 0x022d4000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x0e2e0055 }, + { _MMIO(0x9888), 0x064c8000 }, + { _MMIO(0x9888), 0x084cc000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044ea000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x024f6100 }, + { _MMIO(0x9888), 0x044f416b }, + { _MMIO(0x9888), 0x064f004b }, + { _MMIO(0x9888), 0x1a4f0000 }, + { _MMIO(0x9888), 0x1a0f02a8 }, + { _MMIO(0x9888), 0x1a2c5500 }, + { _MMIO(0x9888), 0x0f808000 }, + { _MMIO(0x9888), 0x25810020 }, + { _MMIO(0x9888), 0x0f8305c0 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x1f951000 }, + { _MMIO(0x9888), 0x13920200 }, + { _MMIO(0x9888), 0x31908000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4d900003 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_hdc_and_sf_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_hdc_and_sf; + lens[n] = ARRAY_SIZE(mux_config_hdc_and_sf); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_1_0_sku_gte_0x03[] = { + { _MMIO(0x9888), 0x12643400 }, + { _MMIO(0x9888), 0x12653400 }, + { _MMIO(0x9888), 0x106c6800 }, + { _MMIO(0x9888), 0x126c001e }, + { _MMIO(0x9888), 0x166c0010 }, + { _MMIO(0x9888), 0x0c2d5000 }, + { _MMIO(0x9888), 0x0e2d5000 }, + { _MMIO(0x9888), 0x002d4000 }, + { _MMIO(0x9888), 0x022d5000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x102e0154 }, + { _MMIO(0x9888), 0x0c2e5000 }, + { _MMIO(0x9888), 0x0e2e0055 }, + { _MMIO(0x9888), 0x104c8000 }, + { _MMIO(0x9888), 0x124c8000 }, + { _MMIO(0x9888), 0x144c8000 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x044c8000 }, + { _MMIO(0x9888), 0x064cc000 }, + { _MMIO(0x9888), 0x084cc000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x0c4ea000 }, + { _MMIO(0x9888), 0x0e4ea000 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x024ea000 }, + { _MMIO(0x9888), 0x044ea000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x1c4f5500 }, + { _MMIO(0x9888), 0x1a4f1554 }, + { _MMIO(0x9888), 0x0a640024 }, + { _MMIO(0x9888), 0x10640000 }, + { _MMIO(0x9888), 0x04640000 }, + { _MMIO(0x9888), 0x0c650024 }, + { _MMIO(0x9888), 0x10650000 }, + { _MMIO(0x9888), 0x06650000 }, + { _MMIO(0x9888), 0x0c6c5327 }, + { _MMIO(0x9888), 0x0e6c5425 }, + { _MMIO(0x9888), 0x006c2a00 }, + { _MMIO(0x9888), 0x026c285b }, + { _MMIO(0x9888), 0x046c005c }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1a6c0900 }, + { _MMIO(0x9888), 0x1c0f0aa0 }, + { _MMIO(0x9888), 0x180f4000 }, + { _MMIO(0x9888), 0x1a0f02aa }, + { _MMIO(0x9888), 0x1c2c5400 }, + { _MMIO(0x9888), 0x1e2c0001 }, + { _MMIO(0x9888), 0x1a2c5550 }, + { _MMIO(0x9888), 0x1993aa00 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900421 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900001 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900420 }, + { _MMIO(0x9888), 0x45900021 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, +}; + +static const struct i915_oa_reg mux_config_l3_1_0_sku_lt_0x03[] = { + { _MMIO(0x9888), 0x14640340 }, + { _MMIO(0x9888), 0x14650340 }, + { _MMIO(0x9888), 0x106c6800 }, + { _MMIO(0x9888), 0x126c001e }, + { _MMIO(0x9888), 0x166c0010 }, + { _MMIO(0x9888), 0x0c2d5000 }, + { _MMIO(0x9888), 0x0e2d5000 }, + { _MMIO(0x9888), 0x002d4000 }, + { _MMIO(0x9888), 0x022d5000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x102e0154 }, + { _MMIO(0x9888), 0x0c2e5000 }, + { _MMIO(0x9888), 0x0e2e0055 }, + { _MMIO(0x9888), 0x104c8000 }, + { _MMIO(0x9888), 0x124c8000 }, + { _MMIO(0x9888), 0x144c8000 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x044c8000 }, + { _MMIO(0x9888), 0x064cc000 }, + { _MMIO(0x9888), 0x084cc000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x0c4ea000 }, + { _MMIO(0x9888), 0x0e4ea000 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x024ea000 }, + { _MMIO(0x9888), 0x044ea000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x1c4f5500 }, + { _MMIO(0x9888), 0x1a4f1554 }, + { _MMIO(0x9888), 0x04642400 }, + { _MMIO(0x9888), 0x22640000 }, + { _MMIO(0x9888), 0x1a640000 }, + { _MMIO(0x9888), 0x06650024 }, + { _MMIO(0x9888), 0x22650000 }, + { _MMIO(0x9888), 0x1c650000 }, + { _MMIO(0x9888), 0x0c6c5327 }, + { _MMIO(0x9888), 0x0e6c5425 }, + { _MMIO(0x9888), 0x006c2a00 }, + { _MMIO(0x9888), 0x026c285b }, + { _MMIO(0x9888), 0x046c005c }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1a6c0900 }, + { _MMIO(0x9888), 0x1c0f0aa0 }, + { _MMIO(0x9888), 0x180f4000 }, + { _MMIO(0x9888), 0x1a0f02aa }, + { _MMIO(0x9888), 0x1c2c5400 }, + { _MMIO(0x9888), 0x1e2c0001 }, + { _MMIO(0x9888), 0x1a2c5550 }, + { _MMIO(0x9888), 0x1993aa00 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900421 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900001 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900420 }, + { _MMIO(0x9888), 0x45900021 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, +}; + +static int +get_l3_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 2); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 2); + + if (dev_priv->drm.pdev->revision >= 0x03) { + regs[n] = mux_config_l3_1_0_sku_gte_0x03; + lens[n] = ARRAY_SIZE(mux_config_l3_1_0_sku_gte_0x03); + n++; + } + if (dev_priv->drm.pdev->revision < 0x03) { + regs[n] = mux_config_l3_1_0_sku_lt_0x03; + lens[n] = ARRAY_SIZE(mux_config_l3_1_0_sku_lt_0x03); + n++; + } + + return n; +} + +static const struct i915_oa_reg b_counter_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000efff }, + { _MMIO(0x2778), 0x00006000 }, + { _MMIO(0x277c), 0x0000f3ff }, +}; + +static const struct i915_oa_reg flex_eu_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x9888), 0x102d7800 }, + { _MMIO(0x9888), 0x122d79e0 }, + { _MMIO(0x9888), 0x0c2f0004 }, + { _MMIO(0x9888), 0x100e3800 }, + { _MMIO(0x9888), 0x180f0005 }, + { _MMIO(0x9888), 0x002d0940 }, + { _MMIO(0x9888), 0x022d802f }, + { _MMIO(0x9888), 0x042d4013 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x0e2e0050 }, + { _MMIO(0x9888), 0x022f0010 }, + { _MMIO(0x9888), 0x002f0000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x040e0480 }, + { _MMIO(0x9888), 0x000e0000 }, + { _MMIO(0x9888), 0x060f0027 }, + { _MMIO(0x9888), 0x100f0000 }, + { _MMIO(0x9888), 0x1a0f0040 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x439014a0 }, + { _MMIO(0x9888), 0x459000a4 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900001 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_rasterizer_and_pixel_backend_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_rasterizer_and_pixel_backend; + lens[n] = ARRAY_SIZE(mux_config_rasterizer_and_pixel_backend); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_sampler[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x70800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x0000c000 }, + { _MMIO(0x2774), 0x0000e7ff }, + { _MMIO(0x2778), 0x00003000 }, + { _MMIO(0x277c), 0x0000f9ff }, + { _MMIO(0x2780), 0x00000c00 }, + { _MMIO(0x2784), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_sampler[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_sampler[] = { + { _MMIO(0x9888), 0x121300a0 }, + { _MMIO(0x9888), 0x141600ab }, + { _MMIO(0x9888), 0x123300a0 }, + { _MMIO(0x9888), 0x143600ab }, + { _MMIO(0x9888), 0x125300a0 }, + { _MMIO(0x9888), 0x145600ab }, + { _MMIO(0x9888), 0x0c2d4000 }, + { _MMIO(0x9888), 0x0e2d5000 }, + { _MMIO(0x9888), 0x002d4000 }, + { _MMIO(0x9888), 0x022d5000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x102e01a0 }, + { _MMIO(0x9888), 0x0c2e5000 }, + { _MMIO(0x9888), 0x0e2e0065 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x044c8000 }, + { _MMIO(0x9888), 0x064cc000 }, + { _MMIO(0x9888), 0x084c4000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x024ea000 }, + { _MMIO(0x9888), 0x044e2000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x1c0f0800 }, + { _MMIO(0x9888), 0x180f4000 }, + { _MMIO(0x9888), 0x1a0f023f }, + { _MMIO(0x9888), 0x1e2c0003 }, + { _MMIO(0x9888), 0x1a2cc030 }, + { _MMIO(0x9888), 0x04132180 }, + { _MMIO(0x9888), 0x02130000 }, + { _MMIO(0x9888), 0x0c148000 }, + { _MMIO(0x9888), 0x0e142000 }, + { _MMIO(0x9888), 0x04148000 }, + { _MMIO(0x9888), 0x1e150140 }, + { _MMIO(0x9888), 0x1c150040 }, + { _MMIO(0x9888), 0x0c163000 }, + { _MMIO(0x9888), 0x0e160068 }, + { _MMIO(0x9888), 0x10160000 }, + { _MMIO(0x9888), 0x18160000 }, + { _MMIO(0x9888), 0x0a164000 }, + { _MMIO(0x9888), 0x04330043 }, + { _MMIO(0x9888), 0x02330000 }, + { _MMIO(0x9888), 0x0234a000 }, + { _MMIO(0x9888), 0x04342000 }, + { _MMIO(0x9888), 0x1c350015 }, + { _MMIO(0x9888), 0x02363460 }, + { _MMIO(0x9888), 0x10360000 }, + { _MMIO(0x9888), 0x04360000 }, + { _MMIO(0x9888), 0x06360000 }, + { _MMIO(0x9888), 0x08364000 }, + { _MMIO(0x9888), 0x06530043 }, + { _MMIO(0x9888), 0x02530000 }, + { _MMIO(0x9888), 0x0e548000 }, + { _MMIO(0x9888), 0x00548000 }, + { _MMIO(0x9888), 0x06542000 }, + { _MMIO(0x9888), 0x1e550400 }, + { _MMIO(0x9888), 0x1a552000 }, + { _MMIO(0x9888), 0x1c550100 }, + { _MMIO(0x9888), 0x0e563000 }, + { _MMIO(0x9888), 0x00563400 }, + { _MMIO(0x9888), 0x10560000 }, + { _MMIO(0x9888), 0x18560000 }, + { _MMIO(0x9888), 0x02560000 }, + { _MMIO(0x9888), 0x0c564000 }, + { _MMIO(0x9888), 0x1993a800 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b9014a0 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900001 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900820 }, + { _MMIO(0x9888), 0x45901022 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, +}; + +static int +get_sampler_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler; + lens[n] = ARRAY_SIZE(mux_config_sampler); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x00007fff }, + { _MMIO(0x2778), 0x00000000 }, + { _MMIO(0x277c), 0x00009fff }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000efff }, + { _MMIO(0x2788), 0x00000000 }, + { _MMIO(0x278c), 0x0000f3ff }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000fdff }, + { _MMIO(0x2798), 0x00000000 }, + { _MMIO(0x279c), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_1[] = { + { _MMIO(0x9888), 0x141a0000 }, + { _MMIO(0x9888), 0x143a0000 }, + { _MMIO(0x9888), 0x145a0000 }, + { _MMIO(0x9888), 0x0c2d4000 }, + { _MMIO(0x9888), 0x0e2d5000 }, + { _MMIO(0x9888), 0x002d4000 }, + { _MMIO(0x9888), 0x022d5000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x102e0150 }, + { _MMIO(0x9888), 0x0c2e5000 }, + { _MMIO(0x9888), 0x0e2e006a }, + { _MMIO(0x9888), 0x124c8000 }, + { _MMIO(0x9888), 0x144c8000 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x044c8000 }, + { _MMIO(0x9888), 0x064c4000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x0e4ea000 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x024e2000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x1c0f0bc0 }, + { _MMIO(0x9888), 0x180f4000 }, + { _MMIO(0x9888), 0x1a0f0302 }, + { _MMIO(0x9888), 0x1e2c0003 }, + { _MMIO(0x9888), 0x1a2c00f0 }, + { _MMIO(0x9888), 0x021a3080 }, + { _MMIO(0x9888), 0x041a31e5 }, + { _MMIO(0x9888), 0x02148000 }, + { _MMIO(0x9888), 0x0414a000 }, + { _MMIO(0x9888), 0x1c150054 }, + { _MMIO(0x9888), 0x06168000 }, + { _MMIO(0x9888), 0x08168000 }, + { _MMIO(0x9888), 0x0a168000 }, + { _MMIO(0x9888), 0x0c3a3280 }, + { _MMIO(0x9888), 0x0e3a0063 }, + { _MMIO(0x9888), 0x063a0061 }, + { _MMIO(0x9888), 0x023a0000 }, + { _MMIO(0x9888), 0x0c348000 }, + { _MMIO(0x9888), 0x0e342000 }, + { _MMIO(0x9888), 0x06342000 }, + { _MMIO(0x9888), 0x1e350140 }, + { _MMIO(0x9888), 0x1c350100 }, + { _MMIO(0x9888), 0x18360028 }, + { _MMIO(0x9888), 0x0c368000 }, + { _MMIO(0x9888), 0x0e5a3080 }, + { _MMIO(0x9888), 0x005a3280 }, + { _MMIO(0x9888), 0x025a0063 }, + { _MMIO(0x9888), 0x0e548000 }, + { _MMIO(0x9888), 0x00548000 }, + { _MMIO(0x9888), 0x02542000 }, + { _MMIO(0x9888), 0x1e550400 }, + { _MMIO(0x9888), 0x1a552000 }, + { _MMIO(0x9888), 0x1c550001 }, + { _MMIO(0x9888), 0x18560080 }, + { _MMIO(0x9888), 0x02568000 }, + { _MMIO(0x9888), 0x04568000 }, + { _MMIO(0x9888), 0x1993a800 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900420 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x45901084 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900001 }, +}; + +static int +get_tdl_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_1; + lens[n] = ARRAY_SIZE(mux_config_tdl_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_2[] = { + { _MMIO(0x9888), 0x141a026b }, + { _MMIO(0x9888), 0x143a0173 }, + { _MMIO(0x9888), 0x145a026b }, + { _MMIO(0x9888), 0x002d4000 }, + { _MMIO(0x9888), 0x022d5000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x0c2e5000 }, + { _MMIO(0x9888), 0x0e2e0069 }, + { _MMIO(0x9888), 0x044c8000 }, + { _MMIO(0x9888), 0x064cc000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x024ea000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x180f6000 }, + { _MMIO(0x9888), 0x1a0f030a }, + { _MMIO(0x9888), 0x1a2c03c0 }, + { _MMIO(0x9888), 0x041a37e7 }, + { _MMIO(0x9888), 0x021a0000 }, + { _MMIO(0x9888), 0x0414a000 }, + { _MMIO(0x9888), 0x1c150050 }, + { _MMIO(0x9888), 0x08168000 }, + { _MMIO(0x9888), 0x0a168000 }, + { _MMIO(0x9888), 0x003a3380 }, + { _MMIO(0x9888), 0x063a006f }, + { _MMIO(0x9888), 0x023a0000 }, + { _MMIO(0x9888), 0x00348000 }, + { _MMIO(0x9888), 0x06342000 }, + { _MMIO(0x9888), 0x1a352000 }, + { _MMIO(0x9888), 0x1c350100 }, + { _MMIO(0x9888), 0x02368000 }, + { _MMIO(0x9888), 0x0c368000 }, + { _MMIO(0x9888), 0x025a37e7 }, + { _MMIO(0x9888), 0x0254a000 }, + { _MMIO(0x9888), 0x1c550005 }, + { _MMIO(0x9888), 0x04568000 }, + { _MMIO(0x9888), 0x06568000 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900020 }, + { _MMIO(0x9888), 0x45901080 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900001 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_tdl_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_2; + lens[n] = ARRAY_SIZE(mux_config_tdl_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extra[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extra[] = { + { _MMIO(0xe458), 0x00001000 }, + { _MMIO(0xe558), 0x00003002 }, + { _MMIO(0xe658), 0x00005004 }, + { _MMIO(0xe758), 0x00011010 }, + { _MMIO(0xe45c), 0x00050012 }, + { _MMIO(0xe55c), 0x00052051 }, + { _MMIO(0xe65c), 0x00000008 }, +}; + +static const struct i915_oa_reg mux_config_compute_extra[] = { + { _MMIO(0x9888), 0x141a001f }, + { _MMIO(0x9888), 0x143a001f }, + { _MMIO(0x9888), 0x145a001f }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x0e2e0094 }, + { _MMIO(0x9888), 0x084cc000 }, + { _MMIO(0x9888), 0x044ea000 }, + { _MMIO(0x9888), 0x1a0f00e0 }, + { _MMIO(0x9888), 0x1a2c0c00 }, + { _MMIO(0x9888), 0x061a0063 }, + { _MMIO(0x9888), 0x021a0000 }, + { _MMIO(0x9888), 0x06142000 }, + { _MMIO(0x9888), 0x1c150100 }, + { _MMIO(0x9888), 0x0c168000 }, + { _MMIO(0x9888), 0x043a3180 }, + { _MMIO(0x9888), 0x023a0000 }, + { _MMIO(0x9888), 0x04348000 }, + { _MMIO(0x9888), 0x1c350040 }, + { _MMIO(0x9888), 0x0a368000 }, + { _MMIO(0x9888), 0x045a0063 }, + { _MMIO(0x9888), 0x025a0000 }, + { _MMIO(0x9888), 0x04542000 }, + { _MMIO(0x9888), 0x1c550010 }, + { _MMIO(0x9888), 0x08568000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900400 }, + { _MMIO(0x9888), 0x47900004 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_compute_extra_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extra; + lens[n] = ARRAY_SIZE(mux_config_compute_extra); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9888), 0x19800000 }, + { _MMIO(0x9888), 0x07800063 }, + { _MMIO(0x9888), 0x11800000 }, + { _MMIO(0x9888), 0x23810008 }, + { _MMIO(0x9888), 0x1d950400 }, + { _MMIO(0x9888), 0x0f922000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_test_oa_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_test_oa; + lens[n] = ARRAY_SIZE(mux_config_test_oa); + n++; + + return n; +} + int i915_oa_select_metric_set_bxt(struct drm_i915_private *dev_priv) { dev_priv->perf.oa.n_mux_configs = 0; @@ -190,6 +1820,370 @@ int i915_oa_select_metric_set_bxt(struct drm_i915_private *dev_priv) dev_priv->perf.oa.flex_regs_len = ARRAY_SIZE(flex_eu_config_render_basic); + return 0; + case METRIC_SET_ID_COMPUTE_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_compute_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_basic); + + return 0; + case METRIC_SET_ID_RENDER_PIPE_PROFILE: + dev_priv->perf.oa.n_mux_configs = + get_render_pipe_profile_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_PIPE_PROFILE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_pipe_profile; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_pipe_profile); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_pipe_profile; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_pipe_profile); + + return 0; + case METRIC_SET_ID_MEMORY_READS: + dev_priv->perf.oa.n_mux_configs = + get_memory_reads_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_READS\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_reads; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_reads); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_reads; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_reads); + + return 0; + case METRIC_SET_ID_MEMORY_WRITES: + dev_priv->perf.oa.n_mux_configs = + get_memory_writes_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_WRITES\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_writes; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_writes); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_writes; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_writes); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTENDED: + dev_priv->perf.oa.n_mux_configs = + get_compute_extended_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTENDED\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extended; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extended); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extended; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extended); + + return 0; + case METRIC_SET_ID_COMPUTE_L3_CACHE: + dev_priv->perf.oa.n_mux_configs = + get_compute_l3_cache_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_L3_CACHE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_l3_cache; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_l3_cache); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_l3_cache; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_l3_cache); + + return 0; + case METRIC_SET_ID_HDC_AND_SF: + dev_priv->perf.oa.n_mux_configs = + get_hdc_and_sf_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"HDC_AND_SF\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_hdc_and_sf; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_hdc_and_sf); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_hdc_and_sf; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_hdc_and_sf); + + return 0; + case METRIC_SET_ID_L3_1: + dev_priv->perf.oa.n_mux_configs = + get_l3_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_1); + + return 0; + case METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND: + dev_priv->perf.oa.n_mux_configs = + get_rasterizer_and_pixel_backend_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RASTERIZER_AND_PIXEL_BACKEND\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_rasterizer_and_pixel_backend); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_rasterizer_and_pixel_backend); + + return 0; + case METRIC_SET_ID_SAMPLER: + dev_priv->perf.oa.n_mux_configs = + get_sampler_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_sampler; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_sampler); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler); + + return 0; + case METRIC_SET_ID_TDL_1: + dev_priv->perf.oa.n_mux_configs = + get_tdl_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_1); + + return 0; + case METRIC_SET_ID_TDL_2: + dev_priv->perf.oa.n_mux_configs = + get_tdl_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_2); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTRA: + dev_priv->perf.oa.n_mux_configs = + get_compute_extra_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTRA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extra; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extra); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extra; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extra); + + return 0; + case METRIC_SET_ID_TEST_OA: + dev_priv->perf.oa.n_mux_configs = + get_test_oa_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TEST_OA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_test_oa; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_test_oa; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_test_oa); + return 0; default: return -ENODEV; @@ -218,6 +2212,314 @@ static struct attribute_group group_render_basic = { .attrs = attrs_render_basic, }; +static ssize_t +show_compute_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_BASIC); +} + +static struct device_attribute dev_attr_compute_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_basic[] = { + &dev_attr_compute_basic_id.attr, + NULL, +}; + +static struct attribute_group group_compute_basic = { + .name = "012d72cf-82a9-4d25-8ddf-74076fd30797", + .attrs = attrs_compute_basic, +}; + +static ssize_t +show_render_pipe_profile_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_PIPE_PROFILE); +} + +static struct device_attribute dev_attr_render_pipe_profile_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_pipe_profile_id, + .store = NULL, +}; + +static struct attribute *attrs_render_pipe_profile[] = { + &dev_attr_render_pipe_profile_id.attr, + NULL, +}; + +static struct attribute_group group_render_pipe_profile = { + .name = "ce416533-e49e-4211-80af-ec513590a914", + .attrs = attrs_render_pipe_profile, +}; + +static ssize_t +show_memory_reads_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_READS); +} + +static struct device_attribute dev_attr_memory_reads_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_reads_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_reads[] = { + &dev_attr_memory_reads_id.attr, + NULL, +}; + +static struct attribute_group group_memory_reads = { + .name = "398e2452-18d7-42d0-b241-e4d0a9148ada", + .attrs = attrs_memory_reads, +}; + +static ssize_t +show_memory_writes_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_WRITES); +} + +static struct device_attribute dev_attr_memory_writes_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_writes_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_writes[] = { + &dev_attr_memory_writes_id.attr, + NULL, +}; + +static struct attribute_group group_memory_writes = { + .name = "d324a0d6-7269-4847-a5c2-6f71ddc7fed5", + .attrs = attrs_memory_writes, +}; + +static ssize_t +show_compute_extended_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTENDED); +} + +static struct device_attribute dev_attr_compute_extended_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extended_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extended[] = { + &dev_attr_compute_extended_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extended = { + .name = "caf3596a-7bb1-4dec-b3b3-2a080d283b49", + .attrs = attrs_compute_extended, +}; + +static ssize_t +show_compute_l3_cache_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_L3_CACHE); +} + +static struct device_attribute dev_attr_compute_l3_cache_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_l3_cache_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_l3_cache[] = { + &dev_attr_compute_l3_cache_id.attr, + NULL, +}; + +static struct attribute_group group_compute_l3_cache = { + .name = "49b956e2-d5b9-47e0-9d8a-cee5e8cec527", + .attrs = attrs_compute_l3_cache, +}; + +static ssize_t +show_hdc_and_sf_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_HDC_AND_SF); +} + +static struct device_attribute dev_attr_hdc_and_sf_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_hdc_and_sf_id, + .store = NULL, +}; + +static struct attribute *attrs_hdc_and_sf[] = { + &dev_attr_hdc_and_sf_id.attr, + NULL, +}; + +static struct attribute_group group_hdc_and_sf = { + .name = "f64ef50a-bdba-4b35-8f09-203c13d8ee5a", + .attrs = attrs_hdc_and_sf, +}; + +static ssize_t +show_l3_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_1); +} + +static struct device_attribute dev_attr_l3_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_1_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_1[] = { + &dev_attr_l3_1_id.attr, + NULL, +}; + +static struct attribute_group group_l3_1 = { + .name = "00ad5a41-7eab-4f7a-9103-49d411c67219", + .attrs = attrs_l3_1, +}; + +static ssize_t +show_rasterizer_and_pixel_backend_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND); +} + +static struct device_attribute dev_attr_rasterizer_and_pixel_backend_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_rasterizer_and_pixel_backend_id, + .store = NULL, +}; + +static struct attribute *attrs_rasterizer_and_pixel_backend[] = { + &dev_attr_rasterizer_and_pixel_backend_id.attr, + NULL, +}; + +static struct attribute_group group_rasterizer_and_pixel_backend = { + .name = "46dc44ca-491c-4cc1-a951-e7b3e62bf02b", + .attrs = attrs_rasterizer_and_pixel_backend, +}; + +static ssize_t +show_sampler_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_SAMPLER); +} + +static struct device_attribute dev_attr_sampler_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_sampler_id, + .store = NULL, +}; + +static struct attribute *attrs_sampler[] = { + &dev_attr_sampler_id.attr, + NULL, +}; + +static struct attribute_group group_sampler = { + .name = "8364e2a8-af63-40af-b0d5-42969a255654", + .attrs = attrs_sampler, +}; + +static ssize_t +show_tdl_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_1); +} + +static struct device_attribute dev_attr_tdl_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_1_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_1[] = { + &dev_attr_tdl_1_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_1 = { + .name = "175c8092-cb25-4d1e-8dc7-b4fdd39e2d92", + .attrs = attrs_tdl_1, +}; + +static ssize_t +show_tdl_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_2); +} + +static struct device_attribute dev_attr_tdl_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_2_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_2[] = { + &dev_attr_tdl_2_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_2 = { + .name = "d260f03f-b34d-4b49-a44e-436819117332", + .attrs = attrs_tdl_2, +}; + +static ssize_t +show_compute_extra_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTRA); +} + +static struct device_attribute dev_attr_compute_extra_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extra_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extra[] = { + &dev_attr_compute_extra_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extra = { + .name = "fa6ecf21-2cb8-4d0b-9308-6e4a7b4ca87a", + .attrs = attrs_compute_extra, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TEST_OA); +} + +static struct device_attribute dev_attr_test_oa_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_test_oa_id, + .store = NULL, +}; + +static struct attribute *attrs_test_oa[] = { + &dev_attr_test_oa_id.attr, + NULL, +}; + +static struct attribute_group group_test_oa = { + .name = "5ee72f5c-092f-421e-8b70-225f7c3e9612", + .attrs = attrs_test_oa, +}; + int i915_perf_register_sysfs_bxt(struct drm_i915_private *dev_priv) { @@ -230,9 +2532,121 @@ i915_perf_register_sysfs_bxt(struct drm_i915_private *dev_priv) if (ret) goto error_render_basic; } + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (ret) + goto error_compute_basic; + } + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (ret) + goto error_render_pipe_profile; + } + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (ret) + goto error_memory_reads; + } + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (ret) + goto error_memory_writes; + } + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (ret) + goto error_compute_extended; + } + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (ret) + goto error_compute_l3_cache; + } + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (ret) + goto error_hdc_and_sf; + } + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (ret) + goto error_l3_1; + } + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (ret) + goto error_rasterizer_and_pixel_backend; + } + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (ret) + goto error_sampler; + } + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (ret) + goto error_tdl_1; + } + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (ret) + goto error_tdl_2; + } + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (ret) + goto error_compute_extra; + } + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_test_oa); + if (ret) + goto error_test_oa; + } return 0; +error_test_oa: + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); +error_compute_extra: + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); +error_tdl_2: + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); +error_tdl_1: + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); +error_sampler: + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); +error_rasterizer_and_pixel_backend: + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); +error_l3_1: + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); +error_hdc_and_sf: + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); +error_compute_l3_cache: + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); +error_compute_extended: + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); +error_memory_writes: + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); +error_memory_reads: + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); +error_render_pipe_profile: + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); +error_compute_basic: + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); error_render_basic: return ret; } @@ -245,4 +2659,32 @@ i915_perf_unregister_sysfs_bxt(struct drm_i915_private *dev_priv) if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_test_oa); } diff --git a/drivers/gpu/drm/i915/i915_oa_chv.c b/drivers/gpu/drm/i915/i915_oa_chv.c index b15f6c980d11..aa6bece7e75f 100644 --- a/drivers/gpu/drm/i915/i915_oa_chv.c +++ b/drivers/gpu/drm/i915/i915_oa_chv.c @@ -33,9 +33,22 @@ enum metric_set_id { METRIC_SET_ID_RENDER_BASIC = 1, + METRIC_SET_ID_COMPUTE_BASIC, + METRIC_SET_ID_RENDER_PIPE_PROFILE, + METRIC_SET_ID_HDC_AND_SF, + METRIC_SET_ID_L3_1, + METRIC_SET_ID_L3_2, + METRIC_SET_ID_L3_3, + METRIC_SET_ID_L3_4, + METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND, + METRIC_SET_ID_SAMPLER_1, + METRIC_SET_ID_SAMPLER_2, + METRIC_SET_ID_TDL_1, + METRIC_SET_ID_TDL_2, + METRIC_SET_ID_TEST_OA, }; -int i915_oa_n_builtin_metric_sets_chv = 1; +int i915_oa_n_builtin_metric_sets_chv = 14; static const struct i915_oa_reg b_counter_config_render_basic[] = { { _MMIO(0x2740), 0x00000000 }, @@ -146,6 +159,1874 @@ get_render_basic_mux_config(struct drm_i915_private *dev_priv, return n; } +static const struct i915_oa_reg b_counter_config_compute_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_basic[] = { + { _MMIO(0x9888), 0x59800000 }, + { _MMIO(0x9888), 0x59800001 }, + { _MMIO(0x9888), 0x2e5800e0 }, + { _MMIO(0x9888), 0x2e3800e0 }, + { _MMIO(0x9888), 0x3580024f }, + { _MMIO(0x9888), 0x3d800140 }, + { _MMIO(0x9888), 0x08580042 }, + { _MMIO(0x9888), 0x0c580040 }, + { _MMIO(0x9888), 0x1058004c }, + { _MMIO(0x9888), 0x1458004b }, + { _MMIO(0x9888), 0x04580000 }, + { _MMIO(0x9888), 0x00580000 }, + { _MMIO(0x9888), 0x00195555 }, + { _MMIO(0x9888), 0x06380042 }, + { _MMIO(0x9888), 0x0a380040 }, + { _MMIO(0x9888), 0x0e38004c }, + { _MMIO(0x9888), 0x1238004b }, + { _MMIO(0x9888), 0x04380000 }, + { _MMIO(0x9888), 0x00384444 }, + { _MMIO(0x9888), 0x003a5555 }, + { _MMIO(0x9888), 0x018bffff }, + { _MMIO(0x9888), 0x01845555 }, + { _MMIO(0x9888), 0x17800074 }, + { _MMIO(0x9888), 0x1980007d }, + { _MMIO(0x9888), 0x1b80007c }, + { _MMIO(0x9888), 0x1d8000b6 }, + { _MMIO(0x9888), 0x1f8000b7 }, + { _MMIO(0x9888), 0x05800000 }, + { _MMIO(0x9888), 0x03800000 }, + { _MMIO(0x9888), 0x418000aa }, + { _MMIO(0x9888), 0x438000aa }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x47800000 }, + { _MMIO(0x9888), 0x4980012a }, + { _MMIO(0x9888), 0x4b80012a }, + { _MMIO(0x9888), 0x4d80012a }, + { _MMIO(0x9888), 0x4f80012a }, + { _MMIO(0x9888), 0x518001ce }, + { _MMIO(0x9888), 0x538001ce }, + { _MMIO(0x9888), 0x5580000e }, + { _MMIO(0x9888), 0x59800000 }, +}; + +static int +get_compute_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_basic; + lens[n] = ARRAY_SIZE(mux_config_compute_basic); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_render_pipe_profile[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2770), 0x0007ffea }, + { _MMIO(0x2774), 0x00007ffc }, + { _MMIO(0x2778), 0x0007affa }, + { _MMIO(0x277c), 0x0000f5fd }, + { _MMIO(0x2780), 0x00079ffa }, + { _MMIO(0x2784), 0x0000f3fb }, + { _MMIO(0x2788), 0x0007bf7a }, + { _MMIO(0x278c), 0x0000f7e7 }, + { _MMIO(0x2790), 0x0007fefa }, + { _MMIO(0x2794), 0x0000f7cf }, + { _MMIO(0x2798), 0x00077ffa }, + { _MMIO(0x279c), 0x0000efdf }, + { _MMIO(0x27a0), 0x0006fffa }, + { _MMIO(0x27a4), 0x0000cfbf }, + { _MMIO(0x27a8), 0x0003fffa }, + { _MMIO(0x27ac), 0x00005f7f }, +}; + +static const struct i915_oa_reg flex_eu_config_render_pipe_profile[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_render_pipe_profile[] = { + { _MMIO(0x9888), 0x59800000 }, + { _MMIO(0x9888), 0x59800001 }, + { _MMIO(0x9888), 0x261e0000 }, + { _MMIO(0x9888), 0x281f000f }, + { _MMIO(0x9888), 0x2817001a }, + { _MMIO(0x9888), 0x2791001f }, + { _MMIO(0x9888), 0x27880019 }, + { _MMIO(0x9888), 0x2d890000 }, + { _MMIO(0x9888), 0x278a0007 }, + { _MMIO(0x9888), 0x298d001f }, + { _MMIO(0x9888), 0x278e0020 }, + { _MMIO(0x9888), 0x2b8f0012 }, + { _MMIO(0x9888), 0x29900000 }, + { _MMIO(0x9888), 0x00184000 }, + { _MMIO(0x9888), 0x02181000 }, + { _MMIO(0x9888), 0x02194000 }, + { _MMIO(0x9888), 0x141e0002 }, + { _MMIO(0x9888), 0x041e0000 }, + { _MMIO(0x9888), 0x001e0000 }, + { _MMIO(0x9888), 0x221f0015 }, + { _MMIO(0x9888), 0x041f0000 }, + { _MMIO(0x9888), 0x001f4000 }, + { _MMIO(0x9888), 0x021f0000 }, + { _MMIO(0x9888), 0x023a8000 }, + { _MMIO(0x9888), 0x0213c000 }, + { _MMIO(0x9888), 0x02164000 }, + { _MMIO(0x9888), 0x24170012 }, + { _MMIO(0x9888), 0x04170000 }, + { _MMIO(0x9888), 0x07910005 }, + { _MMIO(0x9888), 0x05910000 }, + { _MMIO(0x9888), 0x01911500 }, + { _MMIO(0x9888), 0x03910501 }, + { _MMIO(0x9888), 0x0d880002 }, + { _MMIO(0x9888), 0x1d880003 }, + { _MMIO(0x9888), 0x05880000 }, + { _MMIO(0x9888), 0x0b890032 }, + { _MMIO(0x9888), 0x1b890031 }, + { _MMIO(0x9888), 0x05890000 }, + { _MMIO(0x9888), 0x01890040 }, + { _MMIO(0x9888), 0x03890040 }, + { _MMIO(0x9888), 0x098a0000 }, + { _MMIO(0x9888), 0x198a0004 }, + { _MMIO(0x9888), 0x058a0000 }, + { _MMIO(0x9888), 0x018a8050 }, + { _MMIO(0x9888), 0x038a2050 }, + { _MMIO(0x9888), 0x018b95a9 }, + { _MMIO(0x9888), 0x038be5a9 }, + { _MMIO(0x9888), 0x018c1500 }, + { _MMIO(0x9888), 0x038c0501 }, + { _MMIO(0x9888), 0x178d0015 }, + { _MMIO(0x9888), 0x058d0000 }, + { _MMIO(0x9888), 0x138e0004 }, + { _MMIO(0x9888), 0x218e000c }, + { _MMIO(0x9888), 0x058e0000 }, + { _MMIO(0x9888), 0x018e0500 }, + { _MMIO(0x9888), 0x038e0101 }, + { _MMIO(0x9888), 0x0f8f0027 }, + { _MMIO(0x9888), 0x058f0000 }, + { _MMIO(0x9888), 0x018f0000 }, + { _MMIO(0x9888), 0x038f0001 }, + { _MMIO(0x9888), 0x11900013 }, + { _MMIO(0x9888), 0x1f900017 }, + { _MMIO(0x9888), 0x05900000 }, + { _MMIO(0x9888), 0x01900100 }, + { _MMIO(0x9888), 0x03900001 }, + { _MMIO(0x9888), 0x01845555 }, + { _MMIO(0x9888), 0x03845555 }, + { _MMIO(0x9888), 0x418000aa }, + { _MMIO(0x9888), 0x438000aa }, + { _MMIO(0x9888), 0x458000aa }, + { _MMIO(0x9888), 0x478000aa }, + { _MMIO(0x9888), 0x4980018c }, + { _MMIO(0x9888), 0x4b80014b }, + { _MMIO(0x9888), 0x4d800128 }, + { _MMIO(0x9888), 0x4f80012a }, + { _MMIO(0x9888), 0x51800187 }, + { _MMIO(0x9888), 0x5380014b }, + { _MMIO(0x9888), 0x55800149 }, + { _MMIO(0x9888), 0x5780010a }, + { _MMIO(0x9888), 0x59800000 }, +}; + +static int +get_render_pipe_profile_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_pipe_profile; + lens[n] = ARRAY_SIZE(mux_config_render_pipe_profile); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_hdc_and_sf[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x10800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fff7 }, +}; + +static const struct i915_oa_reg flex_eu_config_hdc_and_sf[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_hdc_and_sf[] = { + { _MMIO(0x9888), 0x105c0232 }, + { _MMIO(0x9888), 0x10580232 }, + { _MMIO(0x9888), 0x10380232 }, + { _MMIO(0x9888), 0x10dc0232 }, + { _MMIO(0x9888), 0x10d80232 }, + { _MMIO(0x9888), 0x10b80232 }, + { _MMIO(0x9888), 0x118e4400 }, + { _MMIO(0x9888), 0x025c6080 }, + { _MMIO(0x9888), 0x045c004b }, + { _MMIO(0x9888), 0x005c8000 }, + { _MMIO(0x9888), 0x00582080 }, + { _MMIO(0x9888), 0x0258004b }, + { _MMIO(0x9888), 0x025b4000 }, + { _MMIO(0x9888), 0x045b4000 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f00aa }, + { _MMIO(0x9888), 0x04386080 }, + { _MMIO(0x9888), 0x0638404b }, + { _MMIO(0x9888), 0x02384000 }, + { _MMIO(0x9888), 0x08384000 }, + { _MMIO(0x9888), 0x0a380000 }, + { _MMIO(0x9888), 0x0c380000 }, + { _MMIO(0x9888), 0x00398000 }, + { _MMIO(0x9888), 0x0239a000 }, + { _MMIO(0x9888), 0x0439a000 }, + { _MMIO(0x9888), 0x06392000 }, + { _MMIO(0x9888), 0x0cdc25c1 }, + { _MMIO(0x9888), 0x0adcc000 }, + { _MMIO(0x9888), 0x0ad825c1 }, + { _MMIO(0x9888), 0x18db4000 }, + { _MMIO(0x9888), 0x1adb0001 }, + { _MMIO(0x9888), 0x0e9f8000 }, + { _MMIO(0x9888), 0x109f02aa }, + { _MMIO(0x9888), 0x0eb825c1 }, + { _MMIO(0x9888), 0x18b80154 }, + { _MMIO(0x9888), 0x0ab9a000 }, + { _MMIO(0x9888), 0x0cb9a000 }, + { _MMIO(0x9888), 0x0eb9a000 }, + { _MMIO(0x9888), 0x0d88c000 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x258baa05 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x198c5400 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x098dc000 }, + { _MMIO(0x9888), 0x0b8da000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x098e05c0 }, + { _MMIO(0x9888), 0x058e0000 }, + { _MMIO(0x9888), 0x198f0020 }, + { _MMIO(0x9888), 0x2185aa0a }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x19835000 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x09848000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x19808000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x51800040 }, + { _MMIO(0x9888), 0x43800400 }, + { _MMIO(0x9888), 0x45800800 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800c62 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f801042 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x418014a4 }, +}; + +static int +get_hdc_and_sf_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_hdc_and_sf; + lens[n] = ARRAY_SIZE(mux_config_hdc_and_sf); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_1[] = { + { _MMIO(0x9888), 0x10bf03da }, + { _MMIO(0x9888), 0x14bf0001 }, + { _MMIO(0x9888), 0x12980340 }, + { _MMIO(0x9888), 0x12990340 }, + { _MMIO(0x9888), 0x0cbf1187 }, + { _MMIO(0x9888), 0x0ebf1205 }, + { _MMIO(0x9888), 0x00bf0500 }, + { _MMIO(0x9888), 0x02bf042b }, + { _MMIO(0x9888), 0x04bf002c }, + { _MMIO(0x9888), 0x0cdac000 }, + { _MMIO(0x9888), 0x0edac000 }, + { _MMIO(0x9888), 0x00da8000 }, + { _MMIO(0x9888), 0x02dac000 }, + { _MMIO(0x9888), 0x04da4000 }, + { _MMIO(0x9888), 0x04983400 }, + { _MMIO(0x9888), 0x10980000 }, + { _MMIO(0x9888), 0x06990034 }, + { _MMIO(0x9888), 0x10990000 }, + { _MMIO(0x9888), 0x0c9dc000 }, + { _MMIO(0x9888), 0x0e9dc000 }, + { _MMIO(0x9888), 0x009d8000 }, + { _MMIO(0x9888), 0x029dc000 }, + { _MMIO(0x9888), 0x049d4000 }, + { _MMIO(0x9888), 0x109f02a8 }, + { _MMIO(0x9888), 0x0c9fa000 }, + { _MMIO(0x9888), 0x0e9f00ba }, + { _MMIO(0x9888), 0x0cb88000 }, + { _MMIO(0x9888), 0x0cb95000 }, + { _MMIO(0x9888), 0x0eb95000 }, + { _MMIO(0x9888), 0x00b94000 }, + { _MMIO(0x9888), 0x02b95000 }, + { _MMIO(0x9888), 0x04b91000 }, + { _MMIO(0x9888), 0x06b92000 }, + { _MMIO(0x9888), 0x0cba4000 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x09888000 }, + { _MMIO(0x9888), 0x0b888000 }, + { _MMIO(0x9888), 0x0d880400 }, + { _MMIO(0x9888), 0x258b800a }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b5500 }, + { _MMIO(0x9888), 0x198c4000 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x098c4000 }, + { _MMIO(0x9888), 0x0b8c4000 }, + { _MMIO(0x9888), 0x0d8c4000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x018d8000 }, + { _MMIO(0x9888), 0x038da000 }, + { _MMIO(0x9888), 0x058da000 }, + { _MMIO(0x9888), 0x078d2000 }, + { _MMIO(0x9888), 0x2185800a }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x1b830154 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x47800000 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800060 }, +}; + +static int +get_l3_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_1; + lens[n] = ARRAY_SIZE(mux_config_l3_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_2[] = { + { _MMIO(0x9888), 0x103f03da }, + { _MMIO(0x9888), 0x143f0001 }, + { _MMIO(0x9888), 0x12180340 }, + { _MMIO(0x9888), 0x12190340 }, + { _MMIO(0x9888), 0x0c3f1187 }, + { _MMIO(0x9888), 0x0e3f1205 }, + { _MMIO(0x9888), 0x003f0500 }, + { _MMIO(0x9888), 0x023f042b }, + { _MMIO(0x9888), 0x043f002c }, + { _MMIO(0x9888), 0x0c5ac000 }, + { _MMIO(0x9888), 0x0e5ac000 }, + { _MMIO(0x9888), 0x005a8000 }, + { _MMIO(0x9888), 0x025ac000 }, + { _MMIO(0x9888), 0x045a4000 }, + { _MMIO(0x9888), 0x04183400 }, + { _MMIO(0x9888), 0x10180000 }, + { _MMIO(0x9888), 0x06190034 }, + { _MMIO(0x9888), 0x10190000 }, + { _MMIO(0x9888), 0x0c1dc000 }, + { _MMIO(0x9888), 0x0e1dc000 }, + { _MMIO(0x9888), 0x001d8000 }, + { _MMIO(0x9888), 0x021dc000 }, + { _MMIO(0x9888), 0x041d4000 }, + { _MMIO(0x9888), 0x101f02a8 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f00ba }, + { _MMIO(0x9888), 0x0c388000 }, + { _MMIO(0x9888), 0x0c395000 }, + { _MMIO(0x9888), 0x0e395000 }, + { _MMIO(0x9888), 0x00394000 }, + { _MMIO(0x9888), 0x02395000 }, + { _MMIO(0x9888), 0x04391000 }, + { _MMIO(0x9888), 0x06392000 }, + { _MMIO(0x9888), 0x0c3a4000 }, + { _MMIO(0x9888), 0x1b8aa800 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x258b4005 }, + { _MMIO(0x9888), 0x278b0015 }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x2185800a }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x1b830154 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x45800000 }, + { _MMIO(0x9888), 0x47800000 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f800000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800060 }, +}; + +static int +get_l3_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_2; + lens[n] = ARRAY_SIZE(mux_config_l3_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_3[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_3[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_3[] = { + { _MMIO(0x9888), 0x121b0340 }, + { _MMIO(0x9888), 0x103f0274 }, + { _MMIO(0x9888), 0x123f0000 }, + { _MMIO(0x9888), 0x129b0340 }, + { _MMIO(0x9888), 0x10bf0274 }, + { _MMIO(0x9888), 0x12bf0000 }, + { _MMIO(0x9888), 0x041b3400 }, + { _MMIO(0x9888), 0x101b0000 }, + { _MMIO(0x9888), 0x045c8000 }, + { _MMIO(0x9888), 0x0a3d4000 }, + { _MMIO(0x9888), 0x003f0080 }, + { _MMIO(0x9888), 0x023f0793 }, + { _MMIO(0x9888), 0x043f0014 }, + { _MMIO(0x9888), 0x04588000 }, + { _MMIO(0x9888), 0x005a8000 }, + { _MMIO(0x9888), 0x025ac000 }, + { _MMIO(0x9888), 0x045a4000 }, + { _MMIO(0x9888), 0x0a5b4000 }, + { _MMIO(0x9888), 0x001d8000 }, + { _MMIO(0x9888), 0x021dc000 }, + { _MMIO(0x9888), 0x041d4000 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f002a }, + { _MMIO(0x9888), 0x0a384000 }, + { _MMIO(0x9888), 0x00394000 }, + { _MMIO(0x9888), 0x02395000 }, + { _MMIO(0x9888), 0x04399000 }, + { _MMIO(0x9888), 0x069b0034 }, + { _MMIO(0x9888), 0x109b0000 }, + { _MMIO(0x9888), 0x06dc4000 }, + { _MMIO(0x9888), 0x0cbd4000 }, + { _MMIO(0x9888), 0x0cbf0981 }, + { _MMIO(0x9888), 0x0ebf0a0f }, + { _MMIO(0x9888), 0x06d84000 }, + { _MMIO(0x9888), 0x0cdac000 }, + { _MMIO(0x9888), 0x0edac000 }, + { _MMIO(0x9888), 0x0cdb4000 }, + { _MMIO(0x9888), 0x0c9dc000 }, + { _MMIO(0x9888), 0x0e9dc000 }, + { _MMIO(0x9888), 0x109f02a8 }, + { _MMIO(0x9888), 0x0e9f0080 }, + { _MMIO(0x9888), 0x0cb84000 }, + { _MMIO(0x9888), 0x0cb95000 }, + { _MMIO(0x9888), 0x0eb95000 }, + { _MMIO(0x9888), 0x06b92000 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x0d880400 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x258b8009 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x198c4000 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x0d8c4000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x078d2000 }, + { _MMIO(0x9888), 0x2185800a }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x1b830154 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x45800c00 }, + { _MMIO(0x9888), 0x47800c63 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f8014a5 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800045 }, +}; + +static int +get_l3_3_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_3; + lens[n] = ARRAY_SIZE(mux_config_l3_3); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_4[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_4[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_4[] = { + { _MMIO(0x9888), 0x121a0340 }, + { _MMIO(0x9888), 0x103f0017 }, + { _MMIO(0x9888), 0x123f0020 }, + { _MMIO(0x9888), 0x129a0340 }, + { _MMIO(0x9888), 0x10bf0017 }, + { _MMIO(0x9888), 0x12bf0020 }, + { _MMIO(0x9888), 0x041a3400 }, + { _MMIO(0x9888), 0x101a0000 }, + { _MMIO(0x9888), 0x043b8000 }, + { _MMIO(0x9888), 0x0a3e0010 }, + { _MMIO(0x9888), 0x003f0200 }, + { _MMIO(0x9888), 0x023f0113 }, + { _MMIO(0x9888), 0x043f0014 }, + { _MMIO(0x9888), 0x02592000 }, + { _MMIO(0x9888), 0x005a8000 }, + { _MMIO(0x9888), 0x025ac000 }, + { _MMIO(0x9888), 0x045a4000 }, + { _MMIO(0x9888), 0x0a1c8000 }, + { _MMIO(0x9888), 0x001d8000 }, + { _MMIO(0x9888), 0x021dc000 }, + { _MMIO(0x9888), 0x041d4000 }, + { _MMIO(0x9888), 0x0a1e8000 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f001a }, + { _MMIO(0x9888), 0x00394000 }, + { _MMIO(0x9888), 0x02395000 }, + { _MMIO(0x9888), 0x04391000 }, + { _MMIO(0x9888), 0x069a0034 }, + { _MMIO(0x9888), 0x109a0000 }, + { _MMIO(0x9888), 0x06bb4000 }, + { _MMIO(0x9888), 0x0abe0040 }, + { _MMIO(0x9888), 0x0cbf0984 }, + { _MMIO(0x9888), 0x0ebf0a02 }, + { _MMIO(0x9888), 0x02d94000 }, + { _MMIO(0x9888), 0x0cdac000 }, + { _MMIO(0x9888), 0x0edac000 }, + { _MMIO(0x9888), 0x0c9c0400 }, + { _MMIO(0x9888), 0x0c9dc000 }, + { _MMIO(0x9888), 0x0e9dc000 }, + { _MMIO(0x9888), 0x0c9e0400 }, + { _MMIO(0x9888), 0x109f02a8 }, + { _MMIO(0x9888), 0x0e9f0040 }, + { _MMIO(0x9888), 0x0cb95000 }, + { _MMIO(0x9888), 0x0eb95000 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x0d880400 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x258b8009 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x198c4000 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x0d8c4000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x078d2000 }, + { _MMIO(0x9888), 0x2185800a }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x1b830154 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x45800800 }, + { _MMIO(0x9888), 0x47800842 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f801084 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800044 }, +}; + +static int +get_l3_4_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_4; + lens[n] = ARRAY_SIZE(mux_config_l3_4); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00006000 }, + { _MMIO(0x2774), 0x0000f3ff }, + { _MMIO(0x2778), 0x00001800 }, + { _MMIO(0x277c), 0x0000fcff }, + { _MMIO(0x2780), 0x00000600 }, + { _MMIO(0x2784), 0x0000ff3f }, + { _MMIO(0x2788), 0x00000180 }, + { _MMIO(0x278c), 0x0000ffcf }, + { _MMIO(0x2790), 0x00000060 }, + { _MMIO(0x2794), 0x0000fff3 }, + { _MMIO(0x2798), 0x00000018 }, + { _MMIO(0x279c), 0x0000fffc }, +}; + +static const struct i915_oa_reg flex_eu_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x9888), 0x143b000e }, + { _MMIO(0x9888), 0x043c55c0 }, + { _MMIO(0x9888), 0x0a1e0280 }, + { _MMIO(0x9888), 0x0c1e0408 }, + { _MMIO(0x9888), 0x10390000 }, + { _MMIO(0x9888), 0x12397a1f }, + { _MMIO(0x9888), 0x14bb000e }, + { _MMIO(0x9888), 0x04bc5000 }, + { _MMIO(0x9888), 0x0a9e0296 }, + { _MMIO(0x9888), 0x0c9e0008 }, + { _MMIO(0x9888), 0x10b90000 }, + { _MMIO(0x9888), 0x12b97a1f }, + { _MMIO(0x9888), 0x063b0042 }, + { _MMIO(0x9888), 0x103b0000 }, + { _MMIO(0x9888), 0x083c0000 }, + { _MMIO(0x9888), 0x0a3e0040 }, + { _MMIO(0x9888), 0x043f8000 }, + { _MMIO(0x9888), 0x02594000 }, + { _MMIO(0x9888), 0x045a8000 }, + { _MMIO(0x9888), 0x0c1c0400 }, + { _MMIO(0x9888), 0x041d8000 }, + { _MMIO(0x9888), 0x081e02c0 }, + { _MMIO(0x9888), 0x0e1e0000 }, + { _MMIO(0x9888), 0x0c1fa800 }, + { _MMIO(0x9888), 0x0e1f0260 }, + { _MMIO(0x9888), 0x101f0014 }, + { _MMIO(0x9888), 0x003905e0 }, + { _MMIO(0x9888), 0x06390bc0 }, + { _MMIO(0x9888), 0x02390018 }, + { _MMIO(0x9888), 0x04394000 }, + { _MMIO(0x9888), 0x04bb0042 }, + { _MMIO(0x9888), 0x10bb0000 }, + { _MMIO(0x9888), 0x02bc05c0 }, + { _MMIO(0x9888), 0x08bc0000 }, + { _MMIO(0x9888), 0x0abe0004 }, + { _MMIO(0x9888), 0x02bf8000 }, + { _MMIO(0x9888), 0x02d91000 }, + { _MMIO(0x9888), 0x02da8000 }, + { _MMIO(0x9888), 0x089c8000 }, + { _MMIO(0x9888), 0x029d8000 }, + { _MMIO(0x9888), 0x089e8000 }, + { _MMIO(0x9888), 0x0e9e0000 }, + { _MMIO(0x9888), 0x0e9fa806 }, + { _MMIO(0x9888), 0x109f0142 }, + { _MMIO(0x9888), 0x08b90617 }, + { _MMIO(0x9888), 0x0ab90be0 }, + { _MMIO(0x9888), 0x02b94000 }, + { _MMIO(0x9888), 0x0d88f000 }, + { _MMIO(0x9888), 0x0f88000c }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x09888000 }, + { _MMIO(0x9888), 0x018a8000 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x1b8a2800 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x238b52a0 }, + { _MMIO(0x9888), 0x258b6a95 }, + { _MMIO(0x9888), 0x278b0029 }, + { _MMIO(0x9888), 0x178c2000 }, + { _MMIO(0x9888), 0x198c1500 }, + { _MMIO(0x9888), 0x1b8c0014 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x098c4000 }, + { _MMIO(0x9888), 0x098da000 }, + { _MMIO(0x9888), 0x0b8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x038d8000 }, + { _MMIO(0x9888), 0x058d2000 }, + { _MMIO(0x9888), 0x1f85aa80 }, + { _MMIO(0x9888), 0x2185aaaa }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x01834000 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0184c000 }, + { _MMIO(0x9888), 0x0784c000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1180c000 }, + { _MMIO(0x9888), 0x1780c000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x4d800444 }, + { _MMIO(0x9888), 0x3d800000 }, + { _MMIO(0x9888), 0x4f804000 }, + { _MMIO(0x9888), 0x43801080 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800084 }, + { _MMIO(0x9888), 0x53800044 }, + { _MMIO(0x9888), 0x47801080 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x3f800000 }, + { _MMIO(0x9888), 0x41800840 }, +}; + +static int +get_rasterizer_and_pixel_backend_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_rasterizer_and_pixel_backend; + lens[n] = ARRAY_SIZE(mux_config_rasterizer_and_pixel_backend); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_sampler_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x70800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x0000c000 }, + { _MMIO(0x2774), 0x0000e7ff }, + { _MMIO(0x2778), 0x00003000 }, + { _MMIO(0x277c), 0x0000f9ff }, + { _MMIO(0x2780), 0x00000c00 }, + { _MMIO(0x2784), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_sampler_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_sampler_1[] = { + { _MMIO(0x9888), 0x18921400 }, + { _MMIO(0x9888), 0x149500ab }, + { _MMIO(0x9888), 0x18b21400 }, + { _MMIO(0x9888), 0x14b500ab }, + { _MMIO(0x9888), 0x18d21400 }, + { _MMIO(0x9888), 0x14d500ab }, + { _MMIO(0x9888), 0x0cdc8000 }, + { _MMIO(0x9888), 0x0edc4000 }, + { _MMIO(0x9888), 0x02dcc000 }, + { _MMIO(0x9888), 0x04dcc000 }, + { _MMIO(0x9888), 0x1abd00a0 }, + { _MMIO(0x9888), 0x0abd8000 }, + { _MMIO(0x9888), 0x0cd88000 }, + { _MMIO(0x9888), 0x0ed84000 }, + { _MMIO(0x9888), 0x04d88000 }, + { _MMIO(0x9888), 0x1adb0050 }, + { _MMIO(0x9888), 0x04db8000 }, + { _MMIO(0x9888), 0x06db8000 }, + { _MMIO(0x9888), 0x08db8000 }, + { _MMIO(0x9888), 0x0adb4000 }, + { _MMIO(0x9888), 0x109f02a0 }, + { _MMIO(0x9888), 0x0c9fa000 }, + { _MMIO(0x9888), 0x0e9f00aa }, + { _MMIO(0x9888), 0x18b82500 }, + { _MMIO(0x9888), 0x02b88000 }, + { _MMIO(0x9888), 0x04b84000 }, + { _MMIO(0x9888), 0x06b84000 }, + { _MMIO(0x9888), 0x08b84000 }, + { _MMIO(0x9888), 0x0ab84000 }, + { _MMIO(0x9888), 0x0cb88000 }, + { _MMIO(0x9888), 0x0cb98000 }, + { _MMIO(0x9888), 0x0eb9a000 }, + { _MMIO(0x9888), 0x00b98000 }, + { _MMIO(0x9888), 0x02b9a000 }, + { _MMIO(0x9888), 0x04b9a000 }, + { _MMIO(0x9888), 0x06b92000 }, + { _MMIO(0x9888), 0x1aba0200 }, + { _MMIO(0x9888), 0x02ba8000 }, + { _MMIO(0x9888), 0x0cba8000 }, + { _MMIO(0x9888), 0x04908000 }, + { _MMIO(0x9888), 0x04918000 }, + { _MMIO(0x9888), 0x04927300 }, + { _MMIO(0x9888), 0x10920000 }, + { _MMIO(0x9888), 0x1893000a }, + { _MMIO(0x9888), 0x0a934000 }, + { _MMIO(0x9888), 0x0a946000 }, + { _MMIO(0x9888), 0x0c959000 }, + { _MMIO(0x9888), 0x0e950098 }, + { _MMIO(0x9888), 0x10950000 }, + { _MMIO(0x9888), 0x04b04000 }, + { _MMIO(0x9888), 0x04b14000 }, + { _MMIO(0x9888), 0x04b20073 }, + { _MMIO(0x9888), 0x10b20000 }, + { _MMIO(0x9888), 0x04b38000 }, + { _MMIO(0x9888), 0x06b38000 }, + { _MMIO(0x9888), 0x08b34000 }, + { _MMIO(0x9888), 0x04b4c000 }, + { _MMIO(0x9888), 0x02b59890 }, + { _MMIO(0x9888), 0x10b50000 }, + { _MMIO(0x9888), 0x06d04000 }, + { _MMIO(0x9888), 0x06d14000 }, + { _MMIO(0x9888), 0x06d20073 }, + { _MMIO(0x9888), 0x10d20000 }, + { _MMIO(0x9888), 0x18d30020 }, + { _MMIO(0x9888), 0x02d38000 }, + { _MMIO(0x9888), 0x0cd34000 }, + { _MMIO(0x9888), 0x0ad48000 }, + { _MMIO(0x9888), 0x04d42000 }, + { _MMIO(0x9888), 0x0ed59000 }, + { _MMIO(0x9888), 0x00d59800 }, + { _MMIO(0x9888), 0x10d50000 }, + { _MMIO(0x9888), 0x0f88000e }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x09888000 }, + { _MMIO(0x9888), 0x0b888000 }, + { _MMIO(0x9888), 0x0d880400 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b5500 }, + { _MMIO(0x9888), 0x258b000a }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x098c4000 }, + { _MMIO(0x9888), 0x0b8c4000 }, + { _MMIO(0x9888), 0x0d8c4000 }, + { _MMIO(0x9888), 0x0d8d8000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x018d8000 }, + { _MMIO(0x9888), 0x038da000 }, + { _MMIO(0x9888), 0x058da000 }, + { _MMIO(0x9888), 0x078d2000 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x2185000a }, + { _MMIO(0x9888), 0x1b830150 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d848000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d808000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47801021 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f800c64 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800c02 }, +}; + +static int +get_sampler_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler_1; + lens[n] = ARRAY_SIZE(mux_config_sampler_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_sampler_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x70800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x0000c000 }, + { _MMIO(0x2774), 0x0000e7ff }, + { _MMIO(0x2778), 0x00003000 }, + { _MMIO(0x277c), 0x0000f9ff }, + { _MMIO(0x2780), 0x00000c00 }, + { _MMIO(0x2784), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_sampler_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_sampler_2[] = { + { _MMIO(0x9888), 0x18121400 }, + { _MMIO(0x9888), 0x141500ab }, + { _MMIO(0x9888), 0x18321400 }, + { _MMIO(0x9888), 0x143500ab }, + { _MMIO(0x9888), 0x18521400 }, + { _MMIO(0x9888), 0x145500ab }, + { _MMIO(0x9888), 0x0c5c8000 }, + { _MMIO(0x9888), 0x0e5c4000 }, + { _MMIO(0x9888), 0x025cc000 }, + { _MMIO(0x9888), 0x045cc000 }, + { _MMIO(0x9888), 0x1a3d00a0 }, + { _MMIO(0x9888), 0x0a3d8000 }, + { _MMIO(0x9888), 0x0c588000 }, + { _MMIO(0x9888), 0x0e584000 }, + { _MMIO(0x9888), 0x04588000 }, + { _MMIO(0x9888), 0x1a5b0050 }, + { _MMIO(0x9888), 0x045b8000 }, + { _MMIO(0x9888), 0x065b8000 }, + { _MMIO(0x9888), 0x085b8000 }, + { _MMIO(0x9888), 0x0a5b4000 }, + { _MMIO(0x9888), 0x101f02a0 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f00aa }, + { _MMIO(0x9888), 0x18382500 }, + { _MMIO(0x9888), 0x02388000 }, + { _MMIO(0x9888), 0x04384000 }, + { _MMIO(0x9888), 0x06384000 }, + { _MMIO(0x9888), 0x08384000 }, + { _MMIO(0x9888), 0x0a384000 }, + { _MMIO(0x9888), 0x0c388000 }, + { _MMIO(0x9888), 0x0c398000 }, + { _MMIO(0x9888), 0x0e39a000 }, + { _MMIO(0x9888), 0x00398000 }, + { _MMIO(0x9888), 0x0239a000 }, + { _MMIO(0x9888), 0x0439a000 }, + { _MMIO(0x9888), 0x06392000 }, + { _MMIO(0x9888), 0x1a3a0200 }, + { _MMIO(0x9888), 0x023a8000 }, + { _MMIO(0x9888), 0x0c3a8000 }, + { _MMIO(0x9888), 0x04108000 }, + { _MMIO(0x9888), 0x04118000 }, + { _MMIO(0x9888), 0x04127300 }, + { _MMIO(0x9888), 0x10120000 }, + { _MMIO(0x9888), 0x1813000a }, + { _MMIO(0x9888), 0x0a134000 }, + { _MMIO(0x9888), 0x0a146000 }, + { _MMIO(0x9888), 0x0c159000 }, + { _MMIO(0x9888), 0x0e150098 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x04304000 }, + { _MMIO(0x9888), 0x04314000 }, + { _MMIO(0x9888), 0x04320073 }, + { _MMIO(0x9888), 0x10320000 }, + { _MMIO(0x9888), 0x04338000 }, + { _MMIO(0x9888), 0x06338000 }, + { _MMIO(0x9888), 0x08334000 }, + { _MMIO(0x9888), 0x0434c000 }, + { _MMIO(0x9888), 0x02359890 }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x06504000 }, + { _MMIO(0x9888), 0x06514000 }, + { _MMIO(0x9888), 0x06520073 }, + { _MMIO(0x9888), 0x10520000 }, + { _MMIO(0x9888), 0x18530020 }, + { _MMIO(0x9888), 0x02538000 }, + { _MMIO(0x9888), 0x0c534000 }, + { _MMIO(0x9888), 0x0a548000 }, + { _MMIO(0x9888), 0x04542000 }, + { _MMIO(0x9888), 0x0e559000 }, + { _MMIO(0x9888), 0x00559800 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x1b8aa000 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x278b0015 }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x258b0005 }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x2185000a }, + { _MMIO(0x9888), 0x1b830150 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0d848000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x07844000 }, + { _MMIO(0x9888), 0x1d808000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x17804000 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47801021 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f800c64 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x41800c02 }, +}; + +static int +get_sampler_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler_2; + lens[n] = ARRAY_SIZE(mux_config_sampler_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fdff }, + { _MMIO(0x2778), 0x00000000 }, + { _MMIO(0x277c), 0x0000fe7f }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000ffbf }, + { _MMIO(0x2788), 0x00000000 }, + { _MMIO(0x278c), 0x0000ffcf }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000fff7 }, + { _MMIO(0x2798), 0x00000000 }, + { _MMIO(0x279c), 0x0000fff9 }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_1[] = { + { _MMIO(0x9888), 0x16154d60 }, + { _MMIO(0x9888), 0x16352e60 }, + { _MMIO(0x9888), 0x16554d60 }, + { _MMIO(0x9888), 0x16950000 }, + { _MMIO(0x9888), 0x16b50000 }, + { _MMIO(0x9888), 0x16d50000 }, + { _MMIO(0x9888), 0x005c8000 }, + { _MMIO(0x9888), 0x045cc000 }, + { _MMIO(0x9888), 0x065c4000 }, + { _MMIO(0x9888), 0x083d8000 }, + { _MMIO(0x9888), 0x0a3d8000 }, + { _MMIO(0x9888), 0x0458c000 }, + { _MMIO(0x9888), 0x025b8000 }, + { _MMIO(0x9888), 0x085b4000 }, + { _MMIO(0x9888), 0x0a5b4000 }, + { _MMIO(0x9888), 0x0c5b8000 }, + { _MMIO(0x9888), 0x0c1fa000 }, + { _MMIO(0x9888), 0x0e1f00aa }, + { _MMIO(0x9888), 0x02384000 }, + { _MMIO(0x9888), 0x04388000 }, + { _MMIO(0x9888), 0x06388000 }, + { _MMIO(0x9888), 0x08384000 }, + { _MMIO(0x9888), 0x0a384000 }, + { _MMIO(0x9888), 0x0c384000 }, + { _MMIO(0x9888), 0x00398000 }, + { _MMIO(0x9888), 0x0239a000 }, + { _MMIO(0x9888), 0x0439a000 }, + { _MMIO(0x9888), 0x06392000 }, + { _MMIO(0x9888), 0x043a8000 }, + { _MMIO(0x9888), 0x063a8000 }, + { _MMIO(0x9888), 0x08138000 }, + { _MMIO(0x9888), 0x0a138000 }, + { _MMIO(0x9888), 0x06143000 }, + { _MMIO(0x9888), 0x0415cfc7 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x02338000 }, + { _MMIO(0x9888), 0x0c338000 }, + { _MMIO(0x9888), 0x04342000 }, + { _MMIO(0x9888), 0x06344000 }, + { _MMIO(0x9888), 0x0035c700 }, + { _MMIO(0x9888), 0x063500cf }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x04538000 }, + { _MMIO(0x9888), 0x06538000 }, + { _MMIO(0x9888), 0x0454c000 }, + { _MMIO(0x9888), 0x0255cfc7 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x06dc8000 }, + { _MMIO(0x9888), 0x08dc4000 }, + { _MMIO(0x9888), 0x0cdcc000 }, + { _MMIO(0x9888), 0x0edcc000 }, + { _MMIO(0x9888), 0x1abd00a8 }, + { _MMIO(0x9888), 0x0cd8c000 }, + { _MMIO(0x9888), 0x0ed84000 }, + { _MMIO(0x9888), 0x0edb8000 }, + { _MMIO(0x9888), 0x18db0800 }, + { _MMIO(0x9888), 0x1adb0254 }, + { _MMIO(0x9888), 0x0e9faa00 }, + { _MMIO(0x9888), 0x109f02aa }, + { _MMIO(0x9888), 0x0eb84000 }, + { _MMIO(0x9888), 0x16b84000 }, + { _MMIO(0x9888), 0x18b8156a }, + { _MMIO(0x9888), 0x06b98000 }, + { _MMIO(0x9888), 0x08b9a000 }, + { _MMIO(0x9888), 0x0ab9a000 }, + { _MMIO(0x9888), 0x0cb9a000 }, + { _MMIO(0x9888), 0x0eb9a000 }, + { _MMIO(0x9888), 0x18baa000 }, + { _MMIO(0x9888), 0x1aba0002 }, + { _MMIO(0x9888), 0x16934000 }, + { _MMIO(0x9888), 0x1893000a }, + { _MMIO(0x9888), 0x0a947000 }, + { _MMIO(0x9888), 0x0c95c5c1 }, + { _MMIO(0x9888), 0x0e9500c3 }, + { _MMIO(0x9888), 0x10950000 }, + { _MMIO(0x9888), 0x0eb38000 }, + { _MMIO(0x9888), 0x16b30040 }, + { _MMIO(0x9888), 0x18b30020 }, + { _MMIO(0x9888), 0x06b48000 }, + { _MMIO(0x9888), 0x08b41000 }, + { _MMIO(0x9888), 0x0ab48000 }, + { _MMIO(0x9888), 0x06b5c500 }, + { _MMIO(0x9888), 0x08b500c3 }, + { _MMIO(0x9888), 0x0eb5c100 }, + { _MMIO(0x9888), 0x10b50000 }, + { _MMIO(0x9888), 0x16d31500 }, + { _MMIO(0x9888), 0x08d4e000 }, + { _MMIO(0x9888), 0x08d5c100 }, + { _MMIO(0x9888), 0x0ad5c3c5 }, + { _MMIO(0x9888), 0x10d50000 }, + { _MMIO(0x9888), 0x0d88f800 }, + { _MMIO(0x9888), 0x0f88000f }, + { _MMIO(0x9888), 0x038a8000 }, + { _MMIO(0x9888), 0x058a8000 }, + { _MMIO(0x9888), 0x078a8000 }, + { _MMIO(0x9888), 0x098a8000 }, + { _MMIO(0x9888), 0x0b8a8000 }, + { _MMIO(0x9888), 0x0d8a8000 }, + { _MMIO(0x9888), 0x258baaa5 }, + { _MMIO(0x9888), 0x278b002a }, + { _MMIO(0x9888), 0x238b2a80 }, + { _MMIO(0x9888), 0x0f8c4000 }, + { _MMIO(0x9888), 0x178c2000 }, + { _MMIO(0x9888), 0x198c5500 }, + { _MMIO(0x9888), 0x1b8c0015 }, + { _MMIO(0x9888), 0x078d8000 }, + { _MMIO(0x9888), 0x098da000 }, + { _MMIO(0x9888), 0x0b8da000 }, + { _MMIO(0x9888), 0x0d8da000 }, + { _MMIO(0x9888), 0x0f8da000 }, + { _MMIO(0x9888), 0x2185aaaa }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0784c000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1780c000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800c42 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45800063 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x47800800 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f8014a4 }, + { _MMIO(0x9888), 0x41801042 }, +}; + +static int +get_tdl_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_1; + lens[n] = ARRAY_SIZE(mux_config_tdl_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fdff }, + { _MMIO(0x2778), 0x00000000 }, + { _MMIO(0x277c), 0x0000fe7f }, + { _MMIO(0x2780), 0x00000000 }, + { _MMIO(0x2784), 0x0000ff9f }, + { _MMIO(0x2788), 0x00000000 }, + { _MMIO(0x278c), 0x0000ffe7 }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000fffb }, + { _MMIO(0x2798), 0x00000002 }, + { _MMIO(0x279c), 0x0000fffd }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_2[] = { + { _MMIO(0x9888), 0x16150000 }, + { _MMIO(0x9888), 0x16350000 }, + { _MMIO(0x9888), 0x16550000 }, + { _MMIO(0x9888), 0x16952e60 }, + { _MMIO(0x9888), 0x16b54d60 }, + { _MMIO(0x9888), 0x16d52e60 }, + { _MMIO(0x9888), 0x065c8000 }, + { _MMIO(0x9888), 0x085cc000 }, + { _MMIO(0x9888), 0x0a5cc000 }, + { _MMIO(0x9888), 0x0c5c4000 }, + { _MMIO(0x9888), 0x0e3d8000 }, + { _MMIO(0x9888), 0x183da000 }, + { _MMIO(0x9888), 0x06588000 }, + { _MMIO(0x9888), 0x08588000 }, + { _MMIO(0x9888), 0x0a584000 }, + { _MMIO(0x9888), 0x0e5b4000 }, + { _MMIO(0x9888), 0x185b5800 }, + { _MMIO(0x9888), 0x1a5b000a }, + { _MMIO(0x9888), 0x0e1faa00 }, + { _MMIO(0x9888), 0x101f02aa }, + { _MMIO(0x9888), 0x0e384000 }, + { _MMIO(0x9888), 0x16384000 }, + { _MMIO(0x9888), 0x18382a55 }, + { _MMIO(0x9888), 0x06398000 }, + { _MMIO(0x9888), 0x0839a000 }, + { _MMIO(0x9888), 0x0a39a000 }, + { _MMIO(0x9888), 0x0c39a000 }, + { _MMIO(0x9888), 0x0e39a000 }, + { _MMIO(0x9888), 0x1a3a02a0 }, + { _MMIO(0x9888), 0x0e138000 }, + { _MMIO(0x9888), 0x16130500 }, + { _MMIO(0x9888), 0x06148000 }, + { _MMIO(0x9888), 0x08146000 }, + { _MMIO(0x9888), 0x0615c100 }, + { _MMIO(0x9888), 0x0815c500 }, + { _MMIO(0x9888), 0x0a1500c3 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x16335040 }, + { _MMIO(0x9888), 0x08349000 }, + { _MMIO(0x9888), 0x0a341000 }, + { _MMIO(0x9888), 0x083500c1 }, + { _MMIO(0x9888), 0x0a35c500 }, + { _MMIO(0x9888), 0x0c3500c3 }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x1853002a }, + { _MMIO(0x9888), 0x0a54e000 }, + { _MMIO(0x9888), 0x0c55c500 }, + { _MMIO(0x9888), 0x0e55c1c3 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x00dc8000 }, + { _MMIO(0x9888), 0x02dcc000 }, + { _MMIO(0x9888), 0x04dc4000 }, + { _MMIO(0x9888), 0x04bd8000 }, + { _MMIO(0x9888), 0x06bd8000 }, + { _MMIO(0x9888), 0x02d8c000 }, + { _MMIO(0x9888), 0x02db8000 }, + { _MMIO(0x9888), 0x04db4000 }, + { _MMIO(0x9888), 0x06db4000 }, + { _MMIO(0x9888), 0x08db8000 }, + { _MMIO(0x9888), 0x0c9fa000 }, + { _MMIO(0x9888), 0x0e9f00aa }, + { _MMIO(0x9888), 0x02b84000 }, + { _MMIO(0x9888), 0x04b84000 }, + { _MMIO(0x9888), 0x06b84000 }, + { _MMIO(0x9888), 0x08b84000 }, + { _MMIO(0x9888), 0x0ab88000 }, + { _MMIO(0x9888), 0x0cb88000 }, + { _MMIO(0x9888), 0x00b98000 }, + { _MMIO(0x9888), 0x02b9a000 }, + { _MMIO(0x9888), 0x04b9a000 }, + { _MMIO(0x9888), 0x06b92000 }, + { _MMIO(0x9888), 0x0aba8000 }, + { _MMIO(0x9888), 0x0cba8000 }, + { _MMIO(0x9888), 0x04938000 }, + { _MMIO(0x9888), 0x06938000 }, + { _MMIO(0x9888), 0x0494c000 }, + { _MMIO(0x9888), 0x0295cfc7 }, + { _MMIO(0x9888), 0x10950000 }, + { _MMIO(0x9888), 0x02b38000 }, + { _MMIO(0x9888), 0x08b38000 }, + { _MMIO(0x9888), 0x04b42000 }, + { _MMIO(0x9888), 0x06b41000 }, + { _MMIO(0x9888), 0x00b5c700 }, + { _MMIO(0x9888), 0x04b500cf }, + { _MMIO(0x9888), 0x10b50000 }, + { _MMIO(0x9888), 0x0ad38000 }, + { _MMIO(0x9888), 0x0cd38000 }, + { _MMIO(0x9888), 0x06d46000 }, + { _MMIO(0x9888), 0x04d5c700 }, + { _MMIO(0x9888), 0x06d500cf }, + { _MMIO(0x9888), 0x10d50000 }, + { _MMIO(0x9888), 0x03888000 }, + { _MMIO(0x9888), 0x05888000 }, + { _MMIO(0x9888), 0x07888000 }, + { _MMIO(0x9888), 0x09888000 }, + { _MMIO(0x9888), 0x0b888000 }, + { _MMIO(0x9888), 0x0d880400 }, + { _MMIO(0x9888), 0x0f8a8000 }, + { _MMIO(0x9888), 0x198a8000 }, + { _MMIO(0x9888), 0x1b8aaaa0 }, + { _MMIO(0x9888), 0x1d8a0002 }, + { _MMIO(0x9888), 0x258b555a }, + { _MMIO(0x9888), 0x278b0015 }, + { _MMIO(0x9888), 0x238b5500 }, + { _MMIO(0x9888), 0x038c4000 }, + { _MMIO(0x9888), 0x058c4000 }, + { _MMIO(0x9888), 0x078c4000 }, + { _MMIO(0x9888), 0x098c4000 }, + { _MMIO(0x9888), 0x0b8c4000 }, + { _MMIO(0x9888), 0x0d8c4000 }, + { _MMIO(0x9888), 0x018d8000 }, + { _MMIO(0x9888), 0x038da000 }, + { _MMIO(0x9888), 0x058da000 }, + { _MMIO(0x9888), 0x078d2000 }, + { _MMIO(0x9888), 0x2185aaaa }, + { _MMIO(0x9888), 0x2385002a }, + { _MMIO(0x9888), 0x1f85aa00 }, + { _MMIO(0x9888), 0x0f834000 }, + { _MMIO(0x9888), 0x19835400 }, + { _MMIO(0x9888), 0x1b830155 }, + { _MMIO(0x9888), 0x03834000 }, + { _MMIO(0x9888), 0x05834000 }, + { _MMIO(0x9888), 0x07834000 }, + { _MMIO(0x9888), 0x09834000 }, + { _MMIO(0x9888), 0x0b834000 }, + { _MMIO(0x9888), 0x0d834000 }, + { _MMIO(0x9888), 0x0784c000 }, + { _MMIO(0x9888), 0x0984c000 }, + { _MMIO(0x9888), 0x0b84c000 }, + { _MMIO(0x9888), 0x0d84c000 }, + { _MMIO(0x9888), 0x0f84c000 }, + { _MMIO(0x9888), 0x01848000 }, + { _MMIO(0x9888), 0x0384c000 }, + { _MMIO(0x9888), 0x0584c000 }, + { _MMIO(0x9888), 0x1780c000 }, + { _MMIO(0x9888), 0x1980c000 }, + { _MMIO(0x9888), 0x1b80c000 }, + { _MMIO(0x9888), 0x1d80c000 }, + { _MMIO(0x9888), 0x1f80c000 }, + { _MMIO(0x9888), 0x11808000 }, + { _MMIO(0x9888), 0x1380c000 }, + { _MMIO(0x9888), 0x1580c000 }, + { _MMIO(0x9888), 0x4f800000 }, + { _MMIO(0x9888), 0x43800882 }, + { _MMIO(0x9888), 0x51800000 }, + { _MMIO(0x9888), 0x45801082 }, + { _MMIO(0x9888), 0x53800000 }, + { _MMIO(0x9888), 0x478014a5 }, + { _MMIO(0x9888), 0x21800000 }, + { _MMIO(0x9888), 0x31800000 }, + { _MMIO(0x9888), 0x4d800000 }, + { _MMIO(0x9888), 0x3f800002 }, + { _MMIO(0x9888), 0x41800c62 }, +}; + +static int +get_tdl_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_2; + lens[n] = ARRAY_SIZE(mux_config_tdl_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9888), 0x59800000 }, + { _MMIO(0x9888), 0x59800001 }, + { _MMIO(0x9888), 0x338b0000 }, + { _MMIO(0x9888), 0x258b0066 }, + { _MMIO(0x9888), 0x058b0000 }, + { _MMIO(0x9888), 0x038b0000 }, + { _MMIO(0x9888), 0x03844000 }, + { _MMIO(0x9888), 0x47800080 }, + { _MMIO(0x9888), 0x57800000 }, + { _MMIO(0x1823a4), 0x00000000 }, + { _MMIO(0x9888), 0x59800000 }, +}; + +static int +get_test_oa_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_test_oa; + lens[n] = ARRAY_SIZE(mux_config_test_oa); + n++; + + return n; +} + int i915_oa_select_metric_set_chv(struct drm_i915_private *dev_priv) { dev_priv->perf.oa.n_mux_configs = 0; @@ -180,6 +2061,344 @@ int i915_oa_select_metric_set_chv(struct drm_i915_private *dev_priv) dev_priv->perf.oa.flex_regs_len = ARRAY_SIZE(flex_eu_config_render_basic); + return 0; + case METRIC_SET_ID_COMPUTE_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_compute_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_basic); + + return 0; + case METRIC_SET_ID_RENDER_PIPE_PROFILE: + dev_priv->perf.oa.n_mux_configs = + get_render_pipe_profile_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_PIPE_PROFILE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_pipe_profile; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_pipe_profile); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_pipe_profile; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_pipe_profile); + + return 0; + case METRIC_SET_ID_HDC_AND_SF: + dev_priv->perf.oa.n_mux_configs = + get_hdc_and_sf_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"HDC_AND_SF\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_hdc_and_sf; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_hdc_and_sf); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_hdc_and_sf; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_hdc_and_sf); + + return 0; + case METRIC_SET_ID_L3_1: + dev_priv->perf.oa.n_mux_configs = + get_l3_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_1); + + return 0; + case METRIC_SET_ID_L3_2: + dev_priv->perf.oa.n_mux_configs = + get_l3_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_2); + + return 0; + case METRIC_SET_ID_L3_3: + dev_priv->perf.oa.n_mux_configs = + get_l3_3_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_3\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_3; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_3); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_3; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_3); + + return 0; + case METRIC_SET_ID_L3_4: + dev_priv->perf.oa.n_mux_configs = + get_l3_4_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_4\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_4; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_4); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_4; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_4); + + return 0; + case METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND: + dev_priv->perf.oa.n_mux_configs = + get_rasterizer_and_pixel_backend_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RASTERIZER_AND_PIXEL_BACKEND\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_rasterizer_and_pixel_backend); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_rasterizer_and_pixel_backend); + + return 0; + case METRIC_SET_ID_SAMPLER_1: + dev_priv->perf.oa.n_mux_configs = + get_sampler_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_sampler_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_sampler_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler_1); + + return 0; + case METRIC_SET_ID_SAMPLER_2: + dev_priv->perf.oa.n_mux_configs = + get_sampler_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_sampler_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_sampler_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler_2); + + return 0; + case METRIC_SET_ID_TDL_1: + dev_priv->perf.oa.n_mux_configs = + get_tdl_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_1); + + return 0; + case METRIC_SET_ID_TDL_2: + dev_priv->perf.oa.n_mux_configs = + get_tdl_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_2); + + return 0; + case METRIC_SET_ID_TEST_OA: + dev_priv->perf.oa.n_mux_configs = + get_test_oa_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TEST_OA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_test_oa; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_test_oa; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_test_oa); + return 0; default: return -ENODEV; @@ -208,6 +2427,292 @@ static struct attribute_group group_render_basic = { .attrs = attrs_render_basic, }; +static ssize_t +show_compute_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_BASIC); +} + +static struct device_attribute dev_attr_compute_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_basic[] = { + &dev_attr_compute_basic_id.attr, + NULL, +}; + +static struct attribute_group group_compute_basic = { + .name = "f522a89c-ecd1-4522-8331-3383c54af5f5", + .attrs = attrs_compute_basic, +}; + +static ssize_t +show_render_pipe_profile_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_PIPE_PROFILE); +} + +static struct device_attribute dev_attr_render_pipe_profile_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_pipe_profile_id, + .store = NULL, +}; + +static struct attribute *attrs_render_pipe_profile[] = { + &dev_attr_render_pipe_profile_id.attr, + NULL, +}; + +static struct attribute_group group_render_pipe_profile = { + .name = "a9ccc03d-a943-4e6b-9cd6-13e063075927", + .attrs = attrs_render_pipe_profile, +}; + +static ssize_t +show_hdc_and_sf_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_HDC_AND_SF); +} + +static struct device_attribute dev_attr_hdc_and_sf_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_hdc_and_sf_id, + .store = NULL, +}; + +static struct attribute *attrs_hdc_and_sf[] = { + &dev_attr_hdc_and_sf_id.attr, + NULL, +}; + +static struct attribute_group group_hdc_and_sf = { + .name = "2cf0c064-68df-4fac-9b3f-57f51ca8a069", + .attrs = attrs_hdc_and_sf, +}; + +static ssize_t +show_l3_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_1); +} + +static struct device_attribute dev_attr_l3_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_1_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_1[] = { + &dev_attr_l3_1_id.attr, + NULL, +}; + +static struct attribute_group group_l3_1 = { + .name = "78a87ff9-543a-49ce-95ea-26d86071ea93", + .attrs = attrs_l3_1, +}; + +static ssize_t +show_l3_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_2); +} + +static struct device_attribute dev_attr_l3_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_2_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_2[] = { + &dev_attr_l3_2_id.attr, + NULL, +}; + +static struct attribute_group group_l3_2 = { + .name = "9f2cece5-7bfe-4320-ad66-8c7cc526bec5", + .attrs = attrs_l3_2, +}; + +static ssize_t +show_l3_3_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_3); +} + +static struct device_attribute dev_attr_l3_3_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_3_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_3[] = { + &dev_attr_l3_3_id.attr, + NULL, +}; + +static struct attribute_group group_l3_3 = { + .name = "d890ef38-d309-47e4-b8b5-aa779bb19ab0", + .attrs = attrs_l3_3, +}; + +static ssize_t +show_l3_4_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_4); +} + +static struct device_attribute dev_attr_l3_4_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_4_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_4[] = { + &dev_attr_l3_4_id.attr, + NULL, +}; + +static struct attribute_group group_l3_4 = { + .name = "5fdff4a6-9dc8-45e1-bfda-ef54869fbdd4", + .attrs = attrs_l3_4, +}; + +static ssize_t +show_rasterizer_and_pixel_backend_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND); +} + +static struct device_attribute dev_attr_rasterizer_and_pixel_backend_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_rasterizer_and_pixel_backend_id, + .store = NULL, +}; + +static struct attribute *attrs_rasterizer_and_pixel_backend[] = { + &dev_attr_rasterizer_and_pixel_backend_id.attr, + NULL, +}; + +static struct attribute_group group_rasterizer_and_pixel_backend = { + .name = "2c0e45e1-7e2c-4a14-ae00-0b7ec868b8aa", + .attrs = attrs_rasterizer_and_pixel_backend, +}; + +static ssize_t +show_sampler_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_SAMPLER_1); +} + +static struct device_attribute dev_attr_sampler_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_sampler_1_id, + .store = NULL, +}; + +static struct attribute *attrs_sampler_1[] = { + &dev_attr_sampler_1_id.attr, + NULL, +}; + +static struct attribute_group group_sampler_1 = { + .name = "71148d78-baf5-474f-878a-e23158d0265d", + .attrs = attrs_sampler_1, +}; + +static ssize_t +show_sampler_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_SAMPLER_2); +} + +static struct device_attribute dev_attr_sampler_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_sampler_2_id, + .store = NULL, +}; + +static struct attribute *attrs_sampler_2[] = { + &dev_attr_sampler_2_id.attr, + NULL, +}; + +static struct attribute_group group_sampler_2 = { + .name = "b996a2b7-c59c-492d-877a-8cd54fd6df84", + .attrs = attrs_sampler_2, +}; + +static ssize_t +show_tdl_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_1); +} + +static struct device_attribute dev_attr_tdl_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_1_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_1[] = { + &dev_attr_tdl_1_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_1 = { + .name = "eb2fecba-b431-42e7-8261-fe9429a6e67a", + .attrs = attrs_tdl_1, +}; + +static ssize_t +show_tdl_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_2); +} + +static struct device_attribute dev_attr_tdl_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_2_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_2[] = { + &dev_attr_tdl_2_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_2 = { + .name = "60749470-a648-4a4b-9f10-dbfe1e36e44d", + .attrs = attrs_tdl_2, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TEST_OA); +} + +static struct device_attribute dev_attr_test_oa_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_test_oa_id, + .store = NULL, +}; + +static struct attribute *attrs_test_oa[] = { + &dev_attr_test_oa_id.attr, + NULL, +}; + +static struct attribute_group group_test_oa = { + .name = "4a534b07-cba3-414d-8d60-874830e883aa", + .attrs = attrs_test_oa, +}; + int i915_perf_register_sysfs_chv(struct drm_i915_private *dev_priv) { @@ -220,9 +2725,113 @@ i915_perf_register_sysfs_chv(struct drm_i915_private *dev_priv) if (ret) goto error_render_basic; } + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (ret) + goto error_compute_basic; + } + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (ret) + goto error_render_pipe_profile; + } + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (ret) + goto error_hdc_and_sf; + } + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (ret) + goto error_l3_1; + } + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (ret) + goto error_l3_2; + } + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (ret) + goto error_l3_3; + } + if (get_l3_4_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_4); + if (ret) + goto error_l3_4; + } + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (ret) + goto error_rasterizer_and_pixel_backend; + } + if (get_sampler_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler_1); + if (ret) + goto error_sampler_1; + } + if (get_sampler_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler_2); + if (ret) + goto error_sampler_2; + } + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (ret) + goto error_tdl_1; + } + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (ret) + goto error_tdl_2; + } + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_test_oa); + if (ret) + goto error_test_oa; + } return 0; +error_test_oa: + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); +error_tdl_2: + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); +error_tdl_1: + if (get_sampler_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler_2); +error_sampler_2: + if (get_sampler_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler_1); +error_sampler_1: + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); +error_rasterizer_and_pixel_backend: + if (get_l3_4_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_4); +error_l3_4: + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); +error_l3_3: + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); +error_l3_2: + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); +error_l3_1: + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); +error_hdc_and_sf: + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); +error_render_pipe_profile: + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); +error_compute_basic: + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); error_render_basic: return ret; } @@ -235,4 +2844,30 @@ i915_perf_unregister_sysfs_chv(struct drm_i915_private *dev_priv) if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (get_l3_4_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_4); + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (get_sampler_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler_1); + if (get_sampler_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler_2); + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_test_oa); } diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.c b/drivers/gpu/drm/i915/i915_oa_hsw.c index 8c13e0880e53..10f169f683b7 100644 --- a/drivers/gpu/drm/i915/i915_oa_hsw.c +++ b/drivers/gpu/drm/i915/i915_oa_hsw.c @@ -49,6 +49,9 @@ static const struct i915_oa_reg b_counter_config_render_basic[] = { { _MMIO(0x2710), 0x00000000 }, }; +static const struct i915_oa_reg flex_eu_config_render_basic[] = { +}; + static const struct i915_oa_reg mux_config_render_basic[] = { { _MMIO(0x253a4), 0x01600000 }, { _MMIO(0x25440), 0x00100000 }, @@ -148,6 +151,9 @@ static const struct i915_oa_reg b_counter_config_compute_basic[] = { { _MMIO(0x236c), 0x00000000 }, }; +static const struct i915_oa_reg flex_eu_config_compute_basic[] = { +}; + static const struct i915_oa_reg mux_config_compute_basic[] = { { _MMIO(0x253a4), 0x00000000 }, { _MMIO(0x2681c), 0x01f00800 }, @@ -223,6 +229,9 @@ static const struct i915_oa_reg b_counter_config_compute_extended[] = { { _MMIO(0x27ac), 0x0000fffe }, }; +static const struct i915_oa_reg flex_eu_config_compute_extended[] = { +}; + static const struct i915_oa_reg mux_config_compute_extended[] = { { _MMIO(0x2681c), 0x3eb00800 }, { _MMIO(0x26820), 0x00900000 }, @@ -289,6 +298,9 @@ static const struct i915_oa_reg b_counter_config_memory_reads[] = { { _MMIO(0x27ac), 0x0000fc00 }, }; +static const struct i915_oa_reg flex_eu_config_memory_reads[] = { +}; + static const struct i915_oa_reg mux_config_memory_reads[] = { { _MMIO(0x253a4), 0x34300000 }, { _MMIO(0x25440), 0x2d800000 }, @@ -358,6 +370,9 @@ static const struct i915_oa_reg b_counter_config_memory_writes[] = { { _MMIO(0x27ac), 0x0000fc00 }, }; +static const struct i915_oa_reg flex_eu_config_memory_writes[] = { +}; + static const struct i915_oa_reg mux_config_memory_writes[] = { { _MMIO(0x253a4), 0x34300000 }, { _MMIO(0x25440), 0x01500000 }, @@ -405,6 +420,9 @@ static const struct i915_oa_reg b_counter_config_sampler_balance[] = { { _MMIO(0x2724), 0x00800000 }, }; +static const struct i915_oa_reg flex_eu_config_sampler_balance[] = { +}; + static const struct i915_oa_reg mux_config_sampler_balance[] = { { _MMIO(0x2eb9c), 0x01906400 }, { _MMIO(0x2fb9c), 0x01906400 }, @@ -492,6 +510,11 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) dev_priv->perf.oa.b_counter_regs_len = ARRAY_SIZE(b_counter_config_render_basic); + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_basic); + return 0; case METRIC_SET_ID_COMPUTE_BASIC: dev_priv->perf.oa.n_mux_configs = @@ -513,6 +536,11 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) dev_priv->perf.oa.b_counter_regs_len = ARRAY_SIZE(b_counter_config_compute_basic); + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_basic); + return 0; case METRIC_SET_ID_COMPUTE_EXTENDED: dev_priv->perf.oa.n_mux_configs = @@ -534,6 +562,11 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) dev_priv->perf.oa.b_counter_regs_len = ARRAY_SIZE(b_counter_config_compute_extended); + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extended; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extended); + return 0; case METRIC_SET_ID_MEMORY_READS: dev_priv->perf.oa.n_mux_configs = @@ -555,6 +588,11 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) dev_priv->perf.oa.b_counter_regs_len = ARRAY_SIZE(b_counter_config_memory_reads); + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_reads; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_reads); + return 0; case METRIC_SET_ID_MEMORY_WRITES: dev_priv->perf.oa.n_mux_configs = @@ -576,6 +614,11 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) dev_priv->perf.oa.b_counter_regs_len = ARRAY_SIZE(b_counter_config_memory_writes); + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_writes; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_writes); + return 0; case METRIC_SET_ID_SAMPLER_BALANCE: dev_priv->perf.oa.n_mux_configs = @@ -597,6 +640,11 @@ int i915_oa_select_metric_set_hsw(struct drm_i915_private *dev_priv) dev_priv->perf.oa.b_counter_regs_len = ARRAY_SIZE(b_counter_config_sampler_balance); + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler_balance; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler_balance); + return 0; default: return -ENODEV; diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt2.c b/drivers/gpu/drm/i915/i915_oa_sklgt2.c index 9ab9d21ec335..1268beda212c 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt2.c +++ b/drivers/gpu/drm/i915/i915_oa_sklgt2.c @@ -33,9 +33,26 @@ enum metric_set_id { METRIC_SET_ID_RENDER_BASIC = 1, + METRIC_SET_ID_COMPUTE_BASIC, + METRIC_SET_ID_RENDER_PIPE_PROFILE, + METRIC_SET_ID_MEMORY_READS, + METRIC_SET_ID_MEMORY_WRITES, + METRIC_SET_ID_COMPUTE_EXTENDED, + METRIC_SET_ID_COMPUTE_L3_CACHE, + METRIC_SET_ID_HDC_AND_SF, + METRIC_SET_ID_L3_1, + METRIC_SET_ID_L3_2, + METRIC_SET_ID_L3_3, + METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND, + METRIC_SET_ID_SAMPLER, + METRIC_SET_ID_TDL_1, + METRIC_SET_ID_TDL_2, + METRIC_SET_ID_COMPUTE_EXTRA, + METRIC_SET_ID_VME_PIPE, + METRIC_SET_ID_TEST_OA, }; -int i915_oa_n_builtin_metric_sets_sklgt2 = 1; +int i915_oa_n_builtin_metric_sets_sklgt2 = 18; static const struct i915_oa_reg b_counter_config_render_basic[] = { { _MMIO(0x2710), 0x00000000 }, @@ -146,6 +163,2244 @@ get_render_basic_mux_config(struct drm_i915_private *dev_priv, return n; } +static const struct i915_oa_reg b_counter_config_compute_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_basic_0_slices_0x01_and_sku_lt_0x02[] = { + { _MMIO(0x9888), 0x104f00e0 }, + { _MMIO(0x9888), 0x124f1c00 }, + { _MMIO(0x9888), 0x106c00e0 }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f901403 }, + { _MMIO(0x9888), 0x184e8000 }, + { _MMIO(0x9888), 0x1a4e8200 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x004f0db2 }, + { _MMIO(0x9888), 0x064f0900 }, + { _MMIO(0x9888), 0x084f1880 }, + { _MMIO(0x9888), 0x0a4f0011 }, + { _MMIO(0x9888), 0x0c4f0e3c }, + { _MMIO(0x9888), 0x0e4f1d80 }, + { _MMIO(0x9888), 0x086c0002 }, + { _MMIO(0x9888), 0x0a6c0100 }, + { _MMIO(0x9888), 0x0e6c000c }, + { _MMIO(0x9888), 0x026c000b }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x081b4000 }, + { _MMIO(0x9888), 0x0a1b8000 }, + { _MMIO(0x9888), 0x0e1b4000 }, + { _MMIO(0x9888), 0x021b4000 }, + { _MMIO(0x9888), 0x1a1c4000 }, + { _MMIO(0x9888), 0x1c1c0012 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x005bc000 }, + { _MMIO(0x9888), 0x065b8000 }, + { _MMIO(0x9888), 0x085b8000 }, + { _MMIO(0x9888), 0x0a5b4000 }, + { _MMIO(0x9888), 0x0c5bc000 }, + { _MMIO(0x9888), 0x0e5b8000 }, + { _MMIO(0x9888), 0x105c8000 }, + { _MMIO(0x9888), 0x1a5ca000 }, + { _MMIO(0x9888), 0x1c5c002d }, + { _MMIO(0x9888), 0x125c8000 }, + { _MMIO(0x9888), 0x0a4c0800 }, + { _MMIO(0x9888), 0x0c4c0082 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x000da000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x020d2000 }, + { _MMIO(0x9888), 0x0c0f5400 }, + { _MMIO(0x9888), 0x0e0f5500 }, + { _MMIO(0x9888), 0x100f0155 }, + { _MMIO(0x9888), 0x002cc000 }, + { _MMIO(0x9888), 0x0e2cc000 }, + { _MMIO(0x9888), 0x162cbe00 }, + { _MMIO(0x9888), 0x182c00ef }, + { _MMIO(0x9888), 0x022cc000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x19900157 }, + { _MMIO(0x9888), 0x1b900167 }, + { _MMIO(0x9888), 0x1d900105 }, + { _MMIO(0x9888), 0x1f900103 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0xd28), 0x00000000 }, + { _MMIO(0x9888), 0x11900fff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900840 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900842 }, + { _MMIO(0x9888), 0x47900840 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900840 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900040 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900840 }, + { _MMIO(0x9888), 0x53901111 }, +}; + +static const struct i915_oa_reg mux_config_compute_basic_0_slices_0x01_and_sku_gte_0x02[] = { + { _MMIO(0x9888), 0x104f00e0 }, + { _MMIO(0x9888), 0x124f1c00 }, + { _MMIO(0x9888), 0x106c00e0 }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f901403 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x1a4e0820 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x064f0900 }, + { _MMIO(0x9888), 0x084f0032 }, + { _MMIO(0x9888), 0x0a4f1810 }, + { _MMIO(0x9888), 0x0c4f0e00 }, + { _MMIO(0x9888), 0x0e4f003c }, + { _MMIO(0x9888), 0x004f0d80 }, + { _MMIO(0x9888), 0x024f003b }, + { _MMIO(0x9888), 0x006c0002 }, + { _MMIO(0x9888), 0x086c0000 }, + { _MMIO(0x9888), 0x0c6c000c }, + { _MMIO(0x9888), 0x0e6c0b00 }, + { _MMIO(0x9888), 0x186c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x081b8000 }, + { _MMIO(0x9888), 0x0c1b4000 }, + { _MMIO(0x9888), 0x0e1b8000 }, + { _MMIO(0x9888), 0x101c8000 }, + { _MMIO(0x9888), 0x1a1c8000 }, + { _MMIO(0x9888), 0x1c1c0024 }, + { _MMIO(0x9888), 0x065b8000 }, + { _MMIO(0x9888), 0x085b4000 }, + { _MMIO(0x9888), 0x0a5bc000 }, + { _MMIO(0x9888), 0x0c5b8000 }, + { _MMIO(0x9888), 0x0e5b4000 }, + { _MMIO(0x9888), 0x005b8000 }, + { _MMIO(0x9888), 0x025b4000 }, + { _MMIO(0x9888), 0x1a5c6000 }, + { _MMIO(0x9888), 0x1c5c001b }, + { _MMIO(0x9888), 0x125c8000 }, + { _MMIO(0x9888), 0x145c8000 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4c2000 }, + { _MMIO(0x9888), 0x0c4c0208 }, + { _MMIO(0x9888), 0x000da000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x020d2000 }, + { _MMIO(0x9888), 0x0c0f5400 }, + { _MMIO(0x9888), 0x0e0f5500 }, + { _MMIO(0x9888), 0x100f0155 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2cc000 }, + { _MMIO(0x9888), 0x162cfb00 }, + { _MMIO(0x9888), 0x182c00be }, + { _MMIO(0x9888), 0x022cc000 }, + { _MMIO(0x9888), 0x042cc000 }, + { _MMIO(0x9888), 0x19900157 }, + { _MMIO(0x9888), 0x1b900167 }, + { _MMIO(0x9888), 0x1d900105 }, + { _MMIO(0x9888), 0x1f900103 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x11900fff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900842 }, + { _MMIO(0x9888), 0x47900802 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900802 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900002 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53901111 }, +}; + +static int +get_compute_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 2); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 2); + + if ((INTEL_INFO(dev_priv)->sseu.slice_mask & 0x01) && + (dev_priv->drm.pdev->revision < 0x02)) { + regs[n] = mux_config_compute_basic_0_slices_0x01_and_sku_lt_0x02; + lens[n] = ARRAY_SIZE(mux_config_compute_basic_0_slices_0x01_and_sku_lt_0x02); + n++; + } + if ((INTEL_INFO(dev_priv)->sseu.slice_mask & 0x01) && + (dev_priv->drm.pdev->revision >= 0x02)) { + regs[n] = mux_config_compute_basic_0_slices_0x01_and_sku_gte_0x02; + lens[n] = ARRAY_SIZE(mux_config_compute_basic_0_slices_0x01_and_sku_gte_0x02); + n++; + } + + return n; +} + +static const struct i915_oa_reg b_counter_config_render_pipe_profile[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007ffea }, + { _MMIO(0x2774), 0x00007ffc }, + { _MMIO(0x2778), 0x0007affa }, + { _MMIO(0x277c), 0x0000f5fd }, + { _MMIO(0x2780), 0x00079ffa }, + { _MMIO(0x2784), 0x0000f3fb }, + { _MMIO(0x2788), 0x0007bf7a }, + { _MMIO(0x278c), 0x0000f7e7 }, + { _MMIO(0x2790), 0x0007fefa }, + { _MMIO(0x2794), 0x0000f7cf }, + { _MMIO(0x2798), 0x00077ffa }, + { _MMIO(0x279c), 0x0000efdf }, + { _MMIO(0x27a0), 0x0006fffa }, + { _MMIO(0x27a4), 0x0000cfbf }, + { _MMIO(0x27a8), 0x0003fffa }, + { _MMIO(0x27ac), 0x00005f7f }, +}; + +static const struct i915_oa_reg flex_eu_config_render_pipe_profile[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_render_pipe_profile_0_sku_lt_0x02[] = { + { _MMIO(0x9888), 0x0c0e001f }, + { _MMIO(0x9888), 0x0a0f0000 }, + { _MMIO(0x9888), 0x10116800 }, + { _MMIO(0x9888), 0x178a03e0 }, + { _MMIO(0x9888), 0x11824c00 }, + { _MMIO(0x9888), 0x11830020 }, + { _MMIO(0x9888), 0x13840020 }, + { _MMIO(0x9888), 0x11850019 }, + { _MMIO(0x9888), 0x11860007 }, + { _MMIO(0x9888), 0x01870c40 }, + { _MMIO(0x9888), 0x17880000 }, + { _MMIO(0x9888), 0x022f4000 }, + { _MMIO(0x9888), 0x0a4c0040 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x040d4000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x020e5400 }, + { _MMIO(0x9888), 0x000e0000 }, + { _MMIO(0x9888), 0x080f0040 }, + { _MMIO(0x9888), 0x000f0000 }, + { _MMIO(0x9888), 0x100f0000 }, + { _MMIO(0x9888), 0x0e0f0040 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x06104000 }, + { _MMIO(0x9888), 0x06110012 }, + { _MMIO(0x9888), 0x06131000 }, + { _MMIO(0x9888), 0x01898000 }, + { _MMIO(0x9888), 0x0d890100 }, + { _MMIO(0x9888), 0x03898000 }, + { _MMIO(0x9888), 0x09808000 }, + { _MMIO(0x9888), 0x0b808000 }, + { _MMIO(0x9888), 0x0380c000 }, + { _MMIO(0x9888), 0x0f8a0075 }, + { _MMIO(0x9888), 0x1d8a0000 }, + { _MMIO(0x9888), 0x118a8000 }, + { _MMIO(0x9888), 0x1b8a4000 }, + { _MMIO(0x9888), 0x138a8000 }, + { _MMIO(0x9888), 0x1d81a000 }, + { _MMIO(0x9888), 0x15818000 }, + { _MMIO(0x9888), 0x17818000 }, + { _MMIO(0x9888), 0x0b820030 }, + { _MMIO(0x9888), 0x07828000 }, + { _MMIO(0x9888), 0x0d824000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x05824000 }, + { _MMIO(0x9888), 0x0d830003 }, + { _MMIO(0x9888), 0x0583000c }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x03838000 }, + { _MMIO(0x9888), 0x07838000 }, + { _MMIO(0x9888), 0x0b840980 }, + { _MMIO(0x9888), 0x03844d80 }, + { _MMIO(0x9888), 0x11840000 }, + { _MMIO(0x9888), 0x09848000 }, + { _MMIO(0x9888), 0x09850080 }, + { _MMIO(0x9888), 0x03850003 }, + { _MMIO(0x9888), 0x01850000 }, + { _MMIO(0x9888), 0x07860000 }, + { _MMIO(0x9888), 0x0f860400 }, + { _MMIO(0x9888), 0x09870032 }, + { _MMIO(0x9888), 0x01888052 }, + { _MMIO(0x9888), 0x11880000 }, + { _MMIO(0x9888), 0x09884000 }, + { _MMIO(0x9888), 0x15968000 }, + { _MMIO(0x9888), 0x17968000 }, + { _MMIO(0x9888), 0x0f96c000 }, + { _MMIO(0x9888), 0x1f950011 }, + { _MMIO(0x9888), 0x1d950014 }, + { _MMIO(0x9888), 0x0592c000 }, + { _MMIO(0x9888), 0x0b928000 }, + { _MMIO(0x9888), 0x0d924000 }, + { _MMIO(0x9888), 0x0f924000 }, + { _MMIO(0x9888), 0x11928000 }, + { _MMIO(0x9888), 0x1392c000 }, + { _MMIO(0x9888), 0x09924000 }, + { _MMIO(0x9888), 0x01985000 }, + { _MMIO(0x9888), 0x07988000 }, + { _MMIO(0x9888), 0x09981000 }, + { _MMIO(0x9888), 0x0b982000 }, + { _MMIO(0x9888), 0x0d982000 }, + { _MMIO(0x9888), 0x0f989000 }, + { _MMIO(0x9888), 0x05982000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x0b978000 }, + { _MMIO(0x9888), 0x0f974000 }, + { _MMIO(0x9888), 0x11974000 }, + { _MMIO(0x9888), 0x13978000 }, + { _MMIO(0x9888), 0x09974000 }, + { _MMIO(0xd28), 0x00000000 }, + { _MMIO(0x9888), 0x1190c080 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x419010a0 }, + { _MMIO(0x9888), 0x55904000 }, + { _MMIO(0x9888), 0x45901000 }, + { _MMIO(0x9888), 0x47900084 }, + { _MMIO(0x9888), 0x57904400 }, + { _MMIO(0x9888), 0x499000a5 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900081 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x439014a4 }, + { _MMIO(0x9888), 0x53900400 }, +}; + +static const struct i915_oa_reg mux_config_render_pipe_profile_0_sku_gte_0x02[] = { + { _MMIO(0x9888), 0x0c0e001f }, + { _MMIO(0x9888), 0x0a0f0000 }, + { _MMIO(0x9888), 0x10116800 }, + { _MMIO(0x9888), 0x178a03e0 }, + { _MMIO(0x9888), 0x11824c00 }, + { _MMIO(0x9888), 0x11830020 }, + { _MMIO(0x9888), 0x13840020 }, + { _MMIO(0x9888), 0x11850019 }, + { _MMIO(0x9888), 0x11860007 }, + { _MMIO(0x9888), 0x01870c40 }, + { _MMIO(0x9888), 0x17880000 }, + { _MMIO(0x9888), 0x022f4000 }, + { _MMIO(0x9888), 0x0a4c0040 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x040d4000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x020e5400 }, + { _MMIO(0x9888), 0x000e0000 }, + { _MMIO(0x9888), 0x080f0040 }, + { _MMIO(0x9888), 0x000f0000 }, + { _MMIO(0x9888), 0x100f0000 }, + { _MMIO(0x9888), 0x0e0f0040 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x06104000 }, + { _MMIO(0x9888), 0x06110012 }, + { _MMIO(0x9888), 0x06131000 }, + { _MMIO(0x9888), 0x01898000 }, + { _MMIO(0x9888), 0x0d890100 }, + { _MMIO(0x9888), 0x03898000 }, + { _MMIO(0x9888), 0x09808000 }, + { _MMIO(0x9888), 0x0b808000 }, + { _MMIO(0x9888), 0x0380c000 }, + { _MMIO(0x9888), 0x0f8a0075 }, + { _MMIO(0x9888), 0x1d8a0000 }, + { _MMIO(0x9888), 0x118a8000 }, + { _MMIO(0x9888), 0x1b8a4000 }, + { _MMIO(0x9888), 0x138a8000 }, + { _MMIO(0x9888), 0x1d81a000 }, + { _MMIO(0x9888), 0x15818000 }, + { _MMIO(0x9888), 0x17818000 }, + { _MMIO(0x9888), 0x0b820030 }, + { _MMIO(0x9888), 0x07828000 }, + { _MMIO(0x9888), 0x0d824000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x05824000 }, + { _MMIO(0x9888), 0x0d830003 }, + { _MMIO(0x9888), 0x0583000c }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x03838000 }, + { _MMIO(0x9888), 0x07838000 }, + { _MMIO(0x9888), 0x0b840980 }, + { _MMIO(0x9888), 0x03844d80 }, + { _MMIO(0x9888), 0x11840000 }, + { _MMIO(0x9888), 0x09848000 }, + { _MMIO(0x9888), 0x09850080 }, + { _MMIO(0x9888), 0x03850003 }, + { _MMIO(0x9888), 0x01850000 }, + { _MMIO(0x9888), 0x07860000 }, + { _MMIO(0x9888), 0x0f860400 }, + { _MMIO(0x9888), 0x09870032 }, + { _MMIO(0x9888), 0x01888052 }, + { _MMIO(0x9888), 0x11880000 }, + { _MMIO(0x9888), 0x09884000 }, + { _MMIO(0x9888), 0x1b931001 }, + { _MMIO(0x9888), 0x1d930001 }, + { _MMIO(0x9888), 0x19934000 }, + { _MMIO(0x9888), 0x1b958000 }, + { _MMIO(0x9888), 0x1d950094 }, + { _MMIO(0x9888), 0x19958000 }, + { _MMIO(0x9888), 0x05e5a000 }, + { _MMIO(0x9888), 0x01e5c000 }, + { _MMIO(0x9888), 0x0592c000 }, + { _MMIO(0x9888), 0x0b928000 }, + { _MMIO(0x9888), 0x0d924000 }, + { _MMIO(0x9888), 0x0f924000 }, + { _MMIO(0x9888), 0x11928000 }, + { _MMIO(0x9888), 0x1392c000 }, + { _MMIO(0x9888), 0x09924000 }, + { _MMIO(0x9888), 0x01985000 }, + { _MMIO(0x9888), 0x07988000 }, + { _MMIO(0x9888), 0x09981000 }, + { _MMIO(0x9888), 0x0b982000 }, + { _MMIO(0x9888), 0x0d982000 }, + { _MMIO(0x9888), 0x0f989000 }, + { _MMIO(0x9888), 0x05982000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1190c080 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x419010a0 }, + { _MMIO(0x9888), 0x55904000 }, + { _MMIO(0x9888), 0x45901000 }, + { _MMIO(0x9888), 0x47900084 }, + { _MMIO(0x9888), 0x57904400 }, + { _MMIO(0x9888), 0x499000a5 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900081 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x439014a4 }, + { _MMIO(0x9888), 0x53900400 }, +}; + +static int +get_render_pipe_profile_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 2); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 2); + + if (dev_priv->drm.pdev->revision < 0x02) { + regs[n] = mux_config_render_pipe_profile_0_sku_lt_0x02; + lens[n] = ARRAY_SIZE(mux_config_render_pipe_profile_0_sku_lt_0x02); + n++; + } + if (dev_priv->drm.pdev->revision >= 0x02) { + regs[n] = mux_config_render_pipe_profile_0_sku_gte_0x02; + lens[n] = ARRAY_SIZE(mux_config_render_pipe_profile_0_sku_gte_0x02); + n++; + } + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_reads[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f872 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_reads[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_reads_0_slices_0x01_and_sku_lt_0x02[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x13946000 }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x0f968000 }, + { _MMIO(0x9888), 0x1196c000 }, + { _MMIO(0x9888), 0x13964000 }, + { _MMIO(0x9888), 0x11938000 }, + { _MMIO(0x9888), 0x1b93fe00 }, + { _MMIO(0x9888), 0x01940010 }, + { _MMIO(0x9888), 0x07941100 }, + { _MMIO(0x9888), 0x09941312 }, + { _MMIO(0x9888), 0x0b941514 }, + { _MMIO(0x9888), 0x0d941716 }, + { _MMIO(0x9888), 0x11940000 }, + { _MMIO(0x9888), 0x19940000 }, + { _MMIO(0x9888), 0x1b940000 }, + { _MMIO(0x9888), 0x1d940000 }, + { _MMIO(0x9888), 0x1b954000 }, + { _MMIO(0x9888), 0x1d95a550 }, + { _MMIO(0x9888), 0x1f9502aa }, + { _MMIO(0x9888), 0x2f900157 }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x13908000 }, + { _MMIO(0x9888), 0x21908000 }, + { _MMIO(0x9888), 0x23908000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27908000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b908000 }, + { _MMIO(0x9888), 0x2d908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0xd28), 0x00000000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static const struct i915_oa_reg mux_config_memory_reads_0_sku_lt_0x05_and_sku_gte_0x02[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x13946000 }, + { _MMIO(0x9888), 0x15940016 }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x19930800 }, + { _MMIO(0x9888), 0x1b93aa55 }, + { _MMIO(0x9888), 0x1d9300aa }, + { _MMIO(0x9888), 0x01940010 }, + { _MMIO(0x9888), 0x07941100 }, + { _MMIO(0x9888), 0x09941312 }, + { _MMIO(0x9888), 0x0b941514 }, + { _MMIO(0x9888), 0x0d941716 }, + { _MMIO(0x9888), 0x0f940018 }, + { _MMIO(0x9888), 0x1b940000 }, + { _MMIO(0x9888), 0x11940000 }, + { _MMIO(0x9888), 0x01e58000 }, + { _MMIO(0x9888), 0x03e57000 }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x13908000 }, + { _MMIO(0x9888), 0x21908000 }, + { _MMIO(0x9888), 0x23908000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27908000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b908000 }, + { _MMIO(0x9888), 0x2d908000 }, + { _MMIO(0x9888), 0x2f908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c20 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900400 }, + { _MMIO(0x9888), 0x47900421 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900421 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900061 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static const struct i915_oa_reg mux_config_memory_reads_0_sku_gte_0x05[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f900064 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930055 }, + { _MMIO(0x9888), 0x03e58000 }, + { _MMIO(0x9888), 0x05e5c000 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x13900150 }, + { _MMIO(0x9888), 0x21900151 }, + { _MMIO(0x9888), 0x23900152 }, + { _MMIO(0x9888), 0x25900153 }, + { _MMIO(0x9888), 0x27900154 }, + { _MMIO(0x9888), 0x29900155 }, + { _MMIO(0x9888), 0x2b900156 }, + { _MMIO(0x9888), 0x2d900157 }, + { _MMIO(0x9888), 0x2f90015f }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c60 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c00 }, + { _MMIO(0x9888), 0x47900c63 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900c63 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_memory_reads_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 3); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 3); + + if ((INTEL_INFO(dev_priv)->sseu.slice_mask & 0x01) && + (dev_priv->drm.pdev->revision < 0x02)) { + regs[n] = mux_config_memory_reads_0_slices_0x01_and_sku_lt_0x02; + lens[n] = ARRAY_SIZE(mux_config_memory_reads_0_slices_0x01_and_sku_lt_0x02); + n++; + } + if ((dev_priv->drm.pdev->revision < 0x05) && + (dev_priv->drm.pdev->revision >= 0x02)) { + regs[n] = mux_config_memory_reads_0_sku_lt_0x05_and_sku_gte_0x02; + lens[n] = ARRAY_SIZE(mux_config_memory_reads_0_sku_lt_0x05_and_sku_gte_0x02); + n++; + } + if (dev_priv->drm.pdev->revision >= 0x05) { + regs[n] = mux_config_memory_reads_0_sku_gte_0x05; + lens[n] = ARRAY_SIZE(mux_config_memory_reads_0_sku_gte_0x05); + n++; + } + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_writes[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f822 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_writes[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_writes_0_slices_0x01_and_sku_lt_0x02[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x13945400 }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f901400 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x0f968000 }, + { _MMIO(0x9888), 0x1196c000 }, + { _MMIO(0x9888), 0x13964000 }, + { _MMIO(0x9888), 0x11938000 }, + { _MMIO(0x9888), 0x1b93fe00 }, + { _MMIO(0x9888), 0x01940010 }, + { _MMIO(0x9888), 0x07941100 }, + { _MMIO(0x9888), 0x09941312 }, + { _MMIO(0x9888), 0x0b941514 }, + { _MMIO(0x9888), 0x0d941716 }, + { _MMIO(0x9888), 0x11940000 }, + { _MMIO(0x9888), 0x19940000 }, + { _MMIO(0x9888), 0x1b940000 }, + { _MMIO(0x9888), 0x1d940000 }, + { _MMIO(0x9888), 0x1b954000 }, + { _MMIO(0x9888), 0x1d95a550 }, + { _MMIO(0x9888), 0x1f9502aa }, + { _MMIO(0x9888), 0x2f900167 }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x13908000 }, + { _MMIO(0x9888), 0x21908000 }, + { _MMIO(0x9888), 0x23908000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27908000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b908000 }, + { _MMIO(0x9888), 0x2d908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0xd28), 0x00000000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static const struct i915_oa_reg mux_config_memory_writes_0_sku_lt_0x05_and_sku_gte_0x02[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x13945400 }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f901400 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x19930800 }, + { _MMIO(0x9888), 0x1b93aa55 }, + { _MMIO(0x9888), 0x1d93002a }, + { _MMIO(0x9888), 0x01940010 }, + { _MMIO(0x9888), 0x07941100 }, + { _MMIO(0x9888), 0x09941312 }, + { _MMIO(0x9888), 0x0b941514 }, + { _MMIO(0x9888), 0x0d941716 }, + { _MMIO(0x9888), 0x1b940000 }, + { _MMIO(0x9888), 0x11940000 }, + { _MMIO(0x9888), 0x01e58000 }, + { _MMIO(0x9888), 0x03e57000 }, + { _MMIO(0x9888), 0x2f900167 }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x13908000 }, + { _MMIO(0x9888), 0x21908000 }, + { _MMIO(0x9888), 0x23908000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27908000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b908000 }, + { _MMIO(0x9888), 0x2d908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c20 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900400 }, + { _MMIO(0x9888), 0x47900421 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900421 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static const struct i915_oa_reg mux_config_memory_writes_0_sku_gte_0x05[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f901000 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930055 }, + { _MMIO(0x9888), 0x03e58000 }, + { _MMIO(0x9888), 0x05e5c000 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x13900160 }, + { _MMIO(0x9888), 0x21900161 }, + { _MMIO(0x9888), 0x23900162 }, + { _MMIO(0x9888), 0x25900163 }, + { _MMIO(0x9888), 0x27900164 }, + { _MMIO(0x9888), 0x29900165 }, + { _MMIO(0x9888), 0x2b900166 }, + { _MMIO(0x9888), 0x2d900167 }, + { _MMIO(0x9888), 0x2f900150 }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c60 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c00 }, + { _MMIO(0x9888), 0x47900c63 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900c63 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_memory_writes_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 3); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 3); + + if ((INTEL_INFO(dev_priv)->sseu.slice_mask & 0x01) && + (dev_priv->drm.pdev->revision < 0x02)) { + regs[n] = mux_config_memory_writes_0_slices_0x01_and_sku_lt_0x02; + lens[n] = ARRAY_SIZE(mux_config_memory_writes_0_slices_0x01_and_sku_lt_0x02); + n++; + } + if ((dev_priv->drm.pdev->revision < 0x05) && + (dev_priv->drm.pdev->revision >= 0x02)) { + regs[n] = mux_config_memory_writes_0_sku_lt_0x05_and_sku_gte_0x02; + lens[n] = ARRAY_SIZE(mux_config_memory_writes_0_sku_lt_0x05_and_sku_gte_0x02); + n++; + } + if (dev_priv->drm.pdev->revision >= 0x05) { + regs[n] = mux_config_memory_writes_0_sku_gte_0x05; + lens[n] = ARRAY_SIZE(mux_config_memory_writes_0_sku_gte_0x05); + n++; + } + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extended[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fc2a }, + { _MMIO(0x2774), 0x0000bf00 }, + { _MMIO(0x2778), 0x0007fc6a }, + { _MMIO(0x277c), 0x0000bf00 }, + { _MMIO(0x2780), 0x0007fc92 }, + { _MMIO(0x2784), 0x0000bf00 }, + { _MMIO(0x2788), 0x0007fca2 }, + { _MMIO(0x278c), 0x0000bf00 }, + { _MMIO(0x2790), 0x0007fc32 }, + { _MMIO(0x2794), 0x0000bf00 }, + { _MMIO(0x2798), 0x0007fc9a }, + { _MMIO(0x279c), 0x0000bf00 }, + { _MMIO(0x27a0), 0x0007fe6a }, + { _MMIO(0x27a4), 0x0000bf00 }, + { _MMIO(0x27a8), 0x0007fe7a }, + { _MMIO(0x27ac), 0x0000bf00 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extended[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended_0_subslices_0x01[] = { + { _MMIO(0x9888), 0x106c00e0 }, + { _MMIO(0x9888), 0x141c8160 }, + { _MMIO(0x9888), 0x161c8015 }, + { _MMIO(0x9888), 0x181c0120 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x184e8000 }, + { _MMIO(0x9888), 0x1a4eaaa0 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0e6c0b01 }, + { _MMIO(0x9888), 0x006c0200 }, + { _MMIO(0x9888), 0x026c000c }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x001b8000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x001c0041 }, + { _MMIO(0x9888), 0x061c4200 }, + { _MMIO(0x9888), 0x081c4443 }, + { _MMIO(0x9888), 0x0a1c4645 }, + { _MMIO(0x9888), 0x0c1c7647 }, + { _MMIO(0x9888), 0x041c7357 }, + { _MMIO(0x9888), 0x1c1c0030 }, + { _MMIO(0x9888), 0x101c0000 }, + { _MMIO(0x9888), 0x1a1c0000 }, + { _MMIO(0x9888), 0x121c8000 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4caa2a }, + { _MMIO(0x9888), 0x0c4c02aa }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x000da000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x0c0f5400 }, + { _MMIO(0x9888), 0x0e0f5515 }, + { _MMIO(0x9888), 0x100f0155 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162caa00 }, + { _MMIO(0x9888), 0x182c00aa }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0xd28), 0x00000000 }, + { _MMIO(0x9888), 0x11907fff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900040 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900802 }, + { _MMIO(0x9888), 0x47900842 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900842 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900800 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_compute_extended_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + if (INTEL_INFO(dev_priv)->sseu.subslice_mask & 0x01) { + regs[n] = mux_config_compute_extended_0_subslices_0x01; + lens[n] = ARRAY_SIZE(mux_config_compute_extended_0_subslices_0x01); + n++; + } + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_l3_cache[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fffa }, + { _MMIO(0x2774), 0x0000fefe }, + { _MMIO(0x2778), 0x0007fffa }, + { _MMIO(0x277c), 0x0000fefd }, + { _MMIO(0x2790), 0x0007fffa }, + { _MMIO(0x2794), 0x0000fbef }, + { _MMIO(0x2798), 0x0007fffa }, + { _MMIO(0x279c), 0x0000fbdf }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_l3_cache[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00101100 }, + { _MMIO(0xe45c), 0x00201200 }, + { _MMIO(0xe55c), 0x00301300 }, + { _MMIO(0xe65c), 0x00401400 }, +}; + +static const struct i915_oa_reg mux_config_compute_l3_cache[] = { + { _MMIO(0x9888), 0x166c0760 }, + { _MMIO(0x9888), 0x1593001e }, + { _MMIO(0x9888), 0x3f901403 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x184e8000 }, + { _MMIO(0x9888), 0x1a4e8020 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x006c0051 }, + { _MMIO(0x9888), 0x066c5000 }, + { _MMIO(0x9888), 0x086c5c5d }, + { _MMIO(0x9888), 0x0e6c5e5f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x186c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x061b8000 }, + { _MMIO(0x9888), 0x081bc000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x101c8000 }, + { _MMIO(0x9888), 0x1a1ce000 }, + { _MMIO(0x9888), 0x1c1c0030 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4c2a00 }, + { _MMIO(0x9888), 0x0c4c0280 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f1500 }, + { _MMIO(0x9888), 0x100f0140 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162c0a00 }, + { _MMIO(0x9888), 0x182c00a0 }, + { _MMIO(0x9888), 0x03933300 }, + { _MMIO(0x9888), 0x05930032 }, + { _MMIO(0x9888), 0x11930000 }, + { _MMIO(0x9888), 0x1b930000 }, + { _MMIO(0x9888), 0x1d900157 }, + { _MMIO(0x9888), 0x1f900167 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1190030f }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900000 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900042 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x53901111 }, + { _MMIO(0x9888), 0x43900420 }, +}; + +static int +get_compute_l3_cache_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_l3_cache; + lens[n] = ARRAY_SIZE(mux_config_compute_l3_cache); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_hdc_and_sf[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x10800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fdff }, +}; + +static const struct i915_oa_reg flex_eu_config_hdc_and_sf[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_hdc_and_sf[] = { + { _MMIO(0x9888), 0x104f0232 }, + { _MMIO(0x9888), 0x124f4640 }, + { _MMIO(0x9888), 0x106c0232 }, + { _MMIO(0x9888), 0x11834400 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x004f1880 }, + { _MMIO(0x9888), 0x024f08bb }, + { _MMIO(0x9888), 0x044f001b }, + { _MMIO(0x9888), 0x046c0100 }, + { _MMIO(0x9888), 0x066c000b }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x041b8000 }, + { _MMIO(0x9888), 0x061b4000 }, + { _MMIO(0x9888), 0x1a1c1800 }, + { _MMIO(0x9888), 0x005b8000 }, + { _MMIO(0x9888), 0x025bc000 }, + { _MMIO(0x9888), 0x045b4000 }, + { _MMIO(0x9888), 0x125c8000 }, + { _MMIO(0x9888), 0x145c8000 }, + { _MMIO(0x9888), 0x165c8000 }, + { _MMIO(0x9888), 0x185c8000 }, + { _MMIO(0x9888), 0x0a4c00a0 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x022cc000 }, + { _MMIO(0x9888), 0x042cc000 }, + { _MMIO(0x9888), 0x062cc000 }, + { _MMIO(0x9888), 0x082cc000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x0f8305c0 }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x07830000 }, + { _MMIO(0x9888), 0x1d950080 }, + { _MMIO(0x9888), 0x13928000 }, + { _MMIO(0x9888), 0x0f988000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b9000a0 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_hdc_and_sf_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_hdc_and_sf; + lens[n] = ARRAY_SIZE(mux_config_hdc_and_sf); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_1[] = { + { _MMIO(0x9888), 0x126c7b40 }, + { _MMIO(0x9888), 0x166c0020 }, + { _MMIO(0x9888), 0x0a603444 }, + { _MMIO(0x9888), 0x0a613400 }, + { _MMIO(0x9888), 0x1a4ea800 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x064f4000 }, + { _MMIO(0x9888), 0x0c6c5327 }, + { _MMIO(0x9888), 0x0e6c5425 }, + { _MMIO(0x9888), 0x006c2a00 }, + { _MMIO(0x9888), 0x026c285b }, + { _MMIO(0x9888), 0x046c005c }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x1a6c0800 }, + { _MMIO(0x9888), 0x0c1bc000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x001b8000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x1c1c003c }, + { _MMIO(0x9888), 0x121c8000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c0800 }, + { _MMIO(0x9888), 0x065b4000 }, + { _MMIO(0x9888), 0x1a5c1000 }, + { _MMIO(0x9888), 0x10600000 }, + { _MMIO(0x9888), 0x04600000 }, + { _MMIO(0x9888), 0x0c610044 }, + { _MMIO(0x9888), 0x10610000 }, + { _MMIO(0x9888), 0x06610000 }, + { _MMIO(0x9888), 0x0c4c02a8 }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x0a4c002a }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f0154 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x182c00aa }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2cc000 }, + { _MMIO(0x9888), 0x1190ffc0 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900420 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900021 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900400 }, + { _MMIO(0x9888), 0x43900421 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, +}; + +static int +get_l3_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_1; + lens[n] = ARRAY_SIZE(mux_config_l3_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00028002 }, + { _MMIO(0x277c), 0x000087ff }, + { _MMIO(0x2780), 0x00020002 }, + { _MMIO(0x2784), 0x00008fff }, + { _MMIO(0x2788), 0x00008002 }, + { _MMIO(0x278c), 0x0000a7ff }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_2[] = { + { _MMIO(0x9888), 0x126c02e0 }, + { _MMIO(0x9888), 0x146c0001 }, + { _MMIO(0x9888), 0x0a623400 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x064f4000 }, + { _MMIO(0x9888), 0x026c3324 }, + { _MMIO(0x9888), 0x046c3422 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c0800 }, + { _MMIO(0x9888), 0x065b4000 }, + { _MMIO(0x9888), 0x1a5c1000 }, + { _MMIO(0x9888), 0x06614000 }, + { _MMIO(0x9888), 0x0c620044 }, + { _MMIO(0x9888), 0x10620000 }, + { _MMIO(0x9888), 0x06620000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c002a }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f4000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2cc000 }, + { _MMIO(0x9888), 0x1190f800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_l3_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_2; + lens[n] = ARRAY_SIZE(mux_config_l3_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_3[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00028002 }, + { _MMIO(0x277c), 0x000087ff }, + { _MMIO(0x2780), 0x00020002 }, + { _MMIO(0x2784), 0x00008fff }, + { _MMIO(0x2788), 0x00008002 }, + { _MMIO(0x278c), 0x0000a7ff }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_3[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_3[] = { + { _MMIO(0x9888), 0x126c4e80 }, + { _MMIO(0x9888), 0x146c0000 }, + { _MMIO(0x9888), 0x0a633400 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x026c3321 }, + { _MMIO(0x9888), 0x046c342f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1a6c2000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x061b4000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c1800 }, + { _MMIO(0x9888), 0x06604000 }, + { _MMIO(0x9888), 0x0c630044 }, + { _MMIO(0x9888), 0x10630000 }, + { _MMIO(0x9888), 0x06630000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c00aa }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f4000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x1190f800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900002 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_l3_3_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_3; + lens[n] = ARRAY_SIZE(mux_config_l3_3); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000efff }, + { _MMIO(0x2778), 0x00006000 }, + { _MMIO(0x277c), 0x0000f3ff }, +}; + +static const struct i915_oa_reg flex_eu_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x9888), 0x102f3800 }, + { _MMIO(0x9888), 0x144d0500 }, + { _MMIO(0x9888), 0x120d03c0 }, + { _MMIO(0x9888), 0x140d03cf }, + { _MMIO(0x9888), 0x0c0f0004 }, + { _MMIO(0x9888), 0x0c4e4000 }, + { _MMIO(0x9888), 0x042f0480 }, + { _MMIO(0x9888), 0x082f0000 }, + { _MMIO(0x9888), 0x022f0000 }, + { _MMIO(0x9888), 0x0a4c0090 }, + { _MMIO(0x9888), 0x064d0027 }, + { _MMIO(0x9888), 0x004d0000 }, + { _MMIO(0x9888), 0x000d0d40 }, + { _MMIO(0x9888), 0x020d803f }, + { _MMIO(0x9888), 0x040d8023 }, + { _MMIO(0x9888), 0x100d0000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x020f0010 }, + { _MMIO(0x9888), 0x000f0000 }, + { _MMIO(0x9888), 0x0e0f0050 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41901400 }, + { _MMIO(0x9888), 0x43901485 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900001 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_rasterizer_and_pixel_backend_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_rasterizer_and_pixel_backend; + lens[n] = ARRAY_SIZE(mux_config_rasterizer_and_pixel_backend); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_sampler[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x70800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x0000c000 }, + { _MMIO(0x2774), 0x0000e7ff }, + { _MMIO(0x2778), 0x00003000 }, + { _MMIO(0x277c), 0x0000f9ff }, + { _MMIO(0x2780), 0x00000c00 }, + { _MMIO(0x2784), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_sampler[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_sampler[] = { + { _MMIO(0x9888), 0x14152c00 }, + { _MMIO(0x9888), 0x16150005 }, + { _MMIO(0x9888), 0x121600a0 }, + { _MMIO(0x9888), 0x14352c00 }, + { _MMIO(0x9888), 0x16350005 }, + { _MMIO(0x9888), 0x123600a0 }, + { _MMIO(0x9888), 0x14552c00 }, + { _MMIO(0x9888), 0x16550005 }, + { _MMIO(0x9888), 0x125600a0 }, + { _MMIO(0x9888), 0x062f6000 }, + { _MMIO(0x9888), 0x022f2000 }, + { _MMIO(0x9888), 0x0c4c0050 }, + { _MMIO(0x9888), 0x0a4c0010 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f0350 }, + { _MMIO(0x9888), 0x0c0fb000 }, + { _MMIO(0x9888), 0x0e0f00da }, + { _MMIO(0x9888), 0x182c0028 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x022dc000 }, + { _MMIO(0x9888), 0x042d4000 }, + { _MMIO(0x9888), 0x0c138000 }, + { _MMIO(0x9888), 0x0e132000 }, + { _MMIO(0x9888), 0x0413c000 }, + { _MMIO(0x9888), 0x1c140018 }, + { _MMIO(0x9888), 0x0c157000 }, + { _MMIO(0x9888), 0x0e150078 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x04162180 }, + { _MMIO(0x9888), 0x02160000 }, + { _MMIO(0x9888), 0x04174000 }, + { _MMIO(0x9888), 0x0233a000 }, + { _MMIO(0x9888), 0x04333000 }, + { _MMIO(0x9888), 0x14348000 }, + { _MMIO(0x9888), 0x16348000 }, + { _MMIO(0x9888), 0x02357870 }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x04360043 }, + { _MMIO(0x9888), 0x02360000 }, + { _MMIO(0x9888), 0x04371000 }, + { _MMIO(0x9888), 0x0e538000 }, + { _MMIO(0x9888), 0x00538000 }, + { _MMIO(0x9888), 0x06533000 }, + { _MMIO(0x9888), 0x1c540020 }, + { _MMIO(0x9888), 0x12548000 }, + { _MMIO(0x9888), 0x0e557000 }, + { _MMIO(0x9888), 0x00557800 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x06560043 }, + { _MMIO(0x9888), 0x02560000 }, + { _MMIO(0x9888), 0x06571000 }, + { _MMIO(0x9888), 0x1190ff80 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900060 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900060 }, +}; + +static int +get_sampler_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler; + lens[n] = ARRAY_SIZE(mux_config_sampler); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x00007fff }, + { _MMIO(0x2778), 0x00000000 }, + { _MMIO(0x277c), 0x00009fff }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000efff }, + { _MMIO(0x2788), 0x00000000 }, + { _MMIO(0x278c), 0x0000f3ff }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000fdff }, + { _MMIO(0x2798), 0x00000000 }, + { _MMIO(0x279c), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_1[] = { + { _MMIO(0x9888), 0x12120000 }, + { _MMIO(0x9888), 0x12320000 }, + { _MMIO(0x9888), 0x12520000 }, + { _MMIO(0x9888), 0x002f8000 }, + { _MMIO(0x9888), 0x022f3000 }, + { _MMIO(0x9888), 0x0a4c0015 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f03a0 }, + { _MMIO(0x9888), 0x0c0ff000 }, + { _MMIO(0x9888), 0x0e0f0095 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2d8000 }, + { _MMIO(0x9888), 0x0e2d4000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x02108000 }, + { _MMIO(0x9888), 0x0410c000 }, + { _MMIO(0x9888), 0x02118000 }, + { _MMIO(0x9888), 0x0411c000 }, + { _MMIO(0x9888), 0x02121880 }, + { _MMIO(0x9888), 0x041219b5 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x02134000 }, + { _MMIO(0x9888), 0x04135000 }, + { _MMIO(0x9888), 0x0c308000 }, + { _MMIO(0x9888), 0x0e304000 }, + { _MMIO(0x9888), 0x06304000 }, + { _MMIO(0x9888), 0x0c318000 }, + { _MMIO(0x9888), 0x0e314000 }, + { _MMIO(0x9888), 0x06314000 }, + { _MMIO(0x9888), 0x0c321a80 }, + { _MMIO(0x9888), 0x0e320033 }, + { _MMIO(0x9888), 0x06320031 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x0c334000 }, + { _MMIO(0x9888), 0x0e331000 }, + { _MMIO(0x9888), 0x06331000 }, + { _MMIO(0x9888), 0x0e508000 }, + { _MMIO(0x9888), 0x00508000 }, + { _MMIO(0x9888), 0x02504000 }, + { _MMIO(0x9888), 0x0e518000 }, + { _MMIO(0x9888), 0x00518000 }, + { _MMIO(0x9888), 0x02514000 }, + { _MMIO(0x9888), 0x0e521880 }, + { _MMIO(0x9888), 0x00521a80 }, + { _MMIO(0x9888), 0x02520033 }, + { _MMIO(0x9888), 0x0e534000 }, + { _MMIO(0x9888), 0x00534000 }, + { _MMIO(0x9888), 0x02531000 }, + { _MMIO(0x9888), 0x1190ff80 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900062 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, +}; + +static int +get_tdl_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_1; + lens[n] = ARRAY_SIZE(mux_config_tdl_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_2[] = { + { _MMIO(0x9888), 0x12124d60 }, + { _MMIO(0x9888), 0x12322e60 }, + { _MMIO(0x9888), 0x12524d60 }, + { _MMIO(0x9888), 0x022f3000 }, + { _MMIO(0x9888), 0x0a4c0014 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0fe000 }, + { _MMIO(0x9888), 0x0e0f0097 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x002d8000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x0410c000 }, + { _MMIO(0x9888), 0x0411c000 }, + { _MMIO(0x9888), 0x04121fb7 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x04135000 }, + { _MMIO(0x9888), 0x00308000 }, + { _MMIO(0x9888), 0x06304000 }, + { _MMIO(0x9888), 0x00318000 }, + { _MMIO(0x9888), 0x06314000 }, + { _MMIO(0x9888), 0x00321b80 }, + { _MMIO(0x9888), 0x0632003f }, + { _MMIO(0x9888), 0x00334000 }, + { _MMIO(0x9888), 0x06331000 }, + { _MMIO(0x9888), 0x0250c000 }, + { _MMIO(0x9888), 0x0251c000 }, + { _MMIO(0x9888), 0x02521fb7 }, + { _MMIO(0x9888), 0x00520000 }, + { _MMIO(0x9888), 0x02535000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x43900063 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_tdl_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_2; + lens[n] = ARRAY_SIZE(mux_config_tdl_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extra[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extra[] = { + { _MMIO(0xe458), 0x00001000 }, + { _MMIO(0xe558), 0x00003002 }, + { _MMIO(0xe658), 0x00005004 }, + { _MMIO(0xe758), 0x00011010 }, + { _MMIO(0xe45c), 0x00050012 }, + { _MMIO(0xe55c), 0x00052051 }, + { _MMIO(0xe65c), 0x00000008 }, +}; + +static const struct i915_oa_reg mux_config_compute_extra[] = { + { _MMIO(0x9888), 0x121203e0 }, + { _MMIO(0x9888), 0x123203e0 }, + { _MMIO(0x9888), 0x125203e0 }, + { _MMIO(0x9888), 0x022f4000 }, + { _MMIO(0x9888), 0x0a4c0040 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0e0f006c }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x042d8000 }, + { _MMIO(0x9888), 0x06104000 }, + { _MMIO(0x9888), 0x06114000 }, + { _MMIO(0x9888), 0x06120033 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x06131000 }, + { _MMIO(0x9888), 0x04308000 }, + { _MMIO(0x9888), 0x04318000 }, + { _MMIO(0x9888), 0x04321980 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x04334000 }, + { _MMIO(0x9888), 0x04504000 }, + { _MMIO(0x9888), 0x04514000 }, + { _MMIO(0x9888), 0x04520033 }, + { _MMIO(0x9888), 0x00520000 }, + { _MMIO(0x9888), 0x04531000 }, + { _MMIO(0x9888), 0x1190e000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900c00 }, + { _MMIO(0x9888), 0x45900002 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_compute_extra_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extra; + lens[n] = ARRAY_SIZE(mux_config_compute_extra); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_vme_pipe[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00100030 }, + { _MMIO(0x2774), 0x0000fff9 }, + { _MMIO(0x2778), 0x00000002 }, + { _MMIO(0x277c), 0x0000fffc }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000fff3 }, + { _MMIO(0x2788), 0x00100180 }, + { _MMIO(0x278c), 0x0000ffcf }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00000002 }, + { _MMIO(0x279c), 0x0000ff3f }, +}; + +static const struct i915_oa_reg flex_eu_config_vme_pipe[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00008003 }, +}; + +static const struct i915_oa_reg mux_config_vme_pipe[] = { + { _MMIO(0x9888), 0x141a5800 }, + { _MMIO(0x9888), 0x161a00c0 }, + { _MMIO(0x9888), 0x12180240 }, + { _MMIO(0x9888), 0x14180002 }, + { _MMIO(0x9888), 0x143a5800 }, + { _MMIO(0x9888), 0x163a00c0 }, + { _MMIO(0x9888), 0x12380240 }, + { _MMIO(0x9888), 0x14380002 }, + { _MMIO(0x9888), 0x002f1000 }, + { _MMIO(0x9888), 0x022f8000 }, + { _MMIO(0x9888), 0x042f3000 }, + { _MMIO(0x9888), 0x004c4000 }, + { _MMIO(0x9888), 0x0a4c1500 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f9500 }, + { _MMIO(0x9888), 0x100f002a }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162c0a00 }, + { _MMIO(0x9888), 0x0a2dc000 }, + { _MMIO(0x9888), 0x0c2dc000 }, + { _MMIO(0x9888), 0x04193000 }, + { _MMIO(0x9888), 0x081a28c1 }, + { _MMIO(0x9888), 0x001a0000 }, + { _MMIO(0x9888), 0x00133000 }, + { _MMIO(0x9888), 0x0613c000 }, + { _MMIO(0x9888), 0x0813f000 }, + { _MMIO(0x9888), 0x00172000 }, + { _MMIO(0x9888), 0x06178000 }, + { _MMIO(0x9888), 0x0817a000 }, + { _MMIO(0x9888), 0x00180037 }, + { _MMIO(0x9888), 0x06180940 }, + { _MMIO(0x9888), 0x08180000 }, + { _MMIO(0x9888), 0x02180000 }, + { _MMIO(0x9888), 0x04183000 }, + { _MMIO(0x9888), 0x06393000 }, + { _MMIO(0x9888), 0x0c3a28c1 }, + { _MMIO(0x9888), 0x003a0000 }, + { _MMIO(0x9888), 0x0a33f000 }, + { _MMIO(0x9888), 0x0c33f000 }, + { _MMIO(0x9888), 0x0a37a000 }, + { _MMIO(0x9888), 0x0c37a000 }, + { _MMIO(0x9888), 0x0a380977 }, + { _MMIO(0x9888), 0x08380000 }, + { _MMIO(0x9888), 0x04380000 }, + { _MMIO(0x9888), 0x06383000 }, + { _MMIO(0x9888), 0x119000ff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900040 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900800 }, + { _MMIO(0x9888), 0x47901000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900844 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_vme_pipe_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_vme_pipe; + lens[n] = ARRAY_SIZE(mux_config_vme_pipe); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9888), 0x11810000 }, + { _MMIO(0x9888), 0x07810016 }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930040 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_test_oa_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_test_oa; + lens[n] = ARRAY_SIZE(mux_config_test_oa); + n++; + + return n; +} + int i915_oa_select_metric_set_sklgt2(struct drm_i915_private *dev_priv) { dev_priv->perf.oa.n_mux_configs = 0; @@ -180,6 +2435,448 @@ int i915_oa_select_metric_set_sklgt2(struct drm_i915_private *dev_priv) dev_priv->perf.oa.flex_regs_len = ARRAY_SIZE(flex_eu_config_render_basic); + return 0; + case METRIC_SET_ID_COMPUTE_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_compute_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_basic); + + return 0; + case METRIC_SET_ID_RENDER_PIPE_PROFILE: + dev_priv->perf.oa.n_mux_configs = + get_render_pipe_profile_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_PIPE_PROFILE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_pipe_profile; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_pipe_profile); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_pipe_profile; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_pipe_profile); + + return 0; + case METRIC_SET_ID_MEMORY_READS: + dev_priv->perf.oa.n_mux_configs = + get_memory_reads_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_READS\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_reads; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_reads); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_reads; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_reads); + + return 0; + case METRIC_SET_ID_MEMORY_WRITES: + dev_priv->perf.oa.n_mux_configs = + get_memory_writes_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_WRITES\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_writes; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_writes); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_writes; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_writes); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTENDED: + dev_priv->perf.oa.n_mux_configs = + get_compute_extended_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTENDED\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extended; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extended); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extended; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extended); + + return 0; + case METRIC_SET_ID_COMPUTE_L3_CACHE: + dev_priv->perf.oa.n_mux_configs = + get_compute_l3_cache_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_L3_CACHE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_l3_cache; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_l3_cache); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_l3_cache; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_l3_cache); + + return 0; + case METRIC_SET_ID_HDC_AND_SF: + dev_priv->perf.oa.n_mux_configs = + get_hdc_and_sf_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"HDC_AND_SF\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_hdc_and_sf; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_hdc_and_sf); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_hdc_and_sf; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_hdc_and_sf); + + return 0; + case METRIC_SET_ID_L3_1: + dev_priv->perf.oa.n_mux_configs = + get_l3_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_1); + + return 0; + case METRIC_SET_ID_L3_2: + dev_priv->perf.oa.n_mux_configs = + get_l3_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_2); + + return 0; + case METRIC_SET_ID_L3_3: + dev_priv->perf.oa.n_mux_configs = + get_l3_3_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_3\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_3; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_3); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_3; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_3); + + return 0; + case METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND: + dev_priv->perf.oa.n_mux_configs = + get_rasterizer_and_pixel_backend_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RASTERIZER_AND_PIXEL_BACKEND\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_rasterizer_and_pixel_backend); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_rasterizer_and_pixel_backend); + + return 0; + case METRIC_SET_ID_SAMPLER: + dev_priv->perf.oa.n_mux_configs = + get_sampler_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_sampler; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_sampler); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler); + + return 0; + case METRIC_SET_ID_TDL_1: + dev_priv->perf.oa.n_mux_configs = + get_tdl_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_1); + + return 0; + case METRIC_SET_ID_TDL_2: + dev_priv->perf.oa.n_mux_configs = + get_tdl_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_2); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTRA: + dev_priv->perf.oa.n_mux_configs = + get_compute_extra_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTRA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extra; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extra); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extra; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extra); + + return 0; + case METRIC_SET_ID_VME_PIPE: + dev_priv->perf.oa.n_mux_configs = + get_vme_pipe_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"VME_PIPE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_vme_pipe; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_vme_pipe); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_vme_pipe; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_vme_pipe); + + return 0; + case METRIC_SET_ID_TEST_OA: + dev_priv->perf.oa.n_mux_configs = + get_test_oa_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TEST_OA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_test_oa; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_test_oa; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_test_oa); + return 0; default: return -ENODEV; @@ -208,6 +2905,380 @@ static struct attribute_group group_render_basic = { .attrs = attrs_render_basic, }; +static ssize_t +show_compute_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_BASIC); +} + +static struct device_attribute dev_attr_compute_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_basic[] = { + &dev_attr_compute_basic_id.attr, + NULL, +}; + +static struct attribute_group group_compute_basic = { + .name = "fe47b29d-ae51-423e-bff4-27d965a95b60", + .attrs = attrs_compute_basic, +}; + +static ssize_t +show_render_pipe_profile_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_PIPE_PROFILE); +} + +static struct device_attribute dev_attr_render_pipe_profile_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_pipe_profile_id, + .store = NULL, +}; + +static struct attribute *attrs_render_pipe_profile[] = { + &dev_attr_render_pipe_profile_id.attr, + NULL, +}; + +static struct attribute_group group_render_pipe_profile = { + .name = "e0ad5ae0-84ba-4f29-a723-1906c12cb774", + .attrs = attrs_render_pipe_profile, +}; + +static ssize_t +show_memory_reads_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_READS); +} + +static struct device_attribute dev_attr_memory_reads_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_reads_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_reads[] = { + &dev_attr_memory_reads_id.attr, + NULL, +}; + +static struct attribute_group group_memory_reads = { + .name = "9bc436dd-6130-4add-affc-283eb6eaa864", + .attrs = attrs_memory_reads, +}; + +static ssize_t +show_memory_writes_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_WRITES); +} + +static struct device_attribute dev_attr_memory_writes_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_writes_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_writes[] = { + &dev_attr_memory_writes_id.attr, + NULL, +}; + +static struct attribute_group group_memory_writes = { + .name = "2ea0da8f-3527-4669-9d9d-13099a7435bf", + .attrs = attrs_memory_writes, +}; + +static ssize_t +show_compute_extended_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTENDED); +} + +static struct device_attribute dev_attr_compute_extended_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extended_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extended[] = { + &dev_attr_compute_extended_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extended = { + .name = "d97d16af-028b-4cd1-a672-6210cb5513dd", + .attrs = attrs_compute_extended, +}; + +static ssize_t +show_compute_l3_cache_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_L3_CACHE); +} + +static struct device_attribute dev_attr_compute_l3_cache_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_l3_cache_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_l3_cache[] = { + &dev_attr_compute_l3_cache_id.attr, + NULL, +}; + +static struct attribute_group group_compute_l3_cache = { + .name = "9fb22842-e708-43f7-9752-e0e41670c39e", + .attrs = attrs_compute_l3_cache, +}; + +static ssize_t +show_hdc_and_sf_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_HDC_AND_SF); +} + +static struct device_attribute dev_attr_hdc_and_sf_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_hdc_and_sf_id, + .store = NULL, +}; + +static struct attribute *attrs_hdc_and_sf[] = { + &dev_attr_hdc_and_sf_id.attr, + NULL, +}; + +static struct attribute_group group_hdc_and_sf = { + .name = "5378e2a1-4248-4188-a4ae-da25a794c603", + .attrs = attrs_hdc_and_sf, +}; + +static ssize_t +show_l3_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_1); +} + +static struct device_attribute dev_attr_l3_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_1_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_1[] = { + &dev_attr_l3_1_id.attr, + NULL, +}; + +static struct attribute_group group_l3_1 = { + .name = "f42cdd6a-b000-42cb-870f-5eb423a7f514", + .attrs = attrs_l3_1, +}; + +static ssize_t +show_l3_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_2); +} + +static struct device_attribute dev_attr_l3_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_2_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_2[] = { + &dev_attr_l3_2_id.attr, + NULL, +}; + +static struct attribute_group group_l3_2 = { + .name = "b9bf2423-d88c-4a7b-a051-627611d00dcc", + .attrs = attrs_l3_2, +}; + +static ssize_t +show_l3_3_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_3); +} + +static struct device_attribute dev_attr_l3_3_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_3_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_3[] = { + &dev_attr_l3_3_id.attr, + NULL, +}; + +static struct attribute_group group_l3_3 = { + .name = "2414a93d-d84f-406e-99c0-472161194b40", + .attrs = attrs_l3_3, +}; + +static ssize_t +show_rasterizer_and_pixel_backend_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND); +} + +static struct device_attribute dev_attr_rasterizer_and_pixel_backend_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_rasterizer_and_pixel_backend_id, + .store = NULL, +}; + +static struct attribute *attrs_rasterizer_and_pixel_backend[] = { + &dev_attr_rasterizer_and_pixel_backend_id.attr, + NULL, +}; + +static struct attribute_group group_rasterizer_and_pixel_backend = { + .name = "53a45d2d-170b-4cf5-b7bb-585120c8e2f5", + .attrs = attrs_rasterizer_and_pixel_backend, +}; + +static ssize_t +show_sampler_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_SAMPLER); +} + +static struct device_attribute dev_attr_sampler_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_sampler_id, + .store = NULL, +}; + +static struct attribute *attrs_sampler[] = { + &dev_attr_sampler_id.attr, + NULL, +}; + +static struct attribute_group group_sampler = { + .name = "b4cff514-a91e-4798-a0b3-426ca13fc9c1", + .attrs = attrs_sampler, +}; + +static ssize_t +show_tdl_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_1); +} + +static struct device_attribute dev_attr_tdl_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_1_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_1[] = { + &dev_attr_tdl_1_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_1 = { + .name = "7821d13b-9b8b-4405-9618-78cd56b62cce", + .attrs = attrs_tdl_1, +}; + +static ssize_t +show_tdl_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_2); +} + +static struct device_attribute dev_attr_tdl_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_2_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_2[] = { + &dev_attr_tdl_2_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_2 = { + .name = "893f1a4d-919d-4388-8cb7-746d73ea7259", + .attrs = attrs_tdl_2, +}; + +static ssize_t +show_compute_extra_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTRA); +} + +static struct device_attribute dev_attr_compute_extra_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extra_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extra[] = { + &dev_attr_compute_extra_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extra = { + .name = "41a24047-7484-4ead-ae37-de907e5ff2b2", + .attrs = attrs_compute_extra, +}; + +static ssize_t +show_vme_pipe_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_VME_PIPE); +} + +static struct device_attribute dev_attr_vme_pipe_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_vme_pipe_id, + .store = NULL, +}; + +static struct attribute *attrs_vme_pipe[] = { + &dev_attr_vme_pipe_id.attr, + NULL, +}; + +static struct attribute_group group_vme_pipe = { + .name = "95910492-943f-44bd-9461-390240f243fd", + .attrs = attrs_vme_pipe, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TEST_OA); +} + +static struct device_attribute dev_attr_test_oa_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_test_oa_id, + .store = NULL, +}; + +static struct attribute *attrs_test_oa[] = { + &dev_attr_test_oa_id.attr, + NULL, +}; + +static struct attribute_group group_test_oa = { + .name = "1651949f-0ac0-4cb1-a06f-dafd74a407d1", + .attrs = attrs_test_oa, +}; + int i915_perf_register_sysfs_sklgt2(struct drm_i915_private *dev_priv) { @@ -220,9 +3291,145 @@ i915_perf_register_sysfs_sklgt2(struct drm_i915_private *dev_priv) if (ret) goto error_render_basic; } + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (ret) + goto error_compute_basic; + } + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (ret) + goto error_render_pipe_profile; + } + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (ret) + goto error_memory_reads; + } + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (ret) + goto error_memory_writes; + } + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (ret) + goto error_compute_extended; + } + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (ret) + goto error_compute_l3_cache; + } + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (ret) + goto error_hdc_and_sf; + } + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (ret) + goto error_l3_1; + } + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (ret) + goto error_l3_2; + } + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (ret) + goto error_l3_3; + } + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (ret) + goto error_rasterizer_and_pixel_backend; + } + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (ret) + goto error_sampler; + } + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (ret) + goto error_tdl_1; + } + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (ret) + goto error_tdl_2; + } + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (ret) + goto error_compute_extra; + } + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (ret) + goto error_vme_pipe; + } + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_test_oa); + if (ret) + goto error_test_oa; + } return 0; +error_test_oa: + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); +error_vme_pipe: + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); +error_compute_extra: + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); +error_tdl_2: + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); +error_tdl_1: + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); +error_sampler: + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); +error_rasterizer_and_pixel_backend: + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); +error_l3_3: + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); +error_l3_2: + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); +error_l3_1: + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); +error_hdc_and_sf: + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); +error_compute_l3_cache: + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); +error_compute_extended: + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); +error_memory_writes: + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); +error_memory_reads: + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); +error_render_pipe_profile: + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); +error_compute_basic: + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); error_render_basic: return ret; } @@ -235,4 +3442,38 @@ i915_perf_unregister_sysfs_sklgt2(struct drm_i915_private *dev_priv) if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_test_oa); } diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt3.c b/drivers/gpu/drm/i915/i915_oa_sklgt3.c index e32d3b3ad77a..7765e22dfa17 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt3.c +++ b/drivers/gpu/drm/i915/i915_oa_sklgt3.c @@ -33,9 +33,26 @@ enum metric_set_id { METRIC_SET_ID_RENDER_BASIC = 1, + METRIC_SET_ID_COMPUTE_BASIC, + METRIC_SET_ID_RENDER_PIPE_PROFILE, + METRIC_SET_ID_MEMORY_READS, + METRIC_SET_ID_MEMORY_WRITES, + METRIC_SET_ID_COMPUTE_EXTENDED, + METRIC_SET_ID_COMPUTE_L3_CACHE, + METRIC_SET_ID_HDC_AND_SF, + METRIC_SET_ID_L3_1, + METRIC_SET_ID_L3_2, + METRIC_SET_ID_L3_3, + METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND, + METRIC_SET_ID_SAMPLER, + METRIC_SET_ID_TDL_1, + METRIC_SET_ID_TDL_2, + METRIC_SET_ID_COMPUTE_EXTRA, + METRIC_SET_ID_VME_PIPE, + METRIC_SET_ID_TEST_OA, }; -int i915_oa_n_builtin_metric_sets_sklgt3 = 1; +int i915_oa_n_builtin_metric_sets_sklgt3 = 18; static const struct i915_oa_reg b_counter_config_render_basic[] = { { _MMIO(0x2710), 0x00000000 }, @@ -157,6 +174,1793 @@ get_render_basic_mux_config(struct drm_i915_private *dev_priv, return n; } +static const struct i915_oa_reg b_counter_config_compute_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_basic[] = { + { _MMIO(0x9888), 0x104f00e0 }, + { _MMIO(0x9888), 0x124f1c00 }, + { _MMIO(0x9888), 0x106c00e0 }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x1a4e0820 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x064f0900 }, + { _MMIO(0x9888), 0x084f0032 }, + { _MMIO(0x9888), 0x0a4f1891 }, + { _MMIO(0x9888), 0x0c4f0e00 }, + { _MMIO(0x9888), 0x0e4f003c }, + { _MMIO(0x9888), 0x004f0d80 }, + { _MMIO(0x9888), 0x024f003b }, + { _MMIO(0x9888), 0x006c0002 }, + { _MMIO(0x9888), 0x086c0100 }, + { _MMIO(0x9888), 0x0c6c000c }, + { _MMIO(0x9888), 0x0e6c0b00 }, + { _MMIO(0x9888), 0x186c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x081b8000 }, + { _MMIO(0x9888), 0x0c1b4000 }, + { _MMIO(0x9888), 0x0e1b8000 }, + { _MMIO(0x9888), 0x101c8000 }, + { _MMIO(0x9888), 0x1a1c8000 }, + { _MMIO(0x9888), 0x1c1c0024 }, + { _MMIO(0x9888), 0x065b8000 }, + { _MMIO(0x9888), 0x085b4000 }, + { _MMIO(0x9888), 0x0a5bc000 }, + { _MMIO(0x9888), 0x0c5b8000 }, + { _MMIO(0x9888), 0x0e5b4000 }, + { _MMIO(0x9888), 0x005b8000 }, + { _MMIO(0x9888), 0x025b4000 }, + { _MMIO(0x9888), 0x1a5c6000 }, + { _MMIO(0x9888), 0x1c5c001b }, + { _MMIO(0x9888), 0x125c8000 }, + { _MMIO(0x9888), 0x145c8000 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4c2000 }, + { _MMIO(0x9888), 0x0c4c0208 }, + { _MMIO(0x9888), 0x000da000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x020d2000 }, + { _MMIO(0x9888), 0x0c0f5400 }, + { _MMIO(0x9888), 0x0e0f5500 }, + { _MMIO(0x9888), 0x100f0155 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2cc000 }, + { _MMIO(0x9888), 0x162cfb00 }, + { _MMIO(0x9888), 0x182c00be }, + { _MMIO(0x9888), 0x022cc000 }, + { _MMIO(0x9888), 0x042cc000 }, + { _MMIO(0x9888), 0x19900157 }, + { _MMIO(0x9888), 0x1b900158 }, + { _MMIO(0x9888), 0x1d900105 }, + { _MMIO(0x9888), 0x1f900103 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x11900fff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900863 }, + { _MMIO(0x9888), 0x47900802 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900802 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900002 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900c62 }, + { _MMIO(0x9888), 0x53903333 }, +}; + +static int +get_compute_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_basic; + lens[n] = ARRAY_SIZE(mux_config_compute_basic); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_render_pipe_profile[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007ffea }, + { _MMIO(0x2774), 0x00007ffc }, + { _MMIO(0x2778), 0x0007affa }, + { _MMIO(0x277c), 0x0000f5fd }, + { _MMIO(0x2780), 0x00079ffa }, + { _MMIO(0x2784), 0x0000f3fb }, + { _MMIO(0x2788), 0x0007bf7a }, + { _MMIO(0x278c), 0x0000f7e7 }, + { _MMIO(0x2790), 0x0007fefa }, + { _MMIO(0x2794), 0x0000f7cf }, + { _MMIO(0x2798), 0x00077ffa }, + { _MMIO(0x279c), 0x0000efdf }, + { _MMIO(0x27a0), 0x0006fffa }, + { _MMIO(0x27a4), 0x0000cfbf }, + { _MMIO(0x27a8), 0x0003fffa }, + { _MMIO(0x27ac), 0x00005f7f }, +}; + +static const struct i915_oa_reg flex_eu_config_render_pipe_profile[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_render_pipe_profile[] = { + { _MMIO(0x9888), 0x0c0e001f }, + { _MMIO(0x9888), 0x0a0f0000 }, + { _MMIO(0x9888), 0x10116800 }, + { _MMIO(0x9888), 0x178a03e0 }, + { _MMIO(0x9888), 0x11824c00 }, + { _MMIO(0x9888), 0x11830020 }, + { _MMIO(0x9888), 0x13840020 }, + { _MMIO(0x9888), 0x11850019 }, + { _MMIO(0x9888), 0x11860007 }, + { _MMIO(0x9888), 0x01870c40 }, + { _MMIO(0x9888), 0x17880000 }, + { _MMIO(0x9888), 0x022f4000 }, + { _MMIO(0x9888), 0x0a4c0040 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x040d4000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x020e5400 }, + { _MMIO(0x9888), 0x000e0000 }, + { _MMIO(0x9888), 0x080f0040 }, + { _MMIO(0x9888), 0x000f0000 }, + { _MMIO(0x9888), 0x100f0000 }, + { _MMIO(0x9888), 0x0e0f0040 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x06104000 }, + { _MMIO(0x9888), 0x06110012 }, + { _MMIO(0x9888), 0x06131000 }, + { _MMIO(0x9888), 0x01898000 }, + { _MMIO(0x9888), 0x0d890100 }, + { _MMIO(0x9888), 0x03898000 }, + { _MMIO(0x9888), 0x09808000 }, + { _MMIO(0x9888), 0x0b808000 }, + { _MMIO(0x9888), 0x0380c000 }, + { _MMIO(0x9888), 0x0f8a0075 }, + { _MMIO(0x9888), 0x1d8a0000 }, + { _MMIO(0x9888), 0x118a8000 }, + { _MMIO(0x9888), 0x1b8a4000 }, + { _MMIO(0x9888), 0x138a8000 }, + { _MMIO(0x9888), 0x1d81a000 }, + { _MMIO(0x9888), 0x15818000 }, + { _MMIO(0x9888), 0x17818000 }, + { _MMIO(0x9888), 0x0b820030 }, + { _MMIO(0x9888), 0x07828000 }, + { _MMIO(0x9888), 0x0d824000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x05824000 }, + { _MMIO(0x9888), 0x0d830003 }, + { _MMIO(0x9888), 0x0583000c }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x03838000 }, + { _MMIO(0x9888), 0x07838000 }, + { _MMIO(0x9888), 0x0b840980 }, + { _MMIO(0x9888), 0x03844d80 }, + { _MMIO(0x9888), 0x11840000 }, + { _MMIO(0x9888), 0x09848000 }, + { _MMIO(0x9888), 0x09850080 }, + { _MMIO(0x9888), 0x03850003 }, + { _MMIO(0x9888), 0x01850000 }, + { _MMIO(0x9888), 0x07860000 }, + { _MMIO(0x9888), 0x0f860400 }, + { _MMIO(0x9888), 0x09870032 }, + { _MMIO(0x9888), 0x01888052 }, + { _MMIO(0x9888), 0x11880000 }, + { _MMIO(0x9888), 0x09884000 }, + { _MMIO(0x9888), 0x1b931001 }, + { _MMIO(0x9888), 0x1d930001 }, + { _MMIO(0x9888), 0x19934000 }, + { _MMIO(0x9888), 0x1b958000 }, + { _MMIO(0x9888), 0x1d950094 }, + { _MMIO(0x9888), 0x19958000 }, + { _MMIO(0x9888), 0x09e58000 }, + { _MMIO(0x9888), 0x0be58000 }, + { _MMIO(0x9888), 0x03e5c000 }, + { _MMIO(0x9888), 0x0592c000 }, + { _MMIO(0x9888), 0x0b928000 }, + { _MMIO(0x9888), 0x0d924000 }, + { _MMIO(0x9888), 0x0f924000 }, + { _MMIO(0x9888), 0x11928000 }, + { _MMIO(0x9888), 0x1392c000 }, + { _MMIO(0x9888), 0x09924000 }, + { _MMIO(0x9888), 0x01985000 }, + { _MMIO(0x9888), 0x07988000 }, + { _MMIO(0x9888), 0x09981000 }, + { _MMIO(0x9888), 0x0b982000 }, + { _MMIO(0x9888), 0x0d982000 }, + { _MMIO(0x9888), 0x0f989000 }, + { _MMIO(0x9888), 0x05982000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1190c080 }, + { _MMIO(0x9888), 0x51901150 }, + { _MMIO(0x9888), 0x41901400 }, + { _MMIO(0x9888), 0x55905111 }, + { _MMIO(0x9888), 0x45901400 }, + { _MMIO(0x9888), 0x479004a5 }, + { _MMIO(0x9888), 0x57903455 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b9000a0 }, + { _MMIO(0x9888), 0x59900001 }, + { _MMIO(0x9888), 0x43900005 }, + { _MMIO(0x9888), 0x53900455 }, +}; + +static int +get_render_pipe_profile_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_pipe_profile; + lens[n] = ARRAY_SIZE(mux_config_render_pipe_profile); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_reads[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f872 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_reads[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_reads[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f900064 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930055 }, + { _MMIO(0x9888), 0x03e58000 }, + { _MMIO(0x9888), 0x05e5c000 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x13900150 }, + { _MMIO(0x9888), 0x21900151 }, + { _MMIO(0x9888), 0x23900152 }, + { _MMIO(0x9888), 0x25900153 }, + { _MMIO(0x9888), 0x27900154 }, + { _MMIO(0x9888), 0x29900155 }, + { _MMIO(0x9888), 0x2b900156 }, + { _MMIO(0x9888), 0x2d900157 }, + { _MMIO(0x9888), 0x2f90015f }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c60 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c00 }, + { _MMIO(0x9888), 0x47900c63 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900c63 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_memory_reads_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_reads; + lens[n] = ARRAY_SIZE(mux_config_memory_reads); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_writes[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f822 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_writes[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_writes[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f901000 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930055 }, + { _MMIO(0x9888), 0x03e58000 }, + { _MMIO(0x9888), 0x05e5c000 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x13900160 }, + { _MMIO(0x9888), 0x21900161 }, + { _MMIO(0x9888), 0x23900162 }, + { _MMIO(0x9888), 0x25900163 }, + { _MMIO(0x9888), 0x27900164 }, + { _MMIO(0x9888), 0x29900165 }, + { _MMIO(0x9888), 0x2b900166 }, + { _MMIO(0x9888), 0x2d900167 }, + { _MMIO(0x9888), 0x2f900150 }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c60 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c00 }, + { _MMIO(0x9888), 0x47900c63 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900c63 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_memory_writes_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_writes; + lens[n] = ARRAY_SIZE(mux_config_memory_writes); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extended[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fc2a }, + { _MMIO(0x2774), 0x0000bf00 }, + { _MMIO(0x2778), 0x0007fc6a }, + { _MMIO(0x277c), 0x0000bf00 }, + { _MMIO(0x2780), 0x0007fc92 }, + { _MMIO(0x2784), 0x0000bf00 }, + { _MMIO(0x2788), 0x0007fca2 }, + { _MMIO(0x278c), 0x0000bf00 }, + { _MMIO(0x2790), 0x0007fc32 }, + { _MMIO(0x2794), 0x0000bf00 }, + { _MMIO(0x2798), 0x0007fc9a }, + { _MMIO(0x279c), 0x0000bf00 }, + { _MMIO(0x27a0), 0x0007fe6a }, + { _MMIO(0x27a4), 0x0000bf00 }, + { _MMIO(0x27a8), 0x0007fe7a }, + { _MMIO(0x27ac), 0x0000bf00 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extended[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended[] = { + { _MMIO(0x9888), 0x106c00e0 }, + { _MMIO(0x9888), 0x141c8160 }, + { _MMIO(0x9888), 0x161c8015 }, + { _MMIO(0x9888), 0x181c0120 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x184e8000 }, + { _MMIO(0x9888), 0x1a4eaaa0 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0e6c0b01 }, + { _MMIO(0x9888), 0x006c0200 }, + { _MMIO(0x9888), 0x026c000c }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x001b8000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x001c0041 }, + { _MMIO(0x9888), 0x061c4200 }, + { _MMIO(0x9888), 0x081c4443 }, + { _MMIO(0x9888), 0x0a1c4645 }, + { _MMIO(0x9888), 0x0c1c7647 }, + { _MMIO(0x9888), 0x041c7357 }, + { _MMIO(0x9888), 0x1c1c0030 }, + { _MMIO(0x9888), 0x101c0000 }, + { _MMIO(0x9888), 0x1a1c0000 }, + { _MMIO(0x9888), 0x121c8000 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4caa2a }, + { _MMIO(0x9888), 0x0c4c02aa }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x000da000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x0c0f5400 }, + { _MMIO(0x9888), 0x0e0f5515 }, + { _MMIO(0x9888), 0x100f0155 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162caa00 }, + { _MMIO(0x9888), 0x182c00aa }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x11907fff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900040 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900802 }, + { _MMIO(0x9888), 0x47900842 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900842 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900800 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_compute_extended_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extended; + lens[n] = ARRAY_SIZE(mux_config_compute_extended); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_l3_cache[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fffa }, + { _MMIO(0x2774), 0x0000fefe }, + { _MMIO(0x2778), 0x0007fffa }, + { _MMIO(0x277c), 0x0000fefd }, + { _MMIO(0x2790), 0x0007fffa }, + { _MMIO(0x2794), 0x0000fbef }, + { _MMIO(0x2798), 0x0007fffa }, + { _MMIO(0x279c), 0x0000fbdf }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_l3_cache[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00101100 }, + { _MMIO(0xe45c), 0x00201200 }, + { _MMIO(0xe55c), 0x00301300 }, + { _MMIO(0xe65c), 0x00401400 }, +}; + +static const struct i915_oa_reg mux_config_compute_l3_cache[] = { + { _MMIO(0x9888), 0x166c0760 }, + { _MMIO(0x9888), 0x1593001e }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x184e8000 }, + { _MMIO(0x9888), 0x1a4e8020 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x006c0051 }, + { _MMIO(0x9888), 0x066c5000 }, + { _MMIO(0x9888), 0x086c5c5d }, + { _MMIO(0x9888), 0x0e6c5e5f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x186c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x061b8000 }, + { _MMIO(0x9888), 0x081bc000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x101c8000 }, + { _MMIO(0x9888), 0x1a1ce000 }, + { _MMIO(0x9888), 0x1c1c0030 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4c2a00 }, + { _MMIO(0x9888), 0x0c4c0280 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f1500 }, + { _MMIO(0x9888), 0x100f0140 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162c0a00 }, + { _MMIO(0x9888), 0x182c00a0 }, + { _MMIO(0x9888), 0x03933300 }, + { _MMIO(0x9888), 0x05930032 }, + { _MMIO(0x9888), 0x11930000 }, + { _MMIO(0x9888), 0x1b930000 }, + { _MMIO(0x9888), 0x1d900157 }, + { _MMIO(0x9888), 0x1f900158 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1190030f }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900000 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900063 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x53903333 }, + { _MMIO(0x9888), 0x43900840 }, +}; + +static int +get_compute_l3_cache_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_l3_cache; + lens[n] = ARRAY_SIZE(mux_config_compute_l3_cache); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_hdc_and_sf[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x10800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fdff }, +}; + +static const struct i915_oa_reg flex_eu_config_hdc_and_sf[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_hdc_and_sf[] = { + { _MMIO(0x9888), 0x104f0232 }, + { _MMIO(0x9888), 0x124f4640 }, + { _MMIO(0x9888), 0x106c0232 }, + { _MMIO(0x9888), 0x11834400 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x004f1880 }, + { _MMIO(0x9888), 0x024f08bb }, + { _MMIO(0x9888), 0x044f001b }, + { _MMIO(0x9888), 0x046c0100 }, + { _MMIO(0x9888), 0x066c000b }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x041b8000 }, + { _MMIO(0x9888), 0x061b4000 }, + { _MMIO(0x9888), 0x1a1c1800 }, + { _MMIO(0x9888), 0x005b8000 }, + { _MMIO(0x9888), 0x025bc000 }, + { _MMIO(0x9888), 0x045b4000 }, + { _MMIO(0x9888), 0x125c8000 }, + { _MMIO(0x9888), 0x145c8000 }, + { _MMIO(0x9888), 0x165c8000 }, + { _MMIO(0x9888), 0x185c8000 }, + { _MMIO(0x9888), 0x0a4c00a0 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x022cc000 }, + { _MMIO(0x9888), 0x042cc000 }, + { _MMIO(0x9888), 0x062cc000 }, + { _MMIO(0x9888), 0x082cc000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x0f8305c0 }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x07830000 }, + { _MMIO(0x9888), 0x1d950080 }, + { _MMIO(0x9888), 0x13928000 }, + { _MMIO(0x9888), 0x0f988000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x59900005 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_hdc_and_sf_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_hdc_and_sf; + lens[n] = ARRAY_SIZE(mux_config_hdc_and_sf); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_1[] = { + { _MMIO(0x9888), 0x126c7b40 }, + { _MMIO(0x9888), 0x166c0020 }, + { _MMIO(0x9888), 0x0a603444 }, + { _MMIO(0x9888), 0x0a613400 }, + { _MMIO(0x9888), 0x1a4ea800 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x064f4000 }, + { _MMIO(0x9888), 0x0c6c5327 }, + { _MMIO(0x9888), 0x0e6c5425 }, + { _MMIO(0x9888), 0x006c2a00 }, + { _MMIO(0x9888), 0x026c285b }, + { _MMIO(0x9888), 0x046c005c }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x1a6c0800 }, + { _MMIO(0x9888), 0x0c1bc000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x001b8000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x1c1c003c }, + { _MMIO(0x9888), 0x121c8000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c0800 }, + { _MMIO(0x9888), 0x065b4000 }, + { _MMIO(0x9888), 0x1a5c1000 }, + { _MMIO(0x9888), 0x10600000 }, + { _MMIO(0x9888), 0x04600000 }, + { _MMIO(0x9888), 0x0c610044 }, + { _MMIO(0x9888), 0x10610000 }, + { _MMIO(0x9888), 0x06610000 }, + { _MMIO(0x9888), 0x0c4c02a8 }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x0a4c002a }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f0154 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x182c00aa }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2cc000 }, + { _MMIO(0x9888), 0x1190ffc0 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900420 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900021 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900400 }, + { _MMIO(0x9888), 0x43900421 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, +}; + +static int +get_l3_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_1; + lens[n] = ARRAY_SIZE(mux_config_l3_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00028002 }, + { _MMIO(0x277c), 0x000087ff }, + { _MMIO(0x2780), 0x00020002 }, + { _MMIO(0x2784), 0x00008fff }, + { _MMIO(0x2788), 0x00008002 }, + { _MMIO(0x278c), 0x0000a7ff }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_2[] = { + { _MMIO(0x9888), 0x126c02e0 }, + { _MMIO(0x9888), 0x146c0001 }, + { _MMIO(0x9888), 0x0a623400 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x064f4000 }, + { _MMIO(0x9888), 0x026c3324 }, + { _MMIO(0x9888), 0x046c3422 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c0800 }, + { _MMIO(0x9888), 0x065b4000 }, + { _MMIO(0x9888), 0x1a5c1000 }, + { _MMIO(0x9888), 0x06614000 }, + { _MMIO(0x9888), 0x0c620044 }, + { _MMIO(0x9888), 0x10620000 }, + { _MMIO(0x9888), 0x06620000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c002a }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f4000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2cc000 }, + { _MMIO(0x9888), 0x1190f800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_l3_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_2; + lens[n] = ARRAY_SIZE(mux_config_l3_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_3[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00028002 }, + { _MMIO(0x277c), 0x000087ff }, + { _MMIO(0x2780), 0x00020002 }, + { _MMIO(0x2784), 0x00008fff }, + { _MMIO(0x2788), 0x00008002 }, + { _MMIO(0x278c), 0x0000a7ff }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_3[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_3[] = { + { _MMIO(0x9888), 0x126c4e80 }, + { _MMIO(0x9888), 0x146c0000 }, + { _MMIO(0x9888), 0x0a633400 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x026c3321 }, + { _MMIO(0x9888), 0x046c342f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1a6c2000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x061b4000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c1800 }, + { _MMIO(0x9888), 0x06604000 }, + { _MMIO(0x9888), 0x0c630044 }, + { _MMIO(0x9888), 0x10630000 }, + { _MMIO(0x9888), 0x06630000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c00aa }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f4000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x1190f800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900002 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_l3_3_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_3; + lens[n] = ARRAY_SIZE(mux_config_l3_3); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000efff }, + { _MMIO(0x2778), 0x00006000 }, + { _MMIO(0x277c), 0x0000f3ff }, +}; + +static const struct i915_oa_reg flex_eu_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x9888), 0x102f3800 }, + { _MMIO(0x9888), 0x144d0500 }, + { _MMIO(0x9888), 0x120d03c0 }, + { _MMIO(0x9888), 0x140d03cf }, + { _MMIO(0x9888), 0x0c0f0004 }, + { _MMIO(0x9888), 0x0c4e4000 }, + { _MMIO(0x9888), 0x042f0480 }, + { _MMIO(0x9888), 0x082f0000 }, + { _MMIO(0x9888), 0x022f0000 }, + { _MMIO(0x9888), 0x0a4c0090 }, + { _MMIO(0x9888), 0x064d0027 }, + { _MMIO(0x9888), 0x004d0000 }, + { _MMIO(0x9888), 0x000d0d40 }, + { _MMIO(0x9888), 0x020d803f }, + { _MMIO(0x9888), 0x040d8023 }, + { _MMIO(0x9888), 0x100d0000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x020f0010 }, + { _MMIO(0x9888), 0x000f0000 }, + { _MMIO(0x9888), 0x0e0f0050 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41901400 }, + { _MMIO(0x9888), 0x43901485 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900001 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_rasterizer_and_pixel_backend_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_rasterizer_and_pixel_backend; + lens[n] = ARRAY_SIZE(mux_config_rasterizer_and_pixel_backend); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_sampler[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x70800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x0000c000 }, + { _MMIO(0x2774), 0x0000e7ff }, + { _MMIO(0x2778), 0x00003000 }, + { _MMIO(0x277c), 0x0000f9ff }, + { _MMIO(0x2780), 0x00000c00 }, + { _MMIO(0x2784), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_sampler[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_sampler[] = { + { _MMIO(0x9888), 0x14152c00 }, + { _MMIO(0x9888), 0x16150005 }, + { _MMIO(0x9888), 0x121600a0 }, + { _MMIO(0x9888), 0x14352c00 }, + { _MMIO(0x9888), 0x16350005 }, + { _MMIO(0x9888), 0x123600a0 }, + { _MMIO(0x9888), 0x14552c00 }, + { _MMIO(0x9888), 0x16550005 }, + { _MMIO(0x9888), 0x125600a0 }, + { _MMIO(0x9888), 0x062f6000 }, + { _MMIO(0x9888), 0x022f2000 }, + { _MMIO(0x9888), 0x0c4c0050 }, + { _MMIO(0x9888), 0x0a4c0010 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f0350 }, + { _MMIO(0x9888), 0x0c0fb000 }, + { _MMIO(0x9888), 0x0e0f00da }, + { _MMIO(0x9888), 0x182c0028 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x022dc000 }, + { _MMIO(0x9888), 0x042d4000 }, + { _MMIO(0x9888), 0x0c138000 }, + { _MMIO(0x9888), 0x0e132000 }, + { _MMIO(0x9888), 0x0413c000 }, + { _MMIO(0x9888), 0x1c140018 }, + { _MMIO(0x9888), 0x0c157000 }, + { _MMIO(0x9888), 0x0e150078 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x04162180 }, + { _MMIO(0x9888), 0x02160000 }, + { _MMIO(0x9888), 0x04174000 }, + { _MMIO(0x9888), 0x0233a000 }, + { _MMIO(0x9888), 0x04333000 }, + { _MMIO(0x9888), 0x14348000 }, + { _MMIO(0x9888), 0x16348000 }, + { _MMIO(0x9888), 0x02357870 }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x04360043 }, + { _MMIO(0x9888), 0x02360000 }, + { _MMIO(0x9888), 0x04371000 }, + { _MMIO(0x9888), 0x0e538000 }, + { _MMIO(0x9888), 0x00538000 }, + { _MMIO(0x9888), 0x06533000 }, + { _MMIO(0x9888), 0x1c540020 }, + { _MMIO(0x9888), 0x12548000 }, + { _MMIO(0x9888), 0x0e557000 }, + { _MMIO(0x9888), 0x00557800 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x06560043 }, + { _MMIO(0x9888), 0x02560000 }, + { _MMIO(0x9888), 0x06571000 }, + { _MMIO(0x9888), 0x1190ff80 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900060 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900060 }, +}; + +static int +get_sampler_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler; + lens[n] = ARRAY_SIZE(mux_config_sampler); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x00007fff }, + { _MMIO(0x2778), 0x00000000 }, + { _MMIO(0x277c), 0x00009fff }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000efff }, + { _MMIO(0x2788), 0x00000000 }, + { _MMIO(0x278c), 0x0000f3ff }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000fdff }, + { _MMIO(0x2798), 0x00000000 }, + { _MMIO(0x279c), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_1[] = { + { _MMIO(0x9888), 0x12120000 }, + { _MMIO(0x9888), 0x12320000 }, + { _MMIO(0x9888), 0x12520000 }, + { _MMIO(0x9888), 0x002f8000 }, + { _MMIO(0x9888), 0x022f3000 }, + { _MMIO(0x9888), 0x0a4c0015 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f03a0 }, + { _MMIO(0x9888), 0x0c0ff000 }, + { _MMIO(0x9888), 0x0e0f0095 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2d8000 }, + { _MMIO(0x9888), 0x0e2d4000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x02108000 }, + { _MMIO(0x9888), 0x0410c000 }, + { _MMIO(0x9888), 0x02118000 }, + { _MMIO(0x9888), 0x0411c000 }, + { _MMIO(0x9888), 0x02121880 }, + { _MMIO(0x9888), 0x041219b5 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x02134000 }, + { _MMIO(0x9888), 0x04135000 }, + { _MMIO(0x9888), 0x0c308000 }, + { _MMIO(0x9888), 0x0e304000 }, + { _MMIO(0x9888), 0x06304000 }, + { _MMIO(0x9888), 0x0c318000 }, + { _MMIO(0x9888), 0x0e314000 }, + { _MMIO(0x9888), 0x06314000 }, + { _MMIO(0x9888), 0x0c321a80 }, + { _MMIO(0x9888), 0x0e320033 }, + { _MMIO(0x9888), 0x06320031 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x0c334000 }, + { _MMIO(0x9888), 0x0e331000 }, + { _MMIO(0x9888), 0x06331000 }, + { _MMIO(0x9888), 0x0e508000 }, + { _MMIO(0x9888), 0x00508000 }, + { _MMIO(0x9888), 0x02504000 }, + { _MMIO(0x9888), 0x0e518000 }, + { _MMIO(0x9888), 0x00518000 }, + { _MMIO(0x9888), 0x02514000 }, + { _MMIO(0x9888), 0x0e521880 }, + { _MMIO(0x9888), 0x00521a80 }, + { _MMIO(0x9888), 0x02520033 }, + { _MMIO(0x9888), 0x0e534000 }, + { _MMIO(0x9888), 0x00534000 }, + { _MMIO(0x9888), 0x02531000 }, + { _MMIO(0x9888), 0x1190ff80 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900062 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, +}; + +static int +get_tdl_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_1; + lens[n] = ARRAY_SIZE(mux_config_tdl_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_2[] = { + { _MMIO(0x9888), 0x12124d60 }, + { _MMIO(0x9888), 0x12322e60 }, + { _MMIO(0x9888), 0x12524d60 }, + { _MMIO(0x9888), 0x022f3000 }, + { _MMIO(0x9888), 0x0a4c0014 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0fe000 }, + { _MMIO(0x9888), 0x0e0f0097 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x002d8000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x0410c000 }, + { _MMIO(0x9888), 0x0411c000 }, + { _MMIO(0x9888), 0x04121fb7 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x04135000 }, + { _MMIO(0x9888), 0x00308000 }, + { _MMIO(0x9888), 0x06304000 }, + { _MMIO(0x9888), 0x00318000 }, + { _MMIO(0x9888), 0x06314000 }, + { _MMIO(0x9888), 0x00321b80 }, + { _MMIO(0x9888), 0x0632003f }, + { _MMIO(0x9888), 0x00334000 }, + { _MMIO(0x9888), 0x06331000 }, + { _MMIO(0x9888), 0x0250c000 }, + { _MMIO(0x9888), 0x0251c000 }, + { _MMIO(0x9888), 0x02521fb7 }, + { _MMIO(0x9888), 0x00520000 }, + { _MMIO(0x9888), 0x02535000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x43900063 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_tdl_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_2; + lens[n] = ARRAY_SIZE(mux_config_tdl_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extra[] = { +}; + +static const struct i915_oa_reg flex_eu_config_compute_extra[] = { +}; + +static const struct i915_oa_reg mux_config_compute_extra[] = { + { _MMIO(0x9888), 0x121203e0 }, + { _MMIO(0x9888), 0x123203e0 }, + { _MMIO(0x9888), 0x125203e0 }, + { _MMIO(0x9888), 0x129203e0 }, + { _MMIO(0x9888), 0x12b203e0 }, + { _MMIO(0x9888), 0x12d203e0 }, + { _MMIO(0x9888), 0x024ec000 }, + { _MMIO(0x9888), 0x044ec000 }, + { _MMIO(0x9888), 0x064ec000 }, + { _MMIO(0x9888), 0x022f4000 }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x0a4c0042 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f006d }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x042d8000 }, + { _MMIO(0x9888), 0x06104000 }, + { _MMIO(0x9888), 0x06114000 }, + { _MMIO(0x9888), 0x06120033 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x06131000 }, + { _MMIO(0x9888), 0x04308000 }, + { _MMIO(0x9888), 0x04318000 }, + { _MMIO(0x9888), 0x04321980 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x04334000 }, + { _MMIO(0x9888), 0x04504000 }, + { _MMIO(0x9888), 0x04514000 }, + { _MMIO(0x9888), 0x04520033 }, + { _MMIO(0x9888), 0x00520000 }, + { _MMIO(0x9888), 0x04531000 }, + { _MMIO(0x9888), 0x00af8000 }, + { _MMIO(0x9888), 0x0acc0001 }, + { _MMIO(0x9888), 0x008d8000 }, + { _MMIO(0x9888), 0x028da000 }, + { _MMIO(0x9888), 0x0c8fb000 }, + { _MMIO(0x9888), 0x0e8f0001 }, + { _MMIO(0x9888), 0x06ac8000 }, + { _MMIO(0x9888), 0x02ad4000 }, + { _MMIO(0x9888), 0x02908000 }, + { _MMIO(0x9888), 0x02918000 }, + { _MMIO(0x9888), 0x02921980 }, + { _MMIO(0x9888), 0x00920000 }, + { _MMIO(0x9888), 0x02934000 }, + { _MMIO(0x9888), 0x02b04000 }, + { _MMIO(0x9888), 0x02b14000 }, + { _MMIO(0x9888), 0x02b20033 }, + { _MMIO(0x9888), 0x00b20000 }, + { _MMIO(0x9888), 0x02b31000 }, + { _MMIO(0x9888), 0x00d08000 }, + { _MMIO(0x9888), 0x00d18000 }, + { _MMIO(0x9888), 0x00d21980 }, + { _MMIO(0x9888), 0x00d34000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900402 }, + { _MMIO(0x9888), 0x53901550 }, + { _MMIO(0x9888), 0x45900080 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_compute_extra_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extra; + lens[n] = ARRAY_SIZE(mux_config_compute_extra); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_vme_pipe[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00100030 }, + { _MMIO(0x2774), 0x0000fff9 }, + { _MMIO(0x2778), 0x00000002 }, + { _MMIO(0x277c), 0x0000fffc }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000fff3 }, + { _MMIO(0x2788), 0x00100180 }, + { _MMIO(0x278c), 0x0000ffcf }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00000002 }, + { _MMIO(0x279c), 0x0000ff3f }, +}; + +static const struct i915_oa_reg flex_eu_config_vme_pipe[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00008003 }, +}; + +static const struct i915_oa_reg mux_config_vme_pipe[] = { + { _MMIO(0x9888), 0x141a5800 }, + { _MMIO(0x9888), 0x161a00c0 }, + { _MMIO(0x9888), 0x12180240 }, + { _MMIO(0x9888), 0x14180002 }, + { _MMIO(0x9888), 0x149a5800 }, + { _MMIO(0x9888), 0x169a00c0 }, + { _MMIO(0x9888), 0x12980240 }, + { _MMIO(0x9888), 0x14980002 }, + { _MMIO(0x9888), 0x1a4e3fc0 }, + { _MMIO(0x9888), 0x002f1000 }, + { _MMIO(0x9888), 0x022f8000 }, + { _MMIO(0x9888), 0x042f3000 }, + { _MMIO(0x9888), 0x004c4000 }, + { _MMIO(0x9888), 0x0a4c9500 }, + { _MMIO(0x9888), 0x0c4c002a }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f5500 }, + { _MMIO(0x9888), 0x100f0015 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162caa00 }, + { _MMIO(0x9888), 0x182c000a }, + { _MMIO(0x9888), 0x04193000 }, + { _MMIO(0x9888), 0x081a28c1 }, + { _MMIO(0x9888), 0x001a0000 }, + { _MMIO(0x9888), 0x00133000 }, + { _MMIO(0x9888), 0x0613c000 }, + { _MMIO(0x9888), 0x0813f000 }, + { _MMIO(0x9888), 0x00172000 }, + { _MMIO(0x9888), 0x06178000 }, + { _MMIO(0x9888), 0x0817a000 }, + { _MMIO(0x9888), 0x00180037 }, + { _MMIO(0x9888), 0x06180940 }, + { _MMIO(0x9888), 0x08180000 }, + { _MMIO(0x9888), 0x02180000 }, + { _MMIO(0x9888), 0x04183000 }, + { _MMIO(0x9888), 0x04afc000 }, + { _MMIO(0x9888), 0x06af3000 }, + { _MMIO(0x9888), 0x0acc4000 }, + { _MMIO(0x9888), 0x0ccc0015 }, + { _MMIO(0x9888), 0x0a8da000 }, + { _MMIO(0x9888), 0x0c8da000 }, + { _MMIO(0x9888), 0x0e8f4000 }, + { _MMIO(0x9888), 0x108f0015 }, + { _MMIO(0x9888), 0x16aca000 }, + { _MMIO(0x9888), 0x18ac000a }, + { _MMIO(0x9888), 0x06993000 }, + { _MMIO(0x9888), 0x0c9a28c1 }, + { _MMIO(0x9888), 0x009a0000 }, + { _MMIO(0x9888), 0x0a93f000 }, + { _MMIO(0x9888), 0x0c93f000 }, + { _MMIO(0x9888), 0x0a97a000 }, + { _MMIO(0x9888), 0x0c97a000 }, + { _MMIO(0x9888), 0x0a980977 }, + { _MMIO(0x9888), 0x08980000 }, + { _MMIO(0x9888), 0x04980000 }, + { _MMIO(0x9888), 0x06983000 }, + { _MMIO(0x9888), 0x119000ff }, + { _MMIO(0x9888), 0x51900050 }, + { _MMIO(0x9888), 0x41900000 }, + { _MMIO(0x9888), 0x55900115 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x47900884 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900002 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_vme_pipe_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_vme_pipe; + lens[n] = ARRAY_SIZE(mux_config_vme_pipe); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9888), 0x11810000 }, + { _MMIO(0x9888), 0x07810013 }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930040 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_test_oa_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_test_oa; + lens[n] = ARRAY_SIZE(mux_config_test_oa); + n++; + + return n; +} + int i915_oa_select_metric_set_sklgt3(struct drm_i915_private *dev_priv) { dev_priv->perf.oa.n_mux_configs = 0; @@ -191,6 +1995,448 @@ int i915_oa_select_metric_set_sklgt3(struct drm_i915_private *dev_priv) dev_priv->perf.oa.flex_regs_len = ARRAY_SIZE(flex_eu_config_render_basic); + return 0; + case METRIC_SET_ID_COMPUTE_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_compute_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_basic); + + return 0; + case METRIC_SET_ID_RENDER_PIPE_PROFILE: + dev_priv->perf.oa.n_mux_configs = + get_render_pipe_profile_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_PIPE_PROFILE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_pipe_profile; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_pipe_profile); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_pipe_profile; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_pipe_profile); + + return 0; + case METRIC_SET_ID_MEMORY_READS: + dev_priv->perf.oa.n_mux_configs = + get_memory_reads_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_READS\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_reads; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_reads); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_reads; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_reads); + + return 0; + case METRIC_SET_ID_MEMORY_WRITES: + dev_priv->perf.oa.n_mux_configs = + get_memory_writes_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_WRITES\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_writes; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_writes); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_writes; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_writes); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTENDED: + dev_priv->perf.oa.n_mux_configs = + get_compute_extended_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTENDED\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extended; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extended); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extended; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extended); + + return 0; + case METRIC_SET_ID_COMPUTE_L3_CACHE: + dev_priv->perf.oa.n_mux_configs = + get_compute_l3_cache_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_L3_CACHE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_l3_cache; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_l3_cache); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_l3_cache; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_l3_cache); + + return 0; + case METRIC_SET_ID_HDC_AND_SF: + dev_priv->perf.oa.n_mux_configs = + get_hdc_and_sf_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"HDC_AND_SF\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_hdc_and_sf; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_hdc_and_sf); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_hdc_and_sf; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_hdc_and_sf); + + return 0; + case METRIC_SET_ID_L3_1: + dev_priv->perf.oa.n_mux_configs = + get_l3_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_1); + + return 0; + case METRIC_SET_ID_L3_2: + dev_priv->perf.oa.n_mux_configs = + get_l3_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_2); + + return 0; + case METRIC_SET_ID_L3_3: + dev_priv->perf.oa.n_mux_configs = + get_l3_3_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_3\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_3; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_3); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_3; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_3); + + return 0; + case METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND: + dev_priv->perf.oa.n_mux_configs = + get_rasterizer_and_pixel_backend_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RASTERIZER_AND_PIXEL_BACKEND\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_rasterizer_and_pixel_backend); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_rasterizer_and_pixel_backend); + + return 0; + case METRIC_SET_ID_SAMPLER: + dev_priv->perf.oa.n_mux_configs = + get_sampler_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_sampler; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_sampler); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler); + + return 0; + case METRIC_SET_ID_TDL_1: + dev_priv->perf.oa.n_mux_configs = + get_tdl_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_1); + + return 0; + case METRIC_SET_ID_TDL_2: + dev_priv->perf.oa.n_mux_configs = + get_tdl_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_2); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTRA: + dev_priv->perf.oa.n_mux_configs = + get_compute_extra_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTRA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extra; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extra); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extra; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extra); + + return 0; + case METRIC_SET_ID_VME_PIPE: + dev_priv->perf.oa.n_mux_configs = + get_vme_pipe_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"VME_PIPE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_vme_pipe; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_vme_pipe); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_vme_pipe; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_vme_pipe); + + return 0; + case METRIC_SET_ID_TEST_OA: + dev_priv->perf.oa.n_mux_configs = + get_test_oa_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TEST_OA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_test_oa; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_test_oa; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_test_oa); + return 0; default: return -ENODEV; @@ -219,6 +2465,380 @@ static struct attribute_group group_render_basic = { .attrs = attrs_render_basic, }; +static ssize_t +show_compute_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_BASIC); +} + +static struct device_attribute dev_attr_compute_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_basic[] = { + &dev_attr_compute_basic_id.attr, + NULL, +}; + +static struct attribute_group group_compute_basic = { + .name = "4320492b-fd03-42ac-922f-dbe1ef3b7b58", + .attrs = attrs_compute_basic, +}; + +static ssize_t +show_render_pipe_profile_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_PIPE_PROFILE); +} + +static struct device_attribute dev_attr_render_pipe_profile_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_pipe_profile_id, + .store = NULL, +}; + +static struct attribute *attrs_render_pipe_profile[] = { + &dev_attr_render_pipe_profile_id.attr, + NULL, +}; + +static struct attribute_group group_render_pipe_profile = { + .name = "bd2d9cae-b9ec-4f5b-9d2f-934bed398a2d", + .attrs = attrs_render_pipe_profile, +}; + +static ssize_t +show_memory_reads_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_READS); +} + +static struct device_attribute dev_attr_memory_reads_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_reads_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_reads[] = { + &dev_attr_memory_reads_id.attr, + NULL, +}; + +static struct attribute_group group_memory_reads = { + .name = "4ca0f3fe-7fd3-4924-98cb-1807d9879767", + .attrs = attrs_memory_reads, +}; + +static ssize_t +show_memory_writes_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_WRITES); +} + +static struct device_attribute dev_attr_memory_writes_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_writes_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_writes[] = { + &dev_attr_memory_writes_id.attr, + NULL, +}; + +static struct attribute_group group_memory_writes = { + .name = "a0c0172c-ee13-403d-99ff-2bdf6936cf14", + .attrs = attrs_memory_writes, +}; + +static ssize_t +show_compute_extended_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTENDED); +} + +static struct device_attribute dev_attr_compute_extended_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extended_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extended[] = { + &dev_attr_compute_extended_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extended = { + .name = "52435e0b-f188-42ea-8680-21a56ee20dee", + .attrs = attrs_compute_extended, +}; + +static ssize_t +show_compute_l3_cache_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_L3_CACHE); +} + +static struct device_attribute dev_attr_compute_l3_cache_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_l3_cache_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_l3_cache[] = { + &dev_attr_compute_l3_cache_id.attr, + NULL, +}; + +static struct attribute_group group_compute_l3_cache = { + .name = "27076eeb-49f3-4fed-8423-c66506005c63", + .attrs = attrs_compute_l3_cache, +}; + +static ssize_t +show_hdc_and_sf_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_HDC_AND_SF); +} + +static struct device_attribute dev_attr_hdc_and_sf_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_hdc_and_sf_id, + .store = NULL, +}; + +static struct attribute *attrs_hdc_and_sf[] = { + &dev_attr_hdc_and_sf_id.attr, + NULL, +}; + +static struct attribute_group group_hdc_and_sf = { + .name = "8071b409-c39a-4674-94d7-32962ecfb512", + .attrs = attrs_hdc_and_sf, +}; + +static ssize_t +show_l3_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_1); +} + +static struct device_attribute dev_attr_l3_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_1_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_1[] = { + &dev_attr_l3_1_id.attr, + NULL, +}; + +static struct attribute_group group_l3_1 = { + .name = "5e0b391e-9ea8-4901-b2ff-b64ff616c7ed", + .attrs = attrs_l3_1, +}; + +static ssize_t +show_l3_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_2); +} + +static struct device_attribute dev_attr_l3_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_2_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_2[] = { + &dev_attr_l3_2_id.attr, + NULL, +}; + +static struct attribute_group group_l3_2 = { + .name = "25dc828e-1d2d-426e-9546-a1d4233cdf16", + .attrs = attrs_l3_2, +}; + +static ssize_t +show_l3_3_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_3); +} + +static struct device_attribute dev_attr_l3_3_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_3_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_3[] = { + &dev_attr_l3_3_id.attr, + NULL, +}; + +static struct attribute_group group_l3_3 = { + .name = "3dba9405-2d7e-4d70-8199-e734e82fd6bf", + .attrs = attrs_l3_3, +}; + +static ssize_t +show_rasterizer_and_pixel_backend_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND); +} + +static struct device_attribute dev_attr_rasterizer_and_pixel_backend_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_rasterizer_and_pixel_backend_id, + .store = NULL, +}; + +static struct attribute *attrs_rasterizer_and_pixel_backend[] = { + &dev_attr_rasterizer_and_pixel_backend_id.attr, + NULL, +}; + +static struct attribute_group group_rasterizer_and_pixel_backend = { + .name = "76935d7b-09c9-46bf-87f1-c18b4a86ebe5", + .attrs = attrs_rasterizer_and_pixel_backend, +}; + +static ssize_t +show_sampler_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_SAMPLER); +} + +static struct device_attribute dev_attr_sampler_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_sampler_id, + .store = NULL, +}; + +static struct attribute *attrs_sampler[] = { + &dev_attr_sampler_id.attr, + NULL, +}; + +static struct attribute_group group_sampler = { + .name = "1b34c0d6-4f4c-4d7b-833f-4aaf236d87a6", + .attrs = attrs_sampler, +}; + +static ssize_t +show_tdl_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_1); +} + +static struct device_attribute dev_attr_tdl_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_1_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_1[] = { + &dev_attr_tdl_1_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_1 = { + .name = "b375c985-9953-455b-bda2-b03f7594e9db", + .attrs = attrs_tdl_1, +}; + +static ssize_t +show_tdl_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_2); +} + +static struct device_attribute dev_attr_tdl_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_2_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_2[] = { + &dev_attr_tdl_2_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_2 = { + .name = "3e2be2bb-884a-49bb-82c5-2358e6bd5f2d", + .attrs = attrs_tdl_2, +}; + +static ssize_t +show_compute_extra_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTRA); +} + +static struct device_attribute dev_attr_compute_extra_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extra_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extra[] = { + &dev_attr_compute_extra_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extra = { + .name = "2d80a648-7b5a-4e92-bbe7-3b5c76f2e221", + .attrs = attrs_compute_extra, +}; + +static ssize_t +show_vme_pipe_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_VME_PIPE); +} + +static struct device_attribute dev_attr_vme_pipe_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_vme_pipe_id, + .store = NULL, +}; + +static struct attribute *attrs_vme_pipe[] = { + &dev_attr_vme_pipe_id.attr, + NULL, +}; + +static struct attribute_group group_vme_pipe = { + .name = "cfae9232-6ffc-42cc-a703-9790016925f0", + .attrs = attrs_vme_pipe, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TEST_OA); +} + +static struct device_attribute dev_attr_test_oa_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_test_oa_id, + .store = NULL, +}; + +static struct attribute *attrs_test_oa[] = { + &dev_attr_test_oa_id.attr, + NULL, +}; + +static struct attribute_group group_test_oa = { + .name = "2b985803-d3c9-4629-8a4f-634bfecba0e8", + .attrs = attrs_test_oa, +}; + int i915_perf_register_sysfs_sklgt3(struct drm_i915_private *dev_priv) { @@ -231,9 +2851,145 @@ i915_perf_register_sysfs_sklgt3(struct drm_i915_private *dev_priv) if (ret) goto error_render_basic; } + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (ret) + goto error_compute_basic; + } + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (ret) + goto error_render_pipe_profile; + } + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (ret) + goto error_memory_reads; + } + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (ret) + goto error_memory_writes; + } + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (ret) + goto error_compute_extended; + } + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (ret) + goto error_compute_l3_cache; + } + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (ret) + goto error_hdc_and_sf; + } + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (ret) + goto error_l3_1; + } + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (ret) + goto error_l3_2; + } + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (ret) + goto error_l3_3; + } + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (ret) + goto error_rasterizer_and_pixel_backend; + } + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (ret) + goto error_sampler; + } + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (ret) + goto error_tdl_1; + } + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (ret) + goto error_tdl_2; + } + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (ret) + goto error_compute_extra; + } + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (ret) + goto error_vme_pipe; + } + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_test_oa); + if (ret) + goto error_test_oa; + } return 0; +error_test_oa: + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); +error_vme_pipe: + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); +error_compute_extra: + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); +error_tdl_2: + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); +error_tdl_1: + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); +error_sampler: + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); +error_rasterizer_and_pixel_backend: + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); +error_l3_3: + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); +error_l3_2: + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); +error_l3_1: + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); +error_hdc_and_sf: + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); +error_compute_l3_cache: + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); +error_compute_extended: + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); +error_memory_writes: + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); +error_memory_reads: + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); +error_render_pipe_profile: + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); +error_compute_basic: + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); error_render_basic: return ret; } @@ -246,4 +3002,38 @@ i915_perf_unregister_sysfs_sklgt3(struct drm_i915_private *dev_priv) if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_test_oa); } diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt4.c b/drivers/gpu/drm/i915/i915_oa_sklgt4.c index ed034f190a6c..9ddab43a2176 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt4.c +++ b/drivers/gpu/drm/i915/i915_oa_sklgt4.c @@ -33,9 +33,26 @@ enum metric_set_id { METRIC_SET_ID_RENDER_BASIC = 1, + METRIC_SET_ID_COMPUTE_BASIC, + METRIC_SET_ID_RENDER_PIPE_PROFILE, + METRIC_SET_ID_MEMORY_READS, + METRIC_SET_ID_MEMORY_WRITES, + METRIC_SET_ID_COMPUTE_EXTENDED, + METRIC_SET_ID_COMPUTE_L3_CACHE, + METRIC_SET_ID_HDC_AND_SF, + METRIC_SET_ID_L3_1, + METRIC_SET_ID_L3_2, + METRIC_SET_ID_L3_3, + METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND, + METRIC_SET_ID_SAMPLER, + METRIC_SET_ID_TDL_1, + METRIC_SET_ID_TDL_2, + METRIC_SET_ID_COMPUTE_EXTRA, + METRIC_SET_ID_VME_PIPE, + METRIC_SET_ID_TEST_OA, }; -int i915_oa_n_builtin_metric_sets_sklgt4 = 1; +int i915_oa_n_builtin_metric_sets_sklgt4 = 18; static const struct i915_oa_reg b_counter_config_render_basic[] = { { _MMIO(0x2710), 0x00000000 }, @@ -168,6 +185,1836 @@ get_render_basic_mux_config(struct drm_i915_private *dev_priv, return n; } +static const struct i915_oa_reg b_counter_config_compute_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_basic[] = { + { _MMIO(0x9888), 0x104f00e0 }, + { _MMIO(0x9888), 0x124f1c00 }, + { _MMIO(0x9888), 0x106c00e0 }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x1a4e0820 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x064f0900 }, + { _MMIO(0x9888), 0x084f0032 }, + { _MMIO(0x9888), 0x0a4f1891 }, + { _MMIO(0x9888), 0x0c4f0e00 }, + { _MMIO(0x9888), 0x0e4f003c }, + { _MMIO(0x9888), 0x004f0d80 }, + { _MMIO(0x9888), 0x024f003b }, + { _MMIO(0x9888), 0x006c0002 }, + { _MMIO(0x9888), 0x086c0100 }, + { _MMIO(0x9888), 0x0c6c000c }, + { _MMIO(0x9888), 0x0e6c0b00 }, + { _MMIO(0x9888), 0x186c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x081b8000 }, + { _MMIO(0x9888), 0x0c1b4000 }, + { _MMIO(0x9888), 0x0e1b8000 }, + { _MMIO(0x9888), 0x101c8000 }, + { _MMIO(0x9888), 0x1a1c8000 }, + { _MMIO(0x9888), 0x1c1c0024 }, + { _MMIO(0x9888), 0x065b8000 }, + { _MMIO(0x9888), 0x085b4000 }, + { _MMIO(0x9888), 0x0a5bc000 }, + { _MMIO(0x9888), 0x0c5b8000 }, + { _MMIO(0x9888), 0x0e5b4000 }, + { _MMIO(0x9888), 0x005b8000 }, + { _MMIO(0x9888), 0x025b4000 }, + { _MMIO(0x9888), 0x1a5c6000 }, + { _MMIO(0x9888), 0x1c5c001b }, + { _MMIO(0x9888), 0x125c8000 }, + { _MMIO(0x9888), 0x145c8000 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4c2000 }, + { _MMIO(0x9888), 0x0c4c0208 }, + { _MMIO(0x9888), 0x000da000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x020d2000 }, + { _MMIO(0x9888), 0x0c0f5400 }, + { _MMIO(0x9888), 0x0e0f5500 }, + { _MMIO(0x9888), 0x100f0155 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2cc000 }, + { _MMIO(0x9888), 0x162cfb00 }, + { _MMIO(0x9888), 0x182c00be }, + { _MMIO(0x9888), 0x022cc000 }, + { _MMIO(0x9888), 0x042cc000 }, + { _MMIO(0x9888), 0x19900157 }, + { _MMIO(0x9888), 0x1b900158 }, + { _MMIO(0x9888), 0x1d900105 }, + { _MMIO(0x9888), 0x1f900103 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x11900fff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900821 }, + { _MMIO(0x9888), 0x47900802 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900802 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900002 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900422 }, + { _MMIO(0x9888), 0x53905555 }, +}; + +static int +get_compute_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_basic; + lens[n] = ARRAY_SIZE(mux_config_compute_basic); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_render_pipe_profile[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007ffea }, + { _MMIO(0x2774), 0x00007ffc }, + { _MMIO(0x2778), 0x0007affa }, + { _MMIO(0x277c), 0x0000f5fd }, + { _MMIO(0x2780), 0x00079ffa }, + { _MMIO(0x2784), 0x0000f3fb }, + { _MMIO(0x2788), 0x0007bf7a }, + { _MMIO(0x278c), 0x0000f7e7 }, + { _MMIO(0x2790), 0x0007fefa }, + { _MMIO(0x2794), 0x0000f7cf }, + { _MMIO(0x2798), 0x00077ffa }, + { _MMIO(0x279c), 0x0000efdf }, + { _MMIO(0x27a0), 0x0006fffa }, + { _MMIO(0x27a4), 0x0000cfbf }, + { _MMIO(0x27a8), 0x0003fffa }, + { _MMIO(0x27ac), 0x00005f7f }, +}; + +static const struct i915_oa_reg flex_eu_config_render_pipe_profile[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_render_pipe_profile[] = { + { _MMIO(0x9888), 0x0c0e001f }, + { _MMIO(0x9888), 0x0a0f0000 }, + { _MMIO(0x9888), 0x10116800 }, + { _MMIO(0x9888), 0x178a03e0 }, + { _MMIO(0x9888), 0x11824c00 }, + { _MMIO(0x9888), 0x11830020 }, + { _MMIO(0x9888), 0x13840020 }, + { _MMIO(0x9888), 0x11850019 }, + { _MMIO(0x9888), 0x11860007 }, + { _MMIO(0x9888), 0x01870c40 }, + { _MMIO(0x9888), 0x17880000 }, + { _MMIO(0x9888), 0x022f4000 }, + { _MMIO(0x9888), 0x0a4c0040 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x040d4000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x020e5400 }, + { _MMIO(0x9888), 0x000e0000 }, + { _MMIO(0x9888), 0x080f0040 }, + { _MMIO(0x9888), 0x000f0000 }, + { _MMIO(0x9888), 0x100f0000 }, + { _MMIO(0x9888), 0x0e0f0040 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x06104000 }, + { _MMIO(0x9888), 0x06110012 }, + { _MMIO(0x9888), 0x06131000 }, + { _MMIO(0x9888), 0x01898000 }, + { _MMIO(0x9888), 0x0d890100 }, + { _MMIO(0x9888), 0x03898000 }, + { _MMIO(0x9888), 0x09808000 }, + { _MMIO(0x9888), 0x0b808000 }, + { _MMIO(0x9888), 0x0380c000 }, + { _MMIO(0x9888), 0x0f8a0075 }, + { _MMIO(0x9888), 0x1d8a0000 }, + { _MMIO(0x9888), 0x118a8000 }, + { _MMIO(0x9888), 0x1b8a4000 }, + { _MMIO(0x9888), 0x138a8000 }, + { _MMIO(0x9888), 0x1d81a000 }, + { _MMIO(0x9888), 0x15818000 }, + { _MMIO(0x9888), 0x17818000 }, + { _MMIO(0x9888), 0x0b820030 }, + { _MMIO(0x9888), 0x07828000 }, + { _MMIO(0x9888), 0x0d824000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x05824000 }, + { _MMIO(0x9888), 0x0d830003 }, + { _MMIO(0x9888), 0x0583000c }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x03838000 }, + { _MMIO(0x9888), 0x07838000 }, + { _MMIO(0x9888), 0x0b840980 }, + { _MMIO(0x9888), 0x03844d80 }, + { _MMIO(0x9888), 0x11840000 }, + { _MMIO(0x9888), 0x09848000 }, + { _MMIO(0x9888), 0x09850080 }, + { _MMIO(0x9888), 0x03850003 }, + { _MMIO(0x9888), 0x01850000 }, + { _MMIO(0x9888), 0x07860000 }, + { _MMIO(0x9888), 0x0f860400 }, + { _MMIO(0x9888), 0x09870032 }, + { _MMIO(0x9888), 0x01888052 }, + { _MMIO(0x9888), 0x11880000 }, + { _MMIO(0x9888), 0x09884000 }, + { _MMIO(0x9888), 0x1b931001 }, + { _MMIO(0x9888), 0x1d930001 }, + { _MMIO(0x9888), 0x19934000 }, + { _MMIO(0x9888), 0x1b958000 }, + { _MMIO(0x9888), 0x1d950094 }, + { _MMIO(0x9888), 0x19958000 }, + { _MMIO(0x9888), 0x09e58000 }, + { _MMIO(0x9888), 0x0be58000 }, + { _MMIO(0x9888), 0x03e5c000 }, + { _MMIO(0x9888), 0x0592c000 }, + { _MMIO(0x9888), 0x0b928000 }, + { _MMIO(0x9888), 0x0d924000 }, + { _MMIO(0x9888), 0x0f924000 }, + { _MMIO(0x9888), 0x11928000 }, + { _MMIO(0x9888), 0x1392c000 }, + { _MMIO(0x9888), 0x09924000 }, + { _MMIO(0x9888), 0x01985000 }, + { _MMIO(0x9888), 0x07988000 }, + { _MMIO(0x9888), 0x09981000 }, + { _MMIO(0x9888), 0x0b982000 }, + { _MMIO(0x9888), 0x0d982000 }, + { _MMIO(0x9888), 0x0f989000 }, + { _MMIO(0x9888), 0x05982000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1190c080 }, + { _MMIO(0x9888), 0x51901110 }, + { _MMIO(0x9888), 0x41900440 }, + { _MMIO(0x9888), 0x55901111 }, + { _MMIO(0x9888), 0x45900400 }, + { _MMIO(0x9888), 0x47900c21 }, + { _MMIO(0x9888), 0x57901411 }, + { _MMIO(0x9888), 0x49900042 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900024 }, + { _MMIO(0x9888), 0x59900001 }, + { _MMIO(0x9888), 0x43900841 }, + { _MMIO(0x9888), 0x53900411 }, +}; + +static int +get_render_pipe_profile_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_pipe_profile; + lens[n] = ARRAY_SIZE(mux_config_render_pipe_profile); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_reads[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f872 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_reads[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_reads[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f900064 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930055 }, + { _MMIO(0x9888), 0x03e58000 }, + { _MMIO(0x9888), 0x05e5c000 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x13900150 }, + { _MMIO(0x9888), 0x21900151 }, + { _MMIO(0x9888), 0x23900152 }, + { _MMIO(0x9888), 0x25900153 }, + { _MMIO(0x9888), 0x27900154 }, + { _MMIO(0x9888), 0x29900155 }, + { _MMIO(0x9888), 0x2b900156 }, + { _MMIO(0x9888), 0x2d900157 }, + { _MMIO(0x9888), 0x2f90015f }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c60 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c00 }, + { _MMIO(0x9888), 0x47900c63 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900c63 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_memory_reads_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_reads; + lens[n] = ARRAY_SIZE(mux_config_memory_reads); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_writes[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f822 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_writes[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_writes[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f901000 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930055 }, + { _MMIO(0x9888), 0x03e58000 }, + { _MMIO(0x9888), 0x05e5c000 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x13900160 }, + { _MMIO(0x9888), 0x21900161 }, + { _MMIO(0x9888), 0x23900162 }, + { _MMIO(0x9888), 0x25900163 }, + { _MMIO(0x9888), 0x27900164 }, + { _MMIO(0x9888), 0x29900165 }, + { _MMIO(0x9888), 0x2b900166 }, + { _MMIO(0x9888), 0x2d900167 }, + { _MMIO(0x9888), 0x2f900150 }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c60 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c00 }, + { _MMIO(0x9888), 0x47900c63 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900c63 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_memory_writes_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_writes; + lens[n] = ARRAY_SIZE(mux_config_memory_writes); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extended[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fc2a }, + { _MMIO(0x2774), 0x0000bf00 }, + { _MMIO(0x2778), 0x0007fc6a }, + { _MMIO(0x277c), 0x0000bf00 }, + { _MMIO(0x2780), 0x0007fc92 }, + { _MMIO(0x2784), 0x0000bf00 }, + { _MMIO(0x2788), 0x0007fca2 }, + { _MMIO(0x278c), 0x0000bf00 }, + { _MMIO(0x2790), 0x0007fc32 }, + { _MMIO(0x2794), 0x0000bf00 }, + { _MMIO(0x2798), 0x0007fc9a }, + { _MMIO(0x279c), 0x0000bf00 }, + { _MMIO(0x27a0), 0x0007fe6a }, + { _MMIO(0x27a4), 0x0000bf00 }, + { _MMIO(0x27a8), 0x0007fe7a }, + { _MMIO(0x27ac), 0x0000bf00 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extended[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended[] = { + { _MMIO(0x9888), 0x106c00e0 }, + { _MMIO(0x9888), 0x141c8160 }, + { _MMIO(0x9888), 0x161c8015 }, + { _MMIO(0x9888), 0x181c0120 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x184e8000 }, + { _MMIO(0x9888), 0x1a4eaaa0 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0e6c0b01 }, + { _MMIO(0x9888), 0x006c0200 }, + { _MMIO(0x9888), 0x026c000c }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x001b8000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x001c0041 }, + { _MMIO(0x9888), 0x061c4200 }, + { _MMIO(0x9888), 0x081c4443 }, + { _MMIO(0x9888), 0x0a1c4645 }, + { _MMIO(0x9888), 0x0c1c7647 }, + { _MMIO(0x9888), 0x041c7357 }, + { _MMIO(0x9888), 0x1c1c0030 }, + { _MMIO(0x9888), 0x101c0000 }, + { _MMIO(0x9888), 0x1a1c0000 }, + { _MMIO(0x9888), 0x121c8000 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4caa2a }, + { _MMIO(0x9888), 0x0c4c02aa }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x000da000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x0c0f5400 }, + { _MMIO(0x9888), 0x0e0f5515 }, + { _MMIO(0x9888), 0x100f0155 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162caa00 }, + { _MMIO(0x9888), 0x182c00aa }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x11907fff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900040 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900802 }, + { _MMIO(0x9888), 0x47900842 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900842 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900800 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_compute_extended_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extended; + lens[n] = ARRAY_SIZE(mux_config_compute_extended); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_l3_cache[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fffa }, + { _MMIO(0x2774), 0x0000fefe }, + { _MMIO(0x2778), 0x0007fffa }, + { _MMIO(0x277c), 0x0000fefd }, + { _MMIO(0x2790), 0x0007fffa }, + { _MMIO(0x2794), 0x0000fbef }, + { _MMIO(0x2798), 0x0007fffa }, + { _MMIO(0x279c), 0x0000fbdf }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_l3_cache[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00101100 }, + { _MMIO(0xe45c), 0x00201200 }, + { _MMIO(0xe55c), 0x00301300 }, + { _MMIO(0xe65c), 0x00401400 }, +}; + +static const struct i915_oa_reg mux_config_compute_l3_cache[] = { + { _MMIO(0x9888), 0x166c0760 }, + { _MMIO(0x9888), 0x1593001e }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x184e8000 }, + { _MMIO(0x9888), 0x1a4e8020 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x006c0051 }, + { _MMIO(0x9888), 0x066c5000 }, + { _MMIO(0x9888), 0x086c5c5d }, + { _MMIO(0x9888), 0x0e6c5e5f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x186c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x061b8000 }, + { _MMIO(0x9888), 0x081bc000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x101c8000 }, + { _MMIO(0x9888), 0x1a1ce000 }, + { _MMIO(0x9888), 0x1c1c0030 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4c2a00 }, + { _MMIO(0x9888), 0x0c4c0280 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f1500 }, + { _MMIO(0x9888), 0x100f0140 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162c0a00 }, + { _MMIO(0x9888), 0x182c00a0 }, + { _MMIO(0x9888), 0x03933300 }, + { _MMIO(0x9888), 0x05930032 }, + { _MMIO(0x9888), 0x11930000 }, + { _MMIO(0x9888), 0x1b930000 }, + { _MMIO(0x9888), 0x1d900157 }, + { _MMIO(0x9888), 0x1f900158 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1190030f }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900000 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900021 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x53905555 }, + { _MMIO(0x9888), 0x43900000 }, +}; + +static int +get_compute_l3_cache_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_l3_cache; + lens[n] = ARRAY_SIZE(mux_config_compute_l3_cache); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_hdc_and_sf[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x10800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fdff }, +}; + +static const struct i915_oa_reg flex_eu_config_hdc_and_sf[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_hdc_and_sf[] = { + { _MMIO(0x9888), 0x104f0232 }, + { _MMIO(0x9888), 0x124f4640 }, + { _MMIO(0x9888), 0x106c0232 }, + { _MMIO(0x9888), 0x11834400 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x004f1880 }, + { _MMIO(0x9888), 0x024f08bb }, + { _MMIO(0x9888), 0x044f001b }, + { _MMIO(0x9888), 0x046c0100 }, + { _MMIO(0x9888), 0x066c000b }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x041b8000 }, + { _MMIO(0x9888), 0x061b4000 }, + { _MMIO(0x9888), 0x1a1c1800 }, + { _MMIO(0x9888), 0x005b8000 }, + { _MMIO(0x9888), 0x025bc000 }, + { _MMIO(0x9888), 0x045b4000 }, + { _MMIO(0x9888), 0x125c8000 }, + { _MMIO(0x9888), 0x145c8000 }, + { _MMIO(0x9888), 0x165c8000 }, + { _MMIO(0x9888), 0x185c8000 }, + { _MMIO(0x9888), 0x0a4c00a0 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x022cc000 }, + { _MMIO(0x9888), 0x042cc000 }, + { _MMIO(0x9888), 0x062cc000 }, + { _MMIO(0x9888), 0x082cc000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x0f8305c0 }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x07830000 }, + { _MMIO(0x9888), 0x1d950080 }, + { _MMIO(0x9888), 0x13928000 }, + { _MMIO(0x9888), 0x0f988000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x59900001 }, + { _MMIO(0x9888), 0x4b900040 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_hdc_and_sf_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_hdc_and_sf; + lens[n] = ARRAY_SIZE(mux_config_hdc_and_sf); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_1[] = { + { _MMIO(0x9888), 0x126c7b40 }, + { _MMIO(0x9888), 0x166c0020 }, + { _MMIO(0x9888), 0x0a603444 }, + { _MMIO(0x9888), 0x0a613400 }, + { _MMIO(0x9888), 0x1a4ea800 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x064f4000 }, + { _MMIO(0x9888), 0x0c6c5327 }, + { _MMIO(0x9888), 0x0e6c5425 }, + { _MMIO(0x9888), 0x006c2a00 }, + { _MMIO(0x9888), 0x026c285b }, + { _MMIO(0x9888), 0x046c005c }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x1a6c0800 }, + { _MMIO(0x9888), 0x0c1bc000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x001b8000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x1c1c003c }, + { _MMIO(0x9888), 0x121c8000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c0800 }, + { _MMIO(0x9888), 0x065b4000 }, + { _MMIO(0x9888), 0x1a5c1000 }, + { _MMIO(0x9888), 0x10600000 }, + { _MMIO(0x9888), 0x04600000 }, + { _MMIO(0x9888), 0x0c610044 }, + { _MMIO(0x9888), 0x10610000 }, + { _MMIO(0x9888), 0x06610000 }, + { _MMIO(0x9888), 0x0c4c02a8 }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x0a4c002a }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f0154 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x182c00aa }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2cc000 }, + { _MMIO(0x9888), 0x1190ffc0 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900420 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900021 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900400 }, + { _MMIO(0x9888), 0x43900421 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, +}; + +static int +get_l3_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_1; + lens[n] = ARRAY_SIZE(mux_config_l3_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00028002 }, + { _MMIO(0x277c), 0x000087ff }, + { _MMIO(0x2780), 0x00020002 }, + { _MMIO(0x2784), 0x00008fff }, + { _MMIO(0x2788), 0x00008002 }, + { _MMIO(0x278c), 0x0000a7ff }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_2[] = { + { _MMIO(0x9888), 0x126c02e0 }, + { _MMIO(0x9888), 0x146c0001 }, + { _MMIO(0x9888), 0x0a623400 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x064f4000 }, + { _MMIO(0x9888), 0x026c3324 }, + { _MMIO(0x9888), 0x046c3422 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c0800 }, + { _MMIO(0x9888), 0x065b4000 }, + { _MMIO(0x9888), 0x1a5c1000 }, + { _MMIO(0x9888), 0x06614000 }, + { _MMIO(0x9888), 0x0c620044 }, + { _MMIO(0x9888), 0x10620000 }, + { _MMIO(0x9888), 0x06620000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c002a }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f4000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2cc000 }, + { _MMIO(0x9888), 0x1190f800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_l3_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_2; + lens[n] = ARRAY_SIZE(mux_config_l3_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_3[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00028002 }, + { _MMIO(0x277c), 0x000087ff }, + { _MMIO(0x2780), 0x00020002 }, + { _MMIO(0x2784), 0x00008fff }, + { _MMIO(0x2788), 0x00008002 }, + { _MMIO(0x278c), 0x0000a7ff }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_3[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_3[] = { + { _MMIO(0x9888), 0x126c4e80 }, + { _MMIO(0x9888), 0x146c0000 }, + { _MMIO(0x9888), 0x0a633400 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x026c3321 }, + { _MMIO(0x9888), 0x046c342f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1a6c2000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x061b4000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c1800 }, + { _MMIO(0x9888), 0x06604000 }, + { _MMIO(0x9888), 0x0c630044 }, + { _MMIO(0x9888), 0x10630000 }, + { _MMIO(0x9888), 0x06630000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c00aa }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f4000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x1190f800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900002 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_l3_3_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_3; + lens[n] = ARRAY_SIZE(mux_config_l3_3); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000efff }, + { _MMIO(0x2778), 0x00006000 }, + { _MMIO(0x277c), 0x0000f3ff }, +}; + +static const struct i915_oa_reg flex_eu_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x9888), 0x102f3800 }, + { _MMIO(0x9888), 0x144d0500 }, + { _MMIO(0x9888), 0x120d03c0 }, + { _MMIO(0x9888), 0x140d03cf }, + { _MMIO(0x9888), 0x0c0f0004 }, + { _MMIO(0x9888), 0x0c4e4000 }, + { _MMIO(0x9888), 0x042f0480 }, + { _MMIO(0x9888), 0x082f0000 }, + { _MMIO(0x9888), 0x022f0000 }, + { _MMIO(0x9888), 0x0a4c0090 }, + { _MMIO(0x9888), 0x064d0027 }, + { _MMIO(0x9888), 0x004d0000 }, + { _MMIO(0x9888), 0x000d0d40 }, + { _MMIO(0x9888), 0x020d803f }, + { _MMIO(0x9888), 0x040d8023 }, + { _MMIO(0x9888), 0x100d0000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x020f0010 }, + { _MMIO(0x9888), 0x000f0000 }, + { _MMIO(0x9888), 0x0e0f0050 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41901400 }, + { _MMIO(0x9888), 0x43901485 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900001 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_rasterizer_and_pixel_backend_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_rasterizer_and_pixel_backend; + lens[n] = ARRAY_SIZE(mux_config_rasterizer_and_pixel_backend); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_sampler[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x70800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x0000c000 }, + { _MMIO(0x2774), 0x0000e7ff }, + { _MMIO(0x2778), 0x00003000 }, + { _MMIO(0x277c), 0x0000f9ff }, + { _MMIO(0x2780), 0x00000c00 }, + { _MMIO(0x2784), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_sampler[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_sampler[] = { + { _MMIO(0x9888), 0x14152c00 }, + { _MMIO(0x9888), 0x16150005 }, + { _MMIO(0x9888), 0x121600a0 }, + { _MMIO(0x9888), 0x14352c00 }, + { _MMIO(0x9888), 0x16350005 }, + { _MMIO(0x9888), 0x123600a0 }, + { _MMIO(0x9888), 0x14552c00 }, + { _MMIO(0x9888), 0x16550005 }, + { _MMIO(0x9888), 0x125600a0 }, + { _MMIO(0x9888), 0x062f6000 }, + { _MMIO(0x9888), 0x022f2000 }, + { _MMIO(0x9888), 0x0c4c0050 }, + { _MMIO(0x9888), 0x0a4c0010 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f0350 }, + { _MMIO(0x9888), 0x0c0fb000 }, + { _MMIO(0x9888), 0x0e0f00da }, + { _MMIO(0x9888), 0x182c0028 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x022dc000 }, + { _MMIO(0x9888), 0x042d4000 }, + { _MMIO(0x9888), 0x0c138000 }, + { _MMIO(0x9888), 0x0e132000 }, + { _MMIO(0x9888), 0x0413c000 }, + { _MMIO(0x9888), 0x1c140018 }, + { _MMIO(0x9888), 0x0c157000 }, + { _MMIO(0x9888), 0x0e150078 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x04162180 }, + { _MMIO(0x9888), 0x02160000 }, + { _MMIO(0x9888), 0x04174000 }, + { _MMIO(0x9888), 0x0233a000 }, + { _MMIO(0x9888), 0x04333000 }, + { _MMIO(0x9888), 0x14348000 }, + { _MMIO(0x9888), 0x16348000 }, + { _MMIO(0x9888), 0x02357870 }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x04360043 }, + { _MMIO(0x9888), 0x02360000 }, + { _MMIO(0x9888), 0x04371000 }, + { _MMIO(0x9888), 0x0e538000 }, + { _MMIO(0x9888), 0x00538000 }, + { _MMIO(0x9888), 0x06533000 }, + { _MMIO(0x9888), 0x1c540020 }, + { _MMIO(0x9888), 0x12548000 }, + { _MMIO(0x9888), 0x0e557000 }, + { _MMIO(0x9888), 0x00557800 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x06560043 }, + { _MMIO(0x9888), 0x02560000 }, + { _MMIO(0x9888), 0x06571000 }, + { _MMIO(0x9888), 0x1190ff80 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900060 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900060 }, +}; + +static int +get_sampler_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler; + lens[n] = ARRAY_SIZE(mux_config_sampler); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x00007fff }, + { _MMIO(0x2778), 0x00000000 }, + { _MMIO(0x277c), 0x00009fff }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000efff }, + { _MMIO(0x2788), 0x00000000 }, + { _MMIO(0x278c), 0x0000f3ff }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000fdff }, + { _MMIO(0x2798), 0x00000000 }, + { _MMIO(0x279c), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_1[] = { + { _MMIO(0x9888), 0x12120000 }, + { _MMIO(0x9888), 0x12320000 }, + { _MMIO(0x9888), 0x12520000 }, + { _MMIO(0x9888), 0x002f8000 }, + { _MMIO(0x9888), 0x022f3000 }, + { _MMIO(0x9888), 0x0a4c0015 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f03a0 }, + { _MMIO(0x9888), 0x0c0ff000 }, + { _MMIO(0x9888), 0x0e0f0095 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2d8000 }, + { _MMIO(0x9888), 0x0e2d4000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x02108000 }, + { _MMIO(0x9888), 0x0410c000 }, + { _MMIO(0x9888), 0x02118000 }, + { _MMIO(0x9888), 0x0411c000 }, + { _MMIO(0x9888), 0x02121880 }, + { _MMIO(0x9888), 0x041219b5 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x02134000 }, + { _MMIO(0x9888), 0x04135000 }, + { _MMIO(0x9888), 0x0c308000 }, + { _MMIO(0x9888), 0x0e304000 }, + { _MMIO(0x9888), 0x06304000 }, + { _MMIO(0x9888), 0x0c318000 }, + { _MMIO(0x9888), 0x0e314000 }, + { _MMIO(0x9888), 0x06314000 }, + { _MMIO(0x9888), 0x0c321a80 }, + { _MMIO(0x9888), 0x0e320033 }, + { _MMIO(0x9888), 0x06320031 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x0c334000 }, + { _MMIO(0x9888), 0x0e331000 }, + { _MMIO(0x9888), 0x06331000 }, + { _MMIO(0x9888), 0x0e508000 }, + { _MMIO(0x9888), 0x00508000 }, + { _MMIO(0x9888), 0x02504000 }, + { _MMIO(0x9888), 0x0e518000 }, + { _MMIO(0x9888), 0x00518000 }, + { _MMIO(0x9888), 0x02514000 }, + { _MMIO(0x9888), 0x0e521880 }, + { _MMIO(0x9888), 0x00521a80 }, + { _MMIO(0x9888), 0x02520033 }, + { _MMIO(0x9888), 0x0e534000 }, + { _MMIO(0x9888), 0x00534000 }, + { _MMIO(0x9888), 0x02531000 }, + { _MMIO(0x9888), 0x1190ff80 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900062 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, +}; + +static int +get_tdl_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_1; + lens[n] = ARRAY_SIZE(mux_config_tdl_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_2[] = { + { _MMIO(0x9888), 0x12124d60 }, + { _MMIO(0x9888), 0x12322e60 }, + { _MMIO(0x9888), 0x12524d60 }, + { _MMIO(0x9888), 0x022f3000 }, + { _MMIO(0x9888), 0x0a4c0014 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0fe000 }, + { _MMIO(0x9888), 0x0e0f0097 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x002d8000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x0410c000 }, + { _MMIO(0x9888), 0x0411c000 }, + { _MMIO(0x9888), 0x04121fb7 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x04135000 }, + { _MMIO(0x9888), 0x00308000 }, + { _MMIO(0x9888), 0x06304000 }, + { _MMIO(0x9888), 0x00318000 }, + { _MMIO(0x9888), 0x06314000 }, + { _MMIO(0x9888), 0x00321b80 }, + { _MMIO(0x9888), 0x0632003f }, + { _MMIO(0x9888), 0x00334000 }, + { _MMIO(0x9888), 0x06331000 }, + { _MMIO(0x9888), 0x0250c000 }, + { _MMIO(0x9888), 0x0251c000 }, + { _MMIO(0x9888), 0x02521fb7 }, + { _MMIO(0x9888), 0x00520000 }, + { _MMIO(0x9888), 0x02535000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x43900063 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_tdl_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_2; + lens[n] = ARRAY_SIZE(mux_config_tdl_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extra[] = { +}; + +static const struct i915_oa_reg flex_eu_config_compute_extra[] = { +}; + +static const struct i915_oa_reg mux_config_compute_extra[] = { + { _MMIO(0x9888), 0x121203e0 }, + { _MMIO(0x9888), 0x123203e0 }, + { _MMIO(0x9888), 0x125203e0 }, + { _MMIO(0x9888), 0x129203e0 }, + { _MMIO(0x9888), 0x12b203e0 }, + { _MMIO(0x9888), 0x12d203e0 }, + { _MMIO(0x9888), 0x131203e0 }, + { _MMIO(0x9888), 0x133203e0 }, + { _MMIO(0x9888), 0x135203e0 }, + { _MMIO(0x9888), 0x1a4ef000 }, + { _MMIO(0x9888), 0x1c4e0003 }, + { _MMIO(0x9888), 0x024ec000 }, + { _MMIO(0x9888), 0x044ec000 }, + { _MMIO(0x9888), 0x064ec000 }, + { _MMIO(0x9888), 0x022f4000 }, + { _MMIO(0x9888), 0x0c4c02a0 }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x0a4c0042 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f0150 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f006d }, + { _MMIO(0x9888), 0x182c00a8 }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x042d8000 }, + { _MMIO(0x9888), 0x06104000 }, + { _MMIO(0x9888), 0x06114000 }, + { _MMIO(0x9888), 0x06120033 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x06131000 }, + { _MMIO(0x9888), 0x04308000 }, + { _MMIO(0x9888), 0x04318000 }, + { _MMIO(0x9888), 0x04321980 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x04334000 }, + { _MMIO(0x9888), 0x04504000 }, + { _MMIO(0x9888), 0x04514000 }, + { _MMIO(0x9888), 0x04520033 }, + { _MMIO(0x9888), 0x00520000 }, + { _MMIO(0x9888), 0x04531000 }, + { _MMIO(0x9888), 0x1acef000 }, + { _MMIO(0x9888), 0x1cce0003 }, + { _MMIO(0x9888), 0x00af8000 }, + { _MMIO(0x9888), 0x0ccc02a0 }, + { _MMIO(0x9888), 0x0acc0001 }, + { _MMIO(0x9888), 0x0c8d8000 }, + { _MMIO(0x9888), 0x0e8da000 }, + { _MMIO(0x9888), 0x008d8000 }, + { _MMIO(0x9888), 0x028da000 }, + { _MMIO(0x9888), 0x108f0150 }, + { _MMIO(0x9888), 0x0c8fb000 }, + { _MMIO(0x9888), 0x0e8f0001 }, + { _MMIO(0x9888), 0x18ac00a8 }, + { _MMIO(0x9888), 0x06ac8000 }, + { _MMIO(0x9888), 0x02ad4000 }, + { _MMIO(0x9888), 0x02908000 }, + { _MMIO(0x9888), 0x02918000 }, + { _MMIO(0x9888), 0x02921980 }, + { _MMIO(0x9888), 0x00920000 }, + { _MMIO(0x9888), 0x02934000 }, + { _MMIO(0x9888), 0x02b04000 }, + { _MMIO(0x9888), 0x02b14000 }, + { _MMIO(0x9888), 0x02b20033 }, + { _MMIO(0x9888), 0x00b20000 }, + { _MMIO(0x9888), 0x02b31000 }, + { _MMIO(0x9888), 0x00d08000 }, + { _MMIO(0x9888), 0x00d18000 }, + { _MMIO(0x9888), 0x00d21980 }, + { _MMIO(0x9888), 0x00d34000 }, + { _MMIO(0x9888), 0x072f8000 }, + { _MMIO(0x9888), 0x0d4c0100 }, + { _MMIO(0x9888), 0x0d0d8000 }, + { _MMIO(0x9888), 0x0f0da000 }, + { _MMIO(0x9888), 0x110f01b0 }, + { _MMIO(0x9888), 0x192c0080 }, + { _MMIO(0x9888), 0x0f2d4000 }, + { _MMIO(0x9888), 0x0f108000 }, + { _MMIO(0x9888), 0x0f118000 }, + { _MMIO(0x9888), 0x0f121980 }, + { _MMIO(0x9888), 0x01120000 }, + { _MMIO(0x9888), 0x0f134000 }, + { _MMIO(0x9888), 0x0f304000 }, + { _MMIO(0x9888), 0x0f314000 }, + { _MMIO(0x9888), 0x0f320033 }, + { _MMIO(0x9888), 0x01320000 }, + { _MMIO(0x9888), 0x0f331000 }, + { _MMIO(0x9888), 0x0d508000 }, + { _MMIO(0x9888), 0x0d518000 }, + { _MMIO(0x9888), 0x0d521980 }, + { _MMIO(0x9888), 0x01520000 }, + { _MMIO(0x9888), 0x0d534000 }, + { _MMIO(0x9888), 0x1190ff80 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900c00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900002 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51901100 }, + { _MMIO(0x9888), 0x41901000 }, + { _MMIO(0x9888), 0x43901423 }, + { _MMIO(0x9888), 0x53903331 }, + { _MMIO(0x9888), 0x45900044 }, +}; + +static int +get_compute_extra_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extra; + lens[n] = ARRAY_SIZE(mux_config_compute_extra); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_vme_pipe[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00100030 }, + { _MMIO(0x2774), 0x0000fff9 }, + { _MMIO(0x2778), 0x00000002 }, + { _MMIO(0x277c), 0x0000fffc }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000fff3 }, + { _MMIO(0x2788), 0x00100180 }, + { _MMIO(0x278c), 0x0000ffcf }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00000002 }, + { _MMIO(0x279c), 0x0000ff3f }, +}; + +static const struct i915_oa_reg flex_eu_config_vme_pipe[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00008003 }, +}; + +static const struct i915_oa_reg mux_config_vme_pipe[] = { + { _MMIO(0x9888), 0x141a5800 }, + { _MMIO(0x9888), 0x161a00c0 }, + { _MMIO(0x9888), 0x12180240 }, + { _MMIO(0x9888), 0x14180002 }, + { _MMIO(0x9888), 0x149a5800 }, + { _MMIO(0x9888), 0x169a00c0 }, + { _MMIO(0x9888), 0x12980240 }, + { _MMIO(0x9888), 0x14980002 }, + { _MMIO(0x9888), 0x1a4e3fc0 }, + { _MMIO(0x9888), 0x002f1000 }, + { _MMIO(0x9888), 0x022f8000 }, + { _MMIO(0x9888), 0x042f3000 }, + { _MMIO(0x9888), 0x004c4000 }, + { _MMIO(0x9888), 0x0a4c9500 }, + { _MMIO(0x9888), 0x0c4c002a }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f5500 }, + { _MMIO(0x9888), 0x100f0015 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162caa00 }, + { _MMIO(0x9888), 0x182c000a }, + { _MMIO(0x9888), 0x04193000 }, + { _MMIO(0x9888), 0x081a28c1 }, + { _MMIO(0x9888), 0x001a0000 }, + { _MMIO(0x9888), 0x00133000 }, + { _MMIO(0x9888), 0x0613c000 }, + { _MMIO(0x9888), 0x0813f000 }, + { _MMIO(0x9888), 0x00172000 }, + { _MMIO(0x9888), 0x06178000 }, + { _MMIO(0x9888), 0x0817a000 }, + { _MMIO(0x9888), 0x00180037 }, + { _MMIO(0x9888), 0x06180940 }, + { _MMIO(0x9888), 0x08180000 }, + { _MMIO(0x9888), 0x02180000 }, + { _MMIO(0x9888), 0x04183000 }, + { _MMIO(0x9888), 0x04afc000 }, + { _MMIO(0x9888), 0x06af3000 }, + { _MMIO(0x9888), 0x0acc4000 }, + { _MMIO(0x9888), 0x0ccc0015 }, + { _MMIO(0x9888), 0x0a8da000 }, + { _MMIO(0x9888), 0x0c8da000 }, + { _MMIO(0x9888), 0x0e8f4000 }, + { _MMIO(0x9888), 0x108f0015 }, + { _MMIO(0x9888), 0x16aca000 }, + { _MMIO(0x9888), 0x18ac000a }, + { _MMIO(0x9888), 0x06993000 }, + { _MMIO(0x9888), 0x0c9a28c1 }, + { _MMIO(0x9888), 0x009a0000 }, + { _MMIO(0x9888), 0x0a93f000 }, + { _MMIO(0x9888), 0x0c93f000 }, + { _MMIO(0x9888), 0x0a97a000 }, + { _MMIO(0x9888), 0x0c97a000 }, + { _MMIO(0x9888), 0x0a980977 }, + { _MMIO(0x9888), 0x08980000 }, + { _MMIO(0x9888), 0x04980000 }, + { _MMIO(0x9888), 0x06983000 }, + { _MMIO(0x9888), 0x119000ff }, + { _MMIO(0x9888), 0x51900010 }, + { _MMIO(0x9888), 0x41900060 }, + { _MMIO(0x9888), 0x55900111 }, + { _MMIO(0x9888), 0x45900c00 }, + { _MMIO(0x9888), 0x47900821 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900002 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_vme_pipe_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_vme_pipe; + lens[n] = ARRAY_SIZE(mux_config_vme_pipe); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9888), 0x11810000 }, + { _MMIO(0x9888), 0x07810013 }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930040 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_test_oa_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_test_oa; + lens[n] = ARRAY_SIZE(mux_config_test_oa); + n++; + + return n; +} + int i915_oa_select_metric_set_sklgt4(struct drm_i915_private *dev_priv) { dev_priv->perf.oa.n_mux_configs = 0; @@ -202,6 +2049,448 @@ int i915_oa_select_metric_set_sklgt4(struct drm_i915_private *dev_priv) dev_priv->perf.oa.flex_regs_len = ARRAY_SIZE(flex_eu_config_render_basic); + return 0; + case METRIC_SET_ID_COMPUTE_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_compute_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_basic); + + return 0; + case METRIC_SET_ID_RENDER_PIPE_PROFILE: + dev_priv->perf.oa.n_mux_configs = + get_render_pipe_profile_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_PIPE_PROFILE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_pipe_profile; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_pipe_profile); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_pipe_profile; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_pipe_profile); + + return 0; + case METRIC_SET_ID_MEMORY_READS: + dev_priv->perf.oa.n_mux_configs = + get_memory_reads_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_READS\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_reads; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_reads); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_reads; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_reads); + + return 0; + case METRIC_SET_ID_MEMORY_WRITES: + dev_priv->perf.oa.n_mux_configs = + get_memory_writes_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_WRITES\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_writes; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_writes); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_writes; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_writes); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTENDED: + dev_priv->perf.oa.n_mux_configs = + get_compute_extended_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTENDED\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extended; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extended); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extended; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extended); + + return 0; + case METRIC_SET_ID_COMPUTE_L3_CACHE: + dev_priv->perf.oa.n_mux_configs = + get_compute_l3_cache_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_L3_CACHE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_l3_cache; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_l3_cache); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_l3_cache; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_l3_cache); + + return 0; + case METRIC_SET_ID_HDC_AND_SF: + dev_priv->perf.oa.n_mux_configs = + get_hdc_and_sf_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"HDC_AND_SF\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_hdc_and_sf; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_hdc_and_sf); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_hdc_and_sf; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_hdc_and_sf); + + return 0; + case METRIC_SET_ID_L3_1: + dev_priv->perf.oa.n_mux_configs = + get_l3_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_1); + + return 0; + case METRIC_SET_ID_L3_2: + dev_priv->perf.oa.n_mux_configs = + get_l3_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_2); + + return 0; + case METRIC_SET_ID_L3_3: + dev_priv->perf.oa.n_mux_configs = + get_l3_3_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_3\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_3; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_3); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_3; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_3); + + return 0; + case METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND: + dev_priv->perf.oa.n_mux_configs = + get_rasterizer_and_pixel_backend_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RASTERIZER_AND_PIXEL_BACKEND\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_rasterizer_and_pixel_backend); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_rasterizer_and_pixel_backend); + + return 0; + case METRIC_SET_ID_SAMPLER: + dev_priv->perf.oa.n_mux_configs = + get_sampler_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_sampler; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_sampler); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler); + + return 0; + case METRIC_SET_ID_TDL_1: + dev_priv->perf.oa.n_mux_configs = + get_tdl_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_1); + + return 0; + case METRIC_SET_ID_TDL_2: + dev_priv->perf.oa.n_mux_configs = + get_tdl_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_2); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTRA: + dev_priv->perf.oa.n_mux_configs = + get_compute_extra_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTRA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extra; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extra); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extra; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extra); + + return 0; + case METRIC_SET_ID_VME_PIPE: + dev_priv->perf.oa.n_mux_configs = + get_vme_pipe_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"VME_PIPE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_vme_pipe; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_vme_pipe); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_vme_pipe; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_vme_pipe); + + return 0; + case METRIC_SET_ID_TEST_OA: + dev_priv->perf.oa.n_mux_configs = + get_test_oa_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TEST_OA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_test_oa; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_test_oa; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_test_oa); + return 0; default: return -ENODEV; @@ -230,6 +2519,380 @@ static struct attribute_group group_render_basic = { .attrs = attrs_render_basic, }; +static ssize_t +show_compute_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_BASIC); +} + +static struct device_attribute dev_attr_compute_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_basic[] = { + &dev_attr_compute_basic_id.attr, + NULL, +}; + +static struct attribute_group group_compute_basic = { + .name = "7277228f-e7f3-4743-945a-6a2049d11377", + .attrs = attrs_compute_basic, +}; + +static ssize_t +show_render_pipe_profile_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_PIPE_PROFILE); +} + +static struct device_attribute dev_attr_render_pipe_profile_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_pipe_profile_id, + .store = NULL, +}; + +static struct attribute *attrs_render_pipe_profile[] = { + &dev_attr_render_pipe_profile_id.attr, + NULL, +}; + +static struct attribute_group group_render_pipe_profile = { + .name = "463c668c-3f60-49b6-8f85-d995b635b3b2", + .attrs = attrs_render_pipe_profile, +}; + +static ssize_t +show_memory_reads_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_READS); +} + +static struct device_attribute dev_attr_memory_reads_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_reads_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_reads[] = { + &dev_attr_memory_reads_id.attr, + NULL, +}; + +static struct attribute_group group_memory_reads = { + .name = "3ae6e74c-72c3-4040-9bd0-7961430b8cc8", + .attrs = attrs_memory_reads, +}; + +static ssize_t +show_memory_writes_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_WRITES); +} + +static struct device_attribute dev_attr_memory_writes_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_writes_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_writes[] = { + &dev_attr_memory_writes_id.attr, + NULL, +}; + +static struct attribute_group group_memory_writes = { + .name = "055f256d-4052-467c-8dec-6064a4806433", + .attrs = attrs_memory_writes, +}; + +static ssize_t +show_compute_extended_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTENDED); +} + +static struct device_attribute dev_attr_compute_extended_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extended_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extended[] = { + &dev_attr_compute_extended_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extended = { + .name = "753972d4-87cd-4460-824d-754463ac5054", + .attrs = attrs_compute_extended, +}; + +static ssize_t +show_compute_l3_cache_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_L3_CACHE); +} + +static struct device_attribute dev_attr_compute_l3_cache_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_l3_cache_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_l3_cache[] = { + &dev_attr_compute_l3_cache_id.attr, + NULL, +}; + +static struct attribute_group group_compute_l3_cache = { + .name = "4e4392e9-8f73-457b-ab44-b49f7a0c733b", + .attrs = attrs_compute_l3_cache, +}; + +static ssize_t +show_hdc_and_sf_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_HDC_AND_SF); +} + +static struct device_attribute dev_attr_hdc_and_sf_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_hdc_and_sf_id, + .store = NULL, +}; + +static struct attribute *attrs_hdc_and_sf[] = { + &dev_attr_hdc_and_sf_id.attr, + NULL, +}; + +static struct attribute_group group_hdc_and_sf = { + .name = "730d95dd-7da8-4e1c-ab8d-c0eb1e4c1805", + .attrs = attrs_hdc_and_sf, +}; + +static ssize_t +show_l3_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_1); +} + +static struct device_attribute dev_attr_l3_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_1_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_1[] = { + &dev_attr_l3_1_id.attr, + NULL, +}; + +static struct attribute_group group_l3_1 = { + .name = "d9e86d70-462b-462a-851e-fd63e8c13d63", + .attrs = attrs_l3_1, +}; + +static ssize_t +show_l3_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_2); +} + +static struct device_attribute dev_attr_l3_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_2_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_2[] = { + &dev_attr_l3_2_id.attr, + NULL, +}; + +static struct attribute_group group_l3_2 = { + .name = "52200424-6ee9-48b3-b7fa-0afcf1975e4d", + .attrs = attrs_l3_2, +}; + +static ssize_t +show_l3_3_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_3); +} + +static struct device_attribute dev_attr_l3_3_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_3_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_3[] = { + &dev_attr_l3_3_id.attr, + NULL, +}; + +static struct attribute_group group_l3_3 = { + .name = "1988315f-0a26-44df-acb0-df7ec86b1456", + .attrs = attrs_l3_3, +}; + +static ssize_t +show_rasterizer_and_pixel_backend_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND); +} + +static struct device_attribute dev_attr_rasterizer_and_pixel_backend_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_rasterizer_and_pixel_backend_id, + .store = NULL, +}; + +static struct attribute *attrs_rasterizer_and_pixel_backend[] = { + &dev_attr_rasterizer_and_pixel_backend_id.attr, + NULL, +}; + +static struct attribute_group group_rasterizer_and_pixel_backend = { + .name = "f1f17ca7-286e-4ae5-9d15-9fccad6c665d", + .attrs = attrs_rasterizer_and_pixel_backend, +}; + +static ssize_t +show_sampler_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_SAMPLER); +} + +static struct device_attribute dev_attr_sampler_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_sampler_id, + .store = NULL, +}; + +static struct attribute *attrs_sampler[] = { + &dev_attr_sampler_id.attr, + NULL, +}; + +static struct attribute_group group_sampler = { + .name = "00a9e0fb-3d2e-4405-852c-dce6334ffb3b", + .attrs = attrs_sampler, +}; + +static ssize_t +show_tdl_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_1); +} + +static struct device_attribute dev_attr_tdl_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_1_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_1[] = { + &dev_attr_tdl_1_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_1 = { + .name = "13dcc50a-7ec0-409b-99d6-a3f932cedcb3", + .attrs = attrs_tdl_1, +}; + +static ssize_t +show_tdl_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_2); +} + +static struct device_attribute dev_attr_tdl_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_2_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_2[] = { + &dev_attr_tdl_2_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_2 = { + .name = "97875e21-6624-4aee-9191-682feb3eae21", + .attrs = attrs_tdl_2, +}; + +static ssize_t +show_compute_extra_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTRA); +} + +static struct device_attribute dev_attr_compute_extra_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extra_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extra[] = { + &dev_attr_compute_extra_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extra = { + .name = "a5aa857d-e8f0-4dfa-8981-ce340fa748fd", + .attrs = attrs_compute_extra, +}; + +static ssize_t +show_vme_pipe_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_VME_PIPE); +} + +static struct device_attribute dev_attr_vme_pipe_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_vme_pipe_id, + .store = NULL, +}; + +static struct attribute *attrs_vme_pipe[] = { + &dev_attr_vme_pipe_id.attr, + NULL, +}; + +static struct attribute_group group_vme_pipe = { + .name = "0e8d8b86-4ee7-4cdd-aaaa-58adc92cb29e", + .attrs = attrs_vme_pipe, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TEST_OA); +} + +static struct device_attribute dev_attr_test_oa_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_test_oa_id, + .store = NULL, +}; + +static struct attribute *attrs_test_oa[] = { + &dev_attr_test_oa_id.attr, + NULL, +}; + +static struct attribute_group group_test_oa = { + .name = "882fa433-1f4a-4a67-a962-c741888fe5f5", + .attrs = attrs_test_oa, +}; + int i915_perf_register_sysfs_sklgt4(struct drm_i915_private *dev_priv) { @@ -242,9 +2905,145 @@ i915_perf_register_sysfs_sklgt4(struct drm_i915_private *dev_priv) if (ret) goto error_render_basic; } + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (ret) + goto error_compute_basic; + } + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (ret) + goto error_render_pipe_profile; + } + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (ret) + goto error_memory_reads; + } + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (ret) + goto error_memory_writes; + } + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (ret) + goto error_compute_extended; + } + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (ret) + goto error_compute_l3_cache; + } + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (ret) + goto error_hdc_and_sf; + } + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (ret) + goto error_l3_1; + } + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (ret) + goto error_l3_2; + } + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (ret) + goto error_l3_3; + } + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (ret) + goto error_rasterizer_and_pixel_backend; + } + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (ret) + goto error_sampler; + } + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (ret) + goto error_tdl_1; + } + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (ret) + goto error_tdl_2; + } + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (ret) + goto error_compute_extra; + } + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (ret) + goto error_vme_pipe; + } + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_test_oa); + if (ret) + goto error_test_oa; + } return 0; +error_test_oa: + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); +error_vme_pipe: + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); +error_compute_extra: + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); +error_tdl_2: + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); +error_tdl_1: + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); +error_sampler: + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); +error_rasterizer_and_pixel_backend: + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); +error_l3_3: + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); +error_l3_2: + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); +error_l3_1: + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); +error_hdc_and_sf: + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); +error_compute_l3_cache: + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); +error_compute_extended: + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); +error_memory_writes: + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); +error_memory_reads: + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); +error_render_pipe_profile: + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); +error_compute_basic: + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); error_render_basic: return ret; } @@ -257,4 +3056,38 @@ i915_perf_unregister_sysfs_sklgt4(struct drm_i915_private *dev_priv) if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_test_oa); } From 155e941f49289fe73157f1c9b3c93450a2e40175 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Tue, 13 Jun 2017 12:23:05 +0100 Subject: [PATCH 166/341] drm/i915/perf: per-gen timebase for checking sample freq An oa_exponent_to_ns() utility and per-gen timebase constants where recently removed when updating the tail pointer race condition WA, and this restores those so we can update the _PROP_OA_EXPONENT validation done in read_properties_unlocked() to not assume we have a 12.5MHz timebase as we did for Haswell. Accordingly the oa_sample_rate_hard_limit value that's referenced by proc_dointvec_minmax defining the absolute limit for the OA sampling frequency is now initialized to (timestamp_frequency / 2) instead of the 6.25MHz constant for Haswell. v2: Specify frequency of 19.2MHz for BXT (Ville) Initialize oa_sample_rate_hard_limit per-gen too (Lionel) Signed-off-by: Robert Bragg Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 39 +++++++++++++++++++++++--------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 101b66b3f86a..975e216f3469 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2415,6 +2415,7 @@ struct drm_i915_private { bool periodic; int period_exponent; + int timestamp_frequency; int metrics_set; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a6af4d7dc4d6..4d79698685ea 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -288,10 +288,12 @@ static u32 i915_perf_stream_paranoid = true; /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate * - * 160ns is the smallest sampling period we can theoretically program the OA - * unit with on Haswell, corresponding to 6.25MHz. + * The highest sampling frequency we can theoretically program the OA unit + * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell. + * + * Initialized just before we register the sysctl parameter. */ -static int oa_sample_rate_hard_limit = 6250000; +static int oa_sample_rate_hard_limit; /* Theoretically we can program the OA unit to sample every 160ns but don't * allow that by default unless root... @@ -2642,6 +2644,12 @@ err: return ret; } +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) +{ + return div_u64(1000000000ULL * (2ULL << exponent), + dev_priv->perf.oa.timestamp_frequency); +} + /** * read_properties_unlocked - validate + copy userspace stream open properties * @dev_priv: i915 device instance @@ -2738,16 +2746,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, } /* Theoretically we can program the OA unit to sample - * every 160ns but don't allow that by default unless - * root. - * - * On Haswell the period is derived from the exponent - * as: - * - * period = 80ns * 2^(exponent + 1) + * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns + * for BXT. We don't allow such high sampling + * frequencies by default unless root. */ + BUILD_BUG_ON(sizeof(oa_period) != 8); - oa_period = 80ull * (2ull << value); + oa_period = oa_exponent_to_ns(dev_priv, value); /* This check is primarily to ensure that oa_period <= * UINT32_MAX (before passing to do_div which only @@ -3003,6 +3008,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.oa_hw_tail_read = gen7_oa_hw_tail_read; + dev_priv->perf.oa.timestamp_frequency = 12500000; + dev_priv->perf.oa.oa_formats = hsw_oa_formats; dev_priv->perf.oa.n_builtin_sets = @@ -3018,6 +3025,9 @@ void i915_perf_init(struct drm_i915_private *dev_priv) if (IS_GEN8(dev_priv)) { dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; + + dev_priv->perf.oa.timestamp_frequency = 12500000; + dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); if (IS_BROADWELL(dev_priv)) { @@ -3034,6 +3044,9 @@ void i915_perf_init(struct drm_i915_private *dev_priv) } else if (IS_GEN9(dev_priv)) { dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + + dev_priv->perf.oa.timestamp_frequency = 12000000; + dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); if (IS_SKL_GT2(dev_priv)) { @@ -3052,6 +3065,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.select_metric_set = i915_oa_select_metric_set_sklgt4; } else if (IS_BROXTON(dev_priv)) { + dev_priv->perf.oa.timestamp_frequency = 19200000; + dev_priv->perf.oa.n_builtin_sets = i915_oa_n_builtin_metric_sets_bxt; dev_priv->perf.oa.ops.select_metric_set = @@ -3086,6 +3101,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) spin_lock_init(&dev_priv->perf.hook_lock); spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); + oa_sample_rate_hard_limit = + dev_priv->perf.oa.timestamp_frequency / 2; dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); dev_priv->perf.initialized = true; From 1bef3409f139d787598245179ff824ff0b46cce4 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Tue, 13 Jun 2017 12:23:06 +0100 Subject: [PATCH 167/341] drm/i915/perf: remove perf.hook_lock In earlier iterations of the i915-perf driver we had a number of callbacks/hooks from other parts of the i915 driver to e.g. notify us when a legacy context was pinned and these could run asynchronously with respect to the stream file operations and might also run in atomic context. dev_priv->perf.hook_lock had been for serialising access to state needed within these callbacks, but as the code has evolved some of the hooks have gone away or are implemented to avoid needing to lock any state. The remaining use of this lock was actually redundant considering how the gen7 oacontrol state used to be updated as part of a context pin hook. Signed-off-by: Robert Bragg Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/i915_drv.h | 2 -- drivers/gpu/drm/i915/i915_perf.c | 33 +++++++++++--------------------- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 975e216f3469..8d33b810f8b4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2396,8 +2396,6 @@ struct drm_i915_private { struct mutex lock; struct list_head streams; - spinlock_t hook_lock; - struct { struct i915_perf_stream *exclusive_stream; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 4d79698685ea..1980f9597b19 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1836,9 +1836,18 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) gen8_configure_all_contexts(dev_priv, false); } -static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv) +static void gen7_oa_enable(struct drm_i915_private *dev_priv) { - lockdep_assert_held(&dev_priv->perf.hook_lock); + /* + * Reset buf pointers so we don't forward reports from before now. + * + * Think carefully if considering trying to avoid this, since it + * also ensures status flags and the buffer itself are cleared + * in error paths, and we have checks for invalid reports based + * on the assumption that certain fields are written to zeroed + * memory which this helps maintains. + */ + gen7_init_oa_buffer(dev_priv); if (dev_priv->perf.oa.exclusive_stream->enabled) { struct i915_gem_context *ctx = @@ -1861,25 +1870,6 @@ static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv) I915_WRITE(GEN7_OACONTROL, 0); } -static void gen7_oa_enable(struct drm_i915_private *dev_priv) -{ - unsigned long flags; - - /* Reset buf pointers so we don't forward reports from before now. - * - * Think carefully if considering trying to avoid this, since it - * also ensures status flags and the buffer itself are cleared - * in error paths, and we have checks for invalid reports based - * on the assumption that certain fields are written to zeroed - * memory which this helps maintains. - */ - gen7_init_oa_buffer(dev_priv); - - spin_lock_irqsave(&dev_priv->perf.hook_lock, flags); - gen7_update_oacontrol_locked(dev_priv); - spin_unlock_irqrestore(&dev_priv->perf.hook_lock, flags); -} - static void gen8_oa_enable(struct drm_i915_private *dev_priv) { u32 report_format = dev_priv->perf.oa.oa_buffer.format; @@ -3098,7 +3088,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->perf.streams); mutex_init(&dev_priv->perf.lock); - spin_lock_init(&dev_priv->perf.hook_lock); spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); oa_sample_rate_hard_limit = From 3891589eee3276cb820a9f3d52b5c82b08382dd7 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 13 Jun 2017 12:23:07 +0100 Subject: [PATCH 168/341] drm/i915: add KBL GT2/GT3 check macros Add macros to detect GT2/GT3 skus so we can apply the proper OA configuration later. Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/i915_drv.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8d33b810f8b4..5dbaf6b24494 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2826,6 +2826,10 @@ intel_info(const struct drm_i915_private *dev_priv) (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0020) #define IS_SKL_GT4(dev_priv) (IS_SKYLAKE(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0030) +#define IS_KBL_GT2(dev_priv) (IS_KABYLAKE(dev_priv) && \ + (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0010) +#define IS_KBL_GT3(dev_priv) (IS_KABYLAKE(dev_priv) && \ + (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0020) #define IS_CFL_ULT(dev_priv) (IS_COFFEELAKE(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0x00F0) == 0x00A0) From 6c5c1d89af15aec24a221637e0fd8b55f15672c3 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 13 Jun 2017 12:23:08 +0100 Subject: [PATCH 169/341] drm/i915/perf: add KBL support Add OA support for Kabylake (pretty much identical to Skylake), and also add the associated OA configurations. Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/Makefile | 4 +- drivers/gpu/drm/i915/i915_oa_kblgt2.c | 2991 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_oa_kblgt2.h | 40 + drivers/gpu/drm/i915/i915_oa_kblgt3.c | 3040 +++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_oa_kblgt3.h | 40 + drivers/gpu/drm/i915/i915_perf.c | 30 +- 6 files changed, 6143 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_oa_kblgt2.c create mode 100644 drivers/gpu/drm/i915/i915_oa_kblgt2.h create mode 100644 drivers/gpu/drm/i915/i915_oa_kblgt3.c create mode 100644 drivers/gpu/drm/i915/i915_oa_kblgt3.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 49a628cdef9e..033a2df01dbe 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -135,7 +135,9 @@ i915-y += i915_perf.o \ i915_oa_sklgt2.o \ i915_oa_sklgt3.o \ i915_oa_sklgt4.o \ - i915_oa_bxt.o + i915_oa_bxt.o \ + i915_oa_kblgt2.o \ + i915_oa_kblgt3.o ifeq ($(CONFIG_DRM_I915_GVT),y) i915-y += intel_gvt.o diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt2.c b/drivers/gpu/drm/i915/i915_oa_kblgt2.c new file mode 100644 index 000000000000..87dbd0a0b076 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_kblgt2.c @@ -0,0 +1,2991 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_kblgt2.h" + +enum metric_set_id { + METRIC_SET_ID_RENDER_BASIC = 1, + METRIC_SET_ID_COMPUTE_BASIC, + METRIC_SET_ID_RENDER_PIPE_PROFILE, + METRIC_SET_ID_MEMORY_READS, + METRIC_SET_ID_MEMORY_WRITES, + METRIC_SET_ID_COMPUTE_EXTENDED, + METRIC_SET_ID_COMPUTE_L3_CACHE, + METRIC_SET_ID_HDC_AND_SF, + METRIC_SET_ID_L3_1, + METRIC_SET_ID_L3_2, + METRIC_SET_ID_L3_3, + METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND, + METRIC_SET_ID_SAMPLER, + METRIC_SET_ID_TDL_1, + METRIC_SET_ID_TDL_2, + METRIC_SET_ID_COMPUTE_EXTRA, + METRIC_SET_ID_VME_PIPE, + METRIC_SET_ID_TEST_OA, +}; + +int i915_oa_n_builtin_metric_sets_kblgt2 = 18; + +static const struct i915_oa_reg b_counter_config_render_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_render_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_render_basic[] = { + { _MMIO(0x9888), 0x166c01e0 }, + { _MMIO(0x9888), 0x12170280 }, + { _MMIO(0x9888), 0x12370280 }, + { _MMIO(0x9888), 0x11930317 }, + { _MMIO(0x9888), 0x159303df }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x1a4e0080 }, + { _MMIO(0x9888), 0x0a6c0053 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x0a1b4000 }, + { _MMIO(0x9888), 0x1c1c0001 }, + { _MMIO(0x9888), 0x002f1000 }, + { _MMIO(0x9888), 0x042f1000 }, + { _MMIO(0x9888), 0x004c4000 }, + { _MMIO(0x9888), 0x0a4c8400 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0d2000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f6600 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x162c2200 }, + { _MMIO(0x9888), 0x062d8000 }, + { _MMIO(0x9888), 0x082d8000 }, + { _MMIO(0x9888), 0x00133000 }, + { _MMIO(0x9888), 0x08133000 }, + { _MMIO(0x9888), 0x00170020 }, + { _MMIO(0x9888), 0x08170021 }, + { _MMIO(0x9888), 0x10170000 }, + { _MMIO(0x9888), 0x0633c000 }, + { _MMIO(0x9888), 0x0833c000 }, + { _MMIO(0x9888), 0x06370800 }, + { _MMIO(0x9888), 0x08370840 }, + { _MMIO(0x9888), 0x10370000 }, + { _MMIO(0x9888), 0x0d933031 }, + { _MMIO(0x9888), 0x0f933e3f }, + { _MMIO(0x9888), 0x01933d00 }, + { _MMIO(0x9888), 0x0393073c }, + { _MMIO(0x9888), 0x0593000e }, + { _MMIO(0x9888), 0x1d930000 }, + { _MMIO(0x9888), 0x19930000 }, + { _MMIO(0x9888), 0x1b930000 }, + { _MMIO(0x9888), 0x1d900157 }, + { _MMIO(0x9888), 0x1f900158 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x2b908000 }, + { _MMIO(0x9888), 0x2d908000 }, + { _MMIO(0x9888), 0x2f908000 }, + { _MMIO(0x9888), 0x31908000 }, + { _MMIO(0x9888), 0x15908000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1190001f }, + { _MMIO(0x9888), 0x51904400 }, + { _MMIO(0x9888), 0x41900020 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c21 }, + { _MMIO(0x9888), 0x47900061 }, + { _MMIO(0x9888), 0x57904440 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900004 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x53904444 }, +}; + +static int +get_render_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_basic; + lens[n] = ARRAY_SIZE(mux_config_render_basic); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_basic[] = { + { _MMIO(0x9888), 0x104f00e0 }, + { _MMIO(0x9888), 0x124f1c00 }, + { _MMIO(0x9888), 0x106c00e0 }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x1a4e0820 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x064f0900 }, + { _MMIO(0x9888), 0x084f0032 }, + { _MMIO(0x9888), 0x0a4f1891 }, + { _MMIO(0x9888), 0x0c4f0e00 }, + { _MMIO(0x9888), 0x0e4f003c }, + { _MMIO(0x9888), 0x004f0d80 }, + { _MMIO(0x9888), 0x024f003b }, + { _MMIO(0x9888), 0x006c0002 }, + { _MMIO(0x9888), 0x086c0100 }, + { _MMIO(0x9888), 0x0c6c000c }, + { _MMIO(0x9888), 0x0e6c0b00 }, + { _MMIO(0x9888), 0x186c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x081b8000 }, + { _MMIO(0x9888), 0x0c1b4000 }, + { _MMIO(0x9888), 0x0e1b8000 }, + { _MMIO(0x9888), 0x101c8000 }, + { _MMIO(0x9888), 0x1a1c8000 }, + { _MMIO(0x9888), 0x1c1c0024 }, + { _MMIO(0x9888), 0x065b8000 }, + { _MMIO(0x9888), 0x085b4000 }, + { _MMIO(0x9888), 0x0a5bc000 }, + { _MMIO(0x9888), 0x0c5b8000 }, + { _MMIO(0x9888), 0x0e5b4000 }, + { _MMIO(0x9888), 0x005b8000 }, + { _MMIO(0x9888), 0x025b4000 }, + { _MMIO(0x9888), 0x1a5c6000 }, + { _MMIO(0x9888), 0x1c5c001b }, + { _MMIO(0x9888), 0x125c8000 }, + { _MMIO(0x9888), 0x145c8000 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4c2000 }, + { _MMIO(0x9888), 0x0c4c0208 }, + { _MMIO(0x9888), 0x000da000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x020d2000 }, + { _MMIO(0x9888), 0x0c0f5400 }, + { _MMIO(0x9888), 0x0e0f5500 }, + { _MMIO(0x9888), 0x100f0155 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2cc000 }, + { _MMIO(0x9888), 0x162cfb00 }, + { _MMIO(0x9888), 0x182c00be }, + { _MMIO(0x9888), 0x022cc000 }, + { _MMIO(0x9888), 0x042cc000 }, + { _MMIO(0x9888), 0x19900157 }, + { _MMIO(0x9888), 0x1b900158 }, + { _MMIO(0x9888), 0x1d900105 }, + { _MMIO(0x9888), 0x1f900103 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x11900fff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900821 }, + { _MMIO(0x9888), 0x47900802 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900802 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900002 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900422 }, + { _MMIO(0x9888), 0x53904444 }, +}; + +static int +get_compute_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_basic; + lens[n] = ARRAY_SIZE(mux_config_compute_basic); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_render_pipe_profile[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007ffea }, + { _MMIO(0x2774), 0x00007ffc }, + { _MMIO(0x2778), 0x0007affa }, + { _MMIO(0x277c), 0x0000f5fd }, + { _MMIO(0x2780), 0x00079ffa }, + { _MMIO(0x2784), 0x0000f3fb }, + { _MMIO(0x2788), 0x0007bf7a }, + { _MMIO(0x278c), 0x0000f7e7 }, + { _MMIO(0x2790), 0x0007fefa }, + { _MMIO(0x2794), 0x0000f7cf }, + { _MMIO(0x2798), 0x00077ffa }, + { _MMIO(0x279c), 0x0000efdf }, + { _MMIO(0x27a0), 0x0006fffa }, + { _MMIO(0x27a4), 0x0000cfbf }, + { _MMIO(0x27a8), 0x0003fffa }, + { _MMIO(0x27ac), 0x00005f7f }, +}; + +static const struct i915_oa_reg flex_eu_config_render_pipe_profile[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_render_pipe_profile[] = { + { _MMIO(0x9888), 0x0c0e001f }, + { _MMIO(0x9888), 0x0a0f0000 }, + { _MMIO(0x9888), 0x10116800 }, + { _MMIO(0x9888), 0x178a03e0 }, + { _MMIO(0x9888), 0x11824c00 }, + { _MMIO(0x9888), 0x11830020 }, + { _MMIO(0x9888), 0x13840020 }, + { _MMIO(0x9888), 0x11850019 }, + { _MMIO(0x9888), 0x11860007 }, + { _MMIO(0x9888), 0x01870c40 }, + { _MMIO(0x9888), 0x17880000 }, + { _MMIO(0x9888), 0x022f4000 }, + { _MMIO(0x9888), 0x0a4c0040 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x040d4000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x020e5400 }, + { _MMIO(0x9888), 0x000e0000 }, + { _MMIO(0x9888), 0x080f0040 }, + { _MMIO(0x9888), 0x000f0000 }, + { _MMIO(0x9888), 0x100f0000 }, + { _MMIO(0x9888), 0x0e0f0040 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x06104000 }, + { _MMIO(0x9888), 0x06110012 }, + { _MMIO(0x9888), 0x06131000 }, + { _MMIO(0x9888), 0x01898000 }, + { _MMIO(0x9888), 0x0d890100 }, + { _MMIO(0x9888), 0x03898000 }, + { _MMIO(0x9888), 0x09808000 }, + { _MMIO(0x9888), 0x0b808000 }, + { _MMIO(0x9888), 0x0380c000 }, + { _MMIO(0x9888), 0x0f8a0075 }, + { _MMIO(0x9888), 0x1d8a0000 }, + { _MMIO(0x9888), 0x118a8000 }, + { _MMIO(0x9888), 0x1b8a4000 }, + { _MMIO(0x9888), 0x138a8000 }, + { _MMIO(0x9888), 0x1d81a000 }, + { _MMIO(0x9888), 0x15818000 }, + { _MMIO(0x9888), 0x17818000 }, + { _MMIO(0x9888), 0x0b820030 }, + { _MMIO(0x9888), 0x07828000 }, + { _MMIO(0x9888), 0x0d824000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x05824000 }, + { _MMIO(0x9888), 0x0d830003 }, + { _MMIO(0x9888), 0x0583000c }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x03838000 }, + { _MMIO(0x9888), 0x07838000 }, + { _MMIO(0x9888), 0x0b840980 }, + { _MMIO(0x9888), 0x03844d80 }, + { _MMIO(0x9888), 0x11840000 }, + { _MMIO(0x9888), 0x09848000 }, + { _MMIO(0x9888), 0x09850080 }, + { _MMIO(0x9888), 0x03850003 }, + { _MMIO(0x9888), 0x01850000 }, + { _MMIO(0x9888), 0x07860000 }, + { _MMIO(0x9888), 0x0f860400 }, + { _MMIO(0x9888), 0x09870032 }, + { _MMIO(0x9888), 0x01888052 }, + { _MMIO(0x9888), 0x11880000 }, + { _MMIO(0x9888), 0x09884000 }, + { _MMIO(0x9888), 0x1b931001 }, + { _MMIO(0x9888), 0x1d930001 }, + { _MMIO(0x9888), 0x19934000 }, + { _MMIO(0x9888), 0x1b958000 }, + { _MMIO(0x9888), 0x1d950094 }, + { _MMIO(0x9888), 0x19958000 }, + { _MMIO(0x9888), 0x09e58000 }, + { _MMIO(0x9888), 0x0be58000 }, + { _MMIO(0x9888), 0x03e5c000 }, + { _MMIO(0x9888), 0x0592c000 }, + { _MMIO(0x9888), 0x0b928000 }, + { _MMIO(0x9888), 0x0d924000 }, + { _MMIO(0x9888), 0x0f924000 }, + { _MMIO(0x9888), 0x11928000 }, + { _MMIO(0x9888), 0x1392c000 }, + { _MMIO(0x9888), 0x09924000 }, + { _MMIO(0x9888), 0x01985000 }, + { _MMIO(0x9888), 0x07988000 }, + { _MMIO(0x9888), 0x09981000 }, + { _MMIO(0x9888), 0x0b982000 }, + { _MMIO(0x9888), 0x0d982000 }, + { _MMIO(0x9888), 0x0f989000 }, + { _MMIO(0x9888), 0x05982000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1190c080 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900440 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900400 }, + { _MMIO(0x9888), 0x47900c21 }, + { _MMIO(0x9888), 0x57900400 }, + { _MMIO(0x9888), 0x49900042 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900024 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900841 }, + { _MMIO(0x9888), 0x53900400 }, +}; + +static int +get_render_pipe_profile_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_pipe_profile; + lens[n] = ARRAY_SIZE(mux_config_render_pipe_profile); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_reads[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f872 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_reads[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_reads[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f900064 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930055 }, + { _MMIO(0x9888), 0x03e58000 }, + { _MMIO(0x9888), 0x05e5c000 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x13900150 }, + { _MMIO(0x9888), 0x21900151 }, + { _MMIO(0x9888), 0x23900152 }, + { _MMIO(0x9888), 0x25900153 }, + { _MMIO(0x9888), 0x27900154 }, + { _MMIO(0x9888), 0x29900155 }, + { _MMIO(0x9888), 0x2b900156 }, + { _MMIO(0x9888), 0x2d900157 }, + { _MMIO(0x9888), 0x2f90015f }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c60 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c00 }, + { _MMIO(0x9888), 0x47900c63 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900c63 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_memory_reads_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_reads; + lens[n] = ARRAY_SIZE(mux_config_memory_reads); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_writes[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f822 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_writes[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_writes[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f901000 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930055 }, + { _MMIO(0x9888), 0x03e58000 }, + { _MMIO(0x9888), 0x05e5c000 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x13900160 }, + { _MMIO(0x9888), 0x21900161 }, + { _MMIO(0x9888), 0x23900162 }, + { _MMIO(0x9888), 0x25900163 }, + { _MMIO(0x9888), 0x27900164 }, + { _MMIO(0x9888), 0x29900165 }, + { _MMIO(0x9888), 0x2b900166 }, + { _MMIO(0x9888), 0x2d900167 }, + { _MMIO(0x9888), 0x2f900150 }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c60 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c00 }, + { _MMIO(0x9888), 0x47900c63 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900c63 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_memory_writes_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_writes; + lens[n] = ARRAY_SIZE(mux_config_memory_writes); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extended[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fc2a }, + { _MMIO(0x2774), 0x0000bf00 }, + { _MMIO(0x2778), 0x0007fc6a }, + { _MMIO(0x277c), 0x0000bf00 }, + { _MMIO(0x2780), 0x0007fc92 }, + { _MMIO(0x2784), 0x0000bf00 }, + { _MMIO(0x2788), 0x0007fca2 }, + { _MMIO(0x278c), 0x0000bf00 }, + { _MMIO(0x2790), 0x0007fc32 }, + { _MMIO(0x2794), 0x0000bf00 }, + { _MMIO(0x2798), 0x0007fc9a }, + { _MMIO(0x279c), 0x0000bf00 }, + { _MMIO(0x27a0), 0x0007fe6a }, + { _MMIO(0x27a4), 0x0000bf00 }, + { _MMIO(0x27a8), 0x0007fe7a }, + { _MMIO(0x27ac), 0x0000bf00 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extended[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended[] = { + { _MMIO(0x9888), 0x106c00e0 }, + { _MMIO(0x9888), 0x141c8160 }, + { _MMIO(0x9888), 0x161c8015 }, + { _MMIO(0x9888), 0x181c0120 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x184e8000 }, + { _MMIO(0x9888), 0x1a4eaaa0 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0e6c0b01 }, + { _MMIO(0x9888), 0x006c0200 }, + { _MMIO(0x9888), 0x026c000c }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x001b8000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x001c0041 }, + { _MMIO(0x9888), 0x061c4200 }, + { _MMIO(0x9888), 0x081c4443 }, + { _MMIO(0x9888), 0x0a1c4645 }, + { _MMIO(0x9888), 0x0c1c7647 }, + { _MMIO(0x9888), 0x041c7357 }, + { _MMIO(0x9888), 0x1c1c0030 }, + { _MMIO(0x9888), 0x101c0000 }, + { _MMIO(0x9888), 0x1a1c0000 }, + { _MMIO(0x9888), 0x121c8000 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4caa2a }, + { _MMIO(0x9888), 0x0c4c02aa }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x000da000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x0c0f5400 }, + { _MMIO(0x9888), 0x0e0f5515 }, + { _MMIO(0x9888), 0x100f0155 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162caa00 }, + { _MMIO(0x9888), 0x182c00aa }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x11907fff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900040 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900802 }, + { _MMIO(0x9888), 0x47900842 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900842 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900800 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_compute_extended_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extended; + lens[n] = ARRAY_SIZE(mux_config_compute_extended); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_l3_cache[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fffa }, + { _MMIO(0x2774), 0x0000fefe }, + { _MMIO(0x2778), 0x0007fffa }, + { _MMIO(0x277c), 0x0000fefd }, + { _MMIO(0x2790), 0x0007fffa }, + { _MMIO(0x2794), 0x0000fbef }, + { _MMIO(0x2798), 0x0007fffa }, + { _MMIO(0x279c), 0x0000fbdf }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_l3_cache[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00101100 }, + { _MMIO(0xe45c), 0x00201200 }, + { _MMIO(0xe55c), 0x00301300 }, + { _MMIO(0xe65c), 0x00401400 }, +}; + +static const struct i915_oa_reg mux_config_compute_l3_cache[] = { + { _MMIO(0x9888), 0x166c0760 }, + { _MMIO(0x9888), 0x1593001e }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x184e8000 }, + { _MMIO(0x9888), 0x1a4e8020 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x006c0051 }, + { _MMIO(0x9888), 0x066c5000 }, + { _MMIO(0x9888), 0x086c5c5d }, + { _MMIO(0x9888), 0x0e6c5e5f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x186c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x061b8000 }, + { _MMIO(0x9888), 0x081bc000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x101c8000 }, + { _MMIO(0x9888), 0x1a1ce000 }, + { _MMIO(0x9888), 0x1c1c0030 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4c2a00 }, + { _MMIO(0x9888), 0x0c4c0280 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f1500 }, + { _MMIO(0x9888), 0x100f0140 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162c0a00 }, + { _MMIO(0x9888), 0x182c00a0 }, + { _MMIO(0x9888), 0x03933300 }, + { _MMIO(0x9888), 0x05930032 }, + { _MMIO(0x9888), 0x11930000 }, + { _MMIO(0x9888), 0x1b930000 }, + { _MMIO(0x9888), 0x1d900157 }, + { _MMIO(0x9888), 0x1f900158 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1190030f }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900000 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900021 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x53904444 }, + { _MMIO(0x9888), 0x43900000 }, +}; + +static int +get_compute_l3_cache_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_l3_cache; + lens[n] = ARRAY_SIZE(mux_config_compute_l3_cache); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_hdc_and_sf[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x10800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fdff }, +}; + +static const struct i915_oa_reg flex_eu_config_hdc_and_sf[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_hdc_and_sf[] = { + { _MMIO(0x9888), 0x104f0232 }, + { _MMIO(0x9888), 0x124f4640 }, + { _MMIO(0x9888), 0x106c0232 }, + { _MMIO(0x9888), 0x11834400 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x004f1880 }, + { _MMIO(0x9888), 0x024f08bb }, + { _MMIO(0x9888), 0x044f001b }, + { _MMIO(0x9888), 0x046c0100 }, + { _MMIO(0x9888), 0x066c000b }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x041b8000 }, + { _MMIO(0x9888), 0x061b4000 }, + { _MMIO(0x9888), 0x1a1c1800 }, + { _MMIO(0x9888), 0x005b8000 }, + { _MMIO(0x9888), 0x025bc000 }, + { _MMIO(0x9888), 0x045b4000 }, + { _MMIO(0x9888), 0x125c8000 }, + { _MMIO(0x9888), 0x145c8000 }, + { _MMIO(0x9888), 0x165c8000 }, + { _MMIO(0x9888), 0x185c8000 }, + { _MMIO(0x9888), 0x0a4c00a0 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x022cc000 }, + { _MMIO(0x9888), 0x042cc000 }, + { _MMIO(0x9888), 0x062cc000 }, + { _MMIO(0x9888), 0x082cc000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x0f8305c0 }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x07830000 }, + { _MMIO(0x9888), 0x1d950080 }, + { _MMIO(0x9888), 0x13928000 }, + { _MMIO(0x9888), 0x0f988000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900040 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_hdc_and_sf_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_hdc_and_sf; + lens[n] = ARRAY_SIZE(mux_config_hdc_and_sf); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_1[] = { + { _MMIO(0x9888), 0x126c7b40 }, + { _MMIO(0x9888), 0x166c0020 }, + { _MMIO(0x9888), 0x0a603444 }, + { _MMIO(0x9888), 0x0a613400 }, + { _MMIO(0x9888), 0x1a4ea800 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x064f4000 }, + { _MMIO(0x9888), 0x0c6c5327 }, + { _MMIO(0x9888), 0x0e6c5425 }, + { _MMIO(0x9888), 0x006c2a00 }, + { _MMIO(0x9888), 0x026c285b }, + { _MMIO(0x9888), 0x046c005c }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x1a6c0800 }, + { _MMIO(0x9888), 0x0c1bc000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x001b8000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x1c1c003c }, + { _MMIO(0x9888), 0x121c8000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c0800 }, + { _MMIO(0x9888), 0x065b4000 }, + { _MMIO(0x9888), 0x1a5c1000 }, + { _MMIO(0x9888), 0x10600000 }, + { _MMIO(0x9888), 0x04600000 }, + { _MMIO(0x9888), 0x0c610044 }, + { _MMIO(0x9888), 0x10610000 }, + { _MMIO(0x9888), 0x06610000 }, + { _MMIO(0x9888), 0x0c4c02a8 }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x0a4c002a }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f0154 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x182c00aa }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2cc000 }, + { _MMIO(0x9888), 0x1190ffc0 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900420 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900021 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900400 }, + { _MMIO(0x9888), 0x43900421 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, +}; + +static int +get_l3_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_1; + lens[n] = ARRAY_SIZE(mux_config_l3_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00028002 }, + { _MMIO(0x277c), 0x000087ff }, + { _MMIO(0x2780), 0x00020002 }, + { _MMIO(0x2784), 0x00008fff }, + { _MMIO(0x2788), 0x00008002 }, + { _MMIO(0x278c), 0x0000a7ff }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_2[] = { + { _MMIO(0x9888), 0x126c02e0 }, + { _MMIO(0x9888), 0x146c0001 }, + { _MMIO(0x9888), 0x0a623400 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x064f4000 }, + { _MMIO(0x9888), 0x026c3324 }, + { _MMIO(0x9888), 0x046c3422 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c0800 }, + { _MMIO(0x9888), 0x065b4000 }, + { _MMIO(0x9888), 0x1a5c1000 }, + { _MMIO(0x9888), 0x06614000 }, + { _MMIO(0x9888), 0x0c620044 }, + { _MMIO(0x9888), 0x10620000 }, + { _MMIO(0x9888), 0x06620000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c002a }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f4000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2cc000 }, + { _MMIO(0x9888), 0x1190f800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_l3_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_2; + lens[n] = ARRAY_SIZE(mux_config_l3_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_3[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00028002 }, + { _MMIO(0x277c), 0x000087ff }, + { _MMIO(0x2780), 0x00020002 }, + { _MMIO(0x2784), 0x00008fff }, + { _MMIO(0x2788), 0x00008002 }, + { _MMIO(0x278c), 0x0000a7ff }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_3[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_3[] = { + { _MMIO(0x9888), 0x126c4e80 }, + { _MMIO(0x9888), 0x146c0000 }, + { _MMIO(0x9888), 0x0a633400 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x026c3321 }, + { _MMIO(0x9888), 0x046c342f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1a6c2000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x061b4000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c1800 }, + { _MMIO(0x9888), 0x06604000 }, + { _MMIO(0x9888), 0x0c630044 }, + { _MMIO(0x9888), 0x10630000 }, + { _MMIO(0x9888), 0x06630000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c00aa }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f4000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x1190f800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900002 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_l3_3_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_3; + lens[n] = ARRAY_SIZE(mux_config_l3_3); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000efff }, + { _MMIO(0x2778), 0x00006000 }, + { _MMIO(0x277c), 0x0000f3ff }, +}; + +static const struct i915_oa_reg flex_eu_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x9888), 0x102f3800 }, + { _MMIO(0x9888), 0x144d0500 }, + { _MMIO(0x9888), 0x120d03c0 }, + { _MMIO(0x9888), 0x140d03cf }, + { _MMIO(0x9888), 0x0c0f0004 }, + { _MMIO(0x9888), 0x0c4e4000 }, + { _MMIO(0x9888), 0x042f0480 }, + { _MMIO(0x9888), 0x082f0000 }, + { _MMIO(0x9888), 0x022f0000 }, + { _MMIO(0x9888), 0x0a4c0090 }, + { _MMIO(0x9888), 0x064d0027 }, + { _MMIO(0x9888), 0x004d0000 }, + { _MMIO(0x9888), 0x000d0d40 }, + { _MMIO(0x9888), 0x020d803f }, + { _MMIO(0x9888), 0x040d8023 }, + { _MMIO(0x9888), 0x100d0000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x020f0010 }, + { _MMIO(0x9888), 0x000f0000 }, + { _MMIO(0x9888), 0x0e0f0050 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41901400 }, + { _MMIO(0x9888), 0x43901485 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900001 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_rasterizer_and_pixel_backend_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_rasterizer_and_pixel_backend; + lens[n] = ARRAY_SIZE(mux_config_rasterizer_and_pixel_backend); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_sampler[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x70800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x0000c000 }, + { _MMIO(0x2774), 0x0000e7ff }, + { _MMIO(0x2778), 0x00003000 }, + { _MMIO(0x277c), 0x0000f9ff }, + { _MMIO(0x2780), 0x00000c00 }, + { _MMIO(0x2784), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_sampler[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_sampler[] = { + { _MMIO(0x9888), 0x14152c00 }, + { _MMIO(0x9888), 0x16150005 }, + { _MMIO(0x9888), 0x121600a0 }, + { _MMIO(0x9888), 0x14352c00 }, + { _MMIO(0x9888), 0x16350005 }, + { _MMIO(0x9888), 0x123600a0 }, + { _MMIO(0x9888), 0x14552c00 }, + { _MMIO(0x9888), 0x16550005 }, + { _MMIO(0x9888), 0x125600a0 }, + { _MMIO(0x9888), 0x062f6000 }, + { _MMIO(0x9888), 0x022f2000 }, + { _MMIO(0x9888), 0x0c4c0050 }, + { _MMIO(0x9888), 0x0a4c0010 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f0350 }, + { _MMIO(0x9888), 0x0c0fb000 }, + { _MMIO(0x9888), 0x0e0f00da }, + { _MMIO(0x9888), 0x182c0028 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x022dc000 }, + { _MMIO(0x9888), 0x042d4000 }, + { _MMIO(0x9888), 0x0c138000 }, + { _MMIO(0x9888), 0x0e132000 }, + { _MMIO(0x9888), 0x0413c000 }, + { _MMIO(0x9888), 0x1c140018 }, + { _MMIO(0x9888), 0x0c157000 }, + { _MMIO(0x9888), 0x0e150078 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x04162180 }, + { _MMIO(0x9888), 0x02160000 }, + { _MMIO(0x9888), 0x04174000 }, + { _MMIO(0x9888), 0x0233a000 }, + { _MMIO(0x9888), 0x04333000 }, + { _MMIO(0x9888), 0x14348000 }, + { _MMIO(0x9888), 0x16348000 }, + { _MMIO(0x9888), 0x02357870 }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x04360043 }, + { _MMIO(0x9888), 0x02360000 }, + { _MMIO(0x9888), 0x04371000 }, + { _MMIO(0x9888), 0x0e538000 }, + { _MMIO(0x9888), 0x00538000 }, + { _MMIO(0x9888), 0x06533000 }, + { _MMIO(0x9888), 0x1c540020 }, + { _MMIO(0x9888), 0x12548000 }, + { _MMIO(0x9888), 0x0e557000 }, + { _MMIO(0x9888), 0x00557800 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x06560043 }, + { _MMIO(0x9888), 0x02560000 }, + { _MMIO(0x9888), 0x06571000 }, + { _MMIO(0x9888), 0x1190ff80 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900060 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900060 }, +}; + +static int +get_sampler_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler; + lens[n] = ARRAY_SIZE(mux_config_sampler); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x00007fff }, + { _MMIO(0x2778), 0x00000000 }, + { _MMIO(0x277c), 0x00009fff }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000efff }, + { _MMIO(0x2788), 0x00000000 }, + { _MMIO(0x278c), 0x0000f3ff }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000fdff }, + { _MMIO(0x2798), 0x00000000 }, + { _MMIO(0x279c), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_1[] = { + { _MMIO(0x9888), 0x12120000 }, + { _MMIO(0x9888), 0x12320000 }, + { _MMIO(0x9888), 0x12520000 }, + { _MMIO(0x9888), 0x002f8000 }, + { _MMIO(0x9888), 0x022f3000 }, + { _MMIO(0x9888), 0x0a4c0015 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f03a0 }, + { _MMIO(0x9888), 0x0c0ff000 }, + { _MMIO(0x9888), 0x0e0f0095 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2d8000 }, + { _MMIO(0x9888), 0x0e2d4000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x02108000 }, + { _MMIO(0x9888), 0x0410c000 }, + { _MMIO(0x9888), 0x02118000 }, + { _MMIO(0x9888), 0x0411c000 }, + { _MMIO(0x9888), 0x02121880 }, + { _MMIO(0x9888), 0x041219b5 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x02134000 }, + { _MMIO(0x9888), 0x04135000 }, + { _MMIO(0x9888), 0x0c308000 }, + { _MMIO(0x9888), 0x0e304000 }, + { _MMIO(0x9888), 0x06304000 }, + { _MMIO(0x9888), 0x0c318000 }, + { _MMIO(0x9888), 0x0e314000 }, + { _MMIO(0x9888), 0x06314000 }, + { _MMIO(0x9888), 0x0c321a80 }, + { _MMIO(0x9888), 0x0e320033 }, + { _MMIO(0x9888), 0x06320031 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x0c334000 }, + { _MMIO(0x9888), 0x0e331000 }, + { _MMIO(0x9888), 0x06331000 }, + { _MMIO(0x9888), 0x0e508000 }, + { _MMIO(0x9888), 0x00508000 }, + { _MMIO(0x9888), 0x02504000 }, + { _MMIO(0x9888), 0x0e518000 }, + { _MMIO(0x9888), 0x00518000 }, + { _MMIO(0x9888), 0x02514000 }, + { _MMIO(0x9888), 0x0e521880 }, + { _MMIO(0x9888), 0x00521a80 }, + { _MMIO(0x9888), 0x02520033 }, + { _MMIO(0x9888), 0x0e534000 }, + { _MMIO(0x9888), 0x00534000 }, + { _MMIO(0x9888), 0x02531000 }, + { _MMIO(0x9888), 0x1190ff80 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900062 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, +}; + +static int +get_tdl_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_1; + lens[n] = ARRAY_SIZE(mux_config_tdl_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_2[] = { + { _MMIO(0x9888), 0x12124d60 }, + { _MMIO(0x9888), 0x12322e60 }, + { _MMIO(0x9888), 0x12524d60 }, + { _MMIO(0x9888), 0x022f3000 }, + { _MMIO(0x9888), 0x0a4c0014 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0fe000 }, + { _MMIO(0x9888), 0x0e0f0097 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x002d8000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x0410c000 }, + { _MMIO(0x9888), 0x0411c000 }, + { _MMIO(0x9888), 0x04121fb7 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x04135000 }, + { _MMIO(0x9888), 0x00308000 }, + { _MMIO(0x9888), 0x06304000 }, + { _MMIO(0x9888), 0x00318000 }, + { _MMIO(0x9888), 0x06314000 }, + { _MMIO(0x9888), 0x00321b80 }, + { _MMIO(0x9888), 0x0632003f }, + { _MMIO(0x9888), 0x00334000 }, + { _MMIO(0x9888), 0x06331000 }, + { _MMIO(0x9888), 0x0250c000 }, + { _MMIO(0x9888), 0x0251c000 }, + { _MMIO(0x9888), 0x02521fb7 }, + { _MMIO(0x9888), 0x00520000 }, + { _MMIO(0x9888), 0x02535000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x43900063 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_tdl_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_2; + lens[n] = ARRAY_SIZE(mux_config_tdl_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extra[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extra[] = { + { _MMIO(0xe458), 0x00001000 }, + { _MMIO(0xe558), 0x00003002 }, + { _MMIO(0xe658), 0x00005004 }, + { _MMIO(0xe758), 0x00011010 }, + { _MMIO(0xe45c), 0x00050012 }, + { _MMIO(0xe55c), 0x00052051 }, + { _MMIO(0xe65c), 0x00000008 }, +}; + +static const struct i915_oa_reg mux_config_compute_extra[] = { + { _MMIO(0x9888), 0x121203e0 }, + { _MMIO(0x9888), 0x123203e0 }, + { _MMIO(0x9888), 0x125203e0 }, + { _MMIO(0x9888), 0x022f4000 }, + { _MMIO(0x9888), 0x0a4c0040 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0e0f006c }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x042d8000 }, + { _MMIO(0x9888), 0x06104000 }, + { _MMIO(0x9888), 0x06114000 }, + { _MMIO(0x9888), 0x06120033 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x06131000 }, + { _MMIO(0x9888), 0x04308000 }, + { _MMIO(0x9888), 0x04318000 }, + { _MMIO(0x9888), 0x04321980 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x04334000 }, + { _MMIO(0x9888), 0x04504000 }, + { _MMIO(0x9888), 0x04514000 }, + { _MMIO(0x9888), 0x04520033 }, + { _MMIO(0x9888), 0x00520000 }, + { _MMIO(0x9888), 0x04531000 }, + { _MMIO(0x9888), 0x1190e000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900c00 }, + { _MMIO(0x9888), 0x45900002 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_compute_extra_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extra; + lens[n] = ARRAY_SIZE(mux_config_compute_extra); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_vme_pipe[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00100030 }, + { _MMIO(0x2774), 0x0000fff9 }, + { _MMIO(0x2778), 0x00000002 }, + { _MMIO(0x277c), 0x0000fffc }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000fff3 }, + { _MMIO(0x2788), 0x00100180 }, + { _MMIO(0x278c), 0x0000ffcf }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00000002 }, + { _MMIO(0x279c), 0x0000ff3f }, +}; + +static const struct i915_oa_reg flex_eu_config_vme_pipe[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00008003 }, +}; + +static const struct i915_oa_reg mux_config_vme_pipe[] = { + { _MMIO(0x9888), 0x141a5800 }, + { _MMIO(0x9888), 0x161a00c0 }, + { _MMIO(0x9888), 0x12180240 }, + { _MMIO(0x9888), 0x14180002 }, + { _MMIO(0x9888), 0x143a5800 }, + { _MMIO(0x9888), 0x163a00c0 }, + { _MMIO(0x9888), 0x12380240 }, + { _MMIO(0x9888), 0x14380002 }, + { _MMIO(0x9888), 0x002f1000 }, + { _MMIO(0x9888), 0x022f8000 }, + { _MMIO(0x9888), 0x042f3000 }, + { _MMIO(0x9888), 0x004c4000 }, + { _MMIO(0x9888), 0x0a4c1500 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f9500 }, + { _MMIO(0x9888), 0x100f002a }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162c0a00 }, + { _MMIO(0x9888), 0x0a2dc000 }, + { _MMIO(0x9888), 0x0c2dc000 }, + { _MMIO(0x9888), 0x04193000 }, + { _MMIO(0x9888), 0x081a28c1 }, + { _MMIO(0x9888), 0x001a0000 }, + { _MMIO(0x9888), 0x00133000 }, + { _MMIO(0x9888), 0x0613c000 }, + { _MMIO(0x9888), 0x0813f000 }, + { _MMIO(0x9888), 0x00172000 }, + { _MMIO(0x9888), 0x06178000 }, + { _MMIO(0x9888), 0x0817a000 }, + { _MMIO(0x9888), 0x00180037 }, + { _MMIO(0x9888), 0x06180940 }, + { _MMIO(0x9888), 0x08180000 }, + { _MMIO(0x9888), 0x02180000 }, + { _MMIO(0x9888), 0x04183000 }, + { _MMIO(0x9888), 0x06393000 }, + { _MMIO(0x9888), 0x0c3a28c1 }, + { _MMIO(0x9888), 0x003a0000 }, + { _MMIO(0x9888), 0x0a33f000 }, + { _MMIO(0x9888), 0x0c33f000 }, + { _MMIO(0x9888), 0x0a37a000 }, + { _MMIO(0x9888), 0x0c37a000 }, + { _MMIO(0x9888), 0x0a380977 }, + { _MMIO(0x9888), 0x08380000 }, + { _MMIO(0x9888), 0x04380000 }, + { _MMIO(0x9888), 0x06383000 }, + { _MMIO(0x9888), 0x119000ff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900040 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900800 }, + { _MMIO(0x9888), 0x47901000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900844 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_vme_pipe_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_vme_pipe; + lens[n] = ARRAY_SIZE(mux_config_vme_pipe); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9888), 0x11810000 }, + { _MMIO(0x9888), 0x07810013 }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930040 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_test_oa_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_test_oa; + lens[n] = ARRAY_SIZE(mux_config_test_oa); + n++; + + return n; +} + +int i915_oa_select_metric_set_kblgt2(struct drm_i915_private *dev_priv) +{ + dev_priv->perf.oa.n_mux_configs = 0; + dev_priv->perf.oa.b_counter_regs = NULL; + dev_priv->perf.oa.b_counter_regs_len = 0; + dev_priv->perf.oa.flex_regs = NULL; + dev_priv->perf.oa.flex_regs_len = 0; + + switch (dev_priv->perf.oa.metrics_set) { + case METRIC_SET_ID_RENDER_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_render_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_basic); + + return 0; + case METRIC_SET_ID_COMPUTE_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_compute_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_basic); + + return 0; + case METRIC_SET_ID_RENDER_PIPE_PROFILE: + dev_priv->perf.oa.n_mux_configs = + get_render_pipe_profile_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_PIPE_PROFILE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_pipe_profile; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_pipe_profile); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_pipe_profile; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_pipe_profile); + + return 0; + case METRIC_SET_ID_MEMORY_READS: + dev_priv->perf.oa.n_mux_configs = + get_memory_reads_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_READS\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_reads; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_reads); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_reads; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_reads); + + return 0; + case METRIC_SET_ID_MEMORY_WRITES: + dev_priv->perf.oa.n_mux_configs = + get_memory_writes_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_WRITES\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_writes; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_writes); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_writes; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_writes); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTENDED: + dev_priv->perf.oa.n_mux_configs = + get_compute_extended_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTENDED\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extended; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extended); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extended; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extended); + + return 0; + case METRIC_SET_ID_COMPUTE_L3_CACHE: + dev_priv->perf.oa.n_mux_configs = + get_compute_l3_cache_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_L3_CACHE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_l3_cache; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_l3_cache); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_l3_cache; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_l3_cache); + + return 0; + case METRIC_SET_ID_HDC_AND_SF: + dev_priv->perf.oa.n_mux_configs = + get_hdc_and_sf_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"HDC_AND_SF\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_hdc_and_sf; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_hdc_and_sf); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_hdc_and_sf; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_hdc_and_sf); + + return 0; + case METRIC_SET_ID_L3_1: + dev_priv->perf.oa.n_mux_configs = + get_l3_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_1); + + return 0; + case METRIC_SET_ID_L3_2: + dev_priv->perf.oa.n_mux_configs = + get_l3_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_2); + + return 0; + case METRIC_SET_ID_L3_3: + dev_priv->perf.oa.n_mux_configs = + get_l3_3_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_3\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_3; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_3); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_3; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_3); + + return 0; + case METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND: + dev_priv->perf.oa.n_mux_configs = + get_rasterizer_and_pixel_backend_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RASTERIZER_AND_PIXEL_BACKEND\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_rasterizer_and_pixel_backend); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_rasterizer_and_pixel_backend); + + return 0; + case METRIC_SET_ID_SAMPLER: + dev_priv->perf.oa.n_mux_configs = + get_sampler_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_sampler; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_sampler); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler); + + return 0; + case METRIC_SET_ID_TDL_1: + dev_priv->perf.oa.n_mux_configs = + get_tdl_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_1); + + return 0; + case METRIC_SET_ID_TDL_2: + dev_priv->perf.oa.n_mux_configs = + get_tdl_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_2); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTRA: + dev_priv->perf.oa.n_mux_configs = + get_compute_extra_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTRA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extra; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extra); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extra; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extra); + + return 0; + case METRIC_SET_ID_VME_PIPE: + dev_priv->perf.oa.n_mux_configs = + get_vme_pipe_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"VME_PIPE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_vme_pipe; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_vme_pipe); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_vme_pipe; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_vme_pipe); + + return 0; + case METRIC_SET_ID_TEST_OA: + dev_priv->perf.oa.n_mux_configs = + get_test_oa_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TEST_OA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_test_oa; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_test_oa; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_test_oa); + + return 0; + default: + return -ENODEV; + } +} + +static ssize_t +show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_BASIC); +} + +static struct device_attribute dev_attr_render_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_render_basic[] = { + &dev_attr_render_basic_id.attr, + NULL, +}; + +static struct attribute_group group_render_basic = { + .name = "f8d677e9-ff6f-4df1-9310-0334c6efacce", + .attrs = attrs_render_basic, +}; + +static ssize_t +show_compute_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_BASIC); +} + +static struct device_attribute dev_attr_compute_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_basic[] = { + &dev_attr_compute_basic_id.attr, + NULL, +}; + +static struct attribute_group group_compute_basic = { + .name = "e17fc42a-e614-41b6-90c4-1074841a6c77", + .attrs = attrs_compute_basic, +}; + +static ssize_t +show_render_pipe_profile_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_PIPE_PROFILE); +} + +static struct device_attribute dev_attr_render_pipe_profile_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_pipe_profile_id, + .store = NULL, +}; + +static struct attribute *attrs_render_pipe_profile[] = { + &dev_attr_render_pipe_profile_id.attr, + NULL, +}; + +static struct attribute_group group_render_pipe_profile = { + .name = "d7a17a3a-ca71-40d2-a919-ace80d50633f", + .attrs = attrs_render_pipe_profile, +}; + +static ssize_t +show_memory_reads_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_READS); +} + +static struct device_attribute dev_attr_memory_reads_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_reads_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_reads[] = { + &dev_attr_memory_reads_id.attr, + NULL, +}; + +static struct attribute_group group_memory_reads = { + .name = "57b59202-172b-477a-87de-33f85572c589", + .attrs = attrs_memory_reads, +}; + +static ssize_t +show_memory_writes_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_WRITES); +} + +static struct device_attribute dev_attr_memory_writes_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_writes_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_writes[] = { + &dev_attr_memory_writes_id.attr, + NULL, +}; + +static struct attribute_group group_memory_writes = { + .name = "3addf8ef-8e9b-40f5-a448-3dbb5d5128b0", + .attrs = attrs_memory_writes, +}; + +static ssize_t +show_compute_extended_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTENDED); +} + +static struct device_attribute dev_attr_compute_extended_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extended_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extended[] = { + &dev_attr_compute_extended_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extended = { + .name = "4af0400a-81c3-47db-a6b6-deddbd75680e", + .attrs = attrs_compute_extended, +}; + +static ssize_t +show_compute_l3_cache_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_L3_CACHE); +} + +static struct device_attribute dev_attr_compute_l3_cache_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_l3_cache_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_l3_cache[] = { + &dev_attr_compute_l3_cache_id.attr, + NULL, +}; + +static struct attribute_group group_compute_l3_cache = { + .name = "0e22f995-79ca-4f67-83ab-e9d9772488d8", + .attrs = attrs_compute_l3_cache, +}; + +static ssize_t +show_hdc_and_sf_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_HDC_AND_SF); +} + +static struct device_attribute dev_attr_hdc_and_sf_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_hdc_and_sf_id, + .store = NULL, +}; + +static struct attribute *attrs_hdc_and_sf[] = { + &dev_attr_hdc_and_sf_id.attr, + NULL, +}; + +static struct attribute_group group_hdc_and_sf = { + .name = "bc2a00f7-cb8a-4ff2-8ad0-e241dad16937", + .attrs = attrs_hdc_and_sf, +}; + +static ssize_t +show_l3_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_1); +} + +static struct device_attribute dev_attr_l3_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_1_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_1[] = { + &dev_attr_l3_1_id.attr, + NULL, +}; + +static struct attribute_group group_l3_1 = { + .name = "d2bbe790-f058-42d9-81c6-cdedcf655bc2", + .attrs = attrs_l3_1, +}; + +static ssize_t +show_l3_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_2); +} + +static struct device_attribute dev_attr_l3_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_2_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_2[] = { + &dev_attr_l3_2_id.attr, + NULL, +}; + +static struct attribute_group group_l3_2 = { + .name = "2f8e32e4-5956-46e2-af31-c8ea95887332", + .attrs = attrs_l3_2, +}; + +static ssize_t +show_l3_3_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_3); +} + +static struct device_attribute dev_attr_l3_3_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_3_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_3[] = { + &dev_attr_l3_3_id.attr, + NULL, +}; + +static struct attribute_group group_l3_3 = { + .name = "ca046aad-b5fb-4101-adce-6473ee6e5b14", + .attrs = attrs_l3_3, +}; + +static ssize_t +show_rasterizer_and_pixel_backend_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND); +} + +static struct device_attribute dev_attr_rasterizer_and_pixel_backend_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_rasterizer_and_pixel_backend_id, + .store = NULL, +}; + +static struct attribute *attrs_rasterizer_and_pixel_backend[] = { + &dev_attr_rasterizer_and_pixel_backend_id.attr, + NULL, +}; + +static struct attribute_group group_rasterizer_and_pixel_backend = { + .name = "605f388f-24bb-455c-88e3-8d57ae0d7e9f", + .attrs = attrs_rasterizer_and_pixel_backend, +}; + +static ssize_t +show_sampler_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_SAMPLER); +} + +static struct device_attribute dev_attr_sampler_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_sampler_id, + .store = NULL, +}; + +static struct attribute *attrs_sampler[] = { + &dev_attr_sampler_id.attr, + NULL, +}; + +static struct attribute_group group_sampler = { + .name = "31dd157c-bf4e-4bab-bf2b-f5c8174af1af", + .attrs = attrs_sampler, +}; + +static ssize_t +show_tdl_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_1); +} + +static struct device_attribute dev_attr_tdl_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_1_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_1[] = { + &dev_attr_tdl_1_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_1 = { + .name = "105db928-5542-466b-9128-e1f3c91426cb", + .attrs = attrs_tdl_1, +}; + +static ssize_t +show_tdl_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_2); +} + +static struct device_attribute dev_attr_tdl_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_2_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_2[] = { + &dev_attr_tdl_2_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_2 = { + .name = "03db94d2-b37f-4c58-a791-0d2067b013bb", + .attrs = attrs_tdl_2, +}; + +static ssize_t +show_compute_extra_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTRA); +} + +static struct device_attribute dev_attr_compute_extra_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extra_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extra[] = { + &dev_attr_compute_extra_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extra = { + .name = "aa7a3fb9-22fb-43ff-a32d-0ab6c13bbd16", + .attrs = attrs_compute_extra, +}; + +static ssize_t +show_vme_pipe_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_VME_PIPE); +} + +static struct device_attribute dev_attr_vme_pipe_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_vme_pipe_id, + .store = NULL, +}; + +static struct attribute *attrs_vme_pipe[] = { + &dev_attr_vme_pipe_id.attr, + NULL, +}; + +static struct attribute_group group_vme_pipe = { + .name = "398a4268-ef6f-4ffc-b55f-3c7b5363ce61", + .attrs = attrs_vme_pipe, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TEST_OA); +} + +static struct device_attribute dev_attr_test_oa_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_test_oa_id, + .store = NULL, +}; + +static struct attribute *attrs_test_oa[] = { + &dev_attr_test_oa_id.attr, + NULL, +}; + +static struct attribute_group group_test_oa = { + .name = "baa3c7e4-52b6-4b85-801e-465a94b746dd", + .attrs = attrs_test_oa, +}; + +int +i915_perf_register_sysfs_kblgt2(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + int ret = 0; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (ret) + goto error_render_basic; + } + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (ret) + goto error_compute_basic; + } + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (ret) + goto error_render_pipe_profile; + } + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (ret) + goto error_memory_reads; + } + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (ret) + goto error_memory_writes; + } + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (ret) + goto error_compute_extended; + } + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (ret) + goto error_compute_l3_cache; + } + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (ret) + goto error_hdc_and_sf; + } + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (ret) + goto error_l3_1; + } + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (ret) + goto error_l3_2; + } + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (ret) + goto error_l3_3; + } + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (ret) + goto error_rasterizer_and_pixel_backend; + } + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (ret) + goto error_sampler; + } + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (ret) + goto error_tdl_1; + } + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (ret) + goto error_tdl_2; + } + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (ret) + goto error_compute_extra; + } + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (ret) + goto error_vme_pipe; + } + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_test_oa); + if (ret) + goto error_test_oa; + } + + return 0; + +error_test_oa: + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); +error_vme_pipe: + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); +error_compute_extra: + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); +error_tdl_2: + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); +error_tdl_1: + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); +error_sampler: + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); +error_rasterizer_and_pixel_backend: + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); +error_l3_3: + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); +error_l3_2: + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); +error_l3_1: + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); +error_hdc_and_sf: + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); +error_compute_l3_cache: + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); +error_compute_extended: + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); +error_memory_writes: + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); +error_memory_reads: + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); +error_render_pipe_profile: + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); +error_compute_basic: + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); +error_render_basic: + return ret; +} + +void +i915_perf_unregister_sysfs_kblgt2(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_test_oa); +} diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt2.h b/drivers/gpu/drm/i915/i915_oa_kblgt2.h new file mode 100644 index 000000000000..7e61bfc4f9f5 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_kblgt2.h @@ -0,0 +1,40 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_KBLGT2_H__ +#define __I915_OA_KBLGT2_H__ + +extern int i915_oa_n_builtin_metric_sets_kblgt2; + +extern int i915_oa_select_metric_set_kblgt2(struct drm_i915_private *dev_priv); + +extern int i915_perf_register_sysfs_kblgt2(struct drm_i915_private *dev_priv); + +extern void i915_perf_unregister_sysfs_kblgt2(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt3.c b/drivers/gpu/drm/i915/i915_oa_kblgt3.c new file mode 100644 index 000000000000..6ed092566a32 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_kblgt3.c @@ -0,0 +1,3040 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_kblgt3.h" + +enum metric_set_id { + METRIC_SET_ID_RENDER_BASIC = 1, + METRIC_SET_ID_COMPUTE_BASIC, + METRIC_SET_ID_RENDER_PIPE_PROFILE, + METRIC_SET_ID_MEMORY_READS, + METRIC_SET_ID_MEMORY_WRITES, + METRIC_SET_ID_COMPUTE_EXTENDED, + METRIC_SET_ID_COMPUTE_L3_CACHE, + METRIC_SET_ID_HDC_AND_SF, + METRIC_SET_ID_L3_1, + METRIC_SET_ID_L3_2, + METRIC_SET_ID_L3_3, + METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND, + METRIC_SET_ID_SAMPLER, + METRIC_SET_ID_TDL_1, + METRIC_SET_ID_TDL_2, + METRIC_SET_ID_COMPUTE_EXTRA, + METRIC_SET_ID_VME_PIPE, + METRIC_SET_ID_TEST_OA, +}; + +int i915_oa_n_builtin_metric_sets_kblgt3 = 18; + +static const struct i915_oa_reg b_counter_config_render_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_render_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_render_basic[] = { + { _MMIO(0x9888), 0x166c01e0 }, + { _MMIO(0x9888), 0x12170280 }, + { _MMIO(0x9888), 0x12370280 }, + { _MMIO(0x9888), 0x16ec01e0 }, + { _MMIO(0x9888), 0x11930317 }, + { _MMIO(0x9888), 0x159303df }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x1a4e0380 }, + { _MMIO(0x9888), 0x0a6c0053 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x0a1b4000 }, + { _MMIO(0x9888), 0x1c1c0001 }, + { _MMIO(0x9888), 0x002f1000 }, + { _MMIO(0x9888), 0x042f1000 }, + { _MMIO(0x9888), 0x004c4000 }, + { _MMIO(0x9888), 0x0a4c8400 }, + { _MMIO(0x9888), 0x0c4c0002 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f6600 }, + { _MMIO(0x9888), 0x100f0001 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x162ca200 }, + { _MMIO(0x9888), 0x062d8000 }, + { _MMIO(0x9888), 0x082d8000 }, + { _MMIO(0x9888), 0x00133000 }, + { _MMIO(0x9888), 0x08133000 }, + { _MMIO(0x9888), 0x00170020 }, + { _MMIO(0x9888), 0x08170021 }, + { _MMIO(0x9888), 0x10170000 }, + { _MMIO(0x9888), 0x0633c000 }, + { _MMIO(0x9888), 0x0833c000 }, + { _MMIO(0x9888), 0x06370800 }, + { _MMIO(0x9888), 0x08370840 }, + { _MMIO(0x9888), 0x10370000 }, + { _MMIO(0x9888), 0x1ace0200 }, + { _MMIO(0x9888), 0x0aec5300 }, + { _MMIO(0x9888), 0x10ec0000 }, + { _MMIO(0x9888), 0x1cec0000 }, + { _MMIO(0x9888), 0x0a9b8000 }, + { _MMIO(0x9888), 0x1c9c0002 }, + { _MMIO(0x9888), 0x0ccc0002 }, + { _MMIO(0x9888), 0x0a8d8000 }, + { _MMIO(0x9888), 0x108f0001 }, + { _MMIO(0x9888), 0x16ac8000 }, + { _MMIO(0x9888), 0x0d933031 }, + { _MMIO(0x9888), 0x0f933e3f }, + { _MMIO(0x9888), 0x01933d00 }, + { _MMIO(0x9888), 0x0393073c }, + { _MMIO(0x9888), 0x0593000e }, + { _MMIO(0x9888), 0x1d930000 }, + { _MMIO(0x9888), 0x19930000 }, + { _MMIO(0x9888), 0x1b930000 }, + { _MMIO(0x9888), 0x1d900157 }, + { _MMIO(0x9888), 0x1f900158 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x2b908000 }, + { _MMIO(0x9888), 0x2d908000 }, + { _MMIO(0x9888), 0x2f908000 }, + { _MMIO(0x9888), 0x31908000 }, + { _MMIO(0x9888), 0x15908000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1190003f }, + { _MMIO(0x9888), 0x51902240 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x55900242 }, + { _MMIO(0x9888), 0x45900084 }, + { _MMIO(0x9888), 0x47901400 }, + { _MMIO(0x9888), 0x57902220 }, + { _MMIO(0x9888), 0x49900c60 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900002 }, + { _MMIO(0x9888), 0x43900c63 }, + { _MMIO(0x9888), 0x53902222 }, +}; + +static int +get_render_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_basic; + lens[n] = ARRAY_SIZE(mux_config_render_basic); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_basic[] = { + { _MMIO(0x9888), 0x104f00e0 }, + { _MMIO(0x9888), 0x124f1c00 }, + { _MMIO(0x9888), 0x106c00e0 }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x1a4e0820 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x064f0900 }, + { _MMIO(0x9888), 0x084f0032 }, + { _MMIO(0x9888), 0x0a4f1891 }, + { _MMIO(0x9888), 0x0c4f0e00 }, + { _MMIO(0x9888), 0x0e4f003c }, + { _MMIO(0x9888), 0x004f0d80 }, + { _MMIO(0x9888), 0x024f003b }, + { _MMIO(0x9888), 0x006c0002 }, + { _MMIO(0x9888), 0x086c0100 }, + { _MMIO(0x9888), 0x0c6c000c }, + { _MMIO(0x9888), 0x0e6c0b00 }, + { _MMIO(0x9888), 0x186c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x081b8000 }, + { _MMIO(0x9888), 0x0c1b4000 }, + { _MMIO(0x9888), 0x0e1b8000 }, + { _MMIO(0x9888), 0x101c8000 }, + { _MMIO(0x9888), 0x1a1c8000 }, + { _MMIO(0x9888), 0x1c1c0024 }, + { _MMIO(0x9888), 0x065b8000 }, + { _MMIO(0x9888), 0x085b4000 }, + { _MMIO(0x9888), 0x0a5bc000 }, + { _MMIO(0x9888), 0x0c5b8000 }, + { _MMIO(0x9888), 0x0e5b4000 }, + { _MMIO(0x9888), 0x005b8000 }, + { _MMIO(0x9888), 0x025b4000 }, + { _MMIO(0x9888), 0x1a5c6000 }, + { _MMIO(0x9888), 0x1c5c001b }, + { _MMIO(0x9888), 0x125c8000 }, + { _MMIO(0x9888), 0x145c8000 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4c2000 }, + { _MMIO(0x9888), 0x0c4c0208 }, + { _MMIO(0x9888), 0x000da000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x020d2000 }, + { _MMIO(0x9888), 0x0c0f5400 }, + { _MMIO(0x9888), 0x0e0f5500 }, + { _MMIO(0x9888), 0x100f0155 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2cc000 }, + { _MMIO(0x9888), 0x162cfb00 }, + { _MMIO(0x9888), 0x182c00be }, + { _MMIO(0x9888), 0x022cc000 }, + { _MMIO(0x9888), 0x042cc000 }, + { _MMIO(0x9888), 0x19900157 }, + { _MMIO(0x9888), 0x1b900158 }, + { _MMIO(0x9888), 0x1d900105 }, + { _MMIO(0x9888), 0x1f900103 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x11900fff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900821 }, + { _MMIO(0x9888), 0x47900802 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900802 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900002 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900422 }, + { _MMIO(0x9888), 0x53904444 }, +}; + +static int +get_compute_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_basic; + lens[n] = ARRAY_SIZE(mux_config_compute_basic); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_render_pipe_profile[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007ffea }, + { _MMIO(0x2774), 0x00007ffc }, + { _MMIO(0x2778), 0x0007affa }, + { _MMIO(0x277c), 0x0000f5fd }, + { _MMIO(0x2780), 0x00079ffa }, + { _MMIO(0x2784), 0x0000f3fb }, + { _MMIO(0x2788), 0x0007bf7a }, + { _MMIO(0x278c), 0x0000f7e7 }, + { _MMIO(0x2790), 0x0007fefa }, + { _MMIO(0x2794), 0x0000f7cf }, + { _MMIO(0x2798), 0x00077ffa }, + { _MMIO(0x279c), 0x0000efdf }, + { _MMIO(0x27a0), 0x0006fffa }, + { _MMIO(0x27a4), 0x0000cfbf }, + { _MMIO(0x27a8), 0x0003fffa }, + { _MMIO(0x27ac), 0x00005f7f }, +}; + +static const struct i915_oa_reg flex_eu_config_render_pipe_profile[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_render_pipe_profile[] = { + { _MMIO(0x9888), 0x0c0e001f }, + { _MMIO(0x9888), 0x0a0f0000 }, + { _MMIO(0x9888), 0x10116800 }, + { _MMIO(0x9888), 0x178a03e0 }, + { _MMIO(0x9888), 0x11824c00 }, + { _MMIO(0x9888), 0x11830020 }, + { _MMIO(0x9888), 0x13840020 }, + { _MMIO(0x9888), 0x11850019 }, + { _MMIO(0x9888), 0x11860007 }, + { _MMIO(0x9888), 0x01870c40 }, + { _MMIO(0x9888), 0x17880000 }, + { _MMIO(0x9888), 0x022f4000 }, + { _MMIO(0x9888), 0x0a4c0040 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x040d4000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x020e5400 }, + { _MMIO(0x9888), 0x000e0000 }, + { _MMIO(0x9888), 0x080f0040 }, + { _MMIO(0x9888), 0x000f0000 }, + { _MMIO(0x9888), 0x100f0000 }, + { _MMIO(0x9888), 0x0e0f0040 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x06104000 }, + { _MMIO(0x9888), 0x06110012 }, + { _MMIO(0x9888), 0x06131000 }, + { _MMIO(0x9888), 0x01898000 }, + { _MMIO(0x9888), 0x0d890100 }, + { _MMIO(0x9888), 0x03898000 }, + { _MMIO(0x9888), 0x09808000 }, + { _MMIO(0x9888), 0x0b808000 }, + { _MMIO(0x9888), 0x0380c000 }, + { _MMIO(0x9888), 0x0f8a0075 }, + { _MMIO(0x9888), 0x1d8a0000 }, + { _MMIO(0x9888), 0x118a8000 }, + { _MMIO(0x9888), 0x1b8a4000 }, + { _MMIO(0x9888), 0x138a8000 }, + { _MMIO(0x9888), 0x1d81a000 }, + { _MMIO(0x9888), 0x15818000 }, + { _MMIO(0x9888), 0x17818000 }, + { _MMIO(0x9888), 0x0b820030 }, + { _MMIO(0x9888), 0x07828000 }, + { _MMIO(0x9888), 0x0d824000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x05824000 }, + { _MMIO(0x9888), 0x0d830003 }, + { _MMIO(0x9888), 0x0583000c }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x03838000 }, + { _MMIO(0x9888), 0x07838000 }, + { _MMIO(0x9888), 0x0b840980 }, + { _MMIO(0x9888), 0x03844d80 }, + { _MMIO(0x9888), 0x11840000 }, + { _MMIO(0x9888), 0x09848000 }, + { _MMIO(0x9888), 0x09850080 }, + { _MMIO(0x9888), 0x03850003 }, + { _MMIO(0x9888), 0x01850000 }, + { _MMIO(0x9888), 0x07860000 }, + { _MMIO(0x9888), 0x0f860400 }, + { _MMIO(0x9888), 0x09870032 }, + { _MMIO(0x9888), 0x01888052 }, + { _MMIO(0x9888), 0x11880000 }, + { _MMIO(0x9888), 0x09884000 }, + { _MMIO(0x9888), 0x1b931001 }, + { _MMIO(0x9888), 0x1d930001 }, + { _MMIO(0x9888), 0x19934000 }, + { _MMIO(0x9888), 0x1b958000 }, + { _MMIO(0x9888), 0x1d950094 }, + { _MMIO(0x9888), 0x19958000 }, + { _MMIO(0x9888), 0x09e58000 }, + { _MMIO(0x9888), 0x0be58000 }, + { _MMIO(0x9888), 0x03e5c000 }, + { _MMIO(0x9888), 0x0592c000 }, + { _MMIO(0x9888), 0x0b928000 }, + { _MMIO(0x9888), 0x0d924000 }, + { _MMIO(0x9888), 0x0f924000 }, + { _MMIO(0x9888), 0x11928000 }, + { _MMIO(0x9888), 0x1392c000 }, + { _MMIO(0x9888), 0x09924000 }, + { _MMIO(0x9888), 0x01985000 }, + { _MMIO(0x9888), 0x07988000 }, + { _MMIO(0x9888), 0x09981000 }, + { _MMIO(0x9888), 0x0b982000 }, + { _MMIO(0x9888), 0x0d982000 }, + { _MMIO(0x9888), 0x0f989000 }, + { _MMIO(0x9888), 0x05982000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1190c080 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900440 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900400 }, + { _MMIO(0x9888), 0x47900c21 }, + { _MMIO(0x9888), 0x57900400 }, + { _MMIO(0x9888), 0x49900042 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900024 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900841 }, + { _MMIO(0x9888), 0x53900400 }, +}; + +static int +get_render_pipe_profile_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_pipe_profile; + lens[n] = ARRAY_SIZE(mux_config_render_pipe_profile); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_reads[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f872 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_reads[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_reads[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f900064 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930055 }, + { _MMIO(0x9888), 0x03e58000 }, + { _MMIO(0x9888), 0x05e5c000 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x13900150 }, + { _MMIO(0x9888), 0x21900151 }, + { _MMIO(0x9888), 0x23900152 }, + { _MMIO(0x9888), 0x25900153 }, + { _MMIO(0x9888), 0x27900154 }, + { _MMIO(0x9888), 0x29900155 }, + { _MMIO(0x9888), 0x2b900156 }, + { _MMIO(0x9888), 0x2d900157 }, + { _MMIO(0x9888), 0x2f90015f }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c60 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c00 }, + { _MMIO(0x9888), 0x47900c63 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900c63 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_memory_reads_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_reads; + lens[n] = ARRAY_SIZE(mux_config_memory_reads); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_writes[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f822 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_writes[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_writes[] = { + { _MMIO(0x9888), 0x11810c00 }, + { _MMIO(0x9888), 0x1381001a }, + { _MMIO(0x9888), 0x37906800 }, + { _MMIO(0x9888), 0x3f901000 }, + { _MMIO(0x9888), 0x03811300 }, + { _MMIO(0x9888), 0x05811b12 }, + { _MMIO(0x9888), 0x0781001a }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x17810000 }, + { _MMIO(0x9888), 0x19810000 }, + { _MMIO(0x9888), 0x1b810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930055 }, + { _MMIO(0x9888), 0x03e58000 }, + { _MMIO(0x9888), 0x05e5c000 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x13900160 }, + { _MMIO(0x9888), 0x21900161 }, + { _MMIO(0x9888), 0x23900162 }, + { _MMIO(0x9888), 0x25900163 }, + { _MMIO(0x9888), 0x27900164 }, + { _MMIO(0x9888), 0x29900165 }, + { _MMIO(0x9888), 0x2b900166 }, + { _MMIO(0x9888), 0x2d900167 }, + { _MMIO(0x9888), 0x2f900150 }, + { _MMIO(0x9888), 0x31900105 }, + { _MMIO(0x9888), 0x15900103 }, + { _MMIO(0x9888), 0x17900101 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c60 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900c00 }, + { _MMIO(0x9888), 0x47900c63 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900c63 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900063 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_memory_writes_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_writes; + lens[n] = ARRAY_SIZE(mux_config_memory_writes); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extended[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fc2a }, + { _MMIO(0x2774), 0x0000bf00 }, + { _MMIO(0x2778), 0x0007fc6a }, + { _MMIO(0x277c), 0x0000bf00 }, + { _MMIO(0x2780), 0x0007fc92 }, + { _MMIO(0x2784), 0x0000bf00 }, + { _MMIO(0x2788), 0x0007fca2 }, + { _MMIO(0x278c), 0x0000bf00 }, + { _MMIO(0x2790), 0x0007fc32 }, + { _MMIO(0x2794), 0x0000bf00 }, + { _MMIO(0x2798), 0x0007fc9a }, + { _MMIO(0x279c), 0x0000bf00 }, + { _MMIO(0x27a0), 0x0007fe6a }, + { _MMIO(0x27a4), 0x0000bf00 }, + { _MMIO(0x27a8), 0x0007fe7a }, + { _MMIO(0x27ac), 0x0000bf00 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extended[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended[] = { + { _MMIO(0x9888), 0x106c00e0 }, + { _MMIO(0x9888), 0x141c8160 }, + { _MMIO(0x9888), 0x161c8015 }, + { _MMIO(0x9888), 0x181c0120 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x184e8000 }, + { _MMIO(0x9888), 0x1a4eaaa0 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0e6c0b01 }, + { _MMIO(0x9888), 0x006c0200 }, + { _MMIO(0x9888), 0x026c000c }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x001b8000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x001c0041 }, + { _MMIO(0x9888), 0x061c4200 }, + { _MMIO(0x9888), 0x081c4443 }, + { _MMIO(0x9888), 0x0a1c4645 }, + { _MMIO(0x9888), 0x0c1c7647 }, + { _MMIO(0x9888), 0x041c7357 }, + { _MMIO(0x9888), 0x1c1c0030 }, + { _MMIO(0x9888), 0x101c0000 }, + { _MMIO(0x9888), 0x1a1c0000 }, + { _MMIO(0x9888), 0x121c8000 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4caa2a }, + { _MMIO(0x9888), 0x0c4c02aa }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x000da000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x0c0f5400 }, + { _MMIO(0x9888), 0x0e0f5515 }, + { _MMIO(0x9888), 0x100f0155 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162caa00 }, + { _MMIO(0x9888), 0x182c00aa }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x11907fff }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900040 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900802 }, + { _MMIO(0x9888), 0x47900842 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900842 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x43900800 }, + { _MMIO(0x9888), 0x53900000 }, +}; + +static int +get_compute_extended_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extended; + lens[n] = ARRAY_SIZE(mux_config_compute_extended); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_l3_cache[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fffa }, + { _MMIO(0x2774), 0x0000fefe }, + { _MMIO(0x2778), 0x0007fffa }, + { _MMIO(0x277c), 0x0000fefd }, + { _MMIO(0x2790), 0x0007fffa }, + { _MMIO(0x2794), 0x0000fbef }, + { _MMIO(0x2798), 0x0007fffa }, + { _MMIO(0x279c), 0x0000fbdf }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_l3_cache[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00101100 }, + { _MMIO(0xe45c), 0x00201200 }, + { _MMIO(0xe55c), 0x00301300 }, + { _MMIO(0xe65c), 0x00401400 }, +}; + +static const struct i915_oa_reg mux_config_compute_l3_cache[] = { + { _MMIO(0x9888), 0x166c0760 }, + { _MMIO(0x9888), 0x1593001e }, + { _MMIO(0x9888), 0x3f900003 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x184e8000 }, + { _MMIO(0x9888), 0x1a4e8020 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x006c0051 }, + { _MMIO(0x9888), 0x066c5000 }, + { _MMIO(0x9888), 0x086c5c5d }, + { _MMIO(0x9888), 0x0e6c5e5f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x186c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x061b8000 }, + { _MMIO(0x9888), 0x081bc000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x101c8000 }, + { _MMIO(0x9888), 0x1a1ce000 }, + { _MMIO(0x9888), 0x1c1c0030 }, + { _MMIO(0x9888), 0x004c8000 }, + { _MMIO(0x9888), 0x0a4c2a00 }, + { _MMIO(0x9888), 0x0c4c0280 }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f1500 }, + { _MMIO(0x9888), 0x100f0140 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162c0a00 }, + { _MMIO(0x9888), 0x182c00a0 }, + { _MMIO(0x9888), 0x03933300 }, + { _MMIO(0x9888), 0x05930032 }, + { _MMIO(0x9888), 0x11930000 }, + { _MMIO(0x9888), 0x1b930000 }, + { _MMIO(0x9888), 0x1d900157 }, + { _MMIO(0x9888), 0x1f900158 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1190030f }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900000 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900021 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x53904444 }, + { _MMIO(0x9888), 0x43900000 }, +}; + +static int +get_compute_l3_cache_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_l3_cache; + lens[n] = ARRAY_SIZE(mux_config_compute_l3_cache); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_hdc_and_sf[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x10800000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fdff }, +}; + +static const struct i915_oa_reg flex_eu_config_hdc_and_sf[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_hdc_and_sf[] = { + { _MMIO(0x9888), 0x104f0232 }, + { _MMIO(0x9888), 0x124f4640 }, + { _MMIO(0x9888), 0x106c0232 }, + { _MMIO(0x9888), 0x11834400 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x004f1880 }, + { _MMIO(0x9888), 0x024f08bb }, + { _MMIO(0x9888), 0x044f001b }, + { _MMIO(0x9888), 0x046c0100 }, + { _MMIO(0x9888), 0x066c000b }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x041b8000 }, + { _MMIO(0x9888), 0x061b4000 }, + { _MMIO(0x9888), 0x1a1c1800 }, + { _MMIO(0x9888), 0x005b8000 }, + { _MMIO(0x9888), 0x025bc000 }, + { _MMIO(0x9888), 0x045b4000 }, + { _MMIO(0x9888), 0x125c8000 }, + { _MMIO(0x9888), 0x145c8000 }, + { _MMIO(0x9888), 0x165c8000 }, + { _MMIO(0x9888), 0x185c8000 }, + { _MMIO(0x9888), 0x0a4c00a0 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x022cc000 }, + { _MMIO(0x9888), 0x042cc000 }, + { _MMIO(0x9888), 0x062cc000 }, + { _MMIO(0x9888), 0x082cc000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x0f8305c0 }, + { _MMIO(0x9888), 0x09830000 }, + { _MMIO(0x9888), 0x07830000 }, + { _MMIO(0x9888), 0x1d950080 }, + { _MMIO(0x9888), 0x13928000 }, + { _MMIO(0x9888), 0x0f988000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900040 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_hdc_and_sf_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_hdc_and_sf; + lens[n] = ARRAY_SIZE(mux_config_hdc_and_sf); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_1[] = { + { _MMIO(0x9888), 0x126c7b40 }, + { _MMIO(0x9888), 0x166c0020 }, + { _MMIO(0x9888), 0x0a603444 }, + { _MMIO(0x9888), 0x0a613400 }, + { _MMIO(0x9888), 0x1a4ea800 }, + { _MMIO(0x9888), 0x1c4e0002 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x064f4000 }, + { _MMIO(0x9888), 0x0c6c5327 }, + { _MMIO(0x9888), 0x0e6c5425 }, + { _MMIO(0x9888), 0x006c2a00 }, + { _MMIO(0x9888), 0x026c285b }, + { _MMIO(0x9888), 0x046c005c }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1e6c0000 }, + { _MMIO(0x9888), 0x1a6c0800 }, + { _MMIO(0x9888), 0x0c1bc000 }, + { _MMIO(0x9888), 0x0e1bc000 }, + { _MMIO(0x9888), 0x001b8000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x1c1c003c }, + { _MMIO(0x9888), 0x121c8000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c0800 }, + { _MMIO(0x9888), 0x065b4000 }, + { _MMIO(0x9888), 0x1a5c1000 }, + { _MMIO(0x9888), 0x10600000 }, + { _MMIO(0x9888), 0x04600000 }, + { _MMIO(0x9888), 0x0c610044 }, + { _MMIO(0x9888), 0x10610000 }, + { _MMIO(0x9888), 0x06610000 }, + { _MMIO(0x9888), 0x0c4c02a8 }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x0a4c002a }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f0154 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x182c00aa }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2cc000 }, + { _MMIO(0x9888), 0x1190ffc0 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900420 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900021 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900400 }, + { _MMIO(0x9888), 0x43900421 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, +}; + +static int +get_l3_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_1; + lens[n] = ARRAY_SIZE(mux_config_l3_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00028002 }, + { _MMIO(0x277c), 0x000087ff }, + { _MMIO(0x2780), 0x00020002 }, + { _MMIO(0x2784), 0x00008fff }, + { _MMIO(0x2788), 0x00008002 }, + { _MMIO(0x278c), 0x0000a7ff }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_2[] = { + { _MMIO(0x9888), 0x126c02e0 }, + { _MMIO(0x9888), 0x146c0001 }, + { _MMIO(0x9888), 0x0a623400 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x064f4000 }, + { _MMIO(0x9888), 0x026c3324 }, + { _MMIO(0x9888), 0x046c3422 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c0800 }, + { _MMIO(0x9888), 0x065b4000 }, + { _MMIO(0x9888), 0x1a5c1000 }, + { _MMIO(0x9888), 0x06614000 }, + { _MMIO(0x9888), 0x0c620044 }, + { _MMIO(0x9888), 0x10620000 }, + { _MMIO(0x9888), 0x06620000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c002a }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f4000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2cc000 }, + { _MMIO(0x9888), 0x1190f800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_l3_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_2; + lens[n] = ARRAY_SIZE(mux_config_l3_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_3[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00028002 }, + { _MMIO(0x277c), 0x000087ff }, + { _MMIO(0x2780), 0x00020002 }, + { _MMIO(0x2784), 0x00008fff }, + { _MMIO(0x2788), 0x00008002 }, + { _MMIO(0x278c), 0x0000a7ff }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_3[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_3[] = { + { _MMIO(0x9888), 0x126c4e80 }, + { _MMIO(0x9888), 0x146c0000 }, + { _MMIO(0x9888), 0x0a633400 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e8000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x026c3321 }, + { _MMIO(0x9888), 0x046c342f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1a6c2000 }, + { _MMIO(0x9888), 0x021bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x061b4000 }, + { _MMIO(0x9888), 0x141c8000 }, + { _MMIO(0x9888), 0x161c8000 }, + { _MMIO(0x9888), 0x181c8000 }, + { _MMIO(0x9888), 0x1a1c1800 }, + { _MMIO(0x9888), 0x06604000 }, + { _MMIO(0x9888), 0x0c630044 }, + { _MMIO(0x9888), 0x10630000 }, + { _MMIO(0x9888), 0x06630000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c00aa }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f4000 }, + { _MMIO(0x9888), 0x0e0f0055 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x1190f800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900002 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_l3_3_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_3; + lens[n] = ARRAY_SIZE(mux_config_l3_3); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000efff }, + { _MMIO(0x2778), 0x00006000 }, + { _MMIO(0x277c), 0x0000f3ff }, +}; + +static const struct i915_oa_reg flex_eu_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x9888), 0x102f3800 }, + { _MMIO(0x9888), 0x144d0500 }, + { _MMIO(0x9888), 0x120d03c0 }, + { _MMIO(0x9888), 0x140d03cf }, + { _MMIO(0x9888), 0x0c0f0004 }, + { _MMIO(0x9888), 0x0c4e4000 }, + { _MMIO(0x9888), 0x042f0480 }, + { _MMIO(0x9888), 0x082f0000 }, + { _MMIO(0x9888), 0x022f0000 }, + { _MMIO(0x9888), 0x0a4c0090 }, + { _MMIO(0x9888), 0x064d0027 }, + { _MMIO(0x9888), 0x004d0000 }, + { _MMIO(0x9888), 0x000d0d40 }, + { _MMIO(0x9888), 0x020d803f }, + { _MMIO(0x9888), 0x040d8023 }, + { _MMIO(0x9888), 0x100d0000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x020f0010 }, + { _MMIO(0x9888), 0x000f0000 }, + { _MMIO(0x9888), 0x0e0f0050 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41901400 }, + { _MMIO(0x9888), 0x43901485 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900001 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_rasterizer_and_pixel_backend_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_rasterizer_and_pixel_backend; + lens[n] = ARRAY_SIZE(mux_config_rasterizer_and_pixel_backend); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_sampler[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x70800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x0000c000 }, + { _MMIO(0x2774), 0x0000e7ff }, + { _MMIO(0x2778), 0x00003000 }, + { _MMIO(0x277c), 0x0000f9ff }, + { _MMIO(0x2780), 0x00000c00 }, + { _MMIO(0x2784), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_sampler[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_sampler[] = { + { _MMIO(0x9888), 0x14152c00 }, + { _MMIO(0x9888), 0x16150005 }, + { _MMIO(0x9888), 0x121600a0 }, + { _MMIO(0x9888), 0x14352c00 }, + { _MMIO(0x9888), 0x16350005 }, + { _MMIO(0x9888), 0x123600a0 }, + { _MMIO(0x9888), 0x14552c00 }, + { _MMIO(0x9888), 0x16550005 }, + { _MMIO(0x9888), 0x125600a0 }, + { _MMIO(0x9888), 0x062f6000 }, + { _MMIO(0x9888), 0x022f2000 }, + { _MMIO(0x9888), 0x0c4c0050 }, + { _MMIO(0x9888), 0x0a4c0010 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f0350 }, + { _MMIO(0x9888), 0x0c0fb000 }, + { _MMIO(0x9888), 0x0e0f00da }, + { _MMIO(0x9888), 0x182c0028 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x022dc000 }, + { _MMIO(0x9888), 0x042d4000 }, + { _MMIO(0x9888), 0x0c138000 }, + { _MMIO(0x9888), 0x0e132000 }, + { _MMIO(0x9888), 0x0413c000 }, + { _MMIO(0x9888), 0x1c140018 }, + { _MMIO(0x9888), 0x0c157000 }, + { _MMIO(0x9888), 0x0e150078 }, + { _MMIO(0x9888), 0x10150000 }, + { _MMIO(0x9888), 0x04162180 }, + { _MMIO(0x9888), 0x02160000 }, + { _MMIO(0x9888), 0x04174000 }, + { _MMIO(0x9888), 0x0233a000 }, + { _MMIO(0x9888), 0x04333000 }, + { _MMIO(0x9888), 0x14348000 }, + { _MMIO(0x9888), 0x16348000 }, + { _MMIO(0x9888), 0x02357870 }, + { _MMIO(0x9888), 0x10350000 }, + { _MMIO(0x9888), 0x04360043 }, + { _MMIO(0x9888), 0x02360000 }, + { _MMIO(0x9888), 0x04371000 }, + { _MMIO(0x9888), 0x0e538000 }, + { _MMIO(0x9888), 0x00538000 }, + { _MMIO(0x9888), 0x06533000 }, + { _MMIO(0x9888), 0x1c540020 }, + { _MMIO(0x9888), 0x12548000 }, + { _MMIO(0x9888), 0x0e557000 }, + { _MMIO(0x9888), 0x00557800 }, + { _MMIO(0x9888), 0x10550000 }, + { _MMIO(0x9888), 0x06560043 }, + { _MMIO(0x9888), 0x02560000 }, + { _MMIO(0x9888), 0x06571000 }, + { _MMIO(0x9888), 0x1190ff80 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900060 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900842 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900060 }, +}; + +static int +get_sampler_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler; + lens[n] = ARRAY_SIZE(mux_config_sampler); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x00007fff }, + { _MMIO(0x2778), 0x00000000 }, + { _MMIO(0x277c), 0x00009fff }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000efff }, + { _MMIO(0x2788), 0x00000000 }, + { _MMIO(0x278c), 0x0000f3ff }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000fdff }, + { _MMIO(0x2798), 0x00000000 }, + { _MMIO(0x279c), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_1[] = { + { _MMIO(0x9888), 0x12120000 }, + { _MMIO(0x9888), 0x12320000 }, + { _MMIO(0x9888), 0x12520000 }, + { _MMIO(0x9888), 0x002f8000 }, + { _MMIO(0x9888), 0x022f3000 }, + { _MMIO(0x9888), 0x0a4c0015 }, + { _MMIO(0x9888), 0x0c0d8000 }, + { _MMIO(0x9888), 0x0e0da000 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x100f03a0 }, + { _MMIO(0x9888), 0x0c0ff000 }, + { _MMIO(0x9888), 0x0e0f0095 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x0c2d8000 }, + { _MMIO(0x9888), 0x0e2d4000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x02108000 }, + { _MMIO(0x9888), 0x0410c000 }, + { _MMIO(0x9888), 0x02118000 }, + { _MMIO(0x9888), 0x0411c000 }, + { _MMIO(0x9888), 0x02121880 }, + { _MMIO(0x9888), 0x041219b5 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x02134000 }, + { _MMIO(0x9888), 0x04135000 }, + { _MMIO(0x9888), 0x0c308000 }, + { _MMIO(0x9888), 0x0e304000 }, + { _MMIO(0x9888), 0x06304000 }, + { _MMIO(0x9888), 0x0c318000 }, + { _MMIO(0x9888), 0x0e314000 }, + { _MMIO(0x9888), 0x06314000 }, + { _MMIO(0x9888), 0x0c321a80 }, + { _MMIO(0x9888), 0x0e320033 }, + { _MMIO(0x9888), 0x06320031 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x0c334000 }, + { _MMIO(0x9888), 0x0e331000 }, + { _MMIO(0x9888), 0x06331000 }, + { _MMIO(0x9888), 0x0e508000 }, + { _MMIO(0x9888), 0x00508000 }, + { _MMIO(0x9888), 0x02504000 }, + { _MMIO(0x9888), 0x0e518000 }, + { _MMIO(0x9888), 0x00518000 }, + { _MMIO(0x9888), 0x02514000 }, + { _MMIO(0x9888), 0x0e521880 }, + { _MMIO(0x9888), 0x00521a80 }, + { _MMIO(0x9888), 0x02520033 }, + { _MMIO(0x9888), 0x0e534000 }, + { _MMIO(0x9888), 0x00534000 }, + { _MMIO(0x9888), 0x02531000 }, + { _MMIO(0x9888), 0x1190ff80 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900800 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4b900062 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900003 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, +}; + +static int +get_tdl_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_1; + lens[n] = ARRAY_SIZE(mux_config_tdl_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_2[] = { + { _MMIO(0x9888), 0x12124d60 }, + { _MMIO(0x9888), 0x12322e60 }, + { _MMIO(0x9888), 0x12524d60 }, + { _MMIO(0x9888), 0x022f3000 }, + { _MMIO(0x9888), 0x0a4c0014 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0fe000 }, + { _MMIO(0x9888), 0x0e0f0097 }, + { _MMIO(0x9888), 0x082c8000 }, + { _MMIO(0x9888), 0x0a2c8000 }, + { _MMIO(0x9888), 0x002d8000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x0410c000 }, + { _MMIO(0x9888), 0x0411c000 }, + { _MMIO(0x9888), 0x04121fb7 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x04135000 }, + { _MMIO(0x9888), 0x00308000 }, + { _MMIO(0x9888), 0x06304000 }, + { _MMIO(0x9888), 0x00318000 }, + { _MMIO(0x9888), 0x06314000 }, + { _MMIO(0x9888), 0x00321b80 }, + { _MMIO(0x9888), 0x0632003f }, + { _MMIO(0x9888), 0x00334000 }, + { _MMIO(0x9888), 0x06331000 }, + { _MMIO(0x9888), 0x0250c000 }, + { _MMIO(0x9888), 0x0251c000 }, + { _MMIO(0x9888), 0x02521fb7 }, + { _MMIO(0x9888), 0x00520000 }, + { _MMIO(0x9888), 0x02535000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900800 }, + { _MMIO(0x9888), 0x43900063 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900040 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_tdl_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_2; + lens[n] = ARRAY_SIZE(mux_config_tdl_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extra[] = { +}; + +static const struct i915_oa_reg flex_eu_config_compute_extra[] = { +}; + +static const struct i915_oa_reg mux_config_compute_extra[] = { + { _MMIO(0x9888), 0x121203e0 }, + { _MMIO(0x9888), 0x123203e0 }, + { _MMIO(0x9888), 0x125203e0 }, + { _MMIO(0x9888), 0x129203e0 }, + { _MMIO(0x9888), 0x12b203e0 }, + { _MMIO(0x9888), 0x12d203e0 }, + { _MMIO(0x9888), 0x024ec000 }, + { _MMIO(0x9888), 0x044ec000 }, + { _MMIO(0x9888), 0x064ec000 }, + { _MMIO(0x9888), 0x022f4000 }, + { _MMIO(0x9888), 0x084ca000 }, + { _MMIO(0x9888), 0x0a4c0042 }, + { _MMIO(0x9888), 0x000d8000 }, + { _MMIO(0x9888), 0x020da000 }, + { _MMIO(0x9888), 0x040da000 }, + { _MMIO(0x9888), 0x060d2000 }, + { _MMIO(0x9888), 0x0c0f5000 }, + { _MMIO(0x9888), 0x0e0f006d }, + { _MMIO(0x9888), 0x022c8000 }, + { _MMIO(0x9888), 0x042c8000 }, + { _MMIO(0x9888), 0x062c8000 }, + { _MMIO(0x9888), 0x0c2c8000 }, + { _MMIO(0x9888), 0x042d8000 }, + { _MMIO(0x9888), 0x06104000 }, + { _MMIO(0x9888), 0x06114000 }, + { _MMIO(0x9888), 0x06120033 }, + { _MMIO(0x9888), 0x00120000 }, + { _MMIO(0x9888), 0x06131000 }, + { _MMIO(0x9888), 0x04308000 }, + { _MMIO(0x9888), 0x04318000 }, + { _MMIO(0x9888), 0x04321980 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x04334000 }, + { _MMIO(0x9888), 0x04504000 }, + { _MMIO(0x9888), 0x04514000 }, + { _MMIO(0x9888), 0x04520033 }, + { _MMIO(0x9888), 0x00520000 }, + { _MMIO(0x9888), 0x04531000 }, + { _MMIO(0x9888), 0x00af8000 }, + { _MMIO(0x9888), 0x0acc0001 }, + { _MMIO(0x9888), 0x008d8000 }, + { _MMIO(0x9888), 0x028da000 }, + { _MMIO(0x9888), 0x0c8fb000 }, + { _MMIO(0x9888), 0x0e8f0001 }, + { _MMIO(0x9888), 0x06ac8000 }, + { _MMIO(0x9888), 0x02ad4000 }, + { _MMIO(0x9888), 0x02908000 }, + { _MMIO(0x9888), 0x02918000 }, + { _MMIO(0x9888), 0x02921980 }, + { _MMIO(0x9888), 0x00920000 }, + { _MMIO(0x9888), 0x02934000 }, + { _MMIO(0x9888), 0x02b04000 }, + { _MMIO(0x9888), 0x02b14000 }, + { _MMIO(0x9888), 0x02b20033 }, + { _MMIO(0x9888), 0x00b20000 }, + { _MMIO(0x9888), 0x02b31000 }, + { _MMIO(0x9888), 0x00d08000 }, + { _MMIO(0x9888), 0x00d18000 }, + { _MMIO(0x9888), 0x00d21980 }, + { _MMIO(0x9888), 0x00d34000 }, + { _MMIO(0x9888), 0x1190fc00 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x51900000 }, + { _MMIO(0x9888), 0x41900c00 }, + { _MMIO(0x9888), 0x43900002 }, + { _MMIO(0x9888), 0x53900420 }, + { _MMIO(0x9888), 0x459000a1 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_compute_extra_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extra; + lens[n] = ARRAY_SIZE(mux_config_compute_extra); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_vme_pipe[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00100030 }, + { _MMIO(0x2774), 0x0000fff9 }, + { _MMIO(0x2778), 0x00000002 }, + { _MMIO(0x277c), 0x0000fffc }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000fff3 }, + { _MMIO(0x2788), 0x00100180 }, + { _MMIO(0x278c), 0x0000ffcf }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00000002 }, + { _MMIO(0x279c), 0x0000ff3f }, +}; + +static const struct i915_oa_reg flex_eu_config_vme_pipe[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00008003 }, +}; + +static const struct i915_oa_reg mux_config_vme_pipe[] = { + { _MMIO(0x9888), 0x141a5800 }, + { _MMIO(0x9888), 0x161a00c0 }, + { _MMIO(0x9888), 0x12180240 }, + { _MMIO(0x9888), 0x14180002 }, + { _MMIO(0x9888), 0x149a5800 }, + { _MMIO(0x9888), 0x169a00c0 }, + { _MMIO(0x9888), 0x12980240 }, + { _MMIO(0x9888), 0x14980002 }, + { _MMIO(0x9888), 0x1a4e3fc0 }, + { _MMIO(0x9888), 0x002f1000 }, + { _MMIO(0x9888), 0x022f8000 }, + { _MMIO(0x9888), 0x042f3000 }, + { _MMIO(0x9888), 0x004c4000 }, + { _MMIO(0x9888), 0x0a4c9500 }, + { _MMIO(0x9888), 0x0c4c002a }, + { _MMIO(0x9888), 0x000d2000 }, + { _MMIO(0x9888), 0x060d8000 }, + { _MMIO(0x9888), 0x080da000 }, + { _MMIO(0x9888), 0x0a0da000 }, + { _MMIO(0x9888), 0x0c0da000 }, + { _MMIO(0x9888), 0x0c0f0400 }, + { _MMIO(0x9888), 0x0e0f5500 }, + { _MMIO(0x9888), 0x100f0015 }, + { _MMIO(0x9888), 0x002c8000 }, + { _MMIO(0x9888), 0x0e2c8000 }, + { _MMIO(0x9888), 0x162caa00 }, + { _MMIO(0x9888), 0x182c000a }, + { _MMIO(0x9888), 0x04193000 }, + { _MMIO(0x9888), 0x081a28c1 }, + { _MMIO(0x9888), 0x001a0000 }, + { _MMIO(0x9888), 0x00133000 }, + { _MMIO(0x9888), 0x0613c000 }, + { _MMIO(0x9888), 0x0813f000 }, + { _MMIO(0x9888), 0x00172000 }, + { _MMIO(0x9888), 0x06178000 }, + { _MMIO(0x9888), 0x0817a000 }, + { _MMIO(0x9888), 0x00180037 }, + { _MMIO(0x9888), 0x06180940 }, + { _MMIO(0x9888), 0x08180000 }, + { _MMIO(0x9888), 0x02180000 }, + { _MMIO(0x9888), 0x04183000 }, + { _MMIO(0x9888), 0x04afc000 }, + { _MMIO(0x9888), 0x06af3000 }, + { _MMIO(0x9888), 0x0acc4000 }, + { _MMIO(0x9888), 0x0ccc0015 }, + { _MMIO(0x9888), 0x0a8da000 }, + { _MMIO(0x9888), 0x0c8da000 }, + { _MMIO(0x9888), 0x0e8f4000 }, + { _MMIO(0x9888), 0x108f0015 }, + { _MMIO(0x9888), 0x16aca000 }, + { _MMIO(0x9888), 0x18ac000a }, + { _MMIO(0x9888), 0x06993000 }, + { _MMIO(0x9888), 0x0c9a28c1 }, + { _MMIO(0x9888), 0x009a0000 }, + { _MMIO(0x9888), 0x0a93f000 }, + { _MMIO(0x9888), 0x0c93f000 }, + { _MMIO(0x9888), 0x0a97a000 }, + { _MMIO(0x9888), 0x0c97a000 }, + { _MMIO(0x9888), 0x0a980977 }, + { _MMIO(0x9888), 0x08980000 }, + { _MMIO(0x9888), 0x04980000 }, + { _MMIO(0x9888), 0x06983000 }, + { _MMIO(0x9888), 0x119000ff }, + { _MMIO(0x9888), 0x51900040 }, + { _MMIO(0x9888), 0x41900020 }, + { _MMIO(0x9888), 0x55900004 }, + { _MMIO(0x9888), 0x45900400 }, + { _MMIO(0x9888), 0x479008a5 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900002 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_vme_pipe_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_vme_pipe; + lens[n] = ARRAY_SIZE(mux_config_vme_pipe); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9888), 0x11810000 }, + { _MMIO(0x9888), 0x07810013 }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930040 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_test_oa_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_test_oa; + lens[n] = ARRAY_SIZE(mux_config_test_oa); + n++; + + return n; +} + +int i915_oa_select_metric_set_kblgt3(struct drm_i915_private *dev_priv) +{ + dev_priv->perf.oa.n_mux_configs = 0; + dev_priv->perf.oa.b_counter_regs = NULL; + dev_priv->perf.oa.b_counter_regs_len = 0; + dev_priv->perf.oa.flex_regs = NULL; + dev_priv->perf.oa.flex_regs_len = 0; + + switch (dev_priv->perf.oa.metrics_set) { + case METRIC_SET_ID_RENDER_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_render_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_basic); + + return 0; + case METRIC_SET_ID_COMPUTE_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_compute_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_basic); + + return 0; + case METRIC_SET_ID_RENDER_PIPE_PROFILE: + dev_priv->perf.oa.n_mux_configs = + get_render_pipe_profile_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_PIPE_PROFILE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_pipe_profile; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_pipe_profile); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_pipe_profile; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_pipe_profile); + + return 0; + case METRIC_SET_ID_MEMORY_READS: + dev_priv->perf.oa.n_mux_configs = + get_memory_reads_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_READS\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_reads; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_reads); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_reads; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_reads); + + return 0; + case METRIC_SET_ID_MEMORY_WRITES: + dev_priv->perf.oa.n_mux_configs = + get_memory_writes_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_WRITES\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_writes; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_writes); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_writes; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_writes); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTENDED: + dev_priv->perf.oa.n_mux_configs = + get_compute_extended_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTENDED\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extended; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extended); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extended; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extended); + + return 0; + case METRIC_SET_ID_COMPUTE_L3_CACHE: + dev_priv->perf.oa.n_mux_configs = + get_compute_l3_cache_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_L3_CACHE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_l3_cache; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_l3_cache); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_l3_cache; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_l3_cache); + + return 0; + case METRIC_SET_ID_HDC_AND_SF: + dev_priv->perf.oa.n_mux_configs = + get_hdc_and_sf_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"HDC_AND_SF\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_hdc_and_sf; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_hdc_and_sf); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_hdc_and_sf; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_hdc_and_sf); + + return 0; + case METRIC_SET_ID_L3_1: + dev_priv->perf.oa.n_mux_configs = + get_l3_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_1); + + return 0; + case METRIC_SET_ID_L3_2: + dev_priv->perf.oa.n_mux_configs = + get_l3_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_2); + + return 0; + case METRIC_SET_ID_L3_3: + dev_priv->perf.oa.n_mux_configs = + get_l3_3_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_3\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_3; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_3); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_3; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_3); + + return 0; + case METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND: + dev_priv->perf.oa.n_mux_configs = + get_rasterizer_and_pixel_backend_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RASTERIZER_AND_PIXEL_BACKEND\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_rasterizer_and_pixel_backend); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_rasterizer_and_pixel_backend); + + return 0; + case METRIC_SET_ID_SAMPLER: + dev_priv->perf.oa.n_mux_configs = + get_sampler_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_sampler; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_sampler); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler); + + return 0; + case METRIC_SET_ID_TDL_1: + dev_priv->perf.oa.n_mux_configs = + get_tdl_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_1); + + return 0; + case METRIC_SET_ID_TDL_2: + dev_priv->perf.oa.n_mux_configs = + get_tdl_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_2); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTRA: + dev_priv->perf.oa.n_mux_configs = + get_compute_extra_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTRA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extra; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extra); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extra; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extra); + + return 0; + case METRIC_SET_ID_VME_PIPE: + dev_priv->perf.oa.n_mux_configs = + get_vme_pipe_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"VME_PIPE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_vme_pipe; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_vme_pipe); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_vme_pipe; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_vme_pipe); + + return 0; + case METRIC_SET_ID_TEST_OA: + dev_priv->perf.oa.n_mux_configs = + get_test_oa_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TEST_OA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_test_oa; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_test_oa; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_test_oa); + + return 0; + default: + return -ENODEV; + } +} + +static ssize_t +show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_BASIC); +} + +static struct device_attribute dev_attr_render_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_render_basic[] = { + &dev_attr_render_basic_id.attr, + NULL, +}; + +static struct attribute_group group_render_basic = { + .name = "0286c920-2f6d-493b-b22d-7a5280df43de", + .attrs = attrs_render_basic, +}; + +static ssize_t +show_compute_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_BASIC); +} + +static struct device_attribute dev_attr_compute_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_basic[] = { + &dev_attr_compute_basic_id.attr, + NULL, +}; + +static struct attribute_group group_compute_basic = { + .name = "9823aaa1-b06f-40ce-884b-cd798c79f0c2", + .attrs = attrs_compute_basic, +}; + +static ssize_t +show_render_pipe_profile_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_PIPE_PROFILE); +} + +static struct device_attribute dev_attr_render_pipe_profile_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_pipe_profile_id, + .store = NULL, +}; + +static struct attribute *attrs_render_pipe_profile[] = { + &dev_attr_render_pipe_profile_id.attr, + NULL, +}; + +static struct attribute_group group_render_pipe_profile = { + .name = "c7c735f3-ce58-45cf-aa04-30b183f1faff", + .attrs = attrs_render_pipe_profile, +}; + +static ssize_t +show_memory_reads_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_READS); +} + +static struct device_attribute dev_attr_memory_reads_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_reads_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_reads[] = { + &dev_attr_memory_reads_id.attr, + NULL, +}; + +static struct attribute_group group_memory_reads = { + .name = "96ec2219-040b-428a-856a-6bc03363a057", + .attrs = attrs_memory_reads, +}; + +static ssize_t +show_memory_writes_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_WRITES); +} + +static struct device_attribute dev_attr_memory_writes_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_writes_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_writes[] = { + &dev_attr_memory_writes_id.attr, + NULL, +}; + +static struct attribute_group group_memory_writes = { + .name = "03372b64-4996-4d3b-aa18-790e75eeb9c2", + .attrs = attrs_memory_writes, +}; + +static ssize_t +show_compute_extended_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTENDED); +} + +static struct device_attribute dev_attr_compute_extended_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extended_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extended[] = { + &dev_attr_compute_extended_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extended = { + .name = "31b4ce5a-bd61-4c1f-bb5d-f2e731412150", + .attrs = attrs_compute_extended, +}; + +static ssize_t +show_compute_l3_cache_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_L3_CACHE); +} + +static struct device_attribute dev_attr_compute_l3_cache_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_l3_cache_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_l3_cache[] = { + &dev_attr_compute_l3_cache_id.attr, + NULL, +}; + +static struct attribute_group group_compute_l3_cache = { + .name = "2ce0911a-27fc-4887-96f0-11084fa807c3", + .attrs = attrs_compute_l3_cache, +}; + +static ssize_t +show_hdc_and_sf_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_HDC_AND_SF); +} + +static struct device_attribute dev_attr_hdc_and_sf_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_hdc_and_sf_id, + .store = NULL, +}; + +static struct attribute *attrs_hdc_and_sf[] = { + &dev_attr_hdc_and_sf_id.attr, + NULL, +}; + +static struct attribute_group group_hdc_and_sf = { + .name = "546c4c1d-99b8-42fb-a107-5aaabb5314a8", + .attrs = attrs_hdc_and_sf, +}; + +static ssize_t +show_l3_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_1); +} + +static struct device_attribute dev_attr_l3_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_1_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_1[] = { + &dev_attr_l3_1_id.attr, + NULL, +}; + +static struct attribute_group group_l3_1 = { + .name = "4e93d156-9b39-4268-8544-a8e0480806d7", + .attrs = attrs_l3_1, +}; + +static ssize_t +show_l3_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_2); +} + +static struct device_attribute dev_attr_l3_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_2_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_2[] = { + &dev_attr_l3_2_id.attr, + NULL, +}; + +static struct attribute_group group_l3_2 = { + .name = "de1bec86-ca92-4b43-89fa-147653221cc0", + .attrs = attrs_l3_2, +}; + +static ssize_t +show_l3_3_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_3); +} + +static struct device_attribute dev_attr_l3_3_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_3_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_3[] = { + &dev_attr_l3_3_id.attr, + NULL, +}; + +static struct attribute_group group_l3_3 = { + .name = "e63537bb-10be-4d4a-92c4-c6b0c65e02ef", + .attrs = attrs_l3_3, +}; + +static ssize_t +show_rasterizer_and_pixel_backend_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND); +} + +static struct device_attribute dev_attr_rasterizer_and_pixel_backend_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_rasterizer_and_pixel_backend_id, + .store = NULL, +}; + +static struct attribute *attrs_rasterizer_and_pixel_backend[] = { + &dev_attr_rasterizer_and_pixel_backend_id.attr, + NULL, +}; + +static struct attribute_group group_rasterizer_and_pixel_backend = { + .name = "7a03a9f8-ec5e-46bb-8b67-1f0ff1476281", + .attrs = attrs_rasterizer_and_pixel_backend, +}; + +static ssize_t +show_sampler_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_SAMPLER); +} + +static struct device_attribute dev_attr_sampler_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_sampler_id, + .store = NULL, +}; + +static struct attribute *attrs_sampler[] = { + &dev_attr_sampler_id.attr, + NULL, +}; + +static struct attribute_group group_sampler = { + .name = "b25d2ebf-a6e0-4b29-96be-a9b010edeeda", + .attrs = attrs_sampler, +}; + +static ssize_t +show_tdl_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_1); +} + +static struct device_attribute dev_attr_tdl_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_1_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_1[] = { + &dev_attr_tdl_1_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_1 = { + .name = "469a05e5-e299-46f7-9598-7b05f3c34991", + .attrs = attrs_tdl_1, +}; + +static ssize_t +show_tdl_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_2); +} + +static struct device_attribute dev_attr_tdl_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_2_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_2[] = { + &dev_attr_tdl_2_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_2 = { + .name = "52f925c6-786a-4ec6-86ce-cba85c83453a", + .attrs = attrs_tdl_2, +}; + +static ssize_t +show_compute_extra_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTRA); +} + +static struct device_attribute dev_attr_compute_extra_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extra_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extra[] = { + &dev_attr_compute_extra_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extra = { + .name = "efc497ac-884e-4ee4-a4a8-15fba22aaf21", + .attrs = attrs_compute_extra, +}; + +static ssize_t +show_vme_pipe_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_VME_PIPE); +} + +static struct device_attribute dev_attr_vme_pipe_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_vme_pipe_id, + .store = NULL, +}; + +static struct attribute *attrs_vme_pipe[] = { + &dev_attr_vme_pipe_id.attr, + NULL, +}; + +static struct attribute_group group_vme_pipe = { + .name = "bfd9764d-2c5b-4c16-bfc1-89de3ca10917", + .attrs = attrs_vme_pipe, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TEST_OA); +} + +static struct device_attribute dev_attr_test_oa_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_test_oa_id, + .store = NULL, +}; + +static struct attribute *attrs_test_oa[] = { + &dev_attr_test_oa_id.attr, + NULL, +}; + +static struct attribute_group group_test_oa = { + .name = "f1792f32-6db2-4b50-b4b2-557128f1688d", + .attrs = attrs_test_oa, +}; + +int +i915_perf_register_sysfs_kblgt3(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + int ret = 0; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (ret) + goto error_render_basic; + } + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (ret) + goto error_compute_basic; + } + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (ret) + goto error_render_pipe_profile; + } + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (ret) + goto error_memory_reads; + } + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (ret) + goto error_memory_writes; + } + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (ret) + goto error_compute_extended; + } + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (ret) + goto error_compute_l3_cache; + } + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (ret) + goto error_hdc_and_sf; + } + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (ret) + goto error_l3_1; + } + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (ret) + goto error_l3_2; + } + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (ret) + goto error_l3_3; + } + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (ret) + goto error_rasterizer_and_pixel_backend; + } + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (ret) + goto error_sampler; + } + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (ret) + goto error_tdl_1; + } + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (ret) + goto error_tdl_2; + } + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (ret) + goto error_compute_extra; + } + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (ret) + goto error_vme_pipe; + } + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_test_oa); + if (ret) + goto error_test_oa; + } + + return 0; + +error_test_oa: + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); +error_vme_pipe: + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); +error_compute_extra: + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); +error_tdl_2: + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); +error_tdl_1: + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); +error_sampler: + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); +error_rasterizer_and_pixel_backend: + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); +error_l3_3: + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); +error_l3_2: + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); +error_l3_1: + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); +error_hdc_and_sf: + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); +error_compute_l3_cache: + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); +error_compute_extended: + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); +error_memory_writes: + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); +error_memory_reads: + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); +error_render_pipe_profile: + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); +error_compute_basic: + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); +error_render_basic: + return ret; +} + +void +i915_perf_unregister_sysfs_kblgt3(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (get_l3_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_2); + if (get_l3_3_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_3); + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (get_vme_pipe_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_vme_pipe); + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_test_oa); +} diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt3.h b/drivers/gpu/drm/i915/i915_oa_kblgt3.h new file mode 100644 index 000000000000..b0ca7f3114d3 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_kblgt3.h @@ -0,0 +1,40 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_KBLGT3_H__ +#define __I915_OA_KBLGT3_H__ + +extern int i915_oa_n_builtin_metric_sets_kblgt3; + +extern int i915_oa_select_metric_set_kblgt3(struct drm_i915_private *dev_priv); + +extern int i915_perf_register_sysfs_kblgt3(struct drm_i915_private *dev_priv); + +extern void i915_perf_unregister_sysfs_kblgt3(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1980f9597b19..e8fca43eea75 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -202,6 +202,8 @@ #include "i915_oa_sklgt3.h" #include "i915_oa_sklgt4.h" #include "i915_oa_bxt.h" +#include "i915_oa_kblgt2.h" +#include "i915_oa_kblgt3.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -1802,7 +1804,8 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv) * be read back from automatically triggered reports, as part of the * RPT_ID field. */ - if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || + IS_KABYLAKE(dev_priv)) { I915_WRITE(GEN8_OA_DEBUG, _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); @@ -2887,6 +2890,15 @@ void i915_perf_register(struct drm_i915_private *dev_priv) } else if (IS_BROXTON(dev_priv)) { if (i915_perf_register_sysfs_bxt(dev_priv)) goto sysfs_error; + } else if (IS_KABYLAKE(dev_priv)) { + if (IS_KBL_GT2(dev_priv)) { + if (i915_perf_register_sysfs_kblgt2(dev_priv)) + goto sysfs_error; + } else if (IS_KBL_GT3(dev_priv)) { + if (i915_perf_register_sysfs_kblgt3(dev_priv)) + goto sysfs_error; + } else + goto sysfs_error; } goto exit; @@ -2928,6 +2940,12 @@ void i915_perf_unregister(struct drm_i915_private *dev_priv) i915_perf_unregister_sysfs_sklgt4(dev_priv); } else if (IS_BROXTON(dev_priv)) i915_perf_unregister_sysfs_bxt(dev_priv); + else if (IS_KABYLAKE(dev_priv)) { + if (IS_KBL_GT2(dev_priv)) + i915_perf_unregister_sysfs_kblgt2(dev_priv); + else if (IS_KBL_GT3(dev_priv)) + i915_perf_unregister_sysfs_kblgt3(dev_priv); + } kobject_put(dev_priv->perf.metrics_kobj); dev_priv->perf.metrics_kobj = NULL; @@ -3061,6 +3079,16 @@ void i915_perf_init(struct drm_i915_private *dev_priv) i915_oa_n_builtin_metric_sets_bxt; dev_priv->perf.oa.ops.select_metric_set = i915_oa_select_metric_set_bxt; + } else if (IS_KBL_GT2(dev_priv)) { + dev_priv->perf.oa.n_builtin_sets = + i915_oa_n_builtin_metric_sets_kblgt2; + dev_priv->perf.oa.ops.select_metric_set = + i915_oa_select_metric_set_kblgt2; + } else if (IS_KBL_GT3(dev_priv)) { + dev_priv->perf.oa.n_builtin_sets = + i915_oa_n_builtin_metric_sets_kblgt3; + dev_priv->perf.oa.ops.select_metric_set = + i915_oa_select_metric_set_kblgt3; } } From 28c7ef9ecca5984cbb416497f300826a870d9cf3 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 13 Jun 2017 12:23:09 +0100 Subject: [PATCH 170/341] drm/i915/perf: add GLK support Add OA support for Geminilake (pretty much identical to Broxton), and also add the associated OA configurations. Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Signed-off-by: Ben Widawsky Link: http://patchwork.freedesktop.org/patch/msgid/20170613112309.4088-2-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_oa_glk.c | 2602 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_oa_glk.h | 40 + drivers/gpu/drm/i915/i915_perf.c | 17 +- 4 files changed, 2659 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_oa_glk.c create mode 100644 drivers/gpu/drm/i915/i915_oa_glk.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 033a2df01dbe..f8227318dcaf 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -137,7 +137,8 @@ i915-y += i915_perf.o \ i915_oa_sklgt4.o \ i915_oa_bxt.o \ i915_oa_kblgt2.o \ - i915_oa_kblgt3.o + i915_oa_kblgt3.o \ + i915_oa_glk.o ifeq ($(CONFIG_DRM_I915_GVT),y) i915-y += intel_gvt.o diff --git a/drivers/gpu/drm/i915/i915_oa_glk.c b/drivers/gpu/drm/i915/i915_oa_glk.c new file mode 100644 index 000000000000..2f356d51bff8 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_glk.c @@ -0,0 +1,2602 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_glk.h" + +enum metric_set_id { + METRIC_SET_ID_RENDER_BASIC = 1, + METRIC_SET_ID_COMPUTE_BASIC, + METRIC_SET_ID_RENDER_PIPE_PROFILE, + METRIC_SET_ID_MEMORY_READS, + METRIC_SET_ID_MEMORY_WRITES, + METRIC_SET_ID_COMPUTE_EXTENDED, + METRIC_SET_ID_COMPUTE_L3_CACHE, + METRIC_SET_ID_HDC_AND_SF, + METRIC_SET_ID_L3_1, + METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND, + METRIC_SET_ID_SAMPLER, + METRIC_SET_ID_TDL_1, + METRIC_SET_ID_TDL_2, + METRIC_SET_ID_COMPUTE_EXTRA, + METRIC_SET_ID_TEST_OA, +}; + +int i915_oa_n_builtin_metric_sets_glk = 15; + +static const struct i915_oa_reg b_counter_config_render_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_render_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_render_basic[] = { + { _MMIO(0x9888), 0x166c00f0 }, + { _MMIO(0x9888), 0x12120280 }, + { _MMIO(0x9888), 0x12320280 }, + { _MMIO(0x9888), 0x11930317 }, + { _MMIO(0x9888), 0x159303df }, + { _MMIO(0x9888), 0x3f900c00 }, + { _MMIO(0x9888), 0x419000a0 }, + { _MMIO(0x9888), 0x002d1000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x082d5000 }, + { _MMIO(0x9888), 0x0a2d1000 }, + { _MMIO(0x9888), 0x0c2e0800 }, + { _MMIO(0x9888), 0x0e2e5900 }, + { _MMIO(0x9888), 0x0a4c8000 }, + { _MMIO(0x9888), 0x0c4c8000 }, + { _MMIO(0x9888), 0x0e4c4000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e2000 }, + { _MMIO(0x9888), 0x1c4f0010 }, + { _MMIO(0x9888), 0x0a6c0053 }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1a0fcc00 }, + { _MMIO(0x9888), 0x1c0f0002 }, + { _MMIO(0x9888), 0x1c2c0040 }, + { _MMIO(0x9888), 0x00101000 }, + { _MMIO(0x9888), 0x04101000 }, + { _MMIO(0x9888), 0x00114000 }, + { _MMIO(0x9888), 0x08114000 }, + { _MMIO(0x9888), 0x00120020 }, + { _MMIO(0x9888), 0x08120021 }, + { _MMIO(0x9888), 0x00141000 }, + { _MMIO(0x9888), 0x08141000 }, + { _MMIO(0x9888), 0x02308000 }, + { _MMIO(0x9888), 0x04302000 }, + { _MMIO(0x9888), 0x06318000 }, + { _MMIO(0x9888), 0x08318000 }, + { _MMIO(0x9888), 0x06320800 }, + { _MMIO(0x9888), 0x08320840 }, + { _MMIO(0x9888), 0x00320000 }, + { _MMIO(0x9888), 0x06344000 }, + { _MMIO(0x9888), 0x08344000 }, + { _MMIO(0x9888), 0x0d931831 }, + { _MMIO(0x9888), 0x0f939f3f }, + { _MMIO(0x9888), 0x01939e80 }, + { _MMIO(0x9888), 0x039303bc }, + { _MMIO(0x9888), 0x0593000e }, + { _MMIO(0x9888), 0x1993002a }, + { _MMIO(0x9888), 0x07930000 }, + { _MMIO(0x9888), 0x09930000 }, + { _MMIO(0x9888), 0x1d900177 }, + { _MMIO(0x9888), 0x1f900187 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25904000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x53901110 }, + { _MMIO(0x9888), 0x43900423 }, + { _MMIO(0x9888), 0x55900111 }, + { _MMIO(0x9888), 0x47900c02 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900020 }, + { _MMIO(0x9888), 0x59901111 }, + { _MMIO(0x9888), 0x4b900421 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900001 }, + { _MMIO(0x9888), 0x45900821 }, +}; + +static int +get_render_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_basic; + lens[n] = ARRAY_SIZE(mux_config_render_basic); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_basic[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2740), 0x00000000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_basic[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_basic[] = { + { _MMIO(0x9888), 0x104f00e0 }, + { _MMIO(0x9888), 0x124f1c00 }, + { _MMIO(0x9888), 0x39900340 }, + { _MMIO(0x9888), 0x3f900c00 }, + { _MMIO(0x9888), 0x41900000 }, + { _MMIO(0x9888), 0x002d5000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x082d4000 }, + { _MMIO(0x9888), 0x0a2d1000 }, + { _MMIO(0x9888), 0x0c2d5000 }, + { _MMIO(0x9888), 0x0e2d4000 }, + { _MMIO(0x9888), 0x0c2e1400 }, + { _MMIO(0x9888), 0x0e2e5100 }, + { _MMIO(0x9888), 0x102e0114 }, + { _MMIO(0x9888), 0x044cc000 }, + { _MMIO(0x9888), 0x0a4c8000 }, + { _MMIO(0x9888), 0x0c4c8000 }, + { _MMIO(0x9888), 0x0e4c4000 }, + { _MMIO(0x9888), 0x104c8000 }, + { _MMIO(0x9888), 0x124c8000 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x004ea000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084e8000 }, + { _MMIO(0x9888), 0x0a4e2000 }, + { _MMIO(0x9888), 0x0c4ea000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x004f6b42 }, + { _MMIO(0x9888), 0x064f6200 }, + { _MMIO(0x9888), 0x084f4100 }, + { _MMIO(0x9888), 0x0a4f0061 }, + { _MMIO(0x9888), 0x0c4f6c4c }, + { _MMIO(0x9888), 0x0e4f4b00 }, + { _MMIO(0x9888), 0x1a4f0000 }, + { _MMIO(0x9888), 0x1c4f0000 }, + { _MMIO(0x9888), 0x180f5000 }, + { _MMIO(0x9888), 0x1a0f8800 }, + { _MMIO(0x9888), 0x1c0f08a2 }, + { _MMIO(0x9888), 0x182c4000 }, + { _MMIO(0x9888), 0x1c2c1451 }, + { _MMIO(0x9888), 0x1e2c0001 }, + { _MMIO(0x9888), 0x1a2c0010 }, + { _MMIO(0x9888), 0x01938000 }, + { _MMIO(0x9888), 0x0f938000 }, + { _MMIO(0x9888), 0x19938a28 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x19900177 }, + { _MMIO(0x9888), 0x1b900178 }, + { _MMIO(0x9888), 0x1d900125 }, + { _MMIO(0x9888), 0x1f900123 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x25904000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x53901000 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x55900111 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900000 }, + { _MMIO(0x9888), 0x45900000 }, +}; + +static int +get_compute_basic_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_basic; + lens[n] = ARRAY_SIZE(mux_config_compute_basic); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_render_pipe_profile[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007ffea }, + { _MMIO(0x2774), 0x00007ffc }, + { _MMIO(0x2778), 0x0007affa }, + { _MMIO(0x277c), 0x0000f5fd }, + { _MMIO(0x2780), 0x00079ffa }, + { _MMIO(0x2784), 0x0000f3fb }, + { _MMIO(0x2788), 0x0007bf7a }, + { _MMIO(0x278c), 0x0000f7e7 }, + { _MMIO(0x2790), 0x0007fefa }, + { _MMIO(0x2794), 0x0000f7cf }, + { _MMIO(0x2798), 0x00077ffa }, + { _MMIO(0x279c), 0x0000efdf }, + { _MMIO(0x27a0), 0x0006fffa }, + { _MMIO(0x27a4), 0x0000cfbf }, + { _MMIO(0x27a8), 0x0003fffa }, + { _MMIO(0x27ac), 0x00005f7f }, +}; + +static const struct i915_oa_reg flex_eu_config_render_pipe_profile[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_render_pipe_profile[] = { + { _MMIO(0x9888), 0x0c2e001f }, + { _MMIO(0x9888), 0x0a2f0000 }, + { _MMIO(0x9888), 0x10186800 }, + { _MMIO(0x9888), 0x11810019 }, + { _MMIO(0x9888), 0x15810013 }, + { _MMIO(0x9888), 0x13820020 }, + { _MMIO(0x9888), 0x11830020 }, + { _MMIO(0x9888), 0x17840000 }, + { _MMIO(0x9888), 0x11860007 }, + { _MMIO(0x9888), 0x21860000 }, + { _MMIO(0x9888), 0x178703e0 }, + { _MMIO(0x9888), 0x0c2d8000 }, + { _MMIO(0x9888), 0x042d4000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x022e5400 }, + { _MMIO(0x9888), 0x002e0000 }, + { _MMIO(0x9888), 0x0e2e0080 }, + { _MMIO(0x9888), 0x082f0040 }, + { _MMIO(0x9888), 0x002f0000 }, + { _MMIO(0x9888), 0x06143000 }, + { _MMIO(0x9888), 0x06174000 }, + { _MMIO(0x9888), 0x06180012 }, + { _MMIO(0x9888), 0x00180000 }, + { _MMIO(0x9888), 0x0d804000 }, + { _MMIO(0x9888), 0x0f804000 }, + { _MMIO(0x9888), 0x05804000 }, + { _MMIO(0x9888), 0x09810200 }, + { _MMIO(0x9888), 0x0b810030 }, + { _MMIO(0x9888), 0x03810003 }, + { _MMIO(0x9888), 0x21819140 }, + { _MMIO(0x9888), 0x23819050 }, + { _MMIO(0x9888), 0x25810018 }, + { _MMIO(0x9888), 0x0b820980 }, + { _MMIO(0x9888), 0x03820d80 }, + { _MMIO(0x9888), 0x11820000 }, + { _MMIO(0x9888), 0x0182c000 }, + { _MMIO(0x9888), 0x07828000 }, + { _MMIO(0x9888), 0x09824000 }, + { _MMIO(0x9888), 0x0f828000 }, + { _MMIO(0x9888), 0x0d830004 }, + { _MMIO(0x9888), 0x0583000c }, + { _MMIO(0x9888), 0x0f831000 }, + { _MMIO(0x9888), 0x01848072 }, + { _MMIO(0x9888), 0x11840000 }, + { _MMIO(0x9888), 0x07848000 }, + { _MMIO(0x9888), 0x09844000 }, + { _MMIO(0x9888), 0x0f848000 }, + { _MMIO(0x9888), 0x07860000 }, + { _MMIO(0x9888), 0x09860092 }, + { _MMIO(0x9888), 0x0f860400 }, + { _MMIO(0x9888), 0x01869100 }, + { _MMIO(0x9888), 0x0f870065 }, + { _MMIO(0x9888), 0x01870000 }, + { _MMIO(0x9888), 0x19930800 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x1b952000 }, + { _MMIO(0x9888), 0x1d955055 }, + { _MMIO(0x9888), 0x1f951455 }, + { _MMIO(0x9888), 0x0992a000 }, + { _MMIO(0x9888), 0x0f928000 }, + { _MMIO(0x9888), 0x1192a800 }, + { _MMIO(0x9888), 0x1392028a }, + { _MMIO(0x9888), 0x0b92a000 }, + { _MMIO(0x9888), 0x0d922000 }, + { _MMIO(0x9888), 0x13908000 }, + { _MMIO(0x9888), 0x21908000 }, + { _MMIO(0x9888), 0x23908000 }, + { _MMIO(0x9888), 0x25908000 }, + { _MMIO(0x9888), 0x27908000 }, + { _MMIO(0x9888), 0x29908000 }, + { _MMIO(0x9888), 0x2b908000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f908000 }, + { _MMIO(0x9888), 0x31908000 }, + { _MMIO(0x9888), 0x15908000 }, + { _MMIO(0x9888), 0x17908000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900c01 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900863 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900061 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900000 }, + { _MMIO(0x9888), 0x45900c22 }, +}; + +static int +get_render_pipe_profile_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_render_pipe_profile; + lens[n] = ARRAY_SIZE(mux_config_render_pipe_profile); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_reads[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f872 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_reads[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_reads[] = { + { _MMIO(0x9888), 0x19800343 }, + { _MMIO(0x9888), 0x39900340 }, + { _MMIO(0x9888), 0x3f901000 }, + { _MMIO(0x9888), 0x41900003 }, + { _MMIO(0x9888), 0x03803180 }, + { _MMIO(0x9888), 0x058035e2 }, + { _MMIO(0x9888), 0x0780006a }, + { _MMIO(0x9888), 0x11800000 }, + { _MMIO(0x9888), 0x2181a000 }, + { _MMIO(0x9888), 0x2381000a }, + { _MMIO(0x9888), 0x1d950550 }, + { _MMIO(0x9888), 0x0b928000 }, + { _MMIO(0x9888), 0x0d92a000 }, + { _MMIO(0x9888), 0x0f922000 }, + { _MMIO(0x9888), 0x13900170 }, + { _MMIO(0x9888), 0x21900171 }, + { _MMIO(0x9888), 0x23900172 }, + { _MMIO(0x9888), 0x25900173 }, + { _MMIO(0x9888), 0x27900174 }, + { _MMIO(0x9888), 0x29900175 }, + { _MMIO(0x9888), 0x2b900176 }, + { _MMIO(0x9888), 0x2d900177 }, + { _MMIO(0x9888), 0x2f90017f }, + { _MMIO(0x9888), 0x31900125 }, + { _MMIO(0x9888), 0x15900123 }, + { _MMIO(0x9888), 0x17900121 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43901084 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47901080 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49901084 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b901084 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900004 }, + { _MMIO(0x9888), 0x45900000 }, +}; + +static int +get_memory_reads_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_reads; + lens[n] = ARRAY_SIZE(mux_config_memory_reads); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_memory_writes[] = { + { _MMIO(0x272c), 0xffffffff }, + { _MMIO(0x2728), 0xffffffff }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x271c), 0xffffffff }, + { _MMIO(0x2718), 0xffffffff }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x274c), 0x86543210 }, + { _MMIO(0x2748), 0x86543210 }, + { _MMIO(0x2744), 0x00006667 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x275c), 0x86543210 }, + { _MMIO(0x2758), 0x86543210 }, + { _MMIO(0x2754), 0x00006465 }, + { _MMIO(0x2750), 0x00000000 }, + { _MMIO(0x2770), 0x0007f81a }, + { _MMIO(0x2774), 0x0000fe00 }, + { _MMIO(0x2778), 0x0007f82a }, + { _MMIO(0x277c), 0x0000fe00 }, + { _MMIO(0x2780), 0x0007f822 }, + { _MMIO(0x2784), 0x0000fe00 }, + { _MMIO(0x2788), 0x0007f8ba }, + { _MMIO(0x278c), 0x0000fe00 }, + { _MMIO(0x2790), 0x0007f87a }, + { _MMIO(0x2794), 0x0000fe00 }, + { _MMIO(0x2798), 0x0007f8ea }, + { _MMIO(0x279c), 0x0000fe00 }, + { _MMIO(0x27a0), 0x0007f8e2 }, + { _MMIO(0x27a4), 0x0000fe00 }, + { _MMIO(0x27a8), 0x0007f8f2 }, + { _MMIO(0x27ac), 0x0000fe00 }, +}; + +static const struct i915_oa_reg flex_eu_config_memory_writes[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00015014 }, + { _MMIO(0xe658), 0x00025024 }, + { _MMIO(0xe758), 0x00035034 }, + { _MMIO(0xe45c), 0x00045044 }, + { _MMIO(0xe55c), 0x00055054 }, + { _MMIO(0xe65c), 0x00065064 }, +}; + +static const struct i915_oa_reg mux_config_memory_writes[] = { + { _MMIO(0x9888), 0x19800343 }, + { _MMIO(0x9888), 0x39900340 }, + { _MMIO(0x9888), 0x3f900000 }, + { _MMIO(0x9888), 0x41900080 }, + { _MMIO(0x9888), 0x03803180 }, + { _MMIO(0x9888), 0x058035e2 }, + { _MMIO(0x9888), 0x0780006a }, + { _MMIO(0x9888), 0x11800000 }, + { _MMIO(0x9888), 0x2181a000 }, + { _MMIO(0x9888), 0x2381000a }, + { _MMIO(0x9888), 0x1d950550 }, + { _MMIO(0x9888), 0x0b928000 }, + { _MMIO(0x9888), 0x0d92a000 }, + { _MMIO(0x9888), 0x0f922000 }, + { _MMIO(0x9888), 0x13900180 }, + { _MMIO(0x9888), 0x21900181 }, + { _MMIO(0x9888), 0x23900182 }, + { _MMIO(0x9888), 0x25900183 }, + { _MMIO(0x9888), 0x27900184 }, + { _MMIO(0x9888), 0x29900185 }, + { _MMIO(0x9888), 0x2b900186 }, + { _MMIO(0x9888), 0x2d900187 }, + { _MMIO(0x9888), 0x2f900170 }, + { _MMIO(0x9888), 0x31900125 }, + { _MMIO(0x9888), 0x15900123 }, + { _MMIO(0x9888), 0x17900121 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x19908000 }, + { _MMIO(0x9888), 0x1b908000 }, + { _MMIO(0x9888), 0x1d908000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43901084 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47901080 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49901084 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b901084 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900004 }, + { _MMIO(0x9888), 0x45900000 }, +}; + +static int +get_memory_writes_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_memory_writes; + lens[n] = ARRAY_SIZE(mux_config_memory_writes); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extended[] = { + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fc2a }, + { _MMIO(0x2774), 0x0000bf00 }, + { _MMIO(0x2778), 0x0007fc6a }, + { _MMIO(0x277c), 0x0000bf00 }, + { _MMIO(0x2780), 0x0007fc92 }, + { _MMIO(0x2784), 0x0000bf00 }, + { _MMIO(0x2788), 0x0007fca2 }, + { _MMIO(0x278c), 0x0000bf00 }, + { _MMIO(0x2790), 0x0007fc32 }, + { _MMIO(0x2794), 0x0000bf00 }, + { _MMIO(0x2798), 0x0007fc9a }, + { _MMIO(0x279c), 0x0000bf00 }, + { _MMIO(0x27a0), 0x0007fe6a }, + { _MMIO(0x27a4), 0x0000bf00 }, + { _MMIO(0x27a8), 0x0007fe7a }, + { _MMIO(0x27ac), 0x0000bf00 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extended[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00778008 }, + { _MMIO(0xe45c), 0x00088078 }, + { _MMIO(0xe55c), 0x00808708 }, + { _MMIO(0xe65c), 0x00a08908 }, +}; + +static const struct i915_oa_reg mux_config_compute_extended[] = { + { _MMIO(0x9888), 0x104f00e0 }, + { _MMIO(0x9888), 0x141c0160 }, + { _MMIO(0x9888), 0x161c0015 }, + { _MMIO(0x9888), 0x181c0120 }, + { _MMIO(0x9888), 0x002d5000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x082d5000 }, + { _MMIO(0x9888), 0x0a2d5000 }, + { _MMIO(0x9888), 0x0c2d5000 }, + { _MMIO(0x9888), 0x0e2d5000 }, + { _MMIO(0x9888), 0x022d5000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x0c2e5400 }, + { _MMIO(0x9888), 0x0e2e5515 }, + { _MMIO(0x9888), 0x102e0155 }, + { _MMIO(0x9888), 0x044cc000 }, + { _MMIO(0x9888), 0x0a4c8000 }, + { _MMIO(0x9888), 0x0c4cc000 }, + { _MMIO(0x9888), 0x0e4cc000 }, + { _MMIO(0x9888), 0x104c8000 }, + { _MMIO(0x9888), 0x124c8000 }, + { _MMIO(0x9888), 0x144c8000 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x064cc000 }, + { _MMIO(0x9888), 0x084cc000 }, + { _MMIO(0x9888), 0x004ea000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084ea000 }, + { _MMIO(0x9888), 0x0a4ea000 }, + { _MMIO(0x9888), 0x0c4ea000 }, + { _MMIO(0x9888), 0x0e4ea000 }, + { _MMIO(0x9888), 0x024ea000 }, + { _MMIO(0x9888), 0x044ea000 }, + { _MMIO(0x9888), 0x0e4f4b41 }, + { _MMIO(0x9888), 0x004f4200 }, + { _MMIO(0x9888), 0x024f404c }, + { _MMIO(0x9888), 0x1c4f0000 }, + { _MMIO(0x9888), 0x1a4f0000 }, + { _MMIO(0x9888), 0x001b4000 }, + { _MMIO(0x9888), 0x061b8000 }, + { _MMIO(0x9888), 0x081bc000 }, + { _MMIO(0x9888), 0x0a1bc000 }, + { _MMIO(0x9888), 0x0c1bc000 }, + { _MMIO(0x9888), 0x041bc000 }, + { _MMIO(0x9888), 0x001c0031 }, + { _MMIO(0x9888), 0x061c1900 }, + { _MMIO(0x9888), 0x081c1a33 }, + { _MMIO(0x9888), 0x0a1c1b35 }, + { _MMIO(0x9888), 0x0c1c3337 }, + { _MMIO(0x9888), 0x041c31c7 }, + { _MMIO(0x9888), 0x180f5000 }, + { _MMIO(0x9888), 0x1a0fa8aa }, + { _MMIO(0x9888), 0x1c0f0aaa }, + { _MMIO(0x9888), 0x182c8000 }, + { _MMIO(0x9888), 0x1c2c6aaa }, + { _MMIO(0x9888), 0x1e2c0001 }, + { _MMIO(0x9888), 0x1a2c2950 }, + { _MMIO(0x9888), 0x01938000 }, + { _MMIO(0x9888), 0x0f938000 }, + { _MMIO(0x9888), 0x1993aaaa }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25904000 }, + { _MMIO(0x9888), 0x27904000 }, + { _MMIO(0x9888), 0x29904000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900420 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900400 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900001 }, + { _MMIO(0x9888), 0x45900001 }, +}; + +static int +get_compute_extended_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extended; + lens[n] = ARRAY_SIZE(mux_config_compute_extended); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_l3_cache[] = { + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2770), 0x0007fffa }, + { _MMIO(0x2774), 0x0000fefe }, + { _MMIO(0x2778), 0x0007fffa }, + { _MMIO(0x277c), 0x0000fefd }, + { _MMIO(0x2790), 0x0007fffa }, + { _MMIO(0x2794), 0x0000fbef }, + { _MMIO(0x2798), 0x0007fffa }, + { _MMIO(0x279c), 0x0000fbdf }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_l3_cache[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00000003 }, + { _MMIO(0xe658), 0x00002001 }, + { _MMIO(0xe758), 0x00101100 }, + { _MMIO(0xe45c), 0x00201200 }, + { _MMIO(0xe55c), 0x00301300 }, + { _MMIO(0xe65c), 0x00401400 }, +}; + +static const struct i915_oa_reg mux_config_compute_l3_cache[] = { + { _MMIO(0x9888), 0x166c03b0 }, + { _MMIO(0x9888), 0x1593001e }, + { _MMIO(0x9888), 0x3f900c00 }, + { _MMIO(0x9888), 0x41900000 }, + { _MMIO(0x9888), 0x002d1000 }, + { _MMIO(0x9888), 0x062d4000 }, + { _MMIO(0x9888), 0x082d5000 }, + { _MMIO(0x9888), 0x0e2d5000 }, + { _MMIO(0x9888), 0x0c2e0400 }, + { _MMIO(0x9888), 0x0e2e1500 }, + { _MMIO(0x9888), 0x102e0140 }, + { _MMIO(0x9888), 0x044c4000 }, + { _MMIO(0x9888), 0x0a4c8000 }, + { _MMIO(0x9888), 0x0c4cc000 }, + { _MMIO(0x9888), 0x144c8000 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x004e2000 }, + { _MMIO(0x9888), 0x064e8000 }, + { _MMIO(0x9888), 0x084ea000 }, + { _MMIO(0x9888), 0x0e4ea000 }, + { _MMIO(0x9888), 0x1a4f4001 }, + { _MMIO(0x9888), 0x1c4f5005 }, + { _MMIO(0x9888), 0x006c0051 }, + { _MMIO(0x9888), 0x066c5000 }, + { _MMIO(0x9888), 0x086c5c5d }, + { _MMIO(0x9888), 0x0e6c5e5f }, + { _MMIO(0x9888), 0x106c0000 }, + { _MMIO(0x9888), 0x146c0000 }, + { _MMIO(0x9888), 0x1a6c0000 }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x180f1000 }, + { _MMIO(0x9888), 0x1a0fa800 }, + { _MMIO(0x9888), 0x1c0f0a00 }, + { _MMIO(0x9888), 0x182c4000 }, + { _MMIO(0x9888), 0x1c2c4015 }, + { _MMIO(0x9888), 0x1e2c0001 }, + { _MMIO(0x9888), 0x03931980 }, + { _MMIO(0x9888), 0x05930032 }, + { _MMIO(0x9888), 0x11930000 }, + { _MMIO(0x9888), 0x01938000 }, + { _MMIO(0x9888), 0x0f938000 }, + { _MMIO(0x9888), 0x1993a00a }, + { _MMIO(0x9888), 0x07930000 }, + { _MMIO(0x9888), 0x09930000 }, + { _MMIO(0x9888), 0x1d900177 }, + { _MMIO(0x9888), 0x1f900178 }, + { _MMIO(0x9888), 0x35900000 }, + { _MMIO(0x9888), 0x13904000 }, + { _MMIO(0x9888), 0x21904000 }, + { _MMIO(0x9888), 0x23904000 }, + { _MMIO(0x9888), 0x25904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x53901000 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x55900111 }, + { _MMIO(0x9888), 0x47900001 }, + { _MMIO(0x9888), 0x57900000 }, + { _MMIO(0x9888), 0x49900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900000 }, + { _MMIO(0x9888), 0x4d900000 }, + { _MMIO(0x9888), 0x45900400 }, +}; + +static int +get_compute_l3_cache_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_l3_cache; + lens[n] = ARRAY_SIZE(mux_config_compute_l3_cache); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_hdc_and_sf[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x10800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000fdff }, +}; + +static const struct i915_oa_reg flex_eu_config_hdc_and_sf[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_hdc_and_sf[] = { + { _MMIO(0x9888), 0x104f0232 }, + { _MMIO(0x9888), 0x124f4640 }, + { _MMIO(0x9888), 0x11834400 }, + { _MMIO(0x9888), 0x022d4000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x0e2e0055 }, + { _MMIO(0x9888), 0x064c8000 }, + { _MMIO(0x9888), 0x084cc000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x024e8000 }, + { _MMIO(0x9888), 0x044ea000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x024f6100 }, + { _MMIO(0x9888), 0x044f416b }, + { _MMIO(0x9888), 0x064f004b }, + { _MMIO(0x9888), 0x1a4f0000 }, + { _MMIO(0x9888), 0x1a0f02a8 }, + { _MMIO(0x9888), 0x1a2c5500 }, + { _MMIO(0x9888), 0x0f808000 }, + { _MMIO(0x9888), 0x25810020 }, + { _MMIO(0x9888), 0x0f8305c0 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x1f951000 }, + { _MMIO(0x9888), 0x13920200 }, + { _MMIO(0x9888), 0x31908000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4d900003 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_hdc_and_sf_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_hdc_and_sf; + lens[n] = ARRAY_SIZE(mux_config_hdc_and_sf); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_l3_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00100070 }, + { _MMIO(0x2774), 0x0000fff1 }, + { _MMIO(0x2778), 0x00014002 }, + { _MMIO(0x277c), 0x0000c3ff }, + { _MMIO(0x2780), 0x00010002 }, + { _MMIO(0x2784), 0x0000c7ff }, + { _MMIO(0x2788), 0x00004002 }, + { _MMIO(0x278c), 0x0000d3ff }, + { _MMIO(0x2790), 0x00100700 }, + { _MMIO(0x2794), 0x0000ff1f }, + { _MMIO(0x2798), 0x00001402 }, + { _MMIO(0x279c), 0x0000fc3f }, + { _MMIO(0x27a0), 0x00001002 }, + { _MMIO(0x27a4), 0x0000fc7f }, + { _MMIO(0x27a8), 0x00000402 }, + { _MMIO(0x27ac), 0x0000fd3f }, +}; + +static const struct i915_oa_reg flex_eu_config_l3_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_l3_1[] = { + { _MMIO(0x9888), 0x12643400 }, + { _MMIO(0x9888), 0x12653400 }, + { _MMIO(0x9888), 0x106c6800 }, + { _MMIO(0x9888), 0x126c001e }, + { _MMIO(0x9888), 0x166c0010 }, + { _MMIO(0x9888), 0x0c2d5000 }, + { _MMIO(0x9888), 0x0e2d5000 }, + { _MMIO(0x9888), 0x002d4000 }, + { _MMIO(0x9888), 0x022d5000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x102e0154 }, + { _MMIO(0x9888), 0x0c2e5000 }, + { _MMIO(0x9888), 0x0e2e0055 }, + { _MMIO(0x9888), 0x104c8000 }, + { _MMIO(0x9888), 0x124c8000 }, + { _MMIO(0x9888), 0x144c8000 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x044c8000 }, + { _MMIO(0x9888), 0x064cc000 }, + { _MMIO(0x9888), 0x084cc000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x0c4ea000 }, + { _MMIO(0x9888), 0x0e4ea000 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x024ea000 }, + { _MMIO(0x9888), 0x044ea000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x1c4f5500 }, + { _MMIO(0x9888), 0x1a4f1554 }, + { _MMIO(0x9888), 0x0a640024 }, + { _MMIO(0x9888), 0x10640000 }, + { _MMIO(0x9888), 0x04640000 }, + { _MMIO(0x9888), 0x0c650024 }, + { _MMIO(0x9888), 0x10650000 }, + { _MMIO(0x9888), 0x06650000 }, + { _MMIO(0x9888), 0x0c6c5327 }, + { _MMIO(0x9888), 0x0e6c5425 }, + { _MMIO(0x9888), 0x006c2a00 }, + { _MMIO(0x9888), 0x026c285b }, + { _MMIO(0x9888), 0x046c005c }, + { _MMIO(0x9888), 0x1c6c0000 }, + { _MMIO(0x9888), 0x1a6c0900 }, + { _MMIO(0x9888), 0x1c0f0aa0 }, + { _MMIO(0x9888), 0x180f4000 }, + { _MMIO(0x9888), 0x1a0f02aa }, + { _MMIO(0x9888), 0x1c2c5400 }, + { _MMIO(0x9888), 0x1e2c0001 }, + { _MMIO(0x9888), 0x1a2c5550 }, + { _MMIO(0x9888), 0x1993aa00 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x2b904000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900421 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900001 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900420 }, + { _MMIO(0x9888), 0x45900021 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, +}; + +static int +get_l3_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_l3_1; + lens[n] = ARRAY_SIZE(mux_config_l3_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x30800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x0000efff }, + { _MMIO(0x2778), 0x00006000 }, + { _MMIO(0x277c), 0x0000f3ff }, +}; + +static const struct i915_oa_reg flex_eu_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_rasterizer_and_pixel_backend[] = { + { _MMIO(0x9888), 0x102d7800 }, + { _MMIO(0x9888), 0x122d79e0 }, + { _MMIO(0x9888), 0x0c2f0004 }, + { _MMIO(0x9888), 0x100e3800 }, + { _MMIO(0x9888), 0x180f0005 }, + { _MMIO(0x9888), 0x002d0940 }, + { _MMIO(0x9888), 0x022d802f }, + { _MMIO(0x9888), 0x042d4013 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x0e2e0050 }, + { _MMIO(0x9888), 0x022f0010 }, + { _MMIO(0x9888), 0x002f0000 }, + { _MMIO(0x9888), 0x084c8000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x044e8000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x040e0480 }, + { _MMIO(0x9888), 0x000e0000 }, + { _MMIO(0x9888), 0x060f0027 }, + { _MMIO(0x9888), 0x100f0000 }, + { _MMIO(0x9888), 0x1a0f0040 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x439014a0 }, + { _MMIO(0x9888), 0x459000a4 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900001 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_rasterizer_and_pixel_backend_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_rasterizer_and_pixel_backend; + lens[n] = ARRAY_SIZE(mux_config_rasterizer_and_pixel_backend); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_sampler[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x70800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, + { _MMIO(0x2770), 0x0000c000 }, + { _MMIO(0x2774), 0x0000e7ff }, + { _MMIO(0x2778), 0x00003000 }, + { _MMIO(0x277c), 0x0000f9ff }, + { _MMIO(0x2780), 0x00000c00 }, + { _MMIO(0x2784), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_sampler[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_sampler[] = { + { _MMIO(0x9888), 0x121300a0 }, + { _MMIO(0x9888), 0x141600ab }, + { _MMIO(0x9888), 0x123300a0 }, + { _MMIO(0x9888), 0x143600ab }, + { _MMIO(0x9888), 0x125300a0 }, + { _MMIO(0x9888), 0x145600ab }, + { _MMIO(0x9888), 0x0c2d4000 }, + { _MMIO(0x9888), 0x0e2d5000 }, + { _MMIO(0x9888), 0x002d4000 }, + { _MMIO(0x9888), 0x022d5000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x102e01a0 }, + { _MMIO(0x9888), 0x0c2e5000 }, + { _MMIO(0x9888), 0x0e2e0065 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x044c8000 }, + { _MMIO(0x9888), 0x064cc000 }, + { _MMIO(0x9888), 0x084c4000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x0e4e8000 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x024ea000 }, + { _MMIO(0x9888), 0x044e2000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x1c0f0800 }, + { _MMIO(0x9888), 0x180f4000 }, + { _MMIO(0x9888), 0x1a0f023f }, + { _MMIO(0x9888), 0x1e2c0003 }, + { _MMIO(0x9888), 0x1a2cc030 }, + { _MMIO(0x9888), 0x04132180 }, + { _MMIO(0x9888), 0x02130000 }, + { _MMIO(0x9888), 0x0c148000 }, + { _MMIO(0x9888), 0x0e142000 }, + { _MMIO(0x9888), 0x04148000 }, + { _MMIO(0x9888), 0x1e150140 }, + { _MMIO(0x9888), 0x1c150040 }, + { _MMIO(0x9888), 0x0c163000 }, + { _MMIO(0x9888), 0x0e160068 }, + { _MMIO(0x9888), 0x10160000 }, + { _MMIO(0x9888), 0x18160000 }, + { _MMIO(0x9888), 0x0a164000 }, + { _MMIO(0x9888), 0x04330043 }, + { _MMIO(0x9888), 0x02330000 }, + { _MMIO(0x9888), 0x0234a000 }, + { _MMIO(0x9888), 0x04342000 }, + { _MMIO(0x9888), 0x1c350015 }, + { _MMIO(0x9888), 0x02363460 }, + { _MMIO(0x9888), 0x10360000 }, + { _MMIO(0x9888), 0x04360000 }, + { _MMIO(0x9888), 0x06360000 }, + { _MMIO(0x9888), 0x08364000 }, + { _MMIO(0x9888), 0x06530043 }, + { _MMIO(0x9888), 0x02530000 }, + { _MMIO(0x9888), 0x0e548000 }, + { _MMIO(0x9888), 0x00548000 }, + { _MMIO(0x9888), 0x06542000 }, + { _MMIO(0x9888), 0x1e550400 }, + { _MMIO(0x9888), 0x1a552000 }, + { _MMIO(0x9888), 0x1c550100 }, + { _MMIO(0x9888), 0x0e563000 }, + { _MMIO(0x9888), 0x00563400 }, + { _MMIO(0x9888), 0x10560000 }, + { _MMIO(0x9888), 0x18560000 }, + { _MMIO(0x9888), 0x02560000 }, + { _MMIO(0x9888), 0x0c564000 }, + { _MMIO(0x9888), 0x1993a800 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b9014a0 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900001 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900820 }, + { _MMIO(0x9888), 0x45901022 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, +}; + +static int +get_sampler_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_sampler; + lens[n] = ARRAY_SIZE(mux_config_sampler); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_1[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x30800000 }, + { _MMIO(0x2770), 0x00000002 }, + { _MMIO(0x2774), 0x00007fff }, + { _MMIO(0x2778), 0x00000000 }, + { _MMIO(0x277c), 0x00009fff }, + { _MMIO(0x2780), 0x00000002 }, + { _MMIO(0x2784), 0x0000efff }, + { _MMIO(0x2788), 0x00000000 }, + { _MMIO(0x278c), 0x0000f3ff }, + { _MMIO(0x2790), 0x00000002 }, + { _MMIO(0x2794), 0x0000fdff }, + { _MMIO(0x2798), 0x00000000 }, + { _MMIO(0x279c), 0x0000fe7f }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_1[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_1[] = { + { _MMIO(0x9888), 0x141a0000 }, + { _MMIO(0x9888), 0x143a0000 }, + { _MMIO(0x9888), 0x145a0000 }, + { _MMIO(0x9888), 0x0c2d4000 }, + { _MMIO(0x9888), 0x0e2d5000 }, + { _MMIO(0x9888), 0x002d4000 }, + { _MMIO(0x9888), 0x022d5000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x102e0150 }, + { _MMIO(0x9888), 0x0c2e5000 }, + { _MMIO(0x9888), 0x0e2e006a }, + { _MMIO(0x9888), 0x124c8000 }, + { _MMIO(0x9888), 0x144c8000 }, + { _MMIO(0x9888), 0x164c2000 }, + { _MMIO(0x9888), 0x044c8000 }, + { _MMIO(0x9888), 0x064c4000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x0c4e8000 }, + { _MMIO(0x9888), 0x0e4ea000 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x024e2000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x1c0f0bc0 }, + { _MMIO(0x9888), 0x180f4000 }, + { _MMIO(0x9888), 0x1a0f0302 }, + { _MMIO(0x9888), 0x1e2c0003 }, + { _MMIO(0x9888), 0x1a2c00f0 }, + { _MMIO(0x9888), 0x021a3080 }, + { _MMIO(0x9888), 0x041a31e5 }, + { _MMIO(0x9888), 0x02148000 }, + { _MMIO(0x9888), 0x0414a000 }, + { _MMIO(0x9888), 0x1c150054 }, + { _MMIO(0x9888), 0x06168000 }, + { _MMIO(0x9888), 0x08168000 }, + { _MMIO(0x9888), 0x0a168000 }, + { _MMIO(0x9888), 0x0c3a3280 }, + { _MMIO(0x9888), 0x0e3a0063 }, + { _MMIO(0x9888), 0x063a0061 }, + { _MMIO(0x9888), 0x023a0000 }, + { _MMIO(0x9888), 0x0c348000 }, + { _MMIO(0x9888), 0x0e342000 }, + { _MMIO(0x9888), 0x06342000 }, + { _MMIO(0x9888), 0x1e350140 }, + { _MMIO(0x9888), 0x1c350100 }, + { _MMIO(0x9888), 0x18360028 }, + { _MMIO(0x9888), 0x0c368000 }, + { _MMIO(0x9888), 0x0e5a3080 }, + { _MMIO(0x9888), 0x005a3280 }, + { _MMIO(0x9888), 0x025a0063 }, + { _MMIO(0x9888), 0x0e548000 }, + { _MMIO(0x9888), 0x00548000 }, + { _MMIO(0x9888), 0x02542000 }, + { _MMIO(0x9888), 0x1e550400 }, + { _MMIO(0x9888), 0x1a552000 }, + { _MMIO(0x9888), 0x1c550001 }, + { _MMIO(0x9888), 0x18560080 }, + { _MMIO(0x9888), 0x02568000 }, + { _MMIO(0x9888), 0x04568000 }, + { _MMIO(0x9888), 0x1993a800 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x2d904000 }, + { _MMIO(0x9888), 0x2f904000 }, + { _MMIO(0x9888), 0x31904000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x59900000 }, + { _MMIO(0x9888), 0x4b900420 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x33900000 }, + { _MMIO(0x9888), 0x4d900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900000 }, + { _MMIO(0x9888), 0x45901084 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900001 }, +}; + +static int +get_tdl_1_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_1; + lens[n] = ARRAY_SIZE(mux_config_tdl_1); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_tdl_2[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_tdl_2[] = { + { _MMIO(0xe458), 0x00005004 }, + { _MMIO(0xe558), 0x00010003 }, + { _MMIO(0xe658), 0x00012011 }, + { _MMIO(0xe758), 0x00015014 }, + { _MMIO(0xe45c), 0x00051050 }, + { _MMIO(0xe55c), 0x00053052 }, + { _MMIO(0xe65c), 0x00055054 }, +}; + +static const struct i915_oa_reg mux_config_tdl_2[] = { + { _MMIO(0x9888), 0x141a026b }, + { _MMIO(0x9888), 0x143a0173 }, + { _MMIO(0x9888), 0x145a026b }, + { _MMIO(0x9888), 0x002d4000 }, + { _MMIO(0x9888), 0x022d5000 }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x0c2e5000 }, + { _MMIO(0x9888), 0x0e2e0069 }, + { _MMIO(0x9888), 0x044c8000 }, + { _MMIO(0x9888), 0x064cc000 }, + { _MMIO(0x9888), 0x0a4c4000 }, + { _MMIO(0x9888), 0x004e8000 }, + { _MMIO(0x9888), 0x024ea000 }, + { _MMIO(0x9888), 0x064e2000 }, + { _MMIO(0x9888), 0x180f6000 }, + { _MMIO(0x9888), 0x1a0f030a }, + { _MMIO(0x9888), 0x1a2c03c0 }, + { _MMIO(0x9888), 0x041a37e7 }, + { _MMIO(0x9888), 0x021a0000 }, + { _MMIO(0x9888), 0x0414a000 }, + { _MMIO(0x9888), 0x1c150050 }, + { _MMIO(0x9888), 0x08168000 }, + { _MMIO(0x9888), 0x0a168000 }, + { _MMIO(0x9888), 0x003a3380 }, + { _MMIO(0x9888), 0x063a006f }, + { _MMIO(0x9888), 0x023a0000 }, + { _MMIO(0x9888), 0x00348000 }, + { _MMIO(0x9888), 0x06342000 }, + { _MMIO(0x9888), 0x1a352000 }, + { _MMIO(0x9888), 0x1c350100 }, + { _MMIO(0x9888), 0x02368000 }, + { _MMIO(0x9888), 0x0c368000 }, + { _MMIO(0x9888), 0x025a37e7 }, + { _MMIO(0x9888), 0x0254a000 }, + { _MMIO(0x9888), 0x1c550005 }, + { _MMIO(0x9888), 0x04568000 }, + { _MMIO(0x9888), 0x06568000 }, + { _MMIO(0x9888), 0x03938000 }, + { _MMIO(0x9888), 0x05938000 }, + { _MMIO(0x9888), 0x07938000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x15904000 }, + { _MMIO(0x9888), 0x17904000 }, + { _MMIO(0x9888), 0x19904000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x43900020 }, + { _MMIO(0x9888), 0x45901080 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900001 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_tdl_2_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_tdl_2; + lens[n] = ARRAY_SIZE(mux_config_tdl_2); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_compute_extra[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0x00800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0x00800000 }, +}; + +static const struct i915_oa_reg flex_eu_config_compute_extra[] = { + { _MMIO(0xe458), 0x00001000 }, + { _MMIO(0xe558), 0x00003002 }, + { _MMIO(0xe658), 0x00005004 }, + { _MMIO(0xe758), 0x00011010 }, + { _MMIO(0xe45c), 0x00050012 }, + { _MMIO(0xe55c), 0x00052051 }, + { _MMIO(0xe65c), 0x00000008 }, +}; + +static const struct i915_oa_reg mux_config_compute_extra[] = { + { _MMIO(0x9888), 0x141a001f }, + { _MMIO(0x9888), 0x143a001f }, + { _MMIO(0x9888), 0x145a001f }, + { _MMIO(0x9888), 0x042d5000 }, + { _MMIO(0x9888), 0x062d1000 }, + { _MMIO(0x9888), 0x0e2e0094 }, + { _MMIO(0x9888), 0x084cc000 }, + { _MMIO(0x9888), 0x044ea000 }, + { _MMIO(0x9888), 0x1a0f00e0 }, + { _MMIO(0x9888), 0x1a2c0c00 }, + { _MMIO(0x9888), 0x061a0063 }, + { _MMIO(0x9888), 0x021a0000 }, + { _MMIO(0x9888), 0x06142000 }, + { _MMIO(0x9888), 0x1c150100 }, + { _MMIO(0x9888), 0x0c168000 }, + { _MMIO(0x9888), 0x043a3180 }, + { _MMIO(0x9888), 0x023a0000 }, + { _MMIO(0x9888), 0x04348000 }, + { _MMIO(0x9888), 0x1c350040 }, + { _MMIO(0x9888), 0x0a368000 }, + { _MMIO(0x9888), 0x045a0063 }, + { _MMIO(0x9888), 0x025a0000 }, + { _MMIO(0x9888), 0x04542000 }, + { _MMIO(0x9888), 0x1c550010 }, + { _MMIO(0x9888), 0x08568000 }, + { _MMIO(0x9888), 0x09938000 }, + { _MMIO(0x9888), 0x0b938000 }, + { _MMIO(0x9888), 0x0d938000 }, + { _MMIO(0x9888), 0x1b904000 }, + { _MMIO(0x9888), 0x1d904000 }, + { _MMIO(0x9888), 0x1f904000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x45900400 }, + { _MMIO(0x9888), 0x47900004 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_compute_extra_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_compute_extra; + lens[n] = ARRAY_SIZE(mux_config_compute_extra); + n++; + + return n; +} + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9888), 0x19800000 }, + { _MMIO(0x9888), 0x07800063 }, + { _MMIO(0x9888), 0x11800000 }, + { _MMIO(0x9888), 0x23810008 }, + { _MMIO(0x9888), 0x1d950400 }, + { _MMIO(0x9888), 0x0f922000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x55900000 }, + { _MMIO(0x9888), 0x47900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static int +get_test_oa_mux_config(struct drm_i915_private *dev_priv, + const struct i915_oa_reg **regs, + int *lens) +{ + int n = 0; + + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs) < 1); + BUILD_BUG_ON(ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens) < 1); + + regs[n] = mux_config_test_oa; + lens[n] = ARRAY_SIZE(mux_config_test_oa); + n++; + + return n; +} + +int i915_oa_select_metric_set_glk(struct drm_i915_private *dev_priv) +{ + dev_priv->perf.oa.n_mux_configs = 0; + dev_priv->perf.oa.b_counter_regs = NULL; + dev_priv->perf.oa.b_counter_regs_len = 0; + dev_priv->perf.oa.flex_regs = NULL; + dev_priv->perf.oa.flex_regs_len = 0; + + switch (dev_priv->perf.oa.metrics_set) { + case METRIC_SET_ID_RENDER_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_render_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_basic); + + return 0; + case METRIC_SET_ID_COMPUTE_BASIC: + dev_priv->perf.oa.n_mux_configs = + get_compute_basic_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_BASIC\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_basic; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_basic); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_basic; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_basic); + + return 0; + case METRIC_SET_ID_RENDER_PIPE_PROFILE: + dev_priv->perf.oa.n_mux_configs = + get_render_pipe_profile_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RENDER_PIPE_PROFILE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_render_pipe_profile; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_render_pipe_profile); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_render_pipe_profile; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_render_pipe_profile); + + return 0; + case METRIC_SET_ID_MEMORY_READS: + dev_priv->perf.oa.n_mux_configs = + get_memory_reads_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_READS\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_reads; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_reads); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_reads; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_reads); + + return 0; + case METRIC_SET_ID_MEMORY_WRITES: + dev_priv->perf.oa.n_mux_configs = + get_memory_writes_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"MEMORY_WRITES\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_memory_writes; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_memory_writes); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_memory_writes; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_memory_writes); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTENDED: + dev_priv->perf.oa.n_mux_configs = + get_compute_extended_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTENDED\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extended; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extended); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extended; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extended); + + return 0; + case METRIC_SET_ID_COMPUTE_L3_CACHE: + dev_priv->perf.oa.n_mux_configs = + get_compute_l3_cache_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_L3_CACHE\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_l3_cache; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_l3_cache); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_l3_cache; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_l3_cache); + + return 0; + case METRIC_SET_ID_HDC_AND_SF: + dev_priv->perf.oa.n_mux_configs = + get_hdc_and_sf_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"HDC_AND_SF\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_hdc_and_sf; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_hdc_and_sf); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_hdc_and_sf; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_hdc_and_sf); + + return 0; + case METRIC_SET_ID_L3_1: + dev_priv->perf.oa.n_mux_configs = + get_l3_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"L3_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_l3_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_l3_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_l3_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_l3_1); + + return 0; + case METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND: + dev_priv->perf.oa.n_mux_configs = + get_rasterizer_and_pixel_backend_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"RASTERIZER_AND_PIXEL_BACKEND\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_rasterizer_and_pixel_backend); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_rasterizer_and_pixel_backend; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_rasterizer_and_pixel_backend); + + return 0; + case METRIC_SET_ID_SAMPLER: + dev_priv->perf.oa.n_mux_configs = + get_sampler_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"SAMPLER\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_sampler; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_sampler); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_sampler; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_sampler); + + return 0; + case METRIC_SET_ID_TDL_1: + dev_priv->perf.oa.n_mux_configs = + get_tdl_1_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_1\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_1; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_1); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_1; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_1); + + return 0; + case METRIC_SET_ID_TDL_2: + dev_priv->perf.oa.n_mux_configs = + get_tdl_2_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TDL_2\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_tdl_2; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_tdl_2); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_tdl_2; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_tdl_2); + + return 0; + case METRIC_SET_ID_COMPUTE_EXTRA: + dev_priv->perf.oa.n_mux_configs = + get_compute_extra_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"COMPUTE_EXTRA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_compute_extra; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_compute_extra); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_compute_extra; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_compute_extra); + + return 0; + case METRIC_SET_ID_TEST_OA: + dev_priv->perf.oa.n_mux_configs = + get_test_oa_mux_config(dev_priv, + dev_priv->perf.oa.mux_regs, + dev_priv->perf.oa.mux_regs_lens); + if (dev_priv->perf.oa.n_mux_configs == 0) { + DRM_DEBUG_DRIVER("No suitable MUX config for \"TEST_OA\" metric set\n"); + + /* EINVAL because *_register_sysfs already checked this + * and so it wouldn't have been advertised to userspace and + * so shouldn't have been requested + */ + return -EINVAL; + } + + dev_priv->perf.oa.b_counter_regs = + b_counter_config_test_oa; + dev_priv->perf.oa.b_counter_regs_len = + ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.flex_regs = + flex_eu_config_test_oa; + dev_priv->perf.oa.flex_regs_len = + ARRAY_SIZE(flex_eu_config_test_oa); + + return 0; + default: + return -ENODEV; + } +} + +static ssize_t +show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_BASIC); +} + +static struct device_attribute dev_attr_render_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_render_basic[] = { + &dev_attr_render_basic_id.attr, + NULL, +}; + +static struct attribute_group group_render_basic = { + .name = "d72df5c7-5b4a-4274-a43f-00b0fd51fc68", + .attrs = attrs_render_basic, +}; + +static ssize_t +show_compute_basic_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_BASIC); +} + +static struct device_attribute dev_attr_compute_basic_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_basic_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_basic[] = { + &dev_attr_compute_basic_id.attr, + NULL, +}; + +static struct attribute_group group_compute_basic = { + .name = "814285f6-354d-41d2-ba49-e24e622714a0", + .attrs = attrs_compute_basic, +}; + +static ssize_t +show_render_pipe_profile_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RENDER_PIPE_PROFILE); +} + +static struct device_attribute dev_attr_render_pipe_profile_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_render_pipe_profile_id, + .store = NULL, +}; + +static struct attribute *attrs_render_pipe_profile[] = { + &dev_attr_render_pipe_profile_id.attr, + NULL, +}; + +static struct attribute_group group_render_pipe_profile = { + .name = "07d397a6-b3e6-49f6-9433-a4f293d55978", + .attrs = attrs_render_pipe_profile, +}; + +static ssize_t +show_memory_reads_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_READS); +} + +static struct device_attribute dev_attr_memory_reads_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_reads_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_reads[] = { + &dev_attr_memory_reads_id.attr, + NULL, +}; + +static struct attribute_group group_memory_reads = { + .name = "1a356946-5428-450b-a2f0-89f8783a302d", + .attrs = attrs_memory_reads, +}; + +static ssize_t +show_memory_writes_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_MEMORY_WRITES); +} + +static struct device_attribute dev_attr_memory_writes_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_memory_writes_id, + .store = NULL, +}; + +static struct attribute *attrs_memory_writes[] = { + &dev_attr_memory_writes_id.attr, + NULL, +}; + +static struct attribute_group group_memory_writes = { + .name = "5299be9d-7a61-4c99-9f81-f87e6c5aaca9", + .attrs = attrs_memory_writes, +}; + +static ssize_t +show_compute_extended_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTENDED); +} + +static struct device_attribute dev_attr_compute_extended_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extended_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extended[] = { + &dev_attr_compute_extended_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extended = { + .name = "bc9bcff2-459a-4cbc-986d-a84b077153f3", + .attrs = attrs_compute_extended, +}; + +static ssize_t +show_compute_l3_cache_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_L3_CACHE); +} + +static struct device_attribute dev_attr_compute_l3_cache_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_l3_cache_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_l3_cache[] = { + &dev_attr_compute_l3_cache_id.attr, + NULL, +}; + +static struct attribute_group group_compute_l3_cache = { + .name = "88ec931f-5b4a-453a-9db6-a61232b6143d", + .attrs = attrs_compute_l3_cache, +}; + +static ssize_t +show_hdc_and_sf_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_HDC_AND_SF); +} + +static struct device_attribute dev_attr_hdc_and_sf_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_hdc_and_sf_id, + .store = NULL, +}; + +static struct attribute *attrs_hdc_and_sf[] = { + &dev_attr_hdc_and_sf_id.attr, + NULL, +}; + +static struct attribute_group group_hdc_and_sf = { + .name = "530d176d-2a18-4014-adf8-1500c6c60835", + .attrs = attrs_hdc_and_sf, +}; + +static ssize_t +show_l3_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_L3_1); +} + +static struct device_attribute dev_attr_l3_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_l3_1_id, + .store = NULL, +}; + +static struct attribute *attrs_l3_1[] = { + &dev_attr_l3_1_id.attr, + NULL, +}; + +static struct attribute_group group_l3_1 = { + .name = "fdee5a5a-f23c-43d1-aa73-f6257c71671d", + .attrs = attrs_l3_1, +}; + +static ssize_t +show_rasterizer_and_pixel_backend_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_RASTERIZER_AND_PIXEL_BACKEND); +} + +static struct device_attribute dev_attr_rasterizer_and_pixel_backend_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_rasterizer_and_pixel_backend_id, + .store = NULL, +}; + +static struct attribute *attrs_rasterizer_and_pixel_backend[] = { + &dev_attr_rasterizer_and_pixel_backend_id.attr, + NULL, +}; + +static struct attribute_group group_rasterizer_and_pixel_backend = { + .name = "6617623e-ca73-4791-b2b7-ddedd0846a0c", + .attrs = attrs_rasterizer_and_pixel_backend, +}; + +static ssize_t +show_sampler_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_SAMPLER); +} + +static struct device_attribute dev_attr_sampler_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_sampler_id, + .store = NULL, +}; + +static struct attribute *attrs_sampler[] = { + &dev_attr_sampler_id.attr, + NULL, +}; + +static struct attribute_group group_sampler = { + .name = "f3b2ea63-e82e-4234-b418-44dd20dd34d0", + .attrs = attrs_sampler, +}; + +static ssize_t +show_tdl_1_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_1); +} + +static struct device_attribute dev_attr_tdl_1_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_1_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_1[] = { + &dev_attr_tdl_1_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_1 = { + .name = "14411d35-cbf6-4f5e-b68b-190faf9a1a83", + .attrs = attrs_tdl_1, +}; + +static ssize_t +show_tdl_2_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TDL_2); +} + +static struct device_attribute dev_attr_tdl_2_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_tdl_2_id, + .store = NULL, +}; + +static struct attribute *attrs_tdl_2[] = { + &dev_attr_tdl_2_id.attr, + NULL, +}; + +static struct attribute_group group_tdl_2 = { + .name = "ffa3f263-0478-4724-8c9f-c911c5ec0f1d", + .attrs = attrs_tdl_2, +}; + +static ssize_t +show_compute_extra_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_COMPUTE_EXTRA); +} + +static struct device_attribute dev_attr_compute_extra_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_compute_extra_id, + .store = NULL, +}; + +static struct attribute *attrs_compute_extra[] = { + &dev_attr_compute_extra_id.attr, + NULL, +}; + +static struct attribute_group group_compute_extra = { + .name = "15274c82-27d2-4819-876a-7cb1a2c59ba4", + .attrs = attrs_compute_extra, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", METRIC_SET_ID_TEST_OA); +} + +static struct device_attribute dev_attr_test_oa_id = { + .attr = { .name = "id", .mode = 0444 }, + .show = show_test_oa_id, + .store = NULL, +}; + +static struct attribute *attrs_test_oa[] = { + &dev_attr_test_oa_id.attr, + NULL, +}; + +static struct attribute_group group_test_oa = { + .name = "dd3fd789-e783-4204-8cd0-b671bbccb0cf", + .attrs = attrs_test_oa, +}; + +int +i915_perf_register_sysfs_glk(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + int ret = 0; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (ret) + goto error_render_basic; + } + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (ret) + goto error_compute_basic; + } + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (ret) + goto error_render_pipe_profile; + } + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (ret) + goto error_memory_reads; + } + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (ret) + goto error_memory_writes; + } + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (ret) + goto error_compute_extended; + } + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (ret) + goto error_compute_l3_cache; + } + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (ret) + goto error_hdc_and_sf; + } + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (ret) + goto error_l3_1; + } + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (ret) + goto error_rasterizer_and_pixel_backend; + } + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (ret) + goto error_sampler; + } + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (ret) + goto error_tdl_1; + } + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (ret) + goto error_tdl_2; + } + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (ret) + goto error_compute_extra; + } + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) { + ret = sysfs_create_group(dev_priv->perf.metrics_kobj, &group_test_oa); + if (ret) + goto error_test_oa; + } + + return 0; + +error_test_oa: + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); +error_compute_extra: + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); +error_tdl_2: + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); +error_tdl_1: + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); +error_sampler: + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); +error_rasterizer_and_pixel_backend: + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); +error_l3_1: + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); +error_hdc_and_sf: + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); +error_compute_l3_cache: + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); +error_compute_extended: + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); +error_memory_writes: + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); +error_memory_reads: + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); +error_render_pipe_profile: + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); +error_compute_basic: + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); +error_render_basic: + return ret; +} + +void +i915_perf_unregister_sysfs_glk(struct drm_i915_private *dev_priv) +{ + const struct i915_oa_reg *mux_regs[ARRAY_SIZE(dev_priv->perf.oa.mux_regs)]; + int mux_lens[ARRAY_SIZE(dev_priv->perf.oa.mux_regs_lens)]; + + if (get_render_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_basic); + if (get_compute_basic_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_basic); + if (get_render_pipe_profile_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_render_pipe_profile); + if (get_memory_reads_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_reads); + if (get_memory_writes_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_memory_writes); + if (get_compute_extended_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extended); + if (get_compute_l3_cache_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_l3_cache); + if (get_hdc_and_sf_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_hdc_and_sf); + if (get_l3_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_l3_1); + if (get_rasterizer_and_pixel_backend_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_rasterizer_and_pixel_backend); + if (get_sampler_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_sampler); + if (get_tdl_1_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_1); + if (get_tdl_2_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_tdl_2); + if (get_compute_extra_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_compute_extra); + if (get_test_oa_mux_config(dev_priv, mux_regs, mux_lens)) + sysfs_remove_group(dev_priv->perf.metrics_kobj, &group_test_oa); +} diff --git a/drivers/gpu/drm/i915/i915_oa_glk.h b/drivers/gpu/drm/i915/i915_oa_glk.h new file mode 100644 index 000000000000..5511bb1cecf7 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_glk.h @@ -0,0 +1,40 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_GLK_H__ +#define __I915_OA_GLK_H__ + +extern int i915_oa_n_builtin_metric_sets_glk; + +extern int i915_oa_select_metric_set_glk(struct drm_i915_private *dev_priv); + +extern int i915_perf_register_sysfs_glk(struct drm_i915_private *dev_priv); + +extern void i915_perf_unregister_sysfs_glk(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index e8fca43eea75..38c44407bafc 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -204,6 +204,7 @@ #include "i915_oa_bxt.h" #include "i915_oa_kblgt2.h" #include "i915_oa_kblgt3.h" +#include "i915_oa_glk.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -1805,7 +1806,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv) * RPT_ID field. */ if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || - IS_KABYLAKE(dev_priv)) { + IS_KABYLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { I915_WRITE(GEN8_OA_DEBUG, _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); @@ -2899,6 +2900,9 @@ void i915_perf_register(struct drm_i915_private *dev_priv) goto sysfs_error; } else goto sysfs_error; + } else if (IS_GEMINILAKE(dev_priv)) { + if (i915_perf_register_sysfs_glk(dev_priv)) + goto sysfs_error; } goto exit; @@ -2945,7 +2949,9 @@ void i915_perf_unregister(struct drm_i915_private *dev_priv) i915_perf_unregister_sysfs_kblgt2(dev_priv); else if (IS_KBL_GT3(dev_priv)) i915_perf_unregister_sysfs_kblgt3(dev_priv); - } + } else if (IS_GEMINILAKE(dev_priv)) + i915_perf_unregister_sysfs_glk(dev_priv); + kobject_put(dev_priv->perf.metrics_kobj); dev_priv->perf.metrics_kobj = NULL; @@ -3089,6 +3095,13 @@ void i915_perf_init(struct drm_i915_private *dev_priv) i915_oa_n_builtin_metric_sets_kblgt3; dev_priv->perf.oa.ops.select_metric_set = i915_oa_select_metric_set_kblgt3; + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->perf.oa.timestamp_frequency = 19200000; + + dev_priv->perf.oa.n_builtin_sets = + i915_oa_n_builtin_metric_sets_glk; + dev_priv->perf.oa.ops.select_metric_set = + i915_oa_select_metric_set_glk; } } From 4c3bb4ccd074e1a0552078c0bf94c662367a1658 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 15 Jun 2017 15:43:17 +1000 Subject: [PATCH 171/341] KVM: PPC: Book3S HV: Restore critical SPRs to host values on guest exit This restores several special-purpose registers (SPRs) to sane values on guest exit that were missed before. TAR and VRSAVE are readable and writable by userspace, and we need to save and restore them to prevent the guest from potentially affecting userspace execution (not that TAR or VRSAVE are used by any known program that run uses the KVM_RUN ioctl). We save/restore these in kvmppc_vcpu_run_hv() rather than on every guest entry/exit. FSCR affects userspace execution in that it can prohibit access to certain facilities by userspace. We restore it to the normal value for the task on exit from the KVM_RUN ioctl. IAMR is normally 0, and is restored to 0 on guest exit. However, with a radix host on POWER9, it is set to a value that prevents the kernel from executing user-accessible memory. On POWER9, we save IAMR on guest entry and restore it on guest exit to the saved value rather than 0. On POWER8 we continue to set it to 0 on guest exit. PSPB is normally 0. We restore it to 0 on guest exit to prevent userspace taking advantage of the guest having set it non-zero (which would allow userspace to set its SMT priority to high). UAMOR is normally 0. We restore it to 0 on guest exit to prevent the AMR from being used as a covert channel between userspace processes, since the AMR is not context-switched at present. Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08) Cc: stable@vger.kernel.org # v3.14+ Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 11 +++++++++-- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 9 ++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 400a5992b121..a963762a031f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2908,6 +2908,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) int r; int srcu_idx; unsigned long ebb_regs[3] = {}; /* shut up GCC */ + unsigned long user_tar = 0; + unsigned int user_vrsave; if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -2935,12 +2937,14 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) flush_all_to_thread(current); - /* Save userspace EBB register values */ + /* Save userspace EBB and other register values */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) { ebb_regs[0] = mfspr(SPRN_EBBHR); ebb_regs[1] = mfspr(SPRN_EBBRR); ebb_regs[2] = mfspr(SPRN_BESCR); + user_tar = mfspr(SPRN_TAR); } + user_vrsave = mfspr(SPRN_VRSAVE); vcpu->arch.wqp = &vcpu->arch.vcore->wq; vcpu->arch.pgdir = current->mm->pgd; @@ -2968,12 +2972,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } } while (is_kvmppc_resume_guest(r)); - /* Restore userspace EBB register values */ + /* Restore userspace EBB and other register values */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) { mtspr(SPRN_EBBHR, ebb_regs[0]); mtspr(SPRN_EBBRR, ebb_regs[1]); mtspr(SPRN_BESCR, ebb_regs[2]); + mtspr(SPRN_TAR, user_tar); + mtspr(SPRN_FSCR, current->thread.fscr); } + mtspr(SPRN_VRSAVE, user_vrsave); out: vcpu->arch.state = KVMPPC_VCPU_NOTREADY; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e390b383b4d6..4e4390564276 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -558,6 +558,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) #define STACK_SLOT_TID (112-16) #define STACK_SLOT_PSSCR (112-24) #define STACK_SLOT_PID (112-32) +#define STACK_SLOT_IAMR (112-40) .global kvmppc_hv_entry kvmppc_hv_entry: @@ -758,9 +759,11 @@ BEGIN_FTR_SECTION mfspr r5, SPRN_TIDR mfspr r6, SPRN_PSSCR mfspr r7, SPRN_PID + mfspr r8, SPRN_IAMR std r5, STACK_SLOT_TID(r1) std r6, STACK_SLOT_PSSCR(r1) std r7, STACK_SLOT_PID(r1) + std r8, STACK_SLOT_IAMR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION @@ -1515,11 +1518,12 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) * set by the guest could disrupt the host. */ li r0, 0 - mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 mtspr SPRN_DAWRX, r0 + mtspr SPRN_PSPB, r0 mtspr SPRN_WORT, r0 BEGIN_FTR_SECTION + mtspr SPRN_IAMR, r0 mtspr SPRN_TCSCR, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ li r0, 1 @@ -1535,6 +1539,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 + mtspr SPRN_UAMOR, r6 /* Switch DSCR back to host value */ mfspr r8, SPRN_DSCR @@ -1683,9 +1688,11 @@ BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) ld r6, STACK_SLOT_PSSCR(r1) ld r7, STACK_SLOT_PID(r1) + ld r8, STACK_SLOT_IAMR(r1) mtspr SPRN_TIDR, r5 mtspr SPRN_PSSCR, r6 mtspr SPRN_PID, r7 + mtspr SPRN_IAMR, r8 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION PPC_INVALIDATE_ERAT From 46a704f8409f79fd66567ad3f8a7304830a84293 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 15 Jun 2017 16:10:27 +1000 Subject: [PATCH 172/341] KVM: PPC: Book3S HV: Preserve userspace HTM state properly If userspace attempts to call the KVM_RUN ioctl when it has hardware transactional memory (HTM) enabled, the values that it has put in the HTM-related SPRs TFHAR, TFIAR and TEXASR will get overwritten by guest values. To fix this, we detect this condition and save those SPR values in the thread struct, and disable HTM for the task. If userspace goes to access those SPRs or the HTM facility in future, a TM-unavailable interrupt will occur and the handler will reload those SPRs and re-enable HTM. If userspace has started a transaction and suspended it, we would currently lose the transactional state in the guest entry path and would almost certainly get a "TM Bad Thing" interrupt, which would cause the host to crash. To avoid this, we detect this case and return from the KVM_RUN ioctl with an EINVAL error, with the KVM exit reason set to KVM_EXIT_FAIL_ENTRY. Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08) Cc: stable@vger.kernel.org # v3.14+ Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a963762a031f..fd4d978d5257 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2916,6 +2916,27 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) return -EINVAL; } + /* + * Don't allow entry with a suspended transaction, because + * the guest entry/exit code will lose it. + * If the guest has TM enabled, save away their TM-related SPRs + * (they will get restored by the TM unavailable interrupt). + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs && + (current->thread.regs->msr & MSR_TM)) { + if (MSR_TM_ACTIVE(current->thread.regs->msr)) { + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + run->fail_entry.hardware_entry_failure_reason = 0; + return -EINVAL; + } + current->thread.tm_tfhar = mfspr(SPRN_TFHAR); + current->thread.tm_tfiar = mfspr(SPRN_TFIAR); + current->thread.tm_texasr = mfspr(SPRN_TEXASR); + current->thread.regs->msr &= ~MSR_TM; + } +#endif + kvmppc_core_prepare_to_enter(vcpu); /* No need to go into the guest when all we'll do is come back out */ From 650bc63568e4218508f206c14af92b5a3f77504f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Jun 2017 09:14:33 +0100 Subject: [PATCH 173/341] drm/i915: Amalgamate execbuffer parameter structures Combine the two slightly overlapping parameter structures we pass around the execbuffer routines into one. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170615081435.17699-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 549 +++++++++------------ 1 file changed, 232 insertions(+), 317 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 04211c970b9f..1c5a6a63a767 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -50,70 +50,77 @@ #define BATCH_OFFSET_BIAS (256*1024) -struct i915_execbuffer_params { - struct drm_device *dev; - struct drm_file *file; - struct i915_vma *batch; - u32 dispatch_flags; - u32 args_batch_start_offset; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - struct drm_i915_gem_request *request; -}; +#define __I915_EXEC_ILLEGAL_FLAGS \ + (__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK) -struct eb_vmas { +struct i915_execbuffer { struct drm_i915_private *i915; + struct drm_file *file; + struct drm_i915_gem_execbuffer2 *args; + struct drm_i915_gem_exec_object2 *exec; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct i915_address_space *vm; + struct i915_vma *batch; + struct drm_i915_gem_request *request; + u32 batch_start_offset; + u32 batch_len; + unsigned int dispatch_flags; + struct drm_i915_gem_exec_object2 shadow_exec_entry; + bool need_relocs; struct list_head vmas; + struct reloc_cache { + struct drm_mm_node node; + unsigned long vaddr; + unsigned int page; + bool use_64bit_reloc : 1; + } reloc_cache; int and; union { - struct i915_vma *lut[0]; - struct hlist_head buckets[0]; + struct i915_vma **lut; + struct hlist_head *buckets; }; }; -static struct eb_vmas * -eb_create(struct drm_i915_private *i915, - struct drm_i915_gem_execbuffer2 *args) +static int eb_create(struct i915_execbuffer *eb) { - struct eb_vmas *eb = NULL; - - if (args->flags & I915_EXEC_HANDLE_LUT) { - unsigned size = args->buffer_count; + eb->lut = NULL; + if (eb->args->flags & I915_EXEC_HANDLE_LUT) { + unsigned int size = eb->args->buffer_count; size *= sizeof(struct i915_vma *); - size += sizeof(struct eb_vmas); - eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); + eb->lut = kmalloc(size, + GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); } - if (eb == NULL) { - unsigned size = args->buffer_count; - unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2; + if (!eb->lut) { + unsigned int size = eb->args->buffer_count; + unsigned int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); while (count > 2*size) count >>= 1; - eb = kzalloc(count*sizeof(struct hlist_head) + - sizeof(struct eb_vmas), - GFP_TEMPORARY); - if (eb == NULL) - return eb; + eb->lut = kzalloc(count * sizeof(struct hlist_head), + GFP_TEMPORARY); + if (!eb->lut) + return -ENOMEM; eb->and = count - 1; - } else - eb->and = -args->buffer_count; + } else { + eb->and = -eb->args->buffer_count; + } - eb->i915 = i915; INIT_LIST_HEAD(&eb->vmas); - return eb; + return 0; } static void -eb_reset(struct eb_vmas *eb) +eb_reset(struct i915_execbuffer *eb) { if (eb->and >= 0) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } static struct i915_vma * -eb_get_batch(struct eb_vmas *eb) +eb_get_batch(struct i915_execbuffer *eb) { struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); @@ -133,34 +140,30 @@ eb_get_batch(struct eb_vmas *eb) } static int -eb_lookup_vmas(struct eb_vmas *eb, - struct drm_i915_gem_exec_object2 *exec, - const struct drm_i915_gem_execbuffer2 *args, - struct i915_address_space *vm, - struct drm_file *file) +eb_lookup_vmas(struct i915_execbuffer *eb) { struct drm_i915_gem_object *obj; struct list_head objects; int i, ret; INIT_LIST_HEAD(&objects); - spin_lock(&file->table_lock); + spin_lock(&eb->file->table_lock); /* Grab a reference to the object and release the lock so we can lookup * or create the VMA without using GFP_ATOMIC */ - for (i = 0; i < args->buffer_count; i++) { - obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); + for (i = 0; i < eb->args->buffer_count; i++) { + obj = to_intel_bo(idr_find(&eb->file->object_idr, eb->exec[i].handle)); if (obj == NULL) { - spin_unlock(&file->table_lock); + spin_unlock(&eb->file->table_lock); DRM_DEBUG("Invalid object handle %d at index %d\n", - exec[i].handle, i); + eb->exec[i].handle, i); ret = -ENOENT; goto err; } if (!list_empty(&obj->obj_exec_link)) { - spin_unlock(&file->table_lock); + spin_unlock(&eb->file->table_lock); DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", - obj, exec[i].handle, i); + obj, eb->exec[i].handle, i); ret = -EINVAL; goto err; } @@ -168,7 +171,7 @@ eb_lookup_vmas(struct eb_vmas *eb, i915_gem_object_get(obj); list_add_tail(&obj->obj_exec_link, &objects); } - spin_unlock(&file->table_lock); + spin_unlock(&eb->file->table_lock); i = 0; while (!list_empty(&objects)) { @@ -186,7 +189,7 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, eb->vm, NULL); if (unlikely(IS_ERR(vma))) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); @@ -197,11 +200,13 @@ eb_lookup_vmas(struct eb_vmas *eb, list_add_tail(&vma->exec_list, &eb->vmas); list_del_init(&obj->obj_exec_link); - vma->exec_entry = &exec[i]; + vma->exec_entry = &eb->exec[i]; if (eb->and < 0) { eb->lut[i] = vma; } else { - uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; + u32 handle = + eb->args->flags & I915_EXEC_HANDLE_LUT ? + i : eb->exec[i].handle; vma->exec_handle = handle; hlist_add_head(&vma->exec_node, &eb->buckets[handle & eb->and]); @@ -228,7 +233,7 @@ err: return ret; } -static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) +static struct i915_vma *eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) { if (eb->and < 0) { if (handle >= -eb->and) @@ -248,7 +253,7 @@ static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) } static void -i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) +eb_unreserve_vma(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry; @@ -266,8 +271,10 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); } -static void eb_destroy(struct eb_vmas *eb) +static void eb_destroy(struct i915_execbuffer *eb) { + i915_gem_context_put(eb->ctx); + while (!list_empty(&eb->vmas)) { struct i915_vma *vma; @@ -275,11 +282,10 @@ static void eb_destroy(struct eb_vmas *eb) struct i915_vma, exec_list); list_del_init(&vma->exec_list); - i915_gem_execbuffer_unreserve_vma(vma); + eb_unreserve_vma(vma); vma->exec_entry = NULL; i915_vma_put(vma); } - kfree(eb); } static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) @@ -320,20 +326,11 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, return gen8_canonical_addr((int)reloc->delta + target_offset); } -struct reloc_cache { - struct drm_i915_private *i915; - struct drm_mm_node node; - unsigned long vaddr; - unsigned int page; - bool use_64bit_reloc; -}; - static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { cache->page = -1; cache->vaddr = 0; - cache->i915 = i915; /* Must be a variable in the struct to allow GCC to unroll. */ cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); cache->node.allocated = false; @@ -351,7 +348,14 @@ static inline unsigned int unmask_flags(unsigned long p) #define KMAP 0x4 /* after CLFLUSH_FLAGS */ -static void reloc_cache_fini(struct reloc_cache *cache) +static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) +{ + struct drm_i915_private *i915 = + container_of(cache, struct i915_execbuffer, reloc_cache)->i915; + return &i915->ggtt; +} + +static void reloc_cache_reset(struct reloc_cache *cache) { void *vaddr; @@ -369,7 +373,7 @@ static void reloc_cache_fini(struct reloc_cache *cache) wmb(); io_mapping_unmap_atomic((void __iomem *)vaddr); if (cache->node.allocated) { - struct i915_ggtt *ggtt = &cache->i915->ggtt; + struct i915_ggtt *ggtt = cache_to_ggtt(cache); ggtt->base.clear_range(&ggtt->base, cache->node.start, @@ -379,6 +383,9 @@ static void reloc_cache_fini(struct reloc_cache *cache) i915_vma_unpin((struct i915_vma *)cache->node.mm); } } + + cache->vaddr = 0; + cache->page = -1; } static void *reloc_kmap(struct drm_i915_gem_object *obj, @@ -417,7 +424,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, int page) { - struct i915_ggtt *ggtt = &cache->i915->ggtt; + struct i915_ggtt *ggtt = cache_to_ggtt(cache); unsigned long offset; void *vaddr; @@ -467,7 +474,8 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, offset += page << PAGE_SHIFT; } - vaddr = (void __force *) io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset); + vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->mappable, + offset); cache->page = page; cache->vaddr = (unsigned long)vaddr; @@ -546,12 +554,10 @@ repeat: } static int -i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, - struct eb_vmas *eb, - struct drm_i915_gem_relocation_entry *reloc, - struct reloc_cache *cache) +eb_relocate_entry(struct drm_i915_gem_object *obj, + struct i915_execbuffer *eb, + struct drm_i915_gem_relocation_entry *reloc) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct drm_gem_object *target_obj; struct drm_i915_gem_object *target_i915_obj; struct i915_vma *target_vma; @@ -570,8 +576,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and * pipe_control writes because the gpu doesn't properly redirect them * through the ppgtt for non_secure batchbuffers. */ - if (unlikely(IS_GEN6(dev_priv) && - reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { + if (unlikely(IS_GEN6(eb->i915) && + reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { ret = i915_vma_bind(target_vma, target_i915_obj->cache_level, PIN_GLOBAL); if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) @@ -612,7 +618,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > - obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) { + obj->base.size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { DRM_DEBUG("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", obj, reloc->target_handle, @@ -628,7 +634,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return -EINVAL; } - ret = relocate_entry(obj, reloc, cache, target_offset); + ret = relocate_entry(obj, reloc, &eb->reloc_cache, target_offset); if (ret) return ret; @@ -637,19 +643,15 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return 0; } -static int -i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, - struct eb_vmas *eb) +static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) { #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; struct drm_i915_gem_relocation_entry __user *user_relocs; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct reloc_cache cache; int remain, ret = 0; user_relocs = u64_to_user_ptr(entry->relocs_ptr); - reloc_cache_init(&cache, eb->i915); remain = entry->relocation_count; while (remain) { @@ -678,7 +680,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, do { u64 offset = r->presumed_offset; - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache); + ret = eb_relocate_entry(vma->obj, eb, r); if (ret) goto out; @@ -710,39 +712,35 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, } out: - reloc_cache_fini(&cache); + reloc_cache_reset(&eb->reloc_cache); return ret; #undef N_RELOC } static int -i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, - struct eb_vmas *eb, - struct drm_i915_gem_relocation_entry *relocs) +eb_relocate_vma_slow(struct i915_vma *vma, + struct i915_execbuffer *eb, + struct drm_i915_gem_relocation_entry *relocs) { const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct reloc_cache cache; int i, ret = 0; - reloc_cache_init(&cache, eb->i915); for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache); + ret = eb_relocate_entry(vma->obj, eb, &relocs[i]); if (ret) break; } - reloc_cache_fini(&cache); - + reloc_cache_reset(&eb->reloc_cache); return ret; } -static int -i915_gem_execbuffer_relocate(struct eb_vmas *eb) +static int eb_relocate(struct i915_execbuffer *eb) { struct i915_vma *vma; int ret = 0; list_for_each_entry(vma, &eb->vmas, exec_list) { - ret = i915_gem_execbuffer_relocate_vma(vma, eb); + ret = eb_relocate_vma(vma, eb); if (ret) break; } @@ -757,9 +755,9 @@ static bool only_mappable_for_reloc(unsigned int flags) } static int -i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, - struct intel_engine_cs *engine, - bool *need_reloc) +eb_reserve_vma(struct i915_vma *vma, + struct intel_engine_cs *engine, + bool *need_reloc) { struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; @@ -878,34 +876,27 @@ eb_vma_misplaced(struct i915_vma *vma) return false; } -static int -i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, - struct list_head *vmas, - struct i915_gem_context *ctx, - bool *need_relocs) +static int eb_reserve(struct i915_execbuffer *eb) { + const bool has_fenced_gpu_access = INTEL_GEN(eb->i915) < 4; + const bool needs_unfenced_map = INTEL_INFO(eb->i915)->unfenced_needs_alignment; struct drm_i915_gem_object *obj; struct i915_vma *vma; - struct i915_address_space *vm; struct list_head ordered_vmas; struct list_head pinned_vmas; - bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; - bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment; int retry; - vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; - INIT_LIST_HEAD(&ordered_vmas); INIT_LIST_HEAD(&pinned_vmas); - while (!list_empty(vmas)) { + while (!list_empty(&eb->vmas)) { struct drm_i915_gem_exec_object2 *entry; bool need_fence, need_mappable; - vma = list_first_entry(vmas, struct i915_vma, exec_list); + vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); obj = vma->obj; entry = vma->exec_entry; - if (ctx->flags & CONTEXT_NO_ZEROMAP) + if (eb->ctx->flags & CONTEXT_NO_ZEROMAP) entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; if (!has_fenced_gpu_access) @@ -927,8 +918,8 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; obj->base.pending_write_domain = 0; } - list_splice(&ordered_vmas, vmas); - list_splice(&pinned_vmas, vmas); + list_splice(&ordered_vmas, &eb->vmas); + list_splice(&pinned_vmas, &eb->vmas); /* Attempt to pin all of the buffers into the GTT. * This is done in 3 phases: @@ -947,27 +938,24 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, int ret = 0; /* Unbind any ill-fitting objects or pin. */ - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { if (!drm_mm_node_allocated(&vma->node)) continue; if (eb_vma_misplaced(vma)) ret = i915_vma_unbind(vma); else - ret = i915_gem_execbuffer_reserve_vma(vma, - engine, - need_relocs); + ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); if (ret) goto err; } /* Bind fresh objects */ - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { if (drm_mm_node_allocated(&vma->node)) continue; - ret = i915_gem_execbuffer_reserve_vma(vma, engine, - need_relocs); + ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); if (ret) goto err; } @@ -977,39 +965,30 @@ err: return ret; /* Decrement pin count for bound objects */ - list_for_each_entry(vma, vmas, exec_list) - i915_gem_execbuffer_unreserve_vma(vma); + list_for_each_entry(vma, &eb->vmas, exec_list) + eb_unreserve_vma(vma); - ret = i915_gem_evict_vm(vm, true); + ret = i915_gem_evict_vm(eb->vm, true); if (ret) return ret; } while (1); } static int -i915_gem_execbuffer_relocate_slow(struct drm_device *dev, - struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file, - struct intel_engine_cs *engine, - struct eb_vmas *eb, - struct drm_i915_gem_exec_object2 *exec, - struct i915_gem_context *ctx) +eb_relocate_slow(struct i915_execbuffer *eb) { + const unsigned int count = eb->args->buffer_count; + struct drm_device *dev = &eb->i915->drm; struct drm_i915_gem_relocation_entry *reloc; - struct i915_address_space *vm; struct i915_vma *vma; - bool need_relocs; int *reloc_offset; int i, total, ret; - unsigned count = args->buffer_count; - - vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; /* We may process another execbuffer during the unlock... */ while (!list_empty(&eb->vmas)) { vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); list_del_init(&vma->exec_list); - i915_gem_execbuffer_unreserve_vma(vma); + eb_unreserve_vma(vma); i915_vma_put(vma); } @@ -1017,7 +996,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, total = 0; for (i = 0; i < count; i++) - total += exec[i].relocation_count; + total += eb->exec[i].relocation_count; reloc_offset = kvmalloc_array(count, sizeof(*reloc_offset), GFP_KERNEL); reloc = kvmalloc_array(total, sizeof(*reloc), GFP_KERNEL); @@ -1034,10 +1013,10 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, u64 invalid_offset = (u64)-1; int j; - user_relocs = u64_to_user_ptr(exec[i].relocs_ptr); + user_relocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); if (copy_from_user(reloc+total, user_relocs, - exec[i].relocation_count * sizeof(*reloc))) { + eb->exec[i].relocation_count * sizeof(*reloc))) { ret = -EFAULT; mutex_lock(&dev->struct_mutex); goto err; @@ -1052,7 +1031,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, * happened we would make the mistake of assuming that the * relocations were valid. */ - for (j = 0; j < exec[i].relocation_count; j++) { + for (j = 0; j < eb->exec[i].relocation_count; j++) { if (__copy_to_user(&user_relocs[j].presumed_offset, &invalid_offset, sizeof(invalid_offset))) { @@ -1063,7 +1042,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, } reloc_offset[i] = total; - total += exec[i].relocation_count; + total += eb->exec[i].relocation_count; } ret = i915_mutex_lock_interruptible(dev); @@ -1074,20 +1053,18 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, /* reacquire the objects */ eb_reset(eb); - ret = eb_lookup_vmas(eb, exec, args, vm, file); + ret = eb_lookup_vmas(eb); if (ret) goto err; - need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; - ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx, - &need_relocs); + ret = eb_reserve(eb); if (ret) goto err; list_for_each_entry(vma, &eb->vmas, exec_list) { - int offset = vma->exec_entry - exec; - ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb, - reloc + reloc_offset[offset]); + int idx = vma->exec_entry - eb->exec; + + ret = eb_relocate_vma_slow(vma, eb, reloc + reloc_offset[idx]); if (ret) goto err; } @@ -1105,13 +1082,12 @@ err: } static int -i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, - struct list_head *vmas) +eb_move_to_gpu(struct i915_execbuffer *eb) { struct i915_vma *vma; int ret; - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) { @@ -1121,9 +1097,9 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, if (unlikely(!capture)) return -ENOMEM; - capture->next = req->capture_list; + capture->next = eb->request->capture_list; capture->vma = vma; - req->capture_list = capture; + eb->request->capture_list = capture; } if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) @@ -1135,22 +1111,22 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, } ret = i915_gem_request_await_object - (req, obj, obj->base.pending_write_domain); + (eb->request, obj, obj->base.pending_write_domain); if (ret) return ret; } /* Unconditionally flush any chipset caches (for streaming writes). */ - i915_gem_chipset_flush(req->engine->i915); + i915_gem_chipset_flush(eb->i915); /* Unconditionally invalidate GPU caches and TLBs. */ - return req->engine->emit_flush(req, EMIT_INVALIDATE); + return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE); } static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) { - if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS) + if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) return false; /* Kernel clipping was a DRI1 misfeature */ @@ -1247,22 +1223,24 @@ validate_exec_list(struct drm_device *dev, return 0; } -static struct i915_gem_context * -i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *engine, const u32 ctx_id) +static int eb_select_context(struct i915_execbuffer *eb) { + unsigned int ctx_id = i915_execbuffer2_get_context_id(*eb->args); struct i915_gem_context *ctx; - ctx = i915_gem_context_lookup(file->driver_priv, ctx_id); - if (IS_ERR(ctx)) - return ctx; + ctx = i915_gem_context_lookup(eb->file->driver_priv, ctx_id); + if (unlikely(IS_ERR(ctx))) + return PTR_ERR(ctx); - if (i915_gem_context_is_banned(ctx)) { + if (unlikely(i915_gem_context_is_banned(ctx))) { DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); - return ERR_PTR(-EIO); + return -EIO; } - return ctx; + eb->ctx = i915_gem_context_get(ctx); + eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base; + + return 0; } static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) @@ -1327,12 +1305,11 @@ static void eb_export_fence(struct drm_i915_gem_object *obj, } static void -i915_gem_execbuffer_move_to_active(struct list_head *vmas, - struct drm_i915_gem_request *req) +eb_move_to_active(struct i915_execbuffer *eb) { struct i915_vma *vma; - list_for_each_entry(vma, vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; obj->base.write_domain = obj->base.pending_write_domain; @@ -1342,8 +1319,8 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, obj->base.pending_read_domains |= obj->base.read_domains; obj->base.read_domains = obj->base.pending_read_domains; - i915_vma_move_to_active(vma, req, vma->exec_entry->flags); - eb_export_fence(obj, req, vma->exec_entry->flags); + i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags); + eb_export_fence(obj, eb->request, vma->exec_entry->flags); } } @@ -1373,29 +1350,22 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) return 0; } -static struct i915_vma * -i915_gem_execbuffer_parse(struct intel_engine_cs *engine, - struct drm_i915_gem_exec_object2 *shadow_exec_entry, - struct drm_i915_gem_object *batch_obj, - struct eb_vmas *eb, - u32 batch_start_offset, - u32 batch_len, - bool is_master) +static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) { struct drm_i915_gem_object *shadow_batch_obj; struct i915_vma *vma; int ret; - shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool, - PAGE_ALIGN(batch_len)); + shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, + PAGE_ALIGN(eb->batch_len)); if (IS_ERR(shadow_batch_obj)) return ERR_CAST(shadow_batch_obj); - ret = intel_engine_cmd_parser(engine, - batch_obj, + ret = intel_engine_cmd_parser(eb->engine, + eb->batch->obj, shadow_batch_obj, - batch_start_offset, - batch_len, + eb->batch_start_offset, + eb->batch_len, is_master); if (ret) { if (ret == -EACCES) /* unhandled chained batch */ @@ -1409,9 +1379,8 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, if (IS_ERR(vma)) goto out; - memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); - - vma->exec_entry = shadow_exec_entry; + vma->exec_entry = + memset(&eb->shadow_exec_entry, 0, sizeof(*vma->exec_entry)); vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; i915_gem_object_get(shadow_batch_obj); list_add_tail(&vma->exec_list, &eb->vmas); @@ -1430,46 +1399,33 @@ add_to_client(struct drm_i915_gem_request *req, } static int -execbuf_submit(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas) +execbuf_submit(struct i915_execbuffer *eb) { - u64 exec_start, exec_len; int ret; - ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); + ret = eb_move_to_gpu(eb); if (ret) return ret; - ret = i915_switch_context(params->request); + ret = i915_switch_context(eb->request); if (ret) return ret; - if (args->flags & I915_EXEC_CONSTANTS_MASK) { - DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n"); - return -EINVAL; - } - - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - ret = i915_reset_gen7_sol_offsets(params->request); + if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { + ret = i915_reset_gen7_sol_offsets(eb->request); if (ret) return ret; } - exec_len = args->batch_len; - exec_start = params->batch->node.start + - params->args_batch_start_offset; - - if (exec_len == 0) - exec_len = params->batch->size - params->args_batch_start_offset; - - ret = params->engine->emit_bb_start(params->request, - exec_start, exec_len, - params->dispatch_flags); + ret = eb->engine->emit_bb_start(eb->request, + eb->batch->node.start + + eb->batch_start_offset, + eb->batch_len, + eb->dispatch_flags); if (ret) return ret; - i915_gem_execbuffer_move_to_active(vmas, params->request); + eb_move_to_active(eb); return 0; } @@ -1551,27 +1507,16 @@ eb_select_engine(struct drm_i915_private *dev_priv, } static int -i915_gem_do_execbuffer(struct drm_device *dev, void *data, +i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, struct drm_i915_gem_exec_object2 *exec) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct eb_vmas *eb; - struct drm_i915_gem_exec_object2 shadow_exec_entry; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - struct i915_address_space *vm; - struct i915_execbuffer_params params_master; /* XXX: will be removed later */ - struct i915_execbuffer_params *params = ¶ms_master; - const u32 ctx_id = i915_execbuffer2_get_context_id(*args); - u32 dispatch_flags; + struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct sync_file *out_fence = NULL; int out_fence_fd = -1; int ret; - bool need_relocs; if (!i915_gem_check_execbuffer(args)) return -EINVAL; @@ -1580,37 +1525,42 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) return ret; - dispatch_flags = 0; + eb.i915 = to_i915(dev); + eb.file = file; + eb.args = args; + eb.exec = exec; + eb.need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; + reloc_cache_init(&eb.reloc_cache, eb.i915); + + eb.batch_start_offset = args->batch_start_offset; + eb.batch_len = args->batch_len; + + eb.dispatch_flags = 0; if (args->flags & I915_EXEC_SECURE) { if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) return -EPERM; - dispatch_flags |= I915_DISPATCH_SECURE; + eb.dispatch_flags |= I915_DISPATCH_SECURE; } if (args->flags & I915_EXEC_IS_PINNED) - dispatch_flags |= I915_DISPATCH_PINNED; + eb.dispatch_flags |= I915_DISPATCH_PINNED; - engine = eb_select_engine(dev_priv, file, args); - if (!engine) + eb.engine = eb_select_engine(eb.i915, file, args); + if (!eb.engine) return -EINVAL; - if (args->buffer_count < 1) { - DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); - return -EINVAL; - } - if (args->flags & I915_EXEC_RESOURCE_STREAMER) { - if (!HAS_RESOURCE_STREAMER(dev_priv)) { + if (!HAS_RESOURCE_STREAMER(eb.i915)) { DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n"); return -EINVAL; } - if (engine->id != RCS) { + if (eb.engine->id != RCS) { DRM_DEBUG("RS is not available on %s\n", - engine->name); + eb.engine->name); return -EINVAL; } - dispatch_flags |= I915_DISPATCH_RS; + eb.dispatch_flags |= I915_DISPATCH_RS; } if (args->flags & I915_EXEC_FENCE_IN) { @@ -1633,59 +1583,44 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * wakeref that we hold until the GPU has been idle for at least * 100ms. */ - intel_runtime_pm_get(dev_priv); + intel_runtime_pm_get(eb.i915); ret = i915_mutex_lock_interruptible(dev); if (ret) goto pre_mutex_err; - ctx = i915_gem_validate_context(dev, file, engine, ctx_id); - if (IS_ERR(ctx)) { + ret = eb_select_context(&eb); + if (ret) { mutex_unlock(&dev->struct_mutex); - ret = PTR_ERR(ctx); goto pre_mutex_err; } - i915_gem_context_get(ctx); - - if (ctx->ppgtt) - vm = &ctx->ppgtt->base; - else - vm = &ggtt->base; - - memset(¶ms_master, 0x00, sizeof(params_master)); - - eb = eb_create(dev_priv, args); - if (eb == NULL) { - i915_gem_context_put(ctx); + if (eb_create(&eb)) { + i915_gem_context_put(eb.ctx); mutex_unlock(&dev->struct_mutex); ret = -ENOMEM; goto pre_mutex_err; } /* Look up object handles */ - ret = eb_lookup_vmas(eb, exec, args, vm, file); + ret = eb_lookup_vmas(&eb); if (ret) goto err; /* take note of the batch buffer before we might reorder the lists */ - params->batch = eb_get_batch(eb); + eb.batch = eb_get_batch(&eb); /* Move the objects en-masse into the GTT, evicting if necessary. */ - need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; - ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx, - &need_relocs); + ret = eb_reserve(&eb); if (ret) goto err; /* The objects are in their final locations, apply the relocations. */ - if (need_relocs) - ret = i915_gem_execbuffer_relocate(eb); + if (eb.need_relocs) + ret = eb_relocate(&eb); if (ret) { if (ret == -EFAULT) { - ret = i915_gem_execbuffer_relocate_slow(dev, args, file, - engine, - eb, exec, ctx); + ret = eb_relocate_slow(&eb); BUG_ON(!mutex_is_locked(&dev->struct_mutex)); } if (ret) @@ -1693,28 +1628,22 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (params->batch->obj->base.pending_write_domain) { + if (eb.batch->obj->base.pending_write_domain) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; } - if (args->batch_start_offset > params->batch->size || - args->batch_len > params->batch->size - args->batch_start_offset) { + if (eb.batch_start_offset > eb.batch->size || + eb.batch_len > eb.batch->size - eb.batch_start_offset) { DRM_DEBUG("Attempting to use out-of-bounds batch\n"); ret = -EINVAL; goto err; } - params->args_batch_start_offset = args->batch_start_offset; - if (engine->needs_cmd_parser && args->batch_len) { + if (eb.engine->needs_cmd_parser && eb.batch_len) { struct i915_vma *vma; - vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry, - params->batch->obj, - eb, - args->batch_start_offset, - args->batch_len, - drm_is_current_master(file)); + vma = eb_parse(&eb, drm_is_current_master(file)); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -1730,19 +1659,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * specifically don't want that set on batches the * command parser has accepted. */ - dispatch_flags |= I915_DISPATCH_SECURE; - params->args_batch_start_offset = 0; - params->batch = vma; + eb.dispatch_flags |= I915_DISPATCH_SECURE; + eb.batch_start_offset = 0; + eb.batch = vma; } } - params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + eb.batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + if (eb.batch_len == 0) + eb.batch_len = eb.batch->size - eb.batch_start_offset; /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - if (dispatch_flags & I915_DISPATCH_SECURE) { - struct drm_i915_gem_object *obj = params->batch->obj; + if (eb.dispatch_flags & I915_DISPATCH_SECURE) { + struct drm_i915_gem_object *obj = eb.batch->obj; struct i915_vma *vma; /* @@ -1761,25 +1692,24 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } - params->batch = vma; + eb.batch = vma; } /* Allocate a request for this batch buffer nice and early. */ - params->request = i915_gem_request_alloc(engine, ctx); - if (IS_ERR(params->request)) { - ret = PTR_ERR(params->request); + eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); + if (IS_ERR(eb.request)) { + ret = PTR_ERR(eb.request); goto err_batch_unpin; } if (in_fence) { - ret = i915_gem_request_await_dma_fence(params->request, - in_fence); + ret = i915_gem_request_await_dma_fence(eb.request, in_fence); if (ret < 0) goto err_request; } if (out_fence_fd != -1) { - out_fence = sync_file_create(¶ms->request->fence); + out_fence = sync_file_create(&eb.request->fence); if (!out_fence) { ret = -ENOMEM; goto err_request; @@ -1792,26 +1722,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * inactive_list and lose its active reference. Hence we do not need * to explicitly hold another reference here. */ - params->request->batch = params->batch; + eb.request->batch = eb.batch; - /* - * Save assorted stuff away to pass through to *_submission(). - * NB: This data should be 'persistent' and not local as it will - * kept around beyond the duration of the IOCTL once the GPU - * scheduler arrives. - */ - params->dev = dev; - params->file = file; - params->engine = engine; - params->dispatch_flags = dispatch_flags; - params->ctx = ctx; - - trace_i915_gem_request_queue(params->request, dispatch_flags); - - ret = execbuf_submit(params, args, &eb->vmas); + trace_i915_gem_request_queue(eb.request, eb.dispatch_flags); + ret = execbuf_submit(&eb); err_request: - __i915_add_request(params->request, ret == 0); - add_to_client(params->request, file); + __i915_add_request(eb.request, ret == 0); + add_to_client(eb.request, file); if (out_fence) { if (ret == 0) { @@ -1831,19 +1748,17 @@ err_batch_unpin: * needs to be adjusted to also track the ggtt batch vma properly as * active. */ - if (dispatch_flags & I915_DISPATCH_SECURE) - i915_vma_unpin(params->batch); + if (eb.dispatch_flags & I915_DISPATCH_SECURE) + i915_vma_unpin(eb.batch); err: /* the request owns the ref now */ - i915_gem_context_put(ctx); - eb_destroy(eb); - + eb_destroy(&eb); mutex_unlock(&dev->struct_mutex); pre_mutex_err: /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(eb.i915); if (out_fence_fd != -1) put_unused_fd(out_fence_fd); err_in_fence: @@ -1914,7 +1829,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec2.flags = I915_EXEC_RENDER; i915_execbuffer2_set_context_id(exec2, 0); - ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); + ret = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); if (!ret) { struct drm_i915_gem_exec_object __user *user_exec_list = u64_to_user_ptr(args->buffers_ptr); @@ -1973,7 +1888,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EFAULT; } - ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); + ret = i915_gem_do_execbuffer(dev, file, args, exec2_list); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ struct drm_i915_gem_exec_object2 __user *user_exec_list = From d55495b4dcce2efb4656edfe211eb0bfb27c3387 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Jun 2017 09:14:34 +0100 Subject: [PATCH 174/341] drm/i915: Use vma->exec_entry as our double-entry placeholder This has the benefit of not requiring us to manipulate the vma->exec_link list when tearing down the execbuffer, and is a marginally cheaper test to detect the user error. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170615081435.17699-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_evict.c | 17 +---- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 77 ++++++++++++---------- drivers/gpu/drm/i915/i915_vma.c | 1 - 3 files changed, 44 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 51e365f70464..891247d79299 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -59,9 +59,6 @@ mark_free(struct drm_mm_scan *scan, if (i915_vma_is_pinned(vma)) return false; - if (WARN_ON(!list_empty(&vma->exec_list))) - return false; - if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link)) return false; @@ -160,8 +157,6 @@ search_again: list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { ret = drm_mm_scan_remove_block(&scan, &vma->node); BUG_ON(ret); - - INIT_LIST_HEAD(&vma->exec_list); } /* Can we unpin some objects such as idle hw contents, @@ -209,17 +204,12 @@ found: if (drm_mm_scan_remove_block(&scan, &vma->node)) __i915_vma_pin(vma); else - list_del_init(&vma->exec_list); + list_del(&vma->exec_list); } /* Unbinding will emit any required flushes */ ret = 0; - while (!list_empty(&eviction_list)) { - vma = list_first_entry(&eviction_list, - struct i915_vma, - exec_list); - - list_del_init(&vma->exec_list); + list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { __i915_vma_unpin(vma); if (ret == 0) ret = i915_vma_unbind(vma); @@ -315,7 +305,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, } /* Overlap of objects in the same batch? */ - if (i915_vma_is_pinned(vma) || !list_empty(&vma->exec_list)) { + if (i915_vma_is_pinned(vma)) { ret = -ENOSPC; if (vma->exec_entry && vma->exec_entry->flags & EXEC_OBJECT_PINNED) @@ -336,7 +326,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, } list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { - list_del_init(&vma->exec_list); __i915_vma_unpin(vma); if (ret == 0) ret = i915_vma_unbind(vma); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1c5a6a63a767..a7aa21dcc553 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -108,13 +108,40 @@ static int eb_create(struct i915_execbuffer *eb) eb->and = -eb->args->buffer_count; } - INIT_LIST_HEAD(&eb->vmas); return 0; } +static inline void +__eb_unreserve_vma(struct i915_vma *vma, + const struct drm_i915_gem_exec_object2 *entry) +{ + if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE)) + i915_vma_unpin_fence(vma); + + if (entry->flags & __EXEC_OBJECT_HAS_PIN) + __i915_vma_unpin(vma); +} + +static void +eb_unreserve_vma(struct i915_vma *vma) +{ + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + + __eb_unreserve_vma(vma, entry); + entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); +} + static void eb_reset(struct i915_execbuffer *eb) { + struct i915_vma *vma; + + list_for_each_entry(vma, &eb->vmas, exec_list) { + eb_unreserve_vma(vma); + i915_vma_put(vma); + vma->exec_entry = NULL; + } + if (eb->and >= 0) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } @@ -146,6 +173,8 @@ eb_lookup_vmas(struct i915_execbuffer *eb) struct list_head objects; int i, ret; + INIT_LIST_HEAD(&eb->vmas); + INIT_LIST_HEAD(&objects); spin_lock(&eb->file->table_lock); /* Grab a reference to the object and release the lock so we can lookup @@ -252,40 +281,23 @@ static struct i915_vma *eb_get_vma(struct i915_execbuffer *eb, unsigned long han } } -static void -eb_unreserve_vma(struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry; - - if (!drm_mm_node_allocated(&vma->node)) - return; - - entry = vma->exec_entry; - - if (entry->flags & __EXEC_OBJECT_HAS_FENCE) - i915_vma_unpin_fence(vma); - - if (entry->flags & __EXEC_OBJECT_HAS_PIN) - __i915_vma_unpin(vma); - - entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); -} - static void eb_destroy(struct i915_execbuffer *eb) { - i915_gem_context_put(eb->ctx); + struct i915_vma *vma; - while (!list_empty(&eb->vmas)) { - struct i915_vma *vma; + list_for_each_entry(vma, &eb->vmas, exec_list) { + if (!vma->exec_entry) + continue; - vma = list_first_entry(&eb->vmas, - struct i915_vma, - exec_list); - list_del_init(&vma->exec_list); - eb_unreserve_vma(vma); + __eb_unreserve_vma(vma, vma->exec_entry); vma->exec_entry = NULL; i915_vma_put(vma); } + + i915_gem_context_put(eb->ctx); + + if (eb->buckets) + kfree(eb->buckets); } static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) @@ -985,13 +997,7 @@ eb_relocate_slow(struct i915_execbuffer *eb) int i, total, ret; /* We may process another execbuffer during the unlock... */ - while (!list_empty(&eb->vmas)) { - vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); - list_del_init(&vma->exec_list); - eb_unreserve_vma(vma); - i915_vma_put(vma); - } - + eb_reset(eb); mutex_unlock(&dev->struct_mutex); total = 0; @@ -1052,7 +1058,6 @@ eb_relocate_slow(struct i915_execbuffer *eb) } /* reacquire the objects */ - eb_reset(eb); ret = eb_lookup_vmas(eb); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 1aba47024656..6cf32da682ec 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -85,7 +85,6 @@ vma_create(struct drm_i915_gem_object *obj, if (vma == NULL) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&vma->exec_list); for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) init_request_active(&vma->last_read[i], i915_vma_retire); init_request_active(&vma->last_fence, NULL); From 8c45cec48e5871f93e56650f7e476d4ea7174a0e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Jun 2017 09:14:35 +0100 Subject: [PATCH 175/341] drm/i915: Split vma exec_link/evict_link Currently the vma has one link member that is used for both holding its place in the execbuf reservation list, and in any eviction list. This dual property is quite tricky and error prone. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170615081435.17699-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_evict.c | 14 +++++----- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 32 +++++++++++----------- drivers/gpu/drm/i915/i915_vma.h | 7 +++-- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 891247d79299..204a2d9288ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -62,7 +62,7 @@ mark_free(struct drm_mm_scan *scan, if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link)) return false; - list_add(&vma->exec_list, unwind); + list_add(&vma->evict_link, unwind); return drm_mm_scan_add_block(scan, &vma->node); } @@ -154,7 +154,7 @@ search_again: } while (*++phase); /* Nothing found, clean up and bail out! */ - list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { + list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { ret = drm_mm_scan_remove_block(&scan, &vma->node); BUG_ON(ret); } @@ -200,16 +200,16 @@ found: * calling unbind (which may remove the active reference * of any of our objects, thus corrupting the list). */ - list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { + list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { if (drm_mm_scan_remove_block(&scan, &vma->node)) __i915_vma_pin(vma); else - list_del(&vma->exec_list); + list_del(&vma->evict_link); } /* Unbinding will emit any required flushes */ ret = 0; - list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { + list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) ret = i915_vma_unbind(vma); @@ -322,10 +322,10 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * reference) another in our eviction list. */ __i915_vma_pin(vma); - list_add(&vma->exec_list, &eviction_list); + list_add(&vma->evict_link, &eviction_list); } - list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { + list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) ret = i915_vma_unbind(vma); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a7aa21dcc553..96705171e397 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -136,7 +136,7 @@ eb_reset(struct i915_execbuffer *eb) { struct i915_vma *vma; - list_for_each_entry(vma, &eb->vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_link) { eb_unreserve_vma(vma); i915_vma_put(vma); vma->exec_entry = NULL; @@ -149,7 +149,7 @@ eb_reset(struct i915_execbuffer *eb) static struct i915_vma * eb_get_batch(struct i915_execbuffer *eb) { - struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_link); /* * SNA is doing fancy tricks with compressing batch buffers, which leads @@ -226,7 +226,7 @@ eb_lookup_vmas(struct i915_execbuffer *eb) } /* Transfer ownership from the objects list to the vmas list. */ - list_add_tail(&vma->exec_list, &eb->vmas); + list_add_tail(&vma->exec_link, &eb->vmas); list_del_init(&obj->obj_exec_link); vma->exec_entry = &eb->exec[i]; @@ -285,7 +285,7 @@ static void eb_destroy(struct i915_execbuffer *eb) { struct i915_vma *vma; - list_for_each_entry(vma, &eb->vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_link) { if (!vma->exec_entry) continue; @@ -751,7 +751,7 @@ static int eb_relocate(struct i915_execbuffer *eb) struct i915_vma *vma; int ret = 0; - list_for_each_entry(vma, &eb->vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_link) { ret = eb_relocate_vma(vma, eb); if (ret) break; @@ -904,7 +904,7 @@ static int eb_reserve(struct i915_execbuffer *eb) struct drm_i915_gem_exec_object2 *entry; bool need_fence, need_mappable; - vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); + vma = list_first_entry(&eb->vmas, struct i915_vma, exec_link); obj = vma->obj; entry = vma->exec_entry; @@ -920,12 +920,12 @@ static int eb_reserve(struct i915_execbuffer *eb) need_mappable = need_fence || need_reloc_mappable(vma); if (entry->flags & EXEC_OBJECT_PINNED) - list_move_tail(&vma->exec_list, &pinned_vmas); + list_move_tail(&vma->exec_link, &pinned_vmas); else if (need_mappable) { entry->flags |= __EXEC_OBJECT_NEEDS_MAP; - list_move(&vma->exec_list, &ordered_vmas); + list_move(&vma->exec_link, &ordered_vmas); } else - list_move_tail(&vma->exec_list, &ordered_vmas); + list_move_tail(&vma->exec_link, &ordered_vmas); obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; obj->base.pending_write_domain = 0; @@ -950,7 +950,7 @@ static int eb_reserve(struct i915_execbuffer *eb) int ret = 0; /* Unbind any ill-fitting objects or pin. */ - list_for_each_entry(vma, &eb->vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -963,7 +963,7 @@ static int eb_reserve(struct i915_execbuffer *eb) } /* Bind fresh objects */ - list_for_each_entry(vma, &eb->vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_link) { if (drm_mm_node_allocated(&vma->node)) continue; @@ -977,7 +977,7 @@ err: return ret; /* Decrement pin count for bound objects */ - list_for_each_entry(vma, &eb->vmas, exec_list) + list_for_each_entry(vma, &eb->vmas, exec_link) eb_unreserve_vma(vma); ret = i915_gem_evict_vm(eb->vm, true); @@ -1066,7 +1066,7 @@ eb_relocate_slow(struct i915_execbuffer *eb) if (ret) goto err; - list_for_each_entry(vma, &eb->vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_link) { int idx = vma->exec_entry - eb->exec; ret = eb_relocate_vma_slow(vma, eb, reloc + reloc_offset[idx]); @@ -1092,7 +1092,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb) struct i915_vma *vma; int ret; - list_for_each_entry(vma, &eb->vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_link) { struct drm_i915_gem_object *obj = vma->obj; if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) { @@ -1314,7 +1314,7 @@ eb_move_to_active(struct i915_execbuffer *eb) { struct i915_vma *vma; - list_for_each_entry(vma, &eb->vmas, exec_list) { + list_for_each_entry(vma, &eb->vmas, exec_link) { struct drm_i915_gem_object *obj = vma->obj; obj->base.write_domain = obj->base.pending_write_domain; @@ -1388,7 +1388,7 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) memset(&eb->shadow_exec_entry, 0, sizeof(*vma->exec_entry)); vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; i915_gem_object_get(shadow_batch_obj); - list_add_tail(&vma->exec_list, &eb->vmas); + list_add_tail(&vma->exec_link, &eb->vmas); out: i915_gem_object_unpin_pages(shadow_batch_obj); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 2e03f81dddbe..4d827300d1a8 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -100,8 +100,11 @@ struct i915_vma { struct list_head obj_link; /* Link in the object's VMA list */ struct rb_node obj_node; - /** This vma's place in the batchbuffer or on the eviction list */ - struct list_head exec_list; + /** This vma's place in the execbuf reservation list */ + struct list_head exec_link; + + /** This vma's place in the eviction list */ + struct list_head evict_link; /** * Used for performing relocations during execbuffer insertion. From aecd36b8a16b2302b33f49ba3fa24c955f1e32f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 1 Jun 2017 17:36:13 +0300 Subject: [PATCH 176/341] drm/i915: Fix deadlock witha the pipe A quirk during resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass down the correct acquire context to the pipe A quirk load detect hack during display resume. Avoids deadlocking the entire thing. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Fixes: e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170601143619.27840-2-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_display.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 99a3bfa528d3..680659125faa 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -120,7 +120,8 @@ static void intel_crtc_init_scalers(struct intel_crtc *crtc, static void skylake_pfit_enable(struct intel_crtc *crtc); static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force); static void ironlake_pfit_enable(struct intel_crtc *crtc); -static void intel_modeset_setup_hw_state(struct drm_device *dev); +static void intel_modeset_setup_hw_state(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx); static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc); struct intel_limit { @@ -3461,7 +3462,7 @@ __intel_display_resume(struct drm_device *dev, struct drm_crtc *crtc; int i, ret; - intel_modeset_setup_hw_state(dev); + intel_modeset_setup_hw_state(dev, ctx); i915_redisable_vga(to_i915(dev)); if (!state) @@ -15109,7 +15110,7 @@ int intel_modeset_init(struct drm_device *dev) intel_setup_outputs(dev_priv); drm_modeset_lock_all(dev); - intel_modeset_setup_hw_state(dev); + intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx); drm_modeset_unlock_all(dev); for_each_intel_crtc(dev, crtc) { @@ -15146,13 +15147,13 @@ int intel_modeset_init(struct drm_device *dev) return 0; } -static void intel_enable_pipe_a(struct drm_device *dev) +static void intel_enable_pipe_a(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx) { struct intel_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_connector *crt = NULL; struct intel_load_detect_pipe load_detect_temp; - struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx; int ret; /* We can't just switch on the pipe A, we need to set things up with a @@ -15224,7 +15225,8 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv, (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A); } -static void intel_sanitize_crtc(struct intel_crtc *crtc) +static void intel_sanitize_crtc(struct intel_crtc *crtc, + struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -15280,7 +15282,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) * resume. Force-enable the pipe to fix this, the update_dpms * call below we restore the pipe to the right state, but leave * the required bits on. */ - intel_enable_pipe_a(dev); + intel_enable_pipe_a(dev, ctx); } /* Adjust the state of the output pipe according to whether we @@ -15583,7 +15585,8 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) * and sanitizes it to the current state */ static void -intel_modeset_setup_hw_state(struct drm_device *dev) +intel_modeset_setup_hw_state(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe; @@ -15603,7 +15606,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev) for_each_pipe(dev_priv, pipe) { crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - intel_sanitize_crtc(crtc); + intel_sanitize_crtc(crtc, ctx); intel_dump_pipe_config(crtc, crtc->config, "[setup_hw_state]"); } From da1d0e265535634bba80d44510b864c620549bee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 1 Jun 2017 17:36:14 +0300 Subject: [PATCH 177/341] drm/i915: Plumb the correct acquire ctx into intel_crtc_disable_noatomic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If intel_crtc_disable_noatomic() were to ever get called during resume we'd end up deadlocking since resume has its own acqcuire_ctx but intel_crtc_disable_noatomic() still tries to use the mode_config.acquire_ctx. Pass down the correct acquire ctx from the top. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Fixes: e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170601143619.27840-3-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_display.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 680659125faa..efd7c5de5b5a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5841,7 +5841,8 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_update_watermarks(intel_crtc); } -static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) +static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, + struct drm_modeset_acquire_ctx *ctx) { struct intel_encoder *encoder; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); @@ -5871,7 +5872,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) return; } - state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; + state->acquire_ctx = ctx; /* Everything's already locked, -EDEADLK can't happen. */ crtc_state = intel_atomic_get_crtc_state(state, intel_crtc); @@ -15272,7 +15273,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, plane = crtc->plane; crtc->base.primary->state->visible = true; crtc->plane = !plane; - intel_crtc_disable_noatomic(&crtc->base); + intel_crtc_disable_noatomic(&crtc->base, ctx); crtc->plane = plane; } @@ -15288,7 +15289,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ if (crtc->active && !intel_crtc_has_encoders(crtc)) - intel_crtc_disable_noatomic(&crtc->base); + intel_crtc_disable_noatomic(&crtc->base, ctx); if (crtc->active || HAS_GMCH_DISPLAY(dev_priv)) { /* From bb408dd2b2b040f7e483c21ed69353e80d890329 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 1 Jun 2017 17:36:15 +0300 Subject: [PATCH 178/341] drm/i915: Use a loop for the "three times for luck" DPLL procedure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The magic "enable the DPLL three times" sequence feels like it deserves a loop. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170601143619.27840-4-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula Reviewed-by: Chris Wilson Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_display.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index efd7c5de5b5a..731800542159 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1550,6 +1550,7 @@ static void i9xx_enable_pll(struct intel_crtc *crtc) struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); i915_reg_t reg = DPLL(crtc->pipe); u32 dpll = crtc->config->dpll_hw_state.dpll; + int i; assert_pipe_disabled(dev_priv, crtc->pipe); @@ -1596,15 +1597,11 @@ static void i9xx_enable_pll(struct intel_crtc *crtc) } /* We do this three times for luck */ - I915_WRITE(reg, dpll); - POSTING_READ(reg); - udelay(150); /* wait for warmup */ - I915_WRITE(reg, dpll); - POSTING_READ(reg); - udelay(150); /* wait for warmup */ - I915_WRITE(reg, dpll); - POSTING_READ(reg); - udelay(150); /* wait for warmup */ + for (i = 0; i < 3; i++) { + I915_WRITE(reg, dpll); + POSTING_READ(reg); + udelay(150); /* wait for warmup */ + } } /** From 2ee0da163196baf9baa05b7464594342788213f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 1 Jun 2017 17:36:16 +0300 Subject: [PATCH 179/341] drm/i915: Add i830 "pipes power well" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 830 more or less requires both pipes and DPLLs to remain on as long as either pipe is needed. However, when neither pipe is actually needed, we can save a bit of power by turning everything off. To do that we add a new "power well" that turns both pipes and DPLLs on and off in the right order. Seems to save ~50mW on my Fujitsu-Siemens Lifebook S6010. This also avoids having to abuse the load detection to force pipe A on at init time. That was never very robust, and it only worked for one pipe, whereas 830 really needs both pipes enabled. As a bonus the 830 pipe quirk is now a bit more isolated from the rest of the mode setting infrastructure, which should mean that it's much less likely someone will accidentally break it in the future. The extra cost is of course slight code duplication, but that seems like a worthwile tradeoff here. v2; s/BIT/BIT_ULL/ Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170601143619.27840-5-ville.syrjala@linux.intel.com Acked-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_display.c | 92 ++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_drv.h | 2 + drivers/gpu/drm/i915/intel_runtime_pm.c | 64 +++++++++++++++++ 3 files changed, 157 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 731800542159..379679eb4366 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5836,6 +5836,10 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, if (!dev_priv->display.initial_watermarks) intel_update_watermarks(intel_crtc); + + /* clock the pipe down to 640x480@60 to potentially save power */ + if (IS_I830(dev_priv)) + i830_enable_pipe(dev_priv, pipe); } static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, @@ -15145,6 +15149,91 @@ int intel_modeset_init(struct drm_device *dev) return 0; } +void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) +{ + /* 640x480@60Hz, ~25175 kHz */ + struct dpll clock = { + .m1 = 18, + .m2 = 7, + .p1 = 13, + .p2 = 4, + .n = 2, + }; + u32 dpll, fp; + int i; + + WARN_ON(i9xx_calc_dpll_params(48000, &clock) != 25154); + + DRM_DEBUG_KMS("enabling pipe %c due to force quirk (vco=%d dot=%d)\n", + pipe_name(pipe), clock.vco, clock.dot); + + fp = i9xx_dpll_compute_fp(&clock); + dpll = (I915_READ(DPLL(pipe)) & DPLL_DVO_2X_MODE) | + DPLL_VGA_MODE_DIS | + ((clock.p1 - 2) << DPLL_FPA01_P1_POST_DIV_SHIFT) | + PLL_P2_DIVIDE_BY_4 | + PLL_REF_INPUT_DREFCLK | + DPLL_VCO_ENABLE; + + I915_WRITE(FP0(pipe), fp); + I915_WRITE(FP1(pipe), fp); + + I915_WRITE(HTOTAL(pipe), (640 - 1) | ((800 - 1) << 16)); + I915_WRITE(HBLANK(pipe), (640 - 1) | ((800 - 1) << 16)); + I915_WRITE(HSYNC(pipe), (656 - 1) | ((752 - 1) << 16)); + I915_WRITE(VTOTAL(pipe), (480 - 1) | ((525 - 1) << 16)); + I915_WRITE(VBLANK(pipe), (480 - 1) | ((525 - 1) << 16)); + I915_WRITE(VSYNC(pipe), (490 - 1) | ((492 - 1) << 16)); + I915_WRITE(PIPESRC(pipe), ((640 - 1) << 16) | (480 - 1)); + + /* + * Apparently we need to have VGA mode enabled prior to changing + * the P1/P2 dividers. Otherwise the DPLL will keep using the old + * dividers, even though the register value does change. + */ + I915_WRITE(DPLL(pipe), dpll & ~DPLL_VGA_MODE_DIS); + I915_WRITE(DPLL(pipe), dpll); + + /* Wait for the clocks to stabilize. */ + POSTING_READ(DPLL(pipe)); + udelay(150); + + /* The pixel multiplier can only be updated once the + * DPLL is enabled and the clocks are stable. + * + * So write it again. + */ + I915_WRITE(DPLL(pipe), dpll); + + /* We do this three times for luck */ + for (i = 0; i < 3 ; i++) { + I915_WRITE(DPLL(pipe), dpll); + POSTING_READ(DPLL(pipe)); + udelay(150); /* wait for warmup */ + } + + I915_WRITE(PIPECONF(pipe), PIPECONF_ENABLE | PIPECONF_PROGRESSIVE); + POSTING_READ(PIPECONF(pipe)); +} + +void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) +{ + DRM_DEBUG_KMS("disabling pipe %c due to force quirk\n", + pipe_name(pipe)); + + assert_plane_disabled(dev_priv, PLANE_A); + assert_plane_disabled(dev_priv, PLANE_B); + + I915_WRITE(PIPECONF(pipe), 0); + POSTING_READ(PIPECONF(pipe)); + + if (wait_for(pipe_dsl_stopped(dev_priv, pipe), 100)) + DRM_ERROR("pipe %c off wait timed out\n", pipe_name(pipe)); + + I915_WRITE(DPLL(pipe), DPLL_VGA_MODE_DIS); + POSTING_READ(DPLL(pipe)); +} + static void intel_enable_pipe_a(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx) { @@ -15274,7 +15363,8 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, crtc->plane = plane; } - if (dev_priv->quirks & QUIRK_PIPEA_FORCE && + if (!IS_I830(dev_priv) && + dev_priv->quirks & QUIRK_PIPEA_FORCE && crtc->pipe == PIPE_A && !crtc->active) { /* BIOS forgot to enable pipe A, this mostly happens after * resume. Force-enable the pipe to fix this, the update_dpms diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2bc3326f4068..4d3982bb596a 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1332,6 +1332,8 @@ void intel_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state); /* intel_display.c */ +void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe); +void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe); enum transcoder intel_crtc_pch_transcoder(struct intel_crtc *crtc); void intel_update_rawclk(struct drm_i915_private *dev_priv); int vlv_get_hpll_vco(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 436ec7a7b843..efe80ed5fd4d 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1041,6 +1041,38 @@ static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv, return true; } +static void i830_pipes_power_well_enable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + if ((I915_READ(PIPECONF(PIPE_A)) & PIPECONF_ENABLE) == 0) + i830_enable_pipe(dev_priv, PIPE_A); + if ((I915_READ(PIPECONF(PIPE_B)) & PIPECONF_ENABLE) == 0) + i830_enable_pipe(dev_priv, PIPE_B); +} + +static void i830_pipes_power_well_disable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + i830_disable_pipe(dev_priv, PIPE_B); + i830_disable_pipe(dev_priv, PIPE_A); +} + +static bool i830_pipes_power_well_enabled(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + return I915_READ(PIPECONF(PIPE_A)) & PIPECONF_ENABLE && + I915_READ(PIPECONF(PIPE_B)) & PIPECONF_ENABLE; +} + +static void i830_pipes_power_well_sync_hw(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + if (power_well->count > 0) + i830_pipes_power_well_enable(dev_priv, power_well); + else + i830_pipes_power_well_disable(dev_priv, power_well); +} + static void vlv_set_power_well(struct drm_i915_private *dev_priv, struct i915_power_well *power_well, bool enable) { @@ -1929,6 +1961,15 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_D) | \ BIT_ULL(POWER_DOMAIN_INIT)) +#define I830_PIPES_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PIPE_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_power_well_sync_hw_noop, .enable = i9xx_always_on_power_well_noop, @@ -1959,6 +2000,27 @@ static struct i915_power_well i9xx_always_on_power_well[] = { }, }; +static const struct i915_power_well_ops i830_pipes_power_well_ops = { + .sync_hw = i830_pipes_power_well_sync_hw, + .enable = i830_pipes_power_well_enable, + .disable = i830_pipes_power_well_disable, + .is_enabled = i830_pipes_power_well_enabled, +}; + +static struct i915_power_well i830_power_wells[] = { + { + .name = "always-on", + .always_on = 1, + .domains = POWER_DOMAIN_MASK, + .ops = &i9xx_always_on_power_well_ops, + }, + { + .name = "pipes", + .domains = I830_PIPES_POWER_DOMAINS, + .ops = &i830_pipes_power_well_ops, + }, +}; + static const struct i915_power_well_ops hsw_power_well_ops = { .sync_hw = hsw_power_well_sync_hw, .enable = hsw_power_well_enable, @@ -2504,6 +2566,8 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) set_power_wells(power_domains, chv_power_wells); } else if (IS_VALLEYVIEW(dev_priv)) { set_power_wells(power_domains, vlv_power_wells); + } else if (IS_I830(dev_priv)) { + set_power_wells(power_domains, i830_power_wells); } else { set_power_wells(power_domains, i9xx_always_on_power_well); } From dc453e336c33615dbfbf78940d4f599c09ad17a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 1 Jun 2017 17:36:17 +0300 Subject: [PATCH 180/341] drm/i915: Drop pipe A quirk for Toshiba Protege R205-S209 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pipe A force quirk shouldn't needed except on 830. So let's nuke it for the Toshiba Protege R-205/S-209 945 machines. This quirk pre-dates KMS so it's usefulness is doubtful at best now. Unfortunately the original bug report [1] isn't very helpful since it doesn't describe the symptoms. And the commit message in xf86-video-intel commit ecdb5963ef68 ("Add pipe A force enable quirk for Toshiba Portege R205-S209") is not much help either. However, if we assume the problem was the typical "closing the lid hangs the box" type of thing, we already nuked the quirk for another 945 machine in commit 736a69ca8c99 ("drm/i915: Drop PIPE-A quirk for 945GSE HP Mini") and so I hope we can drop this one as well. [1] https://bugs.freedesktop.org/show_bug.cgi?id=14944 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170601143619.27840-6-ville.syrjala@linux.intel.com Acked-by: Chris Wilson Acked-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_display.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 379679eb4366..f84691fb909d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14817,9 +14817,6 @@ static const struct intel_dmi_quirk intel_dmi_quirks[] = { }; static struct intel_quirk intel_quirks[] = { - /* Toshiba Protege R-205, S-209 needs pipe A force quirk */ - { 0x2592, 0x1179, 0x0001, quirk_pipea_force }, - /* ThinkPad T60 needs pipe A force quirk (bug #16494) */ { 0x2782, 0x17aa, 0x201a, quirk_pipea_force }, From b82a682d3234115dda6c8d05c31bafe98a66c885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 1 Jun 2017 17:36:18 +0300 Subject: [PATCH 181/341] drm/i915: Drop pipe A quirk for Thinkapd T60 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pipe A force quirk shouldn't needed except on 830. So let's nuke it for the IBM Thinkpad T60 945 machines. This quirk pre-dates KMS so it's usefulness is doubtful at best now. The original bug report [1] describes the symptoms as "system hang on closing T60 panel lid", and we already dropped a similar quirk for another 945 machine in commit 736a69ca8c99 ("drm/i915: Drop PIPE-A quirk for 945GSE HP Mini") so I'm hopeful we can drop this one as well. The quirk was added into xf86-video-intel in commit 08903abe4dc0 ("Add pipe a force enable quirk for Lenovo T60") [1] https://bugs.freedesktop.org/show_bug.cgi?id=16494 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170601143619.27840-7-ville.syrjala@linux.intel.com Acked-by: Chris Wilson Acked-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_display.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f84691fb909d..c3d9c8e511c6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14817,9 +14817,6 @@ static const struct intel_dmi_quirk intel_dmi_quirks[] = { }; static struct intel_quirk intel_quirks[] = { - /* ThinkPad T60 needs pipe A force quirk (bug #16494) */ - { 0x2782, 0x17aa, 0x201a, quirk_pipea_force }, - /* 830 needs to leave pipe A & dpll A up */ { 0x3577, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force }, From e56134bc79569d5aa912c957f2fd4d92ea21fcc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 1 Jun 2017 17:36:19 +0300 Subject: [PATCH 182/341] drm/i915: Remove pipe A quirk remnants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With 830 the only thing needing pipe quirks, we can just drop the quirk defines and replace the checks with IS_I830() checks. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170601143619.27840-8-ville.syrjala@linux.intel.com Acked-by: Chris Wilson Acked-by: Maarten Lankhorst --- drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/intel_display.c | 92 +++------------------------- drivers/gpu/drm/i915/intel_overlay.c | 1 - 3 files changed, 10 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5dbaf6b24494..a1b2672cfe56 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1161,11 +1161,9 @@ enum intel_sbi_destination { SBI_MPHY, }; -#define QUIRK_PIPEA_FORCE (1<<0) #define QUIRK_LVDS_SSC_DISABLE (1<<1) #define QUIRK_INVERT_BRIGHTNESS (1<<2) #define QUIRK_BACKLIGHT_PRESENT (1<<3) -#define QUIRK_PIPEB_FORCE (1<<4) #define QUIRK_PIN_SWIZZLED_PAGES (1<<5) struct intel_fbdev; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c3d9c8e511c6..4836e537c7f8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1193,9 +1193,8 @@ void assert_pipe(struct drm_i915_private *dev_priv, pipe); enum intel_display_power_domain power_domain; - /* if we need the pipe quirk it must be always on */ - if ((pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) || - (pipe == PIPE_B && dev_priv->quirks & QUIRK_PIPEB_FORCE)) + /* we keep both pipes enabled on 830 */ + if (IS_I830(dev_priv)) state = true; power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); @@ -1629,8 +1628,7 @@ static void i9xx_disable_pll(struct intel_crtc *crtc) } /* Don't disable pipe or pipe PLLs if needed */ - if ((pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) || - (pipe == PIPE_B && dev_priv->quirks & QUIRK_PIPEB_FORCE)) + if (IS_I830(dev_priv)) return; /* Make sure the pipe isn't still relying on us */ @@ -1913,8 +1911,8 @@ static void intel_enable_pipe(struct intel_crtc *crtc) reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); if (val & PIPECONF_ENABLE) { - WARN_ON(!((pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) || - (pipe == PIPE_B && dev_priv->quirks & QUIRK_PIPEB_FORCE))); + /* we keep both pipes enabled on 830 */ + WARN_ON(!IS_I830(dev_priv)); return; } @@ -1974,8 +1972,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc) val &= ~PIPECONF_DOUBLE_WIDE; /* Don't disable pipe or pipe PLLs if needed */ - if (!(pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) && - !(pipe == PIPE_B && dev_priv->quirks & QUIRK_PIPEB_FORCE)) + if (!IS_I830(dev_priv)) val &= ~PIPECONF_ENABLE; I915_WRITE(reg, val); @@ -7045,8 +7042,8 @@ static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc) pipeconf = 0; - if ((intel_crtc->pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) || - (intel_crtc->pipe == PIPE_B && dev_priv->quirks & QUIRK_PIPEB_FORCE)) + /* we keep both pipes enabled on 830 */ + if (IS_I830(dev_priv)) pipeconf |= I915_READ(PIPECONF(intel_crtc->pipe)) & PIPECONF_ENABLE; if (intel_crtc->config->double_wide) @@ -12231,9 +12228,8 @@ verify_crtc_state(struct drm_crtc *crtc, active = dev_priv->display.get_pipe_config(intel_crtc, pipe_config); - /* hw state is inconsistent with the pipe quirk */ - if ((intel_crtc->pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) || - (intel_crtc->pipe == PIPE_B && dev_priv->quirks & QUIRK_PIPEB_FORCE)) + /* we keep both pipes enabled on 830 */ + if (IS_I830(dev_priv)) active = new_crtc_state->active; I915_STATE_WARN(new_crtc_state->active != active, @@ -14731,27 +14727,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } } -/* - * Some BIOSes insist on assuming the GPU's pipe A is enabled at suspend, - * resume, or other times. This quirk makes sure that's the case for - * affected systems. - */ -static void quirk_pipea_force(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - dev_priv->quirks |= QUIRK_PIPEA_FORCE; - DRM_INFO("applying pipe a force quirk\n"); -} - -static void quirk_pipeb_force(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - dev_priv->quirks |= QUIRK_PIPEB_FORCE; - DRM_INFO("applying pipe b force quirk\n"); -} - /* * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason */ @@ -14817,12 +14792,6 @@ static const struct intel_dmi_quirk intel_dmi_quirks[] = { }; static struct intel_quirk intel_quirks[] = { - /* 830 needs to leave pipe A & dpll A up */ - { 0x3577, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force }, - - /* 830 needs to leave pipe B & dpll B up */ - { 0x3577, PCI_ANY_ID, PCI_ANY_ID, quirk_pipeb_force }, - /* Lenovo U160 cannot use SSC on LVDS */ { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable }, @@ -15228,37 +15197,6 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) POSTING_READ(DPLL(pipe)); } -static void intel_enable_pipe_a(struct drm_device *dev, - struct drm_modeset_acquire_ctx *ctx) -{ - struct intel_connector *connector; - struct drm_connector_list_iter conn_iter; - struct drm_connector *crt = NULL; - struct intel_load_detect_pipe load_detect_temp; - int ret; - - /* We can't just switch on the pipe A, we need to set things up with a - * proper mode and output configuration. As a gross hack, enable pipe A - * by enabling the load detect pipe once. */ - drm_connector_list_iter_begin(dev, &conn_iter); - for_each_intel_connector_iter(connector, &conn_iter) { - if (connector->encoder->type == INTEL_OUTPUT_ANALOG) { - crt = &connector->base; - break; - } - } - drm_connector_list_iter_end(&conn_iter); - - if (!crt) - return; - - ret = intel_get_load_detect_pipe(crt, NULL, &load_detect_temp, ctx); - WARN(ret < 0, "All modeset mutexes are locked, but intel_get_load_detect_pipe failed\n"); - - if (ret > 0) - intel_release_load_detect_pipe(crt, &load_detect_temp, ctx); -} - static bool intel_check_plane_mapping(struct intel_crtc *crtc) { @@ -15357,16 +15295,6 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, crtc->plane = plane; } - if (!IS_I830(dev_priv) && - dev_priv->quirks & QUIRK_PIPEA_FORCE && - crtc->pipe == PIPE_A && !crtc->active) { - /* BIOS forgot to enable pipe A, this mostly happens after - * resume. Force-enable the pipe to fix this, the update_dpms - * call below we restore the pipe to the right state, but leave - * the required bits on. */ - intel_enable_pipe_a(dev, ctx); - } - /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ if (crtc->active && !intel_crtc_has_encoders(crtc)) diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 2e0c56ed22bb..b96aed941b97 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -270,7 +270,6 @@ static int intel_overlay_on(struct intel_overlay *overlay) u32 *cs; WARN_ON(overlay->active); - WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); req = alloc_request(overlay); if (IS_ERR(req)) From cd15fb64ee56192760ad5c1e2ad97a65e735b18b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 15 Jun 2017 08:39:15 -0400 Subject: [PATCH 183/341] Revert "dm mirror: use all available legs on multiple failures" This reverts commit 12a7cf5ba6c776a2621d8972c7d42e8d3d959d20. This commit apparently attempted to fix an issue that didn't really exist, furthermore: this commit is the source of deadlocks and crashes seen in multiple cases related to failing the primary mirror dev while syncing. Reported-by: Jonathan Brassow Signed-off-by: Mike Snitzer --- drivers/md/dm-raid1.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index e61c45047c25..4da8858856fb 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -145,6 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) struct dm_raid1_bio_record { struct mirror *m; + /* if details->bi_bdev == NULL, details were not saved */ struct dm_bio_details details; region_t write_region; }; @@ -1198,6 +1199,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio) struct dm_raid1_bio_record *bio_record = dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); + bio_record->details.bi_bdev = NULL; + if (rw == WRITE) { /* Save region for mirror_end_io() handler */ bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio); @@ -1256,12 +1259,22 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) } if (error == -EOPNOTSUPP) - return error; + goto out; if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD)) - return error; + goto out; if (unlikely(error)) { + if (!bio_record->details.bi_bdev) { + /* + * There wasn't enough memory to record necessary + * information for a retry or there was no other + * mirror in-sync. + */ + DMERR_LIMIT("Mirror read failed."); + return -EIO; + } + m = bio_record->m; DMERR("Mirror read failed from %s. Trying alternative device.", @@ -1277,6 +1290,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) bd = &bio_record->details; dm_bio_restore(bd, bio); + bio_record->details.bi_bdev = NULL; bio->bi_error = 0; queue_bio(ms, bio, rw); @@ -1285,6 +1299,9 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) DMERR("All replicated volumes dead, failing I/O"); } +out: + bio_record->details.bi_bdev = NULL; + return error; } From 29962acaa07c151a7ddfd3ff56ba5844889681fb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 13 Jun 2017 14:47:51 +0100 Subject: [PATCH 184/341] drm/i915/cnl: make function cnl_ddi_dp_set_dpll_hw_state static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function cnl_ddi_dp_set_dpll_hw_state does not need to be in global scope, so make it static. Cleans up sparse warning: "symbol 'cnl_ddi_dp_set_dpll_hw_state' was not declared. Should it be static?" Signed-off-by: Colin Ian King Link: http://patchwork.freedesktop.org/patch/msgid/20170613134751.29196-1-colin.king@canonical.com Reviewed-by: Tvrtko Ursulin Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 8e669b6254ae..2f7b0e64f628 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2292,8 +2292,9 @@ static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, return true; } -bool cnl_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state) +static bool +cnl_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) { uint32_t cfgcr0; From f6262bda462e81e959b80a96dac799bd9df27f73 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Tue, 13 Jun 2017 13:03:59 -0700 Subject: [PATCH 185/341] drm/i915: Don't enable backlight at setup time. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maarten and Ville noticed that we are enabling backlight via DP aux very early in the modeset_init path via the intel_dp_aux_setup_backlight() function, since commit e7156c833903 ("drm/i915: Add Backlight Control using DPCD for eDP connectors (v9)"). Looks like all we need to do during _setup_backlight() is read the current brightness state instead of modifying it. v2: Rewrote commit message. Cc: Ville Syrjala Cc: Maarten Lankhorst Cc: Jani Nikula Cc: Yetunde Adebisi Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Maarten Lankhorst Acked-by: Jani Nikula Tested-by: Puthikorn Voravootivat Fixes: e7156c833903 ("drm/i915: Add Backlight Control using DPCD for eDP connectors (v9)") Link: http://patchwork.freedesktop.org/patch/msgid/1497384239-2965-1-git-send-email-dhinakaran.pandiyan@intel.com Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_dp_aux_backlight.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c index 6cc62980d0da..228ca06d9f0b 100644 --- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c @@ -80,10 +80,6 @@ static uint32_t intel_dp_aux_get_backlight(struct intel_connector *connector) static void intel_dp_aux_set_backlight(const struct drm_connector_state *conn_state, u32 level) { - /* - * conn_state->best_encoder is likely NULL when called from - * intel_dp_aux_setup_backlight() - */ struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); uint8_t vals[2] = { 0x0 }; @@ -106,10 +102,6 @@ static void intel_dp_aux_enable_backlight(const struct intel_crtc_state *crtc_st const struct drm_connector_state *conn_state) { struct intel_connector *connector = to_intel_connector(conn_state->connector); - /* - * conn_state->best_encoder (and crtc_state) are NULL when called from - * intel_dp_aux_setup_backlight() - */ struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); uint8_t dpcd_buf = 0; uint8_t edp_backlight_mode = 0; @@ -156,8 +148,6 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector, struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); struct intel_panel *panel = &connector->panel; - intel_dp_aux_enable_backlight(NULL, connector->base.state); - if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) panel->backlight.max = 0xFFFF; else From f8a894b218138888542a5058d0e902378fd0d4ec Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 15 Jun 2017 16:33:58 +0800 Subject: [PATCH 186/341] ipv6: fix calling in6_ifa_hold incorrectly for dad work Now when starting the dad work in addrconf_mod_dad_work, if the dad work is idle and queued, it needs to hold ifa. The problem is there's one gap in [1], during which if the pending dad work is removed elsewhere. It will miss to hold ifa, but the dad word is still idea and queue. if (!delayed_work_pending(&ifp->dad_work)) in6_ifa_hold(ifp); <--------------[1] mod_delayed_work(addrconf_wq, &ifp->dad_work, delay); An use-after-free issue can be caused by this. Chen Wei found this issue when WARN_ON(!hlist_unhashed(&ifp->addr_lst)) in net6_ifa_finish_destroy was hit because of it. As Hannes' suggestion, this patch is to fix it by holding ifa first in addrconf_mod_dad_work, then calling mod_delayed_work and putting ifa if the dad_work is already in queue. Note that this patch did not choose to fix it with: if (!mod_delayed_work(delay)) in6_ifa_hold(ifp); As with it, when delay == 0, dad_work would be scheduled immediately, all addrconf_mod_dad_work(0) callings had to be moved under ifp->lock. Reported-by: Wei Chen Suggested-by: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6a4fb1e629fb..686c92375e81 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -332,9 +332,9 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev, static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, unsigned long delay) { - if (!delayed_work_pending(&ifp->dad_work)) - in6_ifa_hold(ifp); - mod_delayed_work(addrconf_wq, &ifp->dad_work, delay); + in6_ifa_hold(ifp); + if (mod_delayed_work(addrconf_wq, &ifp->dad_work, delay)) + in6_ifa_put(ifp); } static int snmp6_alloc_dev(struct inet6_dev *idev) From 6d0507a777fbc533f7f1bf5664a81982dd50dece Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 12 Jun 2017 12:47:32 +0100 Subject: [PATCH 187/341] brcmfmac: add parameter to pass error code in firmware callback Extend the parameters in the firmware callback so it can be called upon success and failure. This allows the caller to properly clear all resources in the failure path. Right now the error code is always zero, ie. success. Cc: stable@vger.kernel.org # 4.9.x- Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/firmware.c | 10 +++++----- .../broadcom/brcm80211/brcmfmac/firmware.h | 4 ++-- .../wireless/broadcom/brcm80211/brcmfmac/pcie.c | 17 ++++++++++++----- .../wireless/broadcom/brcm80211/brcmfmac/sdio.c | 17 +++++++++++------ .../wireless/broadcom/brcm80211/brcmfmac/usb.c | 6 ++++-- 5 files changed, 34 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c index c7c1e9906500..ae61a24202ac 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c @@ -442,7 +442,7 @@ struct brcmf_fw { const char *nvram_name; u16 domain_nr; u16 bus_nr; - void (*done)(struct device *dev, const struct firmware *fw, + void (*done)(struct device *dev, int err, const struct firmware *fw, void *nvram_image, u32 nvram_len); }; @@ -477,7 +477,7 @@ static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx) if (!nvram && !(fwctx->flags & BRCMF_FW_REQ_NV_OPTIONAL)) goto fail; - fwctx->done(fwctx->dev, fwctx->code, nvram, nvram_length); + fwctx->done(fwctx->dev, 0, fwctx->code, nvram, nvram_length); kfree(fwctx); return; @@ -499,7 +499,7 @@ static void brcmf_fw_request_code_done(const struct firmware *fw, void *ctx) /* only requested code so done here */ if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM)) { - fwctx->done(fwctx->dev, fw, NULL, 0); + fwctx->done(fwctx->dev, 0, fw, NULL, 0); kfree(fwctx); return; } @@ -522,7 +522,7 @@ fail: int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags, const char *code, const char *nvram, - void (*fw_cb)(struct device *dev, + void (*fw_cb)(struct device *dev, int err, const struct firmware *fw, void *nvram_image, u32 nvram_len), u16 domain_nr, u16 bus_nr) @@ -555,7 +555,7 @@ int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags, int brcmf_fw_get_firmwares(struct device *dev, u16 flags, const char *code, const char *nvram, - void (*fw_cb)(struct device *dev, + void (*fw_cb)(struct device *dev, int err, const struct firmware *fw, void *nvram_image, u32 nvram_len)) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h index d3c9f0d52ae3..8fa4b7e1ab3d 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h @@ -73,13 +73,13 @@ void brcmf_fw_nvram_free(void *nvram); */ int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags, const char *code, const char *nvram, - void (*fw_cb)(struct device *dev, + void (*fw_cb)(struct device *dev, int err, const struct firmware *fw, void *nvram_image, u32 nvram_len), u16 domain_nr, u16 bus_nr); int brcmf_fw_get_firmwares(struct device *dev, u16 flags, const char *code, const char *nvram, - void (*fw_cb)(struct device *dev, + void (*fw_cb)(struct device *dev, int err, const struct firmware *fw, void *nvram_image, u32 nvram_len)); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index f36b96dc6acd..f878706613e6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -1650,16 +1650,23 @@ static const struct brcmf_buscore_ops brcmf_pcie_buscore_ops = { .write32 = brcmf_pcie_buscore_write32, }; -static void brcmf_pcie_setup(struct device *dev, const struct firmware *fw, +static void brcmf_pcie_setup(struct device *dev, int ret, + const struct firmware *fw, void *nvram, u32 nvram_len) { - struct brcmf_bus *bus = dev_get_drvdata(dev); - struct brcmf_pciedev *pcie_bus_dev = bus->bus_priv.pcie; - struct brcmf_pciedev_info *devinfo = pcie_bus_dev->devinfo; + struct brcmf_bus *bus; + struct brcmf_pciedev *pcie_bus_dev; + struct brcmf_pciedev_info *devinfo; struct brcmf_commonring **flowrings; - int ret; u32 i; + /* check firmware loading result */ + if (ret) + goto fail; + + bus = dev_get_drvdata(dev); + pcie_bus_dev = bus->bus_priv.pcie; + devinfo = pcie_bus_dev->devinfo; brcmf_pcie_attach(devinfo); /* Some of the firmwares have the size of the memory of the device diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index e03450059b06..6e1fcdcde11c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -3982,21 +3982,26 @@ static const struct brcmf_bus_ops brcmf_sdio_bus_ops = { .get_memdump = brcmf_sdio_bus_get_memdump, }; -static void brcmf_sdio_firmware_callback(struct device *dev, +static void brcmf_sdio_firmware_callback(struct device *dev, int err, const struct firmware *code, void *nvram, u32 nvram_len) { - struct brcmf_bus *bus_if = dev_get_drvdata(dev); - struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio; - struct brcmf_sdio *bus = sdiodev->bus; - int err = 0; + struct brcmf_bus *bus_if; + struct brcmf_sdio_dev *sdiodev; + struct brcmf_sdio *bus; u8 saveclk; - brcmf_dbg(TRACE, "Enter: dev=%s\n", dev_name(dev)); + brcmf_dbg(TRACE, "Enter: dev=%s, err=%d\n", dev_name(dev), err); + if (err) + goto fail; + bus_if = dev_get_drvdata(dev); if (!bus_if->drvr) return; + sdiodev = bus_if->bus_priv.sdio; + bus = sdiodev->bus; + /* try to download image and nvram to the dongle */ bus->alp_only = true; err = brcmf_sdio_download_firmware(bus, code, nvram, nvram_len); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index e4d545f9edee..9ce3b55c3ffe 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1159,13 +1159,15 @@ fail: return ret; } -static void brcmf_usb_probe_phase2(struct device *dev, +static void brcmf_usb_probe_phase2(struct device *dev, int ret, const struct firmware *fw, void *nvram, u32 nvlen) { struct brcmf_bus *bus = dev_get_drvdata(dev); struct brcmf_usbdev_info *devinfo; - int ret; + + if (ret) + goto error; brcmf_dbg(USB, "Start fw downloading\n"); From 03fb0e8393fae8ebb6710a99387853ed0becbc8e Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 12 Jun 2017 12:47:33 +0100 Subject: [PATCH 188/341] brcmfmac: use firmware callback upon failure to load When firmware loading failed the code used to unbind the device provided by the calling code. However, for the sdio driver two devices are bound and both need to be released upon failure. The callback has been extended with parameter to pass error code so add that in this commit upon firmware loading failure. Cc: stable@vger.kernel.org # 4.9.x- Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/firmware.c | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c index ae61a24202ac..d231042f19d6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c @@ -484,39 +484,38 @@ static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx) fail: brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev)); release_firmware(fwctx->code); - device_release_driver(fwctx->dev); + fwctx->done(fwctx->dev, -ENOENT, NULL, NULL, 0); kfree(fwctx); } static void brcmf_fw_request_code_done(const struct firmware *fw, void *ctx) { struct brcmf_fw *fwctx = ctx; - int ret; + int ret = 0; brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(fwctx->dev)); - if (!fw) + if (!fw) { + ret = -ENOENT; goto fail; - - /* only requested code so done here */ - if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM)) { - fwctx->done(fwctx->dev, 0, fw, NULL, 0); - kfree(fwctx); - return; } + /* only requested code so done here */ + if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM)) + goto done; + fwctx->code = fw; ret = request_firmware_nowait(THIS_MODULE, true, fwctx->nvram_name, fwctx->dev, GFP_KERNEL, fwctx, brcmf_fw_request_nvram_done); - if (!ret) - return; - - brcmf_fw_request_nvram_done(NULL, fwctx); + /* pass NULL to nvram callback for bcm47xx fallback */ + if (ret) + brcmf_fw_request_nvram_done(NULL, fwctx); return; fail: brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev)); - device_release_driver(fwctx->dev); +done: + fwctx->done(fwctx->dev, ret, fw, NULL, 0); kfree(fwctx); } From 7a51461fc2da82a6c565a3ee65c41c197f28225d Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 12 Jun 2017 12:47:34 +0100 Subject: [PATCH 189/341] brcmfmac: unbind all devices upon failure in firmware callback When request firmware fails, brcmf_ops_sdio_remove is being called and brcmf_bus freed. In such circumstancies if you do a suspend/resume cycle the kernel hangs on resume due a NULL pointer dereference in resume function. So in brcmf_sdio_firmware_callback() we need to unbind the driver from both sdio_func devices when firmware load failure is indicated. Cc: stable@vger.kernel.org # 4.9.x- Tested-by: Enric Balletbo i Serra Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 6e1fcdcde11c..5653d6dd38f6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -3992,14 +3992,14 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err, u8 saveclk; brcmf_dbg(TRACE, "Enter: dev=%s, err=%d\n", dev_name(dev), err); + bus_if = dev_get_drvdata(dev); + sdiodev = bus_if->bus_priv.sdio; if (err) goto fail; - bus_if = dev_get_drvdata(dev); if (!bus_if->drvr) return; - sdiodev = bus_if->bus_priv.sdio; bus = sdiodev->bus; /* try to download image and nvram to the dongle */ @@ -4088,6 +4088,7 @@ release: fail: brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err); device_release_driver(dev); + device_release_driver(&sdiodev->func[2]->dev); } struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) From a2b7a622d6292b693544d7c94bb5d11c3f4676f8 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 12 Jun 2017 12:56:35 +0100 Subject: [PATCH 190/341] brcmfmac: fix brcmf_fws_add_interface() for USB devices USB devices rely on queuing functionality provided by the fwsignal module regardless the mode fwsignal is operating in. For this some data structure needs to be reserved which is tied to the interface, which is done by brcmf_fws_add_interface(). However, it checks the mode. Replace that by checking result from brcmf_fws_queue_skbs(). Otherwise the driver will crash in a null pointer dereference when data is transmitted on the interface. Fixes: fc0471e3e884 ("brcmfmac: ignore interfaces when fwsignal is disabled") Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c index 72373e59308e..f59642b2c935 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c @@ -2145,7 +2145,7 @@ void brcmf_fws_add_interface(struct brcmf_if *ifp) struct brcmf_fws_info *fws = drvr_to_fws(ifp->drvr); struct brcmf_fws_mac_descriptor *entry; - if (!ifp->ndev || fws->fcmode == BRCMF_FWS_FCMODE_NONE) + if (!ifp->ndev || !brcmf_fws_queue_skbs(fws)) return; entry = &fws->desc.iface[ifp->ifidx]; From 5f2f97656ada8d811d3c1bef503ced266fcd53a0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 15 Jun 2017 00:12:24 +0100 Subject: [PATCH 191/341] rxrpc: Fix several cases where a padded len isn't checked in ticket decode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes CVE-2017-7482. When a kerberos 5 ticket is being decoded so that it can be loaded into an rxrpc-type key, there are several places in which the length of a variable-length field is checked to make sure that it's not going to overrun the available data - but the data is padded to the nearest four-byte boundary and the code doesn't check for this extra. This could lead to the size-remaining variable wrapping and the data pointer going over the end of the buffer. Fix this by making the various variable-length data checks use the padded length. Reported-by: 石磊 Signed-off-by: David Howells Reviewed-by: Marc Dionne Reviewed-by: Dan Carpenter Signed-off-by: David S. Miller --- net/rxrpc/key.c | 64 ++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 0a4e28477ad9..54369225766e 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -217,7 +217,7 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ, unsigned int *_toklen) { const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, n_parts, loop, tmp; + unsigned int toklen = *_toklen, n_parts, loop, tmp, paddedlen; /* there must be at least one name, and at least #names+1 length * words */ @@ -247,16 +247,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ, toklen -= 4; if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX) return -EINVAL; - if (tmp > toklen) + paddedlen = (tmp + 3) & ~3; + if (paddedlen > toklen) return -EINVAL; princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL); if (!princ->name_parts[loop]) return -ENOMEM; memcpy(princ->name_parts[loop], xdr, tmp); princ->name_parts[loop][tmp] = 0; - tmp = (tmp + 3) & ~3; - toklen -= tmp; - xdr += tmp >> 2; + toklen -= paddedlen; + xdr += paddedlen >> 2; } if (toklen < 4) @@ -265,16 +265,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ, toklen -= 4; if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX) return -EINVAL; - if (tmp > toklen) + paddedlen = (tmp + 3) & ~3; + if (paddedlen > toklen) return -EINVAL; princ->realm = kmalloc(tmp + 1, GFP_KERNEL); if (!princ->realm) return -ENOMEM; memcpy(princ->realm, xdr, tmp); princ->realm[tmp] = 0; - tmp = (tmp + 3) & ~3; - toklen -= tmp; - xdr += tmp >> 2; + toklen -= paddedlen; + xdr += paddedlen >> 2; _debug("%s/...@%s", princ->name_parts[0], princ->realm); @@ -293,7 +293,7 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td, unsigned int *_toklen) { const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, len; + unsigned int toklen = *_toklen, len, paddedlen; /* there must be at least one tag and one length word */ if (toklen <= 8) @@ -307,15 +307,17 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td, toklen -= 8; if (len > max_data_size) return -EINVAL; + paddedlen = (len + 3) & ~3; + if (paddedlen > toklen) + return -EINVAL; td->data_len = len; if (len > 0) { td->data = kmemdup(xdr, len, GFP_KERNEL); if (!td->data) return -ENOMEM; - len = (len + 3) & ~3; - toklen -= len; - xdr += len >> 2; + toklen -= paddedlen; + xdr += paddedlen >> 2; } _debug("tag %x len %x", td->tag, td->data_len); @@ -387,7 +389,7 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen, const __be32 **_xdr, unsigned int *_toklen) { const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, len; + unsigned int toklen = *_toklen, len, paddedlen; /* there must be at least one length word */ if (toklen <= 4) @@ -399,6 +401,9 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen, toklen -= 4; if (len > AFSTOKEN_K5_TIX_MAX) return -EINVAL; + paddedlen = (len + 3) & ~3; + if (paddedlen > toklen) + return -EINVAL; *_tktlen = len; _debug("ticket len %u", len); @@ -407,9 +412,8 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen, *_ticket = kmemdup(xdr, len, GFP_KERNEL); if (!*_ticket) return -ENOMEM; - len = (len + 3) & ~3; - toklen -= len; - xdr += len >> 2; + toklen -= paddedlen; + xdr += paddedlen >> 2; } *_xdr = xdr; @@ -552,7 +556,7 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) { const __be32 *xdr = prep->data, *token; const char *cp; - unsigned int len, tmp, loop, ntoken, toklen, sec_ix; + unsigned int len, paddedlen, loop, ntoken, toklen, sec_ix; size_t datalen = prep->datalen; int ret; @@ -578,22 +582,21 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) if (len < 1 || len > AFSTOKEN_CELL_MAX) goto not_xdr; datalen -= 4; - tmp = (len + 3) & ~3; - if (tmp > datalen) + paddedlen = (len + 3) & ~3; + if (paddedlen > datalen) goto not_xdr; cp = (const char *) xdr; for (loop = 0; loop < len; loop++) if (!isprint(cp[loop])) goto not_xdr; - if (len < tmp) - for (; loop < tmp; loop++) - if (cp[loop]) - goto not_xdr; + for (; loop < paddedlen; loop++) + if (cp[loop]) + goto not_xdr; _debug("cellname: [%u/%u] '%*.*s'", - len, tmp, len, len, (const char *) xdr); - datalen -= tmp; - xdr += tmp >> 2; + len, paddedlen, len, len, (const char *) xdr); + datalen -= paddedlen; + xdr += paddedlen >> 2; /* get the token count */ if (datalen < 12) @@ -614,10 +617,11 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) sec_ix = ntohl(*xdr); datalen -= 4; _debug("token: [%x/%zx] %x", toklen, datalen, sec_ix); - if (toklen < 20 || toklen > datalen) + paddedlen = (toklen + 3) & ~3; + if (toklen < 20 || toklen > datalen || paddedlen > datalen) goto not_xdr; - datalen -= (toklen + 3) & ~3; - xdr += (toklen + 3) >> 2; + datalen -= paddedlen; + xdr += paddedlen >> 2; } while (--loop > 0); From 988c7322116970696211e902b468aefec95b6ec4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 15 Jun 2017 17:49:08 +0800 Subject: [PATCH 192/341] sctp: return next obj by passing pos + 1 into sctp_transport_get_idx In sctp_for_each_transport, pos is used to save how many objs it has dumped. Now it gets the last obj by sctp_transport_get_idx, then gets the next obj by sctp_transport_get_next. The issue is that in the meanwhile if some objs in transport hashtable are removed and the objs nums are less than pos, sctp_transport_get_idx would return NULL and hti.walker.tbl is NULL as well. At this moment it should stop hti, instead of continue getting the next obj. Or it would cause a NULL pointer dereference in sctp_transport_get_next. This patch is to pass pos + 1 into sctp_transport_get_idx to get the next obj directly, even if pos > objs nums, it would return NULL and stop hti. Fixes: 626d16f50f39 ("sctp: export some apis or variables for sctp_diag and reuse some for proc") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 30aa0a529215..3a8318e518f1 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4666,9 +4666,8 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), if (err) return err; - sctp_transport_get_idx(net, &hti, pos); - obj = sctp_transport_get_next(net, &hti); - for (; obj && !IS_ERR(obj); obj = sctp_transport_get_next(net, &hti)) { + obj = sctp_transport_get_idx(net, &hti, pos + 1); + for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) { struct sctp_transport *transport = obj; if (!sctp_transport_hold(transport)) From 3b1bbafbfd14474fee61487552c9916ec1b25c58 Mon Sep 17 00:00:00 2001 From: "xypron.glpk@gmx.de" Date: Thu, 15 Jun 2017 20:59:57 +0200 Subject: [PATCH 193/341] Doc: net: dsa: b53: update location of referenced dsa.txt The referenced file dsa.txt is located at Documentation/devicetree/bindings/net/dsa/dsa.txt Reviewed-by: Florian Fainelli Signed-off-by: Heinrich Schuchardt Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/b53.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt index d6c6e41648d4..8ec2ca21adeb 100644 --- a/Documentation/devicetree/bindings/net/dsa/b53.txt +++ b/Documentation/devicetree/bindings/net/dsa/b53.txt @@ -34,7 +34,7 @@ Required properties: "brcm,bcm6328-switch" "brcm,bcm6368-switch" and the mandatory "brcm,bcm63xx-switch" -See Documentation/devicetree/bindings/dsa/dsa.txt for a list of additional +See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional required and optional properties. Examples: From 74e4ce6a78751f0a602dcbd00b53f710e312fcc5 Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Tue, 13 Jun 2017 13:18:14 +0530 Subject: [PATCH 194/341] drm/i915/glk: Split GLK DSI device ready functionality This patch divides glk_dsi_device_ready() function into two part. First part will program LP wake and MIPI DSI mode to MIPI_CTRL reg using newly defined function glk_dsi_enable_io(). glk_dsi_enable_io() will be called from intel_dsi_pre_enable. Second part will do remaining device ready activities using the existing function glk_dsi_device_ready(). Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1497340095-5877-1-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 721f3f3adc1e..48a078f90cac 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -346,12 +346,12 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, return true; } -static void glk_dsi_device_ready(struct intel_encoder *encoder) +static void glk_dsi_enable_io(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; - u32 tmp, val; + u32 tmp; /* Set the MIPI mode * If MIPI_Mode is off, then writing to LP_Wake bit is not reflecting. @@ -381,6 +381,14 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) GLK_MIPIIO_PORT_POWERED, 20)) DRM_ERROR("MIPIO port is powergated\n"); } +} + +static void glk_dsi_device_ready(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 val; /* Wait for MIPI PHY status bit to set */ for_each_dsi_port(port, intel_dsi->ports) { @@ -391,8 +399,8 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) } /* Get IO out of reset */ - tmp = I915_READ(MIPI_CTRL(PORT_A)); - I915_WRITE(MIPI_CTRL(PORT_A), tmp | GLK_MIPIIO_RESET_RELEASED); + val = I915_READ(MIPI_CTRL(PORT_A)); + I915_WRITE(MIPI_CTRL(PORT_A), val | GLK_MIPIIO_RESET_RELEASED); /* Get IO out of Low power state*/ for_each_dsi_port(port, intel_dsi->ports) { @@ -427,9 +435,9 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) val |= (ULPS_STATE_NORMAL_OPERATION | DEVICE_READY); I915_WRITE(MIPI_DEVICE_READY(port), val); - tmp = I915_READ(MIPI_CTRL(port)); - tmp &= ~GLK_LP_WAKE; - I915_WRITE(MIPI_CTRL(port), tmp); + val = I915_READ(MIPI_CTRL(port)); + val &= ~GLK_LP_WAKE; + I915_WRITE(MIPI_CTRL(port), val); } /* Wait for Stop state */ @@ -811,6 +819,9 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, /* Deassert reset */ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + if (IS_GEMINILAKE(dev_priv)) + glk_dsi_enable_io(encoder); + /* Put device in ready state (LP-11) */ intel_dsi_device_ready(encoder); From 8a1deb329ffbdb049f6a475cf535644a81e80b55 Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Tue, 13 Jun 2017 13:18:15 +0530 Subject: [PATCH 195/341] drm/i915/glk: Add cold boot sequence for GLK DSI As per BSEPC, if device ready bit is '0' in enable IO sequence then its a cold boot/reset scenario eg: S3/S4 resume. If cold boot scenario detected in enable IO, then prepare port immediately. In normal boot scenario, prepare port after glk_dsi_device_ready(). Without cold boot sequence enabled, features like S3/S4 doesn't work. Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1497340095-5877-2-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 79 +++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 48a078f90cac..50ec836da8b1 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -346,12 +346,13 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, return true; } -static void glk_dsi_enable_io(struct intel_encoder *encoder) +static bool glk_dsi_enable_io(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; u32 tmp; + bool cold_boot = false; /* Set the MIPI mode * If MIPI_Mode is off, then writing to LP_Wake bit is not reflecting. @@ -370,7 +371,10 @@ static void glk_dsi_enable_io(struct intel_encoder *encoder) /* Program LP Wake */ for_each_dsi_port(port, intel_dsi->ports) { tmp = I915_READ(MIPI_CTRL(port)); - tmp |= GLK_LP_WAKE; + if (!(I915_READ(MIPI_DEVICE_READY(port)) & DEVICE_READY)) + tmp &= ~GLK_LP_WAKE; + else + tmp |= GLK_LP_WAKE; I915_WRITE(MIPI_CTRL(port), tmp); } @@ -381,6 +385,14 @@ static void glk_dsi_enable_io(struct intel_encoder *encoder) GLK_MIPIIO_PORT_POWERED, 20)) DRM_ERROR("MIPIO port is powergated\n"); } + + /* Check for cold boot scenario */ + for_each_dsi_port(port, intel_dsi->ports) { + cold_boot |= !(I915_READ(MIPI_DEVICE_READY(port)) & + DEVICE_READY); + } + + return cold_boot; } static void glk_dsi_device_ready(struct intel_encoder *encoder) @@ -410,34 +422,34 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) val |= DEVICE_READY; I915_WRITE(MIPI_DEVICE_READY(port), val); usleep_range(10, 15); - } + } else { + /* Enter ULPS */ + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_ENTER | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); - /* Enter ULPS */ - val = I915_READ(MIPI_DEVICE_READY(port)); - val &= ~ULPS_STATE_MASK; - val |= (ULPS_STATE_ENTER | DEVICE_READY); - I915_WRITE(MIPI_DEVICE_READY(port), val); - - /* Wait for ULPS active */ - if (intel_wait_for_register(dev_priv, + /* Wait for ULPS active */ + if (intel_wait_for_register(dev_priv, MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, 0, 20)) - DRM_ERROR("ULPS not active\n"); + DRM_ERROR("ULPS not active\n"); - /* Exit ULPS */ - val = I915_READ(MIPI_DEVICE_READY(port)); - val &= ~ULPS_STATE_MASK; - val |= (ULPS_STATE_EXIT | DEVICE_READY); - I915_WRITE(MIPI_DEVICE_READY(port), val); + /* Exit ULPS */ + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_EXIT | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); - /* Enter Normal Mode */ - val = I915_READ(MIPI_DEVICE_READY(port)); - val &= ~ULPS_STATE_MASK; - val |= (ULPS_STATE_NORMAL_OPERATION | DEVICE_READY); - I915_WRITE(MIPI_DEVICE_READY(port), val); + /* Enter Normal Mode */ + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_NORMAL_OPERATION | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); - val = I915_READ(MIPI_CTRL(port)); - val &= ~GLK_LP_WAKE; - I915_WRITE(MIPI_CTRL(port), val); + val = I915_READ(MIPI_CTRL(port)); + val &= ~GLK_LP_WAKE; + I915_WRITE(MIPI_CTRL(port), val); + } } /* Wait for Stop state */ @@ -778,6 +790,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; u32 val; + bool glk_cold_boot = false; DRM_DEBUG_KMS("\n"); @@ -808,7 +821,8 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, I915_WRITE(DSPCLK_GATE_D, val); } - intel_dsi_prepare(encoder, pipe_config); + if (!IS_GEMINILAKE(dev_priv)) + intel_dsi_prepare(encoder, pipe_config); /* Power on, try both CRC pmic gpio and VBT */ if (intel_dsi->gpio_panel) @@ -819,12 +833,21 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, /* Deassert reset */ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); - if (IS_GEMINILAKE(dev_priv)) - glk_dsi_enable_io(encoder); + if (IS_GEMINILAKE(dev_priv)) { + glk_cold_boot = glk_dsi_enable_io(encoder); + + /* Prepare port in cold boot(s3/s4) scenario */ + if (glk_cold_boot) + intel_dsi_prepare(encoder, pipe_config); + } /* Put device in ready state (LP-11) */ intel_dsi_device_ready(encoder); + /* Prepare port in normal boot scenario */ + if (IS_GEMINILAKE(dev_priv) && !glk_cold_boot) + intel_dsi_prepare(encoder, pipe_config); + /* Send initialization commands in LP mode */ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); From 6c780a0267b8a1075f40b39851132eeaefefcff5 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 8 Jun 2017 11:33:16 -0500 Subject: [PATCH 196/341] net/mlx5: Wait for FW readiness before initializing command interface Before attempting to initialize the command interface we must wait till the fw_initializing bit is clear. If we fail to meet this condition the hardware will drop our configuration, specifically the descriptors page address. This scenario can happen when the firmware is still executing an FLR flow and did not finish yet so the driver needs to wait for that to finish. Fixes: e3297246c2c8 ('net/mlx5_core: Wait for FW readiness on startup') Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4f577a5abf88..13be264587f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -175,8 +175,9 @@ static struct mlx5_profile profile[] = { }, }; -#define FW_INIT_TIMEOUT_MILI 2000 -#define FW_INIT_WAIT_MS 2 +#define FW_INIT_TIMEOUT_MILI 2000 +#define FW_INIT_WAIT_MS 2 +#define FW_PRE_INIT_TIMEOUT_MILI 10000 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) { @@ -1013,6 +1014,15 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, */ dev->state = MLX5_DEVICE_STATE_UP; + /* wait for firmware to accept initialization segments configurations + */ + err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI); + if (err) { + dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n", + FW_PRE_INIT_TIMEOUT_MILI); + goto out; + } + err = mlx5_cmd_init(dev); if (err) { dev_err(&pdev->dev, "Failed initializing command interface, aborting\n"); From f0b381178b01b831f9907d72f467d6443afdea67 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Thu, 18 May 2017 15:15:08 +0300 Subject: [PATCH 197/341] net/mlx5e: Fix timestamping capabilities reporting Misuse of (BIT) macro caused to report wrong flags for "Hardware Transmit Timestamp Modes" and "Hardware Receive Filter Modes" Fixes: ef9814deafd0 ('net/mlx5e: Add HW timestamping (TS) support') Signed-off-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 8209affa75c3..16486dff1493 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1242,11 +1242,11 @@ static int mlx5e_get_ts_info(struct net_device *dev, SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; - info->tx_types = (BIT(1) << HWTSTAMP_TX_OFF) | - (BIT(1) << HWTSTAMP_TX_ON); + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); - info->rx_filters = (BIT(1) << HWTSTAMP_FILTER_NONE) | - (BIT(1) << HWTSTAMP_FILTER_ALL); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); return 0; } From 5f195c2c5cba60241004146cd12d71451d6b0fc4 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 16 May 2017 07:07:11 -0400 Subject: [PATCH 198/341] net/mlx5e: Fix min inline value for VF rep SQs The offending commit only changed the code path for PF/VF, but it didn't take care of VF representors. As a result, since params->tx_min_inline_mode for VF representors is kzalloced to 0 (MLX5_INLINE_MODE_NONE), all VF reps SQs were set to that mode. This actually works on CX5 by default but broke CX4. Fix that by adding a call to query the min inline mode from the VF rep build up code. Fixes: a6f402e49901 ("net/mlx5e: Tx, no inline copy on ConnectX-5") Signed-off-by: Chris Mi Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 79462c0368a0..46984a52a94b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -791,6 +791,8 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); params->num_tc = 1; params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); } static void mlx5e_build_rep_netdev(struct net_device *netdev) From 9d1cef196b2687e9338c4268a3aa0ca521686bc9 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 4 Jun 2017 19:36:17 +0300 Subject: [PATCH 199/341] net/mlx5: Properly check applicability of devlink eswitch commands Currently we don't check that the link type is Eth and hence crash on IB ports when attempting to deref esw->xxx, fix that. To avoid repeating this check over and over, put the existing checks and the one on link type in a single helper. Fixes: 7768d1971de6 ('net/mlx5: E-Switch, Add control for encapsulation') Signed-off-by: Or Gerlitz Reported-by: Mohamad Badarnah Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 77 ++++++++++--------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f991f669047e..a53e982a6863 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -906,21 +906,34 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } -int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +static int mlx5_devlink_eswitch_check(struct devlink *devlink) { - struct mlx5_core_dev *dev; - u16 cur_mlx5_mode, mlx5_mode = 0; + struct mlx5_core_dev *dev = devlink_priv(devlink); - dev = devlink_priv(devlink); + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EOPNOTSUPP; if (!MLX5_CAP_GEN(dev, vport_group_manager)) return -EOPNOTSUPP; - cur_mlx5_mode = dev->priv.eswitch->mode; - - if (cur_mlx5_mode == SRIOV_NONE) + if (dev->priv.eswitch->mode == SRIOV_NONE) return -EOPNOTSUPP; + return 0; +} + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u16 cur_mlx5_mode, mlx5_mode = 0; + int err; + + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; + + cur_mlx5_mode = dev->priv.eswitch->mode; + if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; @@ -937,15 +950,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { - struct mlx5_core_dev *dev; + struct mlx5_core_dev *dev = devlink_priv(devlink); + int err; - dev = devlink_priv(devlink); - - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; - - if (dev->priv.eswitch->mode == SRIOV_NONE) - return -EOPNOTSUPP; + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); } @@ -954,15 +964,12 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; - int num_vports = esw->enabled_vports; int err, vport; u8 mlx5_mode; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; - - if (esw->mode == SRIOV_NONE) - return -EOPNOTSUPP; + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: @@ -985,7 +992,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) if (err) goto out; - for (vport = 1; vport < num_vports; vport++) { + for (vport = 1; vport < esw->enabled_vports; vport++) { err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode); if (err) { esw_warn(dev, "Failed to set min inline on vport %d\n", @@ -1010,12 +1017,11 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; - - if (esw->mode == SRIOV_NONE) - return -EOPNOTSUPP; + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } @@ -1062,11 +1068,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap) struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; - - if (esw->mode == SRIOV_NONE) - return -EOPNOTSUPP; + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) || @@ -1105,12 +1109,11 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; - - if (esw->mode == SRIOV_NONE) - return -EOPNOTSUPP; + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; *encap = esw->offloads.encap; return 0; From 9cfb4f719264f3eeb68122371ad70fd5bf2e10bb Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 11 Jun 2017 19:32:12 +0300 Subject: [PATCH 200/341] net/mlx5e: Remove TC header re-write offloading of ip tos Currently the firmware API is partial and allows to offload only the dscp part of the tos, also, ipv6 support isn't there yet. As such, remove the offloading option of ipv4 dscp till the FW APIs are more comprehensive. Fixes: d79b6df6b10a ('net/mlx5e: Add parsing of TC pedit actions to HW format') Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ec63158ab643..9df9fc0d26f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -895,7 +895,6 @@ static struct mlx5_fields fields[] = { {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_source[4])}, {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE, 2, offsetof(struct pedit_headers, eth.h_proto)}, - {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)}, {MLX5_ACTION_IN_FIELD_OUT_IP_TTL, 1, offsetof(struct pedit_headers, ip4.ttl)}, {MLX5_ACTION_IN_FIELD_OUT_SIPV4, 4, offsetof(struct pedit_headers, ip4.saddr)}, {MLX5_ACTION_IN_FIELD_OUT_DIPV4, 4, offsetof(struct pedit_headers, ip4.daddr)}, From 31ac93386d135a6c96de9c8bab406f5ccabf5a4d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 15 Jun 2017 20:08:32 +0300 Subject: [PATCH 201/341] net/mlx5e: Avoid doing a cleanup call if the profile doesn't have it The error flow of mlx5e_create_netdev calls the cleanup call of the given profile without checking if it exists, fix that. Currently the VF reps don't register that callback and we crash if getting into error -- can be reproduced by the user doing ctrl^C while attempting to change the sriov mode from legacy to switchdev. Fixes: 26e59d8077a3 '(net/mlx5e: Implement mlx5e interface attach/detach callbacks') Signed-off-by: Or Gerlitz Reported-by: Sabrina Dubroca Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 41cd22a223dc..277f4de30375 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4241,7 +4241,8 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, return netdev; err_cleanup_nic: - profile->cleanup(priv); + if (profile->cleanup) + profile->cleanup(priv); free_netdev(netdev); return NULL; From 9a30a26122c3fb249a4f509fb2253de259e58cd0 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 13 Jun 2017 10:52:30 -0700 Subject: [PATCH 202/341] Revert "drm/i915/skl: New ddb allocation algorithm" This reverts commit bb9d85f6e9de8fef5236c076530eab67a2f2431b. New ddb allocation algorithm is a show stopper on my SKL system. Besides not be able to get external DP 4k@60 (through USB type C), It fully hang my screen when unplugging the USB type C. Bugzilla: https://patchwork.freedesktop.org/patch/161571/ Fixes: bb9d85f6e9de ("drm/i915/skl: New ddb allocation algorithm") Cc: Mahesh Kumar Cc: Maarten Lankhorst Cc: Matt Roper Signed-off-by: Rodrigo Vivi Reviewed-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/1497376350-3400-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 257 ++++++++++++-------------------- 1 file changed, 98 insertions(+), 159 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0aed13dcedf0..48ea0fca1f72 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4140,41 +4140,13 @@ skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active, minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active); } -static void -skl_enable_plane_wm_levels(const struct drm_i915_private *dev_priv, - uint16_t plane_ddb, - uint16_t max_level, - struct skl_plane_wm *wm) -{ - int level; - /* - * Now enable all levels in WM structure which can be enabled - * using current DDB allocation - */ - for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) { - struct skl_wm_level *level_wm = &wm->wm[level]; - - if (level > max_level || level_wm->plane_res_b == 0 - || level_wm->plane_res_l >= 31 - || level_wm->plane_res_b >= plane_ddb) { - level_wm->plane_en = false; - level_wm->plane_res_b = 0; - level_wm->plane_res_l = 0; - } else { - level_wm->plane_en = true; - } - } -} - static int skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, - struct skl_pipe_wm *pipe_wm, struct skl_ddb_allocation *ddb /* out */) { struct drm_atomic_state *state = cstate->base.state; struct drm_crtc *crtc = cstate->base.crtc; struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; @@ -4187,9 +4159,6 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, unsigned plane_data_rate[I915_MAX_PLANES] = {}; unsigned plane_y_data_rate[I915_MAX_PLANES] = {}; uint16_t total_min_blocks = 0; - uint16_t total_level_ddb; - uint16_t plane_blocks = 0; - int max_level, level; /* Clear the partitioning for disabled planes. */ memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); @@ -4228,48 +4197,10 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, return -EINVAL; } - alloc_size -= minimum[PLANE_CURSOR]; - ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - - minimum[PLANE_CURSOR]; + alloc_size -= total_min_blocks; + ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR]; ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; - for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) { - total_level_ddb = 0; - for_each_plane_id_on_crtc(intel_crtc, plane_id) { - /* - * TODO: We should calculate watermark values for Y/UV - * plane both in case of NV12 format and use both values - * for ddb calculation. NV12 is disabled as of now, So - * using only single/UV plane value here. - */ - struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; - uint16_t plane_res_b = wm->wm[level].plane_res_b; - uint16_t min = minimum[plane_id] + y_minimum[plane_id]; - - if (plane_id == PLANE_CURSOR) - continue; - - total_level_ddb += max(plane_res_b, min); - } - - /* - * If This level can successfully be enabled with the - * pipe's current DDB allocation, then all lower levels are - * guaranteed to succeed as well. - */ - if (total_level_ddb <= alloc_size) - break; - } - - if ((level < 0) || (total_min_blocks > alloc_size)) { - DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations"); - DRM_DEBUG_KMS("minimum required %d/%d\n", (level < 0) ? - total_level_ddb : total_min_blocks, alloc_size); - return -EINVAL; - } - max_level = level; - alloc_size -= total_level_ddb; - /* * 2. Distribute the remaining space in proportion to the amount of * data each plane needs to fetch from memory. @@ -4279,24 +4210,13 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, total_data_rate = skl_get_total_relative_data_rate(cstate, plane_data_rate, plane_y_data_rate); - /* - * PLANE_CURSOR data rate is not included in total_data_rate. - * If only cursor plane is enabled we have to enable its WM levels - * explicitly before returning. Cursor has fixed ddb allocation, - * So it's ok to always check cursor WM enabling before return. - */ - plane_blocks = skl_ddb_entry_size(&ddb->plane[pipe][PLANE_CURSOR]); - skl_enable_plane_wm_levels(dev_priv, plane_blocks, max_level, - &pipe_wm->planes[PLANE_CURSOR]); if (total_data_rate == 0) return 0; start = alloc->start; for_each_plane_id_on_crtc(intel_crtc, plane_id) { unsigned int data_rate, y_data_rate; - uint16_t plane_blocks = 0, y_plane_blocks = 0; - struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; - uint16_t plane_res_b = wm->wm[max_level].plane_res_b; + uint16_t plane_blocks, y_plane_blocks = 0; if (plane_id == PLANE_CURSOR) continue; @@ -4308,36 +4228,33 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * promote the expression to 64 bits to avoid overflowing, the * result is < available as data_rate / total_data_rate < 1 */ + plane_blocks = minimum[plane_id]; + plane_blocks += div_u64((uint64_t)alloc_size * data_rate, + total_data_rate); /* Leave disabled planes at (0,0) */ if (data_rate) { - plane_blocks = max(minimum[plane_id], plane_res_b); - plane_blocks += div_u64((uint64_t)alloc_size * - data_rate, total_data_rate); ddb->plane[pipe][plane_id].start = start; ddb->plane[pipe][plane_id].end = start + plane_blocks; - start += plane_blocks; } + start += plane_blocks; + /* * allocation for y_plane part of planar format: - * TODO: Once we start calculating watermark values for Y/UV - * plane both consider it for initial allowed wm blocks. */ y_data_rate = plane_y_data_rate[plane_id]; + y_plane_blocks = y_minimum[plane_id]; + y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, + total_data_rate); + if (y_data_rate) { - y_plane_blocks = y_minimum[plane_id]; - y_plane_blocks += div_u64((uint64_t)alloc_size * - y_data_rate, total_data_rate); ddb->y_plane[pipe][plane_id].start = start; ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks; - start += y_plane_blocks; } - skl_enable_plane_wm_levels(dev_priv, - plane_blocks, - max_level, - wm); + + start += y_plane_blocks; } return 0; @@ -4427,9 +4344,11 @@ skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, + uint16_t ddb_allocation, int level, uint16_t *out_blocks, /* out */ - uint8_t *out_lines /* out */) + uint8_t *out_lines, /* out */ + bool *enabled /* out */) { struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); const struct drm_plane_state *pstate = &intel_pstate->base; @@ -4452,8 +4371,10 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, bool y_tiled, x_tiled; if (latency == 0 || - !intel_wm_plane_visible(cstate, intel_pstate)) + !intel_wm_plane_visible(cstate, intel_pstate)) { + *enabled = false; return 0; + } y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || fb->modifier == I915_FORMAT_MOD_Yf_TILED; @@ -4542,6 +4463,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && (plane_bytes_per_line / 512 < 1)) selected_result = method2; + else if ((ddb_allocation && ddb_allocation / + fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1) + selected_result = min_fixed_16_16(method1, method2); else if (latency >= linetime_us) selected_result = min_fixed_16_16(method1, method2); else @@ -4561,42 +4485,64 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } } - if (res_lines >= 31 && level == 0) { - struct drm_plane *plane = pstate->plane; + if (res_blocks >= ddb_allocation || res_lines > 31) { + *enabled = false; - DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n"); - DRM_DEBUG_KMS("[PLANE:%d:%s] lines required = %u/31\n", - plane->base.id, plane->name, res_lines); - return -EINVAL; + /* + * If there are no valid level 0 watermarks, then we can't + * support this display configuration. + */ + if (level) { + return 0; + } else { + struct drm_plane *plane = pstate->plane; + + DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n"); + DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n", + plane->base.id, plane->name, + res_blocks, ddb_allocation, res_lines); + return -EINVAL; + } } *out_blocks = res_blocks; *out_lines = res_lines; + *enabled = true; return 0; } static int skl_compute_wm_levels(const struct drm_i915_private *dev_priv, + struct skl_ddb_allocation *ddb, struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, struct skl_plane_wm *wm) { + struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); + struct drm_plane *plane = intel_pstate->base.plane; + struct intel_plane *intel_plane = to_intel_plane(plane); + uint16_t ddb_blocks; + enum pipe pipe = intel_crtc->pipe; int level, max_level = ilk_wm_max_level(dev_priv); int ret; if (WARN_ON(!intel_pstate->base.fb)) return -EINVAL; + ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]); + for (level = 0; level <= max_level; level++) { struct skl_wm_level *result = &wm->wm[level]; ret = skl_compute_plane_wm(dev_priv, cstate, intel_pstate, + ddb_blocks, level, &result->plane_res_b, - &result->plane_res_l); + &result->plane_res_l, + &result->plane_en); if (ret) return ret; } @@ -4662,7 +4608,8 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate, wm = &pipe_wm->planes[plane_id]; - ret = skl_compute_wm_levels(dev_priv, cstate, intel_pstate, wm); + ret = skl_compute_wm_levels(dev_priv, ddb, cstate, + intel_pstate, wm); if (ret) return ret; skl_compute_transition_wm(cstate, &wm->trans_wm); @@ -4775,45 +4722,6 @@ bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, return false; } -static int -skl_ddb_add_affected_planes(struct intel_crtc_state *cstate, - const struct skl_pipe_wm *old_pipe_wm, - const struct skl_pipe_wm *pipe_wm) -{ - struct drm_atomic_state *state = cstate->base.state; - struct drm_device *dev = state->dev; - struct drm_crtc *crtc = cstate->base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; - struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; - struct drm_plane_state *plane_state; - struct drm_plane *plane; - enum pipe pipe = intel_crtc->pipe; - - WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); - - drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) { - enum plane_id plane_id = to_intel_plane(plane)->id; - const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; - const struct skl_plane_wm *old_wm = &old_pipe_wm->planes[plane_id]; - - if ((skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id], - &new_ddb->plane[pipe][plane_id]) && - skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id], - &new_ddb->y_plane[pipe][plane_id])) && - !memcmp(wm, old_wm, sizeof(struct skl_plane_wm))) - continue; - - plane_state = drm_atomic_get_plane_state(state, plane); - if (IS_ERR(plane_state)) - return PTR_ERR(plane_state); - } - - return 0; -} - static int skl_update_pipe_wm(struct drm_crtc_state *cstate, const struct skl_pipe_wm *old_pipe_wm, struct skl_pipe_wm *pipe_wm, /* out */ @@ -4827,17 +4735,6 @@ static int skl_update_pipe_wm(struct drm_crtc_state *cstate, if (ret) return ret; - ret = skl_allocate_pipe_ddb(intel_cstate, pipe_wm, ddb); - if (ret) - return ret; - /* - * TODO: Planes are included in state to arm WM registers. - * Scope to optimize further, by just rewriting plane surf register. - */ - ret = skl_ddb_add_affected_planes(intel_cstate, old_pipe_wm, pipe_wm); - if (ret) - return ret; - if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm))) *changed = false; else @@ -4860,7 +4757,41 @@ pipes_modified(struct drm_atomic_state *state) } static int -skl_include_affected_crtcs(struct drm_atomic_state *state) +skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) +{ + struct drm_atomic_state *state = cstate->base.state; + struct drm_device *dev = state->dev; + struct drm_crtc *crtc = cstate->base.crtc; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; + struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; + struct drm_plane_state *plane_state; + struct drm_plane *plane; + enum pipe pipe = intel_crtc->pipe; + + WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); + + drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) { + enum plane_id plane_id = to_intel_plane(plane)->id; + + if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id], + &new_ddb->plane[pipe][plane_id]) && + skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id], + &new_ddb->y_plane[pipe][plane_id])) + continue; + + plane_state = drm_atomic_get_plane_state(state, plane); + if (IS_ERR(plane_state)) + return PTR_ERR(plane_state); + } + + return 0; +} + +static int +skl_compute_ddb(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -4924,6 +4855,14 @@ skl_include_affected_crtcs(struct drm_atomic_state *state) cstate = intel_atomic_get_crtc_state(state, intel_crtc); if (IS_ERR(cstate)) return PTR_ERR(cstate); + + ret = skl_allocate_pipe_ddb(cstate, ddb); + if (ret) + return ret; + + ret = skl_ddb_add_affected_planes(cstate); + if (ret) + return ret; } return 0; @@ -5013,7 +4952,7 @@ skl_compute_wm(struct drm_atomic_state *state) /* Clear all dirty flags */ results->dirty_pipes = 0; - ret = skl_include_affected_crtcs(state); + ret = skl_compute_ddb(state); if (ret) return ret; From 7ceaa6dcd8c6f59588428cec37f3c8093dd1011f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 16 Jun 2017 11:53:19 +1000 Subject: [PATCH 203/341] KVM: PPC: Book3S HV: Save/restore host values of debug registers At present, HV KVM on POWER8 and POWER9 machines loses any instruction or data breakpoint set in the host whenever a guest is run. Instruction breakpoints are currently only used by xmon, but ptrace and the perf_event subsystem can set data breakpoints as well as xmon. To fix this, we save the host values of the debug registers (CIABR, DAWR and DAWRX) before entering the guest and restore them on exit. To provide space to save them in the stack frame, we expand the stack frame allocated by kvmppc_hv_entry() from 112 to 144 bytes. Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs", 2014-01-08) Cc: stable@vger.kernel.org # v3.14+ Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 45 ++++++++++++++++++------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 4e4390564276..4888dd494604 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -44,6 +44,17 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 +/* Stack frame offsets for kvmppc_hv_entry */ +#define SFS 144 +#define STACK_SLOT_TRAP (SFS-4) +#define STACK_SLOT_TID (SFS-16) +#define STACK_SLOT_PSSCR (SFS-24) +#define STACK_SLOT_PID (SFS-32) +#define STACK_SLOT_IAMR (SFS-40) +#define STACK_SLOT_CIABR (SFS-48) +#define STACK_SLOT_DAWR (SFS-56) +#define STACK_SLOT_DAWRX (SFS-64) + /* * Call kvmppc_hv_entry in real mode. * Must be called with interrupts hard-disabled. @@ -328,10 +339,10 @@ kvm_novcpu_exit: bl kvmhv_accumulate_time #endif 13: mr r3, r12 - stw r12, 112-4(r1) + stw r12, STACK_SLOT_TRAP(r1) bl kvmhv_commence_exit nop - lwz r12, 112-4(r1) + lwz r12, STACK_SLOT_TRAP(r1) b kvmhv_switch_to_host /* @@ -554,12 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * * *****************************************************************************/ -/* Stack frame offsets */ -#define STACK_SLOT_TID (112-16) -#define STACK_SLOT_PSSCR (112-24) -#define STACK_SLOT_PID (112-32) -#define STACK_SLOT_IAMR (112-40) - .global kvmppc_hv_entry kvmppc_hv_entry: @@ -575,7 +580,7 @@ kvmppc_hv_entry: */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -112(r1) + stdu r1, -SFS(r1) /* Save R1 in the PACA */ std r1, HSTATE_HOST_R1(r13) @@ -765,6 +770,14 @@ BEGIN_FTR_SECTION std r7, STACK_SLOT_PID(r1) std r8, STACK_SLOT_IAMR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) +BEGIN_FTR_SECTION + mfspr r5, SPRN_CIABR + mfspr r6, SPRN_DAWR + mfspr r7, SPRN_DAWRX + std r5, STACK_SLOT_CIABR(r1) + std r6, STACK_SLOT_DAWR(r1) + std r7, STACK_SLOT_DAWRX(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION /* Set partition DABR */ @@ -1518,8 +1531,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) * set by the guest could disrupt the host. */ li r0, 0 - mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX, r0 mtspr SPRN_PSPB, r0 mtspr SPRN_WORT, r0 BEGIN_FTR_SECTION @@ -1684,6 +1695,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ptesync /* Restore host values of some registers */ +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_CIABR(r1) + ld r6, STACK_SLOT_DAWR(r1) + ld r7, STACK_SLOT_DAWRX(r1) + mtspr SPRN_CIABR, r5 + mtspr SPRN_DAWR, r6 + mtspr SPRN_DAWRX, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) ld r6, STACK_SLOT_PSSCR(r1) @@ -1836,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) - ld r0, 112+PPC_LR_STKOFF(r1) - addi r1, r1, 112 + ld r0, SFS+PPC_LR_STKOFF(r1) + addi r1, r1, SFS mtlr r0 blr From 3d3efb68c19e539f0535c93a5258c1299270215f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 6 Jun 2017 14:35:30 +1000 Subject: [PATCH 204/341] KVM: PPC: Book3S HV: Ignore timebase offset on POWER9 DD1 POWER9 DD1 has an erratum where writing to the TBU40 register, which is used to apply an offset to the timebase, can cause the timebase to lose counts. This results in the timebase on some CPUs getting out of sync with other CPUs, which then results in misbehaviour of the timekeeping code. To work around the problem, we make KVM ignore the timebase offset for all guests on POWER9 DD1 machines. This means that live migration cannot be supported on POWER9 DD1 machines. Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index fd4d978d5257..8d1a365b8edc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); break; case KVM_REG_PPC_TB_OFFSET: + /* + * POWER9 DD1 has an erratum where writing TBU40 causes + * the timebase to lose ticks. So we don't let the + * timebase offset be changed on P9 DD1. (It is + * initialized to zero.) + */ + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + break; /* round up to multiple of 2^24 */ vcpu->arch.vcore->tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24); From 35abcd4f9f303ac4f10f99b3f7e993e5f2e6fa37 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Fri, 16 Jun 2017 09:36:35 +0100 Subject: [PATCH 205/341] brcmfmac: fix uninitialized warning in brcmf_usb_probe_phase2() This fixes the following warning: drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c: In function 'brcmf_usb_probe_phase2': drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c:1198:2: warning: 'devinfo' may be used uninitialized in this function [-Wmaybe-uninitialized] mutex_unlock(&devinfo->dev_init_lock); Fixes: 6d0507a777fb ("brcmfmac: add parameter to pass error code in firmware callback") Cc: Stephen Rothwell Reported-by: Kalle Valo Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 9ce3b55c3ffe..0eea48e73331 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1164,14 +1164,13 @@ static void brcmf_usb_probe_phase2(struct device *dev, int ret, void *nvram, u32 nvlen) { struct brcmf_bus *bus = dev_get_drvdata(dev); - struct brcmf_usbdev_info *devinfo; + struct brcmf_usbdev_info *devinfo = bus->bus_priv.usb->devinfo; if (ret) goto error; brcmf_dbg(USB, "Start fw downloading\n"); - devinfo = bus->bus_priv.usb->devinfo; ret = check_file(fw->data); if (ret < 0) { brcmf_err("invalid firmware\n"); From a9f8553e935f26cb5447f67e280946b0923cd2dc Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 1 Jun 2017 16:18:15 +0530 Subject: [PATCH 206/341] powerpc/kprobes: Pause function_graph tracing during jprobes handling This fixes a crash when function_graph and jprobes are used together. This is essentially commit 237d28db036e ("ftrace/jprobes/x86: Fix conflict between jprobes and function graph tracing"), but for powerpc. Jprobes breaks function_graph tracing since the jprobe hook needs to use jprobe_return(), which never returns back to the hook, but instead to the original jprobe'd function. The solution is to momentarily pause function_graph tracing before invoking the jprobe hook and re-enable it when returning back to the original jprobe'd function. Fixes: 6794c78243bf ("powerpc64: port of the function graph tracer") Cc: stable@vger.kernel.org # v2.6.30+ Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Acked-by: Steven Rostedt (VMware) Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/kprobes.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index fc4343514bed..5075a4d6f1d7 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -617,6 +617,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); #endif + /* + * jprobes use jprobe_return() which skips the normal return + * path of the function, and this messes up the accounting of the + * function graph tracer. + * + * Pause function graph tracing while performing the jprobe function. + */ + pause_graph_tracing(); + return 1; } NOKPROBE_SYMBOL(setjmp_pre_handler); @@ -642,6 +651,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) * saved regs... */ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + /* It's OK to start function graph tracing again */ + unpause_graph_tracing(); preempt_enable_no_resched(); return 1; } From a4979a7e71eb8da976cbe4a0a1fa50636e76b04f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 1 Jun 2017 16:18:16 +0530 Subject: [PATCH 207/341] powerpc/ftrace: Pass the correct stack pointer for DYNAMIC_FTRACE_WITH_REGS For DYNAMIC_FTRACE_WITH_REGS, we should be passing-in the original set of registers in pt_regs, to capture the state _before_ ftrace_caller. However, we are instead passing the stack pointer *after* allocating a stack frame in ftrace_caller. Fix this by saving the proper value of r1 in pt_regs. Also, use SAVE_10GPRS() to simplify the code. Fixes: 153086644fd1 ("powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- .../powerpc/kernel/trace/ftrace_64_mprofile.S | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index 7c933a99f5d5..fa0921410fa4 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller) stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ - SAVE_8GPRS(0,r1) - SAVE_8GPRS(8,r1) - SAVE_8GPRS(16,r1) - SAVE_8GPRS(24,r1) + SAVE_GPR(0, r1) + SAVE_10GPRS(2, r1) + SAVE_10GPRS(12, r1) + SAVE_10GPRS(22, r1) + + /* Save previous stack pointer (r1) */ + addi r8, r1, SWITCH_FRAME_SIZE + std r8, GPR1(r1) /* Load special regs for save below */ mfmsr r8 @@ -103,10 +107,10 @@ ftrace_call: #endif /* Restore gprs */ - REST_8GPRS(0,r1) - REST_8GPRS(8,r1) - REST_8GPRS(16,r1) - REST_8GPRS(24,r1) + REST_GPR(0,r1) + REST_10GPRS(2,r1) + REST_10GPRS(12,r1) + REST_10GPRS(22,r1) /* Restore possibly modified LR */ ld r0, _LINK(r1) From c05b8c4474c03026aaa7f8872e78369f69f1bb08 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 1 Jun 2017 16:18:17 +0530 Subject: [PATCH 208/341] powerpc/kprobes: Skip livepatch_handler() for jprobes ftrace_caller() depends on a modified regs->nip to detect if a certain function has been livepatched. However, with KPROBES_ON_FTRACE, it is possible for regs->nip to have been modified by the kprobes pre_handler (jprobes, for instance). In this case, we do not want to invoke the livepatch_handler so as not to consume the livepatch stack. To distinguish between the two (kprobes and livepatch), we check if there is an active kprobe on the current function. If there is, then we know for sure that it must have modified the NIP as we don't support livepatching a kprobe'd function. In this case, we simply skip the livepatch_handler and branch to the new NIP. Otherwise, the livepatch_handler is invoked. Fixes: ead514d5fb30 ("powerpc/kprobes: Add support for KPROBES_ON_FTRACE") Signed-off-by: Naveen N. Rao Reviewed-by: Masami Hiramatsu Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kprobes.h | 1 + arch/powerpc/kernel/kprobes.c | 6 +++ .../powerpc/kernel/trace/ftrace_64_mprofile.S | 39 ++++++++++++++++--- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index a83821f33ea3..8814a7249ceb 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self, extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_handler(struct pt_regs *regs); extern int kprobe_post_handler(struct pt_regs *regs); +extern int is_current_kprobe_addr(unsigned long addr); #ifdef CONFIG_KPROBES_ON_FTRACE extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 5075a4d6f1d7..01addfb0ed0a 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; +int is_current_kprobe_addr(unsigned long addr) +{ + struct kprobe *p = kprobe_running(); + return (p && (unsigned long)p->addr == addr) ? 1 : 0; +} + bool arch_within_kprobe_blacklist(unsigned long addr) { return (addr >= (unsigned long)__kprobes_text_start && diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index fa0921410fa4..c98e90b4ea7b 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -99,13 +99,39 @@ ftrace_call: bl ftrace_stub nop - /* Load ctr with the possibly modified NIP */ - ld r3, _NIP(r1) - mtctr r3 + /* Load the possibly modified NIP */ + ld r15, _NIP(r1) + #ifdef CONFIG_LIVEPATCH - cmpd r14,r3 /* has NIP been altered? */ + cmpd r14, r15 /* has NIP been altered? */ #endif +#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE) + /* NIP has not been altered, skip over further checks */ + beq 1f + + /* Check if there is an active kprobe on us */ + subi r3, r14, 4 + bl is_current_kprobe_addr + nop + + /* + * If r3 == 1, then this is a kprobe/jprobe. + * else, this is livepatched function. + * + * The conditional branch for livepatch_handler below will use the + * result of this comparison. For kprobe/jprobe, we just need to branch to + * the new NIP, not call livepatch_handler. The branch below is bne, so we + * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want + * CR0[EQ] = (r3 == 1). + */ + cmpdi r3, 1 +1: +#endif + + /* Load CTR with the possibly modified NIP */ + mtctr r15 + /* Restore gprs */ REST_GPR(0,r1) REST_10GPRS(2,r1) @@ -123,7 +149,10 @@ ftrace_call: addi r1, r1, SWITCH_FRAME_SIZE #ifdef CONFIG_LIVEPATCH - /* Based on the cmpd above, if the NIP was altered handle livepatch */ + /* + * Based on the cmpd or cmpdi above, if the NIP was altered and we're + * not on a kprobe/jprobe, then handle livepatch. + */ bne- livepatch_handler #endif From d89ba5353f301971dd7d2f9fdf25c4432728f38e Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 14 Jun 2017 00:12:00 +0530 Subject: [PATCH 209/341] powerpc/64s: Handle data breakpoints in Radix mode On Power9, trying to use data breakpoints throws the splat shown below. This is because the check for a data breakpoint in DSISR is in do_hash_page(), which is not called when in Radix mode. Unable to handle kernel paging request for data at address 0xc000000000e19218 Faulting instruction address: 0xc0000000001155e8 cpu 0x0: Vector: 300 (Data Access) at [c0000000ef1e7b20] pc: c0000000001155e8: find_pid_ns+0x48/0xe0 lr: c000000000116ac4: find_task_by_vpid+0x44/0x90 sp: c0000000ef1e7da0 msr: 9000000000009033 dar: c000000000e19218 dsisr: 400000 Move the check to handle_page_fault() so as to catch data breakpoints in both Hash and Radix MMU modes. We have to change the check in do_hash_page() against 0xa410 to use 0xa450, so as to include the value of (DSISR_DABRMATCH << 16). There are two sites that call handle_page_fault() when in Radix, both already pass DSISR in r4. Fixes: caca285e5ab4 ("powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code") Cc: stable@vger.kernel.org # v4.7+ Reported-by: Shriya R. Kulkarni Signed-off-by: Naveen N. Rao [mpe: Fix the fall-through case on hash, we need to reload DSISR] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ae418b85c17c..b886795060fd 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1411,10 +1411,8 @@ USE_TEXT_SECTION() .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_STD_MMU_64 - andis. r0,r4,0xa410 /* weird error? */ + andis. r0,r4,0xa450 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ - andis. r0,r4,DSISR_DABRMATCH@h - bne- handle_dabr_fault CURRENT_THREAD_INFO(r11, r1) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ @@ -1438,11 +1436,16 @@ do_hash_page: /* Error */ blt- 13f + + /* Reload DSISR into r4 for the DABR check below */ + ld r4,_DSISR(r1) #endif /* CONFIG_PPC_STD_MMU_64 */ /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: -11: ld r4,_DAR(r1) +11: andis. r0,r4,DSISR_DABRMATCH@h + bne- handle_dabr_fault + ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault From bf05fc25f268cd62f147f368fe65ad3e5b04fe9f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 15 Jun 2017 19:16:48 +0530 Subject: [PATCH 210/341] powerpc/perf: Fix oops when kthread execs user process When a kthread calls call_usermodehelper() the steps are: 1. allocate current->mm 2. load_elf_binary() 3. populate current->thread.regs While doing this, interrupts are not disabled. If there is a perf interrupt in the middle of this process (i.e. step 1 has completed but not yet reached to step 3) and if perf tries to read userspace regs, kernel oops with following log: Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc0000000000da0fc ... Call Trace: perf_output_sample_regs+0x6c/0xd0 perf_output_sample+0x4e4/0x830 perf_event_output_forward+0x64/0x90 __perf_event_overflow+0x8c/0x1e0 record_and_restart+0x220/0x5c0 perf_event_interrupt+0x2d8/0x4d0 performance_monitor_exception+0x54/0x70 performance_monitor_common+0x158/0x160 --- interrupt: f01 at avtab_search_node+0x150/0x1a0 LR = avtab_search_node+0x100/0x1a0 ... load_elf_binary+0x6e8/0x15a0 search_binary_handler+0xe8/0x290 do_execveat_common.isra.14+0x5f4/0x840 call_usermodehelper_exec_async+0x170/0x210 ret_from_kernel_thread+0x5c/0x7c Fix it by setting abi to PERF_SAMPLE_REGS_ABI_NONE when userspace pt_regs are not set. Fixes: ed4a4ef85cf5 ("powerpc/perf: Add support for sampling interrupt register state") Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Ravi Bangoria Acked-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/perf/perf_regs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index cbd82fde5770..09ceea6175ba 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user, struct pt_regs *regs_user_copy) { regs_user->regs = task_pt_regs(current); - regs_user->abi = perf_reg_abi(current); + regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : + PERF_SAMPLE_REGS_ABI_NONE; } From 10223df2c183df65674ec584dd95e2adced40d99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 15 Jun 2017 20:23:08 +0300 Subject: [PATCH 211/341] drm/i915: Actually attach the tv_format property to the SDVO connector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attach the tv_format property to the SDVO connector instead of passing a '0' in place of the pointer to the property. This got broken when the SDVO connector properties were converted to atomic. We can thank sparse for catching this: drivers/gpu/drm/i915/intel_sdvo.c:2742:75: warning: Using plain integer as NULL pointer Cc: Maarten Lankhorst Cc: Daniel Vetter Fixes: 630d30a4ee27 ("drm/i915: Convert intel_sdvo connector properties to atomic.") Link: http://patchwork.freedesktop.org/patch/msgid/20170615172308.10121-1-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_sdvo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index f4329d20b6f6..3f8f30b412cd 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2739,7 +2739,8 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0]; - drm_object_attach_property(&intel_sdvo_connector->base.base.base, 0, 0); + drm_object_attach_property(&intel_sdvo_connector->base.base.base, + intel_sdvo_connector->tv_format, 0); return true; } From b8e5d2ef19c1b5f2528d09859d5cebed1f870da3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 13:35:08 +0100 Subject: [PATCH 212/341] drm/i915: Make i915_vma_destroy() static i915_vma_destroy() is now not used outside of i915_vma.c so we can remove the export and make the function static. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170616123508.12673-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_vma.c | 2 +- drivers/gpu/drm/i915/i915_vma.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 6cf32da682ec..bbc8309743a0 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -576,7 +576,7 @@ err_unpin: return ret; } -void i915_vma_destroy(struct i915_vma *vma) +static void i915_vma_destroy(struct i915_vma *vma) { GEM_BUG_ON(vma->node.allocated); GEM_BUG_ON(i915_vma_is_active(vma)); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 4d827300d1a8..11ce83a8adf0 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -236,7 +236,6 @@ bool i915_vma_misplaced(const struct i915_vma *vma, void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); -void i915_vma_destroy(struct i915_vma *vma); int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); From e27ab73d17ef90db3e586a02ce2f03eb660451cd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Jun 2017 13:38:49 +0100 Subject: [PATCH 213/341] drm/i915: Mark CPU cache as dirty on every transition for CPU writes Currently, we only mark the CPU cache as dirty if we skip a clflush. This leads to some confusion where we have to ask if the object is in the write domain or missed a clflush. If we always mark the cache as dirty, this becomes a much simply question to answer. The goal remains to do as few clflushes as required and to do them as late as possible, in the hope of deferring the work to a kthread and not block the caller (e.g. execbuf, flips). v2: Always call clflush before GPU execution when the cache_dirty flag is set. This may cause some extra work on llc systems that migrate dirty buffers back and forth - but we do try to limit that by only setting cache_dirty at the end of the gpu sequence. v3: Always mark the cache as dirty upon a level change, as we need to invalidate any stale cachelines due to external writes. Reported-by: Dongwon Kim Fixes: a6a7cc4b7db6 ("drm/i915: Always flush the dirty CPU cache when pinning the scanout") Signed-off-by: Chris Wilson Cc: Dongwon Kim Cc: Matt Roper Tested-by: Dongwon Kim Link: http://patchwork.freedesktop.org/patch/msgid/20170615123850.26843-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 76 +++++++++++-------- drivers/gpu/drm/i915/i915_gem_clflush.c | 15 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 21 ++--- drivers/gpu/drm/i915/i915_gem_internal.c | 3 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 5 +- .../gpu/drm/i915/selftests/huge_gem_object.c | 3 +- 6 files changed, 67 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 31cbe78171a9..b1504a829c6a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -49,7 +49,7 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915); static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + if (obj->cache_dirty) return false; if (!i915_gem_object_is_coherent(obj)) @@ -233,6 +233,14 @@ err_phys: return st; } +static void __start_cpu_write(struct drm_i915_gem_object *obj) +{ + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + if (cpu_write_needs_clflush(obj)) + obj->cache_dirty = true; +} + static void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages, @@ -248,8 +256,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, !i915_gem_object_is_coherent(obj)) drm_clflush_sg(pages); - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; + __start_cpu_write(obj); } static void @@ -684,6 +691,12 @@ i915_gem_dumb_create(struct drm_file *file, args->size, &args->handle); } +static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + return !(obj->cache_level == I915_CACHE_NONE || + obj->cache_level == I915_CACHE_WT); +} + /** * Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -753,6 +766,11 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) case I915_GEM_DOMAIN_CPU: i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); break; + + case I915_GEM_DOMAIN_RENDER: + if (gpu_write_needs_clflush(obj)) + obj->cache_dirty = true; + break; } obj->base.write_domain = 0; @@ -854,7 +872,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, * optimizes for the case when the gpu will dirty the data * anyway again before the next pread happens. */ - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) + if (!obj->cache_dirty && + !(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) *needs_clflush = CLFLUSH_BEFORE; out: @@ -906,14 +925,16 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, * This optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) + if (!obj->cache_dirty) { *needs_clflush |= CLFLUSH_AFTER; - /* Same trick applies to invalidate partially written cachelines read - * before writing. - */ - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush |= CLFLUSH_BEFORE; + /* + * Same trick applies to invalidate partially written + * cachelines read before writing. + */ + if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) + *needs_clflush |= CLFLUSH_BEFORE; + } out: intel_fb_obj_invalidate(obj, ORIGIN_CPU); @@ -3395,10 +3416,13 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) { - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) - return; - - i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); + /* + * We manually flush the CPU domain so that we can override and + * force the flush for the display, and perform it asyncrhonously. + */ + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + if (obj->cache_dirty) + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); obj->base.write_domain = 0; } @@ -3657,13 +3681,10 @@ restart: } } - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU && - i915_gem_object_is_coherent(obj)) - obj->cache_dirty = true; - list_for_each_entry(vma, &obj->vma_list, obj_link) vma->node.color = cache_level; obj->cache_level = cache_level; + obj->cache_dirty = true; /* Always invalidate stale cachelines */ return 0; } @@ -3885,9 +3906,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) - return 0; - flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* Flush the CPU cache if it's still invalid. */ @@ -3899,15 +3917,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); /* If we're writing through the CPU, then the GPU read domains will * need to be invalidated at next use. */ - if (write) { - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - } + if (write) + __start_cpu_write(obj); return 0; } @@ -4328,6 +4344,8 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) } else obj->cache_level = I915_CACHE_NONE; + obj->cache_dirty = !i915_gem_object_is_coherent(obj); + trace_i915_gem_object_create(obj); return obj; @@ -4994,10 +5012,8 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); for (p = phases; *p; p++) { - list_for_each_entry(obj, *p, global_link) { - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - } + list_for_each_entry(obj, *p, global_link) + __start_cpu_write(obj); } mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index ffac7a1f0caf..17b207e963c2 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -71,8 +71,6 @@ static const struct dma_fence_ops i915_clflush_ops = { static void __i915_do_clflush(struct drm_i915_gem_object *obj) { drm_clflush_sg(obj->mm.pages); - obj->cache_dirty = false; - intel_fb_obj_flush(obj, ORIGIN_CPU); } @@ -81,9 +79,6 @@ static void i915_clflush_work(struct work_struct *work) struct clflush *clflush = container_of(work, typeof(*clflush), work); struct drm_i915_gem_object *obj = clflush->obj; - if (!obj->cache_dirty) - goto out; - if (i915_gem_object_pin_pages(obj)) { DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); goto out; @@ -131,10 +126,10 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, * anything not backed by physical memory we consider to be always * coherent and not need clflushing. */ - if (!i915_gem_object_has_struct_page(obj)) + if (!i915_gem_object_has_struct_page(obj)) { + obj->cache_dirty = false; return; - - obj->cache_dirty = true; + } /* If the GPU is snooping the contents of the CPU cache, * we do not need to manually clear the CPU cache lines. However, @@ -153,6 +148,8 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, if (!(flags & I915_CLFLUSH_SYNC)) clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); if (clflush) { + GEM_BUG_ON(!obj->cache_dirty); + dma_fence_init(&clflush->dma, &i915_clflush_ops, &clflush_lock, @@ -180,4 +177,6 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, } else { GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); } + + obj->cache_dirty = false; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 96705171e397..2a9aed5640e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -309,7 +309,7 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) return DBG_USE_CPU_RELOC > 0; return (HAS_LLC(to_i915(obj->base.dev)) || - obj->base.write_domain == I915_GEM_DOMAIN_CPU || + obj->cache_dirty || obj->cache_level != I915_CACHE_NONE); } @@ -1110,10 +1110,8 @@ eb_move_to_gpu(struct i915_execbuffer *eb) if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) continue; - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) { + if (obj->cache_dirty) i915_gem_clflush_object(obj, 0); - obj->base.write_domain = 0; - } ret = i915_gem_request_await_object (eb->request, obj, obj->base.pending_write_domain); @@ -1248,12 +1246,6 @@ static int eb_select_context(struct i915_execbuffer *eb) return 0; } -static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) -{ - return !(obj->cache_level == I915_CACHE_NONE || - obj->cache_level == I915_CACHE_WT); -} - void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req, unsigned int flags) @@ -1277,15 +1269,16 @@ void i915_vma_move_to_active(struct i915_vma *vma, i915_gem_active_set(&vma->last_read[idx], req); list_move_tail(&vma->vm_link, &vma->vm->active_list); + obj->base.write_domain = 0; if (flags & EXEC_OBJECT_WRITE) { + obj->base.write_domain = I915_GEM_DOMAIN_RENDER; + if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) i915_gem_active_set(&obj->frontbuffer_write, req); - /* update for the implicit flush after a batch */ - obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; - if (!obj->cache_dirty && gpu_write_needs_clflush(obj)) - obj->cache_dirty = true; + obj->base.read_domains = 0; } + obj->base.read_domains |= I915_GEM_GPU_DOMAINS; if (flags & EXEC_OBJECT_NEEDS_FENCE) i915_gem_active_set(&vma->last_fence, req); diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index fc950abbe400..58e93e87d573 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -188,9 +188,10 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_object_internal_ops); - obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + obj->cache_dirty = !i915_gem_object_is_coherent(obj); return obj; } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 1a0ce1dc68f5..34461e1928bc 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -802,9 +802,10 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file drm_gem_private_object_init(dev, &obj->base, args->user_size); i915_gem_object_init(obj, &i915_gem_userptr_ops); - obj->cache_level = I915_CACHE_LLC; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_LLC; + obj->cache_dirty = !i915_gem_object_is_coherent(obj); obj->userptr.ptr = args->user_ptr; obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY); diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c index 4e681fc13be4..0ca867a877b6 100644 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -126,9 +126,10 @@ huge_gem_object(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); i915_gem_object_init(obj, &huge_ops); - obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + obj->cache_dirty = !i915_gem_object_is_coherent(obj); obj->scratch = phys_size; return obj; From 7fc92e96c3eed6004ce8dab5e315264bff85db5a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 11:54:55 +0100 Subject: [PATCH 214/341] drm/i915: Store i915_gem_object_is_coherent() as a bit next to cache-dirty For ease of use (i.e. avoiding a few checks and function calls), store the object's cache coherency next to the cache is dirty bit. Specifically this patch aims to reduce the frequency of no-op calls to i915_gem_object_clflush() to counter-act the increase of such calls for GPU only objects in the previous patch. v2: Replace cache_dirty & ~cache_coherent with cache_dirty && !cache_coherent as gcc generates much better code for the latter (Tvrtko) Signed-off-by: Chris Wilson Cc: Dongwon Kim Cc: Matt Roper Tested-by: Dongwon Kim Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170616105455.16977-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 14 +++++++------- drivers/gpu/drm/i915/i915_gem_clflush.c | 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_internal.c | 3 ++- drivers/gpu/drm/i915/i915_gem_object.h | 1 + drivers/gpu/drm/i915/i915_gem_stolen.c | 1 + drivers/gpu/drm/i915/i915_gem_userptr.c | 3 ++- drivers/gpu/drm/i915/selftests/huge_gem_object.c | 3 ++- 8 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b1504a829c6a..4ae30f74c475 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (obj->cache_dirty) return false; - if (!i915_gem_object_is_coherent(obj)) + if (!obj->cache_coherent) return true; return obj->pin_display; @@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, if (needs_clflush && (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && - !i915_gem_object_is_coherent(obj)) + !obj->cache_coherent) drm_clflush_sg(pages); __start_cpu_write(obj); @@ -856,8 +856,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, if (ret) return ret; - if (i915_gem_object_is_coherent(obj) || - !static_cpu_has(X86_FEATURE_CLFLUSH)) { + if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) goto err_unpin; @@ -909,8 +908,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, if (ret) return ret; - if (i915_gem_object_is_coherent(obj) || - !static_cpu_has(X86_FEATURE_CLFLUSH)) { + if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, true); if (ret) goto err_unpin; @@ -3684,6 +3682,7 @@ restart: list_for_each_entry(vma, &obj->vma_list, obj_link) vma->node.color = cache_level; obj->cache_level = cache_level; + obj->cache_coherent = i915_gem_object_is_coherent(obj); obj->cache_dirty = true; /* Always invalidate stale cachelines */ return 0; @@ -4344,7 +4343,8 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) } else obj->cache_level = I915_CACHE_NONE; - obj->cache_dirty = !i915_gem_object_is_coherent(obj); + obj->cache_coherent = i915_gem_object_is_coherent(obj); + obj->cache_dirty = !obj->cache_coherent; trace_i915_gem_object_create(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index 17b207e963c2..152f16c11878 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -139,7 +139,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, * snooping behaviour occurs naturally as the result of our domain * tracking. */ - if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj)) + if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent) return; trace_i915_gem_object_clflush(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2a9aed5640e2..d6099d084748 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1110,7 +1110,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb) if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) continue; - if (obj->cache_dirty) + if (unlikely(obj->cache_dirty && !obj->cache_coherent)) i915_gem_clflush_object(obj, 0); ret = i915_gem_request_await_object diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 58e93e87d573..568bf83af1f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -191,7 +191,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; - obj->cache_dirty = !i915_gem_object_is_coherent(obj); + obj->cache_coherent = i915_gem_object_is_coherent(obj); + obj->cache_dirty = !obj->cache_coherent; return obj; } diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 915057824284..adb482b00271 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -121,6 +121,7 @@ struct drm_i915_gem_object { unsigned long gt_ro:1; unsigned int cache_level:3; unsigned int cache_dirty:1; + unsigned int cache_coherent:1; atomic_t frontbuffer_bits; unsigned int frontbuffer_ggtt_origin; /* write once */ diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 681db6083f4d..a817b3e0b17e 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -590,6 +590,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, obj->stolen = stolen; obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; + obj->cache_coherent = true; /* assumptions! more like cache_oblivious */ if (i915_gem_object_pin_pages(obj)) goto cleanup; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 34461e1928bc..05c36f663550 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -805,7 +805,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_LLC; - obj->cache_dirty = !i915_gem_object_is_coherent(obj); + obj->cache_coherent = i915_gem_object_is_coherent(obj); + obj->cache_dirty = !obj->cache_coherent; obj->userptr.ptr = args->user_ptr; obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY); diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c index 0ca867a877b6..caf76af36aba 100644 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -129,7 +129,8 @@ huge_gem_object(struct drm_i915_private *i915, obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; - obj->cache_dirty = !i915_gem_object_is_coherent(obj); + obj->cache_coherent = i915_gem_object_is_coherent(obj); + obj->cache_dirty = !obj->cache_coherent; obj->scratch = phys_size; return obj; From 7c65817e6d38fad8ed4ae8632b3615980a2197b4 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Wed, 17 May 2017 08:45:29 -0600 Subject: [PATCH 215/341] drm/msm: gpu: Enable zap shader for A5XX The A5XX GPU powers on in "secure" mode. In secure mode the GPU can only render to buffers that are marked as secure and inaccessible to the kernel and user through a series of hardware protections. In practice secure mode is used to draw things like a UI on a secure video frame. In order to switch out of secure mode the GPU executes a special shader that clears out the GMEM and other sensitve registers and then writes a register. Because the kernel can't be trusted the shader binary is signed and verified and programmed by the secure world. To do this we need to read the MDT header and the segments from the firmware location and put them in memory and present them for approval. For targets without secure support there is an out: if the secure world doesn't support secure then there are no hardware protections and we can freely write the SECVID_TRUST register from the CPU. We don't have 100% confidence that we can query the secure capabilities at run time but we have enough calls that need to go right to give us some confidence that we're at least doing something useful. Of course if we guess wrong you trigger a permissions violation which usually ends up in a system crash but thats a problem that shows up immediately. [v2: use child device per Bjorn] [v3: use generic MDT loader per Bjorn] [v4: use managed dma functions and ifdefs for the MDT loader] [v5: Add depends for QCOM_MDT_LOADER] Signed-off-by: Jordan Crouse Acked-by: Bjorn Andersson [robclark: fix Kconfig to use select instead of depends + #if IS_ENABLED()] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/Kconfig | 1 + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 180 ++++++++++++++++++++- drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 2 + drivers/gpu/drm/msm/adreno/adreno_device.c | 1 + drivers/gpu/drm/msm/adreno/adreno_gpu.h | 1 + 5 files changed, 183 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 0a31cd6d01ce..b638d192ce5e 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -5,6 +5,7 @@ config DRM_MSM depends on ARCH_QCOM || (ARM && COMPILE_TEST) depends on OF && COMMON_CLK depends on MMU + select QCOM_MDT_LOADER select REGULATOR select DRM_KMS_HELPER select DRM_PANEL diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 31a9bceed32c..67fd6bf2a25f 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -11,6 +11,12 @@ * */ +#include +#include +#include +#include +#include +#include #include "msm_gem.h" #include "msm_mmu.h" #include "a5xx_gpu.h" @@ -18,6 +24,62 @@ extern bool hang_debug; static void a5xx_dump(struct msm_gpu *gpu); +#define GPU_PAS_ID 13 + +#if IS_ENABLED(CONFIG_QCOM_MDT_LOADER) + +static int zap_shader_load_mdt(struct device *dev, const char *fwname) +{ + const struct firmware *fw; + phys_addr_t mem_phys; + ssize_t mem_size; + void *mem_region = NULL; + int ret; + + /* Request the MDT file for the firmware */ + ret = request_firmware(&fw, fwname, dev); + if (ret) { + DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname); + return ret; + } + + /* Figure out how much memory we need */ + mem_size = qcom_mdt_get_size(fw); + if (mem_size < 0) { + ret = mem_size; + goto out; + } + + /* Allocate memory for the firmware image */ + mem_region = dmam_alloc_coherent(dev, mem_size, &mem_phys, GFP_KERNEL); + if (!mem_region) { + ret = -ENOMEM; + goto out; + } + + /* Load the rest of the MDT */ + ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, mem_region, mem_phys, + mem_size); + if (ret) + goto out; + + /* Send the image to the secure world */ + ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID); + if (ret) + DRM_DEV_ERROR(dev, "Unable to authorize the image\n"); + +out: + release_firmware(fw); + + return ret; +} +#else +static int zap_shader_load_mdt(struct device *dev, const char *fwname) +{ + return -ENODEV; +} +#endif + static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { @@ -304,6 +366,98 @@ static int a5xx_ucode_init(struct msm_gpu *gpu) return 0; } +#define SCM_GPU_ZAP_SHADER_RESUME 0 + +static int a5xx_zap_shader_resume(struct msm_gpu *gpu) +{ + int ret; + + ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID); + if (ret) + DRM_ERROR("%s: zap-shader resume failed: %d\n", + gpu->name, ret); + + return ret; +} + +/* Set up a child device to "own" the zap shader */ +static int a5xx_zap_shader_dev_init(struct device *parent, struct device *dev) +{ + struct device_node *node; + int ret; + + if (dev->parent) + return 0; + + /* Find the sub-node for the zap shader */ + node = of_get_child_by_name(parent->of_node, "zap-shader"); + if (!node) { + DRM_DEV_ERROR(parent, "zap-shader not found in device tree\n"); + return -ENODEV; + } + + dev->parent = parent; + dev->of_node = node; + dev_set_name(dev, "adreno_zap_shader"); + + ret = device_register(dev); + if (ret) { + DRM_DEV_ERROR(parent, "Couldn't register zap shader device\n"); + goto out; + } + + ret = of_reserved_mem_device_init(dev); + if (ret) { + DRM_DEV_ERROR(parent, "Unable to set up the reserved memory\n"); + device_unregister(dev); + } + +out: + if (ret) + dev->parent = NULL; + + return ret; +} + +static int a5xx_zap_shader_init(struct msm_gpu *gpu) +{ + static bool loaded; + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + struct platform_device *pdev = a5xx_gpu->pdev; + int ret; + + /* + * If the zap shader is already loaded into memory we just need to kick + * the remote processor to reinitialize it + */ + if (loaded) + return a5xx_zap_shader_resume(gpu); + + /* We need SCM to be able to load the firmware */ + if (!qcom_scm_is_available()) { + DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n"); + return -EPROBE_DEFER; + } + + /* Each GPU has a target specific zap shader firmware name to use */ + if (!adreno_gpu->info->zapfw) { + DRM_DEV_ERROR(&pdev->dev, + "Zap shader firmware file not specified for this target\n"); + return -ENODEV; + } + + ret = a5xx_zap_shader_dev_init(&pdev->dev, &a5xx_gpu->zap_dev); + + if (!ret) + ret = zap_shader_load_mdt(&a5xx_gpu->zap_dev, + adreno_gpu->info->zapfw); + + loaded = !ret; + + return ret; +} + #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ @@ -488,8 +642,27 @@ static int a5xx_hw_init(struct msm_gpu *gpu) return -EINVAL; } - /* Put the GPU into unsecure mode */ - gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); + /* + * Try to load a zap shader into the secure world. If successful + * we can use the CP to switch out of secure mode. If not then we + * have no resource but to try to switch ourselves out manually. If we + * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will + * be blocked and a permissions violation will soon follow. + */ + ret = a5xx_zap_shader_init(gpu); + if (!ret) { + OUT_PKT7(gpu->rb, CP_SET_SECURE_MODE, 1); + OUT_RING(gpu->rb, 0x00000000); + + gpu->funcs->flush(gpu); + if (!gpu->funcs->idle(gpu)) + return -EINVAL; + } else { + /* Print a warning so if we die, we know why */ + dev_warn_once(gpu->dev->dev, + "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); + gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); + } return 0; } @@ -521,6 +694,9 @@ static void a5xx_destroy(struct msm_gpu *gpu) DBG("%s", gpu->name); + if (a5xx_gpu->zap_dev.parent) + device_unregister(&a5xx_gpu->zap_dev); + if (a5xx_gpu->pm4_bo) { if (a5xx_gpu->pm4_iova) msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->id); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index 1590f845d554..78408f56660e 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -36,6 +36,8 @@ struct a5xx_gpu { uint32_t gpmu_dwords; uint32_t lm_leakage; + + struct device zap_dev; }; #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index c0fa5d1c75ff..b7bd6d393215 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -86,6 +86,7 @@ static const struct adreno_info gpulist[] = { ADRENO_QUIRK_FAULT_DETECT_MASK, .init = a5xx_gpu_init, .gpmufw = "a530v3_gpmu.fw2", + .zapfw = "a530_zap.mdt", }, }; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index fb4831f9f80b..12b1483625f8 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -77,6 +77,7 @@ struct adreno_info { uint32_t gmem; enum adreno_quirks quirks; struct msm_gpu *(*init)(struct drm_device *dev); + const char *zapfw; }; const struct adreno_info *adreno_info(struct adreno_rev rev); From 167b606aa262270ab6aeb5700adca6b1f33da26a Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Mon, 8 May 2017 14:34:59 -0600 Subject: [PATCH 216/341] drm/msm: Remove DRM_MSM_NUM_IOCTLS The ioctl array is sparsely populated but the compiler will make sure that it is sufficiently sized for all the values that we have so we can safely use ARRAY_SIZE() instead of having a constantly changing #define in the uapi header. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 2 +- include/uapi/drm/msm_drm.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 9d498eb81906..18d02ccb6c9b 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -842,7 +842,7 @@ static struct drm_driver msm_driver = { .debugfs_init = msm_debugfs_init, #endif .ioctls = msm_ioctls, - .num_ioctls = DRM_MSM_NUM_IOCTLS, + .num_ioctls = ARRAY_SIZE(msm_ioctls), .fops = &fops, .name = "msm", .desc = "MSM Snapdragon DRM", diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index a4a189a240d7..a9985fe6efcd 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -261,7 +261,6 @@ struct drm_msm_gem_madvise { #define DRM_MSM_GEM_SUBMIT 0x06 #define DRM_MSM_WAIT_FENCE 0x07 #define DRM_MSM_GEM_MADVISE 0x08 -#define DRM_MSM_NUM_IOCTLS 0x09 #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param) #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new) From e895c7bd31f5ca7dbd315001cfbdcaee98ee16c4 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Mon, 8 May 2017 14:35:00 -0600 Subject: [PATCH 217/341] drm/msm: Remove idle function hook There isn't any generic code that uses ->idle so remove it. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 4 ++-- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 4 ++-- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 9 ++++----- drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 1 + drivers/gpu/drm/msm/adreno/a5xx_power.c | 2 +- drivers/gpu/drm/msm/msm_gpu.h | 1 - 6 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 7fd77958a436..0e3828ed1e46 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -40,6 +40,7 @@ extern bool hang_debug; static void a3xx_dump(struct msm_gpu *gpu); +static bool a3xx_idle(struct msm_gpu *gpu); static bool a3xx_me_init(struct msm_gpu *gpu) { @@ -65,7 +66,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); gpu->funcs->flush(gpu); - return gpu->funcs->idle(gpu); + return a3xx_idle(gpu); } static int a3xx_hw_init(struct msm_gpu *gpu) @@ -446,7 +447,6 @@ static const struct adreno_gpu_funcs funcs = { .last_fence = adreno_last_fence, .submit = adreno_submit, .flush = adreno_flush, - .idle = a3xx_idle, .irq = a3xx_irq, .destroy = a3xx_destroy, #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index dfe0eceaae3b..19abf229b08d 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -31,6 +31,7 @@ extern bool hang_debug; static void a4xx_dump(struct msm_gpu *gpu); +static bool a4xx_idle(struct msm_gpu *gpu); /* * a4xx_enable_hwcg() - Program the clock control registers @@ -137,7 +138,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); gpu->funcs->flush(gpu); - return gpu->funcs->idle(gpu); + return a4xx_idle(gpu); } static int a4xx_hw_init(struct msm_gpu *gpu) @@ -534,7 +535,6 @@ static const struct adreno_gpu_funcs funcs = { .last_fence = adreno_last_fence, .submit = adreno_submit, .flush = adreno_flush, - .idle = a4xx_idle, .irq = a4xx_irq, .destroy = a4xx_destroy, #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 67fd6bf2a25f..c4b775e1f23b 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -287,7 +287,7 @@ static int a5xx_me_init(struct msm_gpu *gpu) gpu->funcs->flush(gpu); - return gpu->funcs->idle(gpu) ? 0 : -EINVAL; + return a5xx_idle(gpu) ? 0 : -EINVAL; } static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, @@ -638,7 +638,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) OUT_RING(gpu->rb, 0x0F); gpu->funcs->flush(gpu); - if (!gpu->funcs->idle(gpu)) + if (!a5xx_idle(gpu)) return -EINVAL; } @@ -655,7 +655,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) OUT_RING(gpu->rb, 0x00000000); gpu->funcs->flush(gpu); - if (!gpu->funcs->idle(gpu)) + if (!a5xx_idle(gpu)) return -EINVAL; } else { /* Print a warning so if we die, we know why */ @@ -732,7 +732,7 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); } -static bool a5xx_idle(struct msm_gpu *gpu) +bool a5xx_idle(struct msm_gpu *gpu) { /* wait for CP to drain ringbuffer: */ if (!adreno_idle(gpu)) @@ -1037,7 +1037,6 @@ static const struct adreno_gpu_funcs funcs = { .last_fence = adreno_last_fence, .submit = a5xx_submit, .flush = adreno_flush, - .idle = a5xx_idle, .irq = a5xx_irq, .destroy = a5xx_destroy, #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index 78408f56660e..6638bc85645d 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -58,5 +58,6 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, return -ETIMEDOUT; } +bool a5xx_idle(struct msm_gpu *gpu); #endif /* __A5XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index 72d52c71f769..ed0802e6ca59 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -194,7 +194,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) gpu->funcs->flush(gpu); - if (!gpu->funcs->idle(gpu)) { + if (!a5xx_idle(gpu)) { DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n", gpu->name); return -EINVAL; diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index aa3241000455..636450299c33 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -50,7 +50,6 @@ struct msm_gpu_funcs { void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); void (*flush)(struct msm_gpu *gpu); - bool (*idle)(struct msm_gpu *gpu); irqreturn_t (*irq)(struct msm_gpu *irq); uint32_t (*last_fence)(struct msm_gpu *gpu); void (*recover)(struct msm_gpu *gpu); From 49fd08baa36ac10b13ea7b23fc6bbee8b4a6fcfe Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Mon, 8 May 2017 14:35:01 -0600 Subject: [PATCH 218/341] drm/msm: Add hint to DRM_IOCTL_MSM_GEM_INFO to return an object IOVA Modify the 'pad' member of struct drm_msm_gem_info to 'flags'. If the user sets 'flags' to non-zero it means that they want a IOVA for the GEM object instead of a mmap() offset. Return the iova in the 'offset' member. Signed-off-by: Jordan Crouse [robclark: s/hint/flags in commit msg] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 23 +++++++++++++++++++++-- include/uapi/drm/msm_drm.h | 8 ++++++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 18d02ccb6c9b..beb4f6b3ac70 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -699,6 +699,17 @@ static int msm_ioctl_gem_cpu_fini(struct drm_device *dev, void *data, return ret; } +static int msm_ioctl_gem_info_iova(struct drm_device *dev, + struct drm_gem_object *obj, uint64_t *iova) +{ + struct msm_drm_private *priv = dev->dev_private; + + if (!priv->gpu) + return -EINVAL; + + return msm_gem_get_iova(obj, priv->gpu->id, iova); +} + static int msm_ioctl_gem_info(struct drm_device *dev, void *data, struct drm_file *file) { @@ -706,14 +717,22 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data, struct drm_gem_object *obj; int ret = 0; - if (args->pad) + if (args->flags & ~MSM_INFO_FLAGS) return -EINVAL; obj = drm_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; - args->offset = msm_gem_mmap_offset(obj); + if (args->flags & MSM_INFO_IOVA) { + uint64_t iova; + + ret = msm_ioctl_gem_info_iova(dev, obj, &iova); + if (!ret) + args->offset = iova; + } else { + args->offset = msm_gem_mmap_offset(obj); + } drm_gem_object_unreference_unlocked(obj); diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index a9985fe6efcd..26c54f6d595d 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -104,10 +104,14 @@ struct drm_msm_gem_new { __u32 handle; /* out */ }; +#define MSM_INFO_IOVA 0x01 + +#define MSM_INFO_FLAGS (MSM_INFO_IOVA) + struct drm_msm_gem_info { __u32 handle; /* in */ - __u32 pad; - __u64 offset; /* out, offset to pass to mmap() */ + __u32 flags; /* in - combination of MSM_INFO_* flags */ + __u64 offset; /* out, mmap() offset or iova */ }; #define MSM_PREP_READ 0x01 From 5770fc7a562e7da37cbf981f352d774c0142d7ba Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Mon, 8 May 2017 14:35:03 -0600 Subject: [PATCH 219/341] drm/msm: Add a struct to pass configuration to msm_gpu_init() The amount of information that we need to pass into msm_gpu_init() is steadily increasing, so add a new struct to stabilize the function call and make it easier to add new configuration down the line. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 12 ++++++++++-- drivers/gpu/drm/msm/msm_gpu.c | 13 ++++++------- drivers/gpu/drm/msm/msm_gpu.h | 11 ++++++++++- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 5b63fc649dcc..8f3809fc3576 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -342,6 +342,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs) { struct adreno_platform_config *config = pdev->dev.platform_data; + struct msm_gpu_config adreno_gpu_config = { 0 }; struct msm_gpu *gpu = &adreno_gpu->base; int ret; @@ -360,9 +361,16 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, DBG("fast_rate=%u, slow_rate=27000000, bus_freq=%u", gpu->fast_rate, gpu->bus_freq); + adreno_gpu_config.ioname = "kgsl_3d0_reg_memory"; + adreno_gpu_config.irqname = "kgsl_3d0_irq"; + + adreno_gpu_config.va_start = SZ_16M; + adreno_gpu_config.va_end = 0xffffffff; + + adreno_gpu_config.ringsz = RB_SIZE; + ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, - adreno_gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq", - RB_SIZE); + adreno_gpu->info->name, &adreno_gpu_config); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 0fdc88d79ca8..5b118e8ead18 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -562,7 +562,7 @@ static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, - const char *name, const char *ioname, const char *irqname, int ringsz) + const char *name, struct msm_gpu_config *config) { struct iommu_domain *iommu; int ret; @@ -593,14 +593,14 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, /* Map registers: */ - gpu->mmio = msm_ioremap(pdev, ioname, name); + gpu->mmio = msm_ioremap(pdev, config->ioname, name); if (IS_ERR(gpu->mmio)) { ret = PTR_ERR(gpu->mmio); goto fail; } /* Get Interrupt: */ - gpu->irq = platform_get_irq_byname(pdev, irqname); + gpu->irq = platform_get_irq_byname(pdev, config->irqname); if (gpu->irq < 0) { ret = gpu->irq; dev_err(drm->dev, "failed to get irq: %d\n", ret); @@ -640,9 +640,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, */ iommu = iommu_domain_alloc(&platform_bus_type); if (iommu) { - /* TODO 32b vs 64b address space.. */ - iommu->geometry.aperture_start = SZ_16M; - iommu->geometry.aperture_end = 0xffffffff; + iommu->geometry.aperture_start = config->va_start; + iommu->geometry.aperture_end = config->va_end; dev_info(drm->dev, "%s: using IOMMU\n", name); gpu->aspace = msm_gem_address_space_create(&pdev->dev, @@ -663,7 +662,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, /* Create ringbuffer: */ mutex_lock(&drm->struct_mutex); - gpu->rb = msm_ringbuffer_new(gpu, ringsz); + gpu->rb = msm_ringbuffer_new(gpu, config->ringsz); mutex_unlock(&drm->struct_mutex); if (IS_ERR(gpu->rb)) { ret = PTR_ERR(gpu->rb); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 636450299c33..fd8049592aae 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -28,6 +28,14 @@ struct msm_gem_submit; struct msm_gpu_perfcntr; +struct msm_gpu_config { + const char *ioname; + const char *irqname; + uint64_t va_start; + uint64_t va_end; + unsigned int ringsz; +}; + /* So far, with hardware that I've seen to date, we can have: * + zero, one, or two z180 2d cores * + a3xx or a2xx 3d core, which share a common CP (the firmware @@ -208,7 +216,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, - const char *name, const char *ioname, const char *irqname, int ringsz); + const char *name, struct msm_gpu_config *config); + void msm_gpu_cleanup(struct msm_gpu *gpu); struct msm_gpu *adreno_load_gpu(struct drm_device *dev); From 42a105e9cfaf0a0c74fdac5ba4ff17d6c0b024cd Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Mon, 8 May 2017 14:35:04 -0600 Subject: [PATCH 220/341] drm/msm: Remove memptrs->wptr memptrs->wptr seems to be unused. Remove it to avoid confusing the upcoming preemption code. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 --- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 1 - 2 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 8f3809fc3576..f8287fd727f1 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -77,7 +77,6 @@ int adreno_hw_init(struct msm_gpu *gpu) /* reset completed fence seqno: */ adreno_gpu->memptrs->fence = gpu->fctx->completed_fence; adreno_gpu->memptrs->rptr = 0; - adreno_gpu->memptrs->wptr = 0; /* Setup REG_CP_RB_CNTL: */ adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, @@ -258,7 +257,6 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, gpu->fctx->last_fence); seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); - seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr); seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); /* dump these out in a form that can be parsed by demsm: */ @@ -294,7 +292,6 @@ void adreno_dump_info(struct msm_gpu *gpu) printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, gpu->fctx->last_fence); printk("rptr: %d\n", get_rptr(adreno_gpu)); - printk("wptr: %d\n", adreno_gpu->memptrs->wptr); printk("rb wptr: %d\n", get_wptr(gpu->rb)); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 12b1483625f8..4d9165f29f43 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -87,7 +87,6 @@ const struct adreno_info *adreno_info(struct adreno_rev rev); struct adreno_rbmemptrs { volatile uint32_t rptr; - volatile uint32_t wptr; volatile uint32_t fence; }; From cb1e38181a0728777057fb03fc4cddb29b7fb24d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 13 Jun 2017 09:15:36 -0400 Subject: [PATCH 221/341] drm/msm: fix locking inconsistency for gpu->hw_init() Most, but not all, paths where calling the with struct_mutex held. The fast-path in msm_gem_get_iova() (plus some sub-code-paths that only run the first time) was masking this issue. So lets just always hold struct_mutex for hw_init(). And sprinkle some WARN_ON()'s and might_lock() to avoid this sort of problem in the future. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 13 +++++-------- drivers/gpu/drm/msm/adreno/a5xx_power.c | 11 ++++------- drivers/gpu/drm/msm/adreno/adreno_device.c | 2 ++ drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- drivers/gpu/drm/msm/msm_gem.c | 3 +++ drivers/gpu/drm/msm/msm_gpu.c | 2 ++ 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index c4b775e1f23b..8d17f525c417 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -297,31 +297,28 @@ static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, struct drm_gem_object *bo; void *ptr; - mutex_lock(&drm->struct_mutex); bo = msm_gem_new(drm, fw->size - 4, MSM_BO_UNCACHED); - mutex_unlock(&drm->struct_mutex); - if (IS_ERR(bo)) return bo; - ptr = msm_gem_get_vaddr(bo); + ptr = msm_gem_get_vaddr_locked(bo); if (!ptr) { - drm_gem_object_unreference_unlocked(bo); + drm_gem_object_unreference(bo); return ERR_PTR(-ENOMEM); } if (iova) { - int ret = msm_gem_get_iova(bo, gpu->id, iova); + int ret = msm_gem_get_iova_locked(bo, gpu->id, iova); if (ret) { - drm_gem_object_unreference_unlocked(bo); + drm_gem_object_unreference(bo); return ERR_PTR(ret); } } memcpy(ptr, &fw->data[4], fw->size - 4); - msm_gem_put_vaddr(bo); + msm_gem_put_vaddr_locked(bo); return bo; } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index ed0802e6ca59..f3274b827a49 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -294,17 +294,14 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) */ bosize = (cmds_size + (cmds_size / TYPE4_MAX_PAYLOAD) + 1) << 2; - mutex_lock(&drm->struct_mutex); a5xx_gpu->gpmu_bo = msm_gem_new(drm, bosize, MSM_BO_UNCACHED); - mutex_unlock(&drm->struct_mutex); - if (IS_ERR(a5xx_gpu->gpmu_bo)) goto err; - if (msm_gem_get_iova(a5xx_gpu->gpmu_bo, gpu->id, &a5xx_gpu->gpmu_iova)) + if (msm_gem_get_iova_locked(a5xx_gpu->gpmu_bo, gpu->id, &a5xx_gpu->gpmu_iova)) goto err; - ptr = msm_gem_get_vaddr(a5xx_gpu->gpmu_bo); + ptr = msm_gem_get_vaddr_locked(a5xx_gpu->gpmu_bo); if (!ptr) goto err; @@ -323,7 +320,7 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) cmds_size -= _size; } - msm_gem_put_vaddr(a5xx_gpu->gpmu_bo); + msm_gem_put_vaddr_locked(a5xx_gpu->gpmu_bo); a5xx_gpu->gpmu_dwords = dwords; goto out; @@ -332,7 +329,7 @@ err: if (a5xx_gpu->gpmu_iova) msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id); if (a5xx_gpu->gpmu_bo) - drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo); + drm_gem_object_unreference(a5xx_gpu->gpmu_bo); a5xx_gpu->gpmu_bo = NULL; a5xx_gpu->gpmu_iova = 0; diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index b7bd6d393215..c75c4df4bc39 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -159,7 +159,9 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) int ret; pm_runtime_get_sync(&pdev->dev); + mutex_lock(&dev->struct_mutex); ret = msm_gpu_hw_init(gpu); + mutex_unlock(&dev->struct_mutex); pm_runtime_put_sync(&pdev->dev); if (ret) { dev_err(dev->dev, "gpu hw init failed: %d\n", ret); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index f8287fd727f1..30a2096ac9a2 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -64,7 +64,7 @@ int adreno_hw_init(struct msm_gpu *gpu) DBG("%s", gpu->name); - ret = msm_gem_get_iova(gpu->rb->bo, gpu->id, &gpu->rb_iova); + ret = msm_gem_get_iova_locked(gpu->rb->bo, gpu->id, &gpu->rb_iova); if (ret) { gpu->rb_iova = 0; dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index be77a35a7a8e..38fbaadccfb7 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -314,6 +314,8 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id, struct msm_gem_object *msm_obj = to_msm_bo(obj); int ret = 0; + WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); + if (!msm_obj->domain[id].iova) { struct msm_drm_private *priv = obj->dev->dev_private; struct page **pages = get_pages(obj); @@ -345,6 +347,7 @@ int msm_gem_get_iova(struct drm_gem_object *obj, int id, uint64_t *iova) * bo is deleted: */ if (msm_obj->domain[id].iova) { + might_lock(&obj->dev->struct_mutex); *iova = msm_obj->domain[id].iova; return 0; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 5b118e8ead18..ebbaed442e8a 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -203,6 +203,8 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) { int ret; + WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex)); + if (!gpu->needs_hw_init) return 0; From aa7cd242976af3c0a6fe4bcf5d9fbb87200cb5c4 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 13 Jun 2017 13:58:23 -0400 Subject: [PATCH 222/341] drm/msm/mdp5: kill pipe_lock It serves no purpose, things should be sufficiently synchronized already by atomic framework. And it is somewhat awkward to be holding a spinlock when msm_gem_iova() is going to start needing to grab a mutex. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index abaaac7fea1a..65b87a098724 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -22,8 +22,6 @@ struct mdp5_plane { struct drm_plane base; - spinlock_t pipe_lock; /* protect REG_MDP5_PIPE_* registers */ - uint32_t nformats; uint32_t formats[32]; }; @@ -881,7 +879,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_rect *src, struct drm_rect *dest) { - struct mdp5_plane *mdp5_plane = to_mdp5_plane(plane); struct drm_plane_state *pstate = plane->state; struct mdp5_hw_pipe *hwpipe = to_mdp5_plane_state(pstate)->hwpipe; struct mdp5_kms *mdp5_kms = get_kms(plane); @@ -902,7 +899,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, uint32_t src_img_w, src_img_h; uint32_t src_x_r; int crtc_x_r; - unsigned long flags; int ret; nplanes = fb->format->num_planes; @@ -981,8 +977,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, hflip = !!(rotation & DRM_MODE_REFLECT_X); vflip = !!(rotation & DRM_MODE_REFLECT_Y); - spin_lock_irqsave(&mdp5_plane->pipe_lock, flags); - mdp5_hwpipe_mode_set(mdp5_kms, hwpipe, fb, &step, &pe, config, hdecm, vdecm, hflip, vflip, crtc_x, crtc_y, crtc_w, crtc_h, @@ -995,8 +989,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, src_img_w, src_img_h, src_x_r, src_y, src_w, src_h); - spin_unlock_irqrestore(&mdp5_plane->pipe_lock, flags); - plane->fb = fb; return ret; @@ -1139,8 +1131,6 @@ struct drm_plane *mdp5_plane_init(struct drm_device *dev, mdp5_plane->nformats = mdp_get_formats(mdp5_plane->formats, ARRAY_SIZE(mdp5_plane->formats), false); - spin_lock_init(&mdp5_plane->pipe_lock); - if (type == DRM_PLANE_TYPE_CURSOR) ret = drm_universal_plane_init(dev, plane, 0xff, &mdp5_cursor_plane_funcs, From f59f62d592a0553bbff6c5b2ba66036becb01c4a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 13 Jun 2017 10:22:37 -0400 Subject: [PATCH 223/341] drm/msm/mdp4+5: move aspace/id to base class Before we can shift to passing the address-space object to _get_iova(), we need to fix a few places (dsi+fbdev) that were hard-coding the adress space id. That gets somewhat easier if we just move these to the kms base class. Prep work for next patch. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_host.c | 10 ++++++++-- drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c | 9 ++++++--- drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c | 14 +++++++------- drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h | 4 ---- drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c | 15 +++++++++------ drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 6 ++++-- drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 10 +++++----- drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h | 4 ---- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 16 ++++++++++------ drivers/gpu/drm/msm/msm_fbdev.c | 4 +++- drivers/gpu/drm/msm/msm_kms.h | 4 ++++ 11 files changed, 56 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index f97a7803a02d..3c752cd0cc1c 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -32,6 +32,7 @@ #include "dsi.xml.h" #include "sfpb.xml.h" #include "dsi_cfg.h" +#include "msm_kms.h" static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor) { @@ -975,6 +976,7 @@ static void dsi_wait4video_eng_busy(struct msm_dsi_host *msm_host) static int dsi_tx_buf_alloc(struct msm_dsi_host *msm_host, int size) { struct drm_device *dev = msm_host->dev; + struct msm_drm_private *priv = dev->dev_private; const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd; int ret; uint64_t iova; @@ -991,7 +993,8 @@ static int dsi_tx_buf_alloc(struct msm_dsi_host *msm_host, int size) return ret; } - ret = msm_gem_get_iova_locked(msm_host->tx_gem_obj, 0, &iova); + ret = msm_gem_get_iova_locked(msm_host->tx_gem_obj, + priv->kms->id, &iova); mutex_unlock(&dev->struct_mutex); if (ret) { pr_err("%s: failed to get iova, %d\n", __func__, ret); @@ -1141,12 +1144,15 @@ static int dsi_long_read_resp(u8 *buf, const struct mipi_dsi_msg *msg) static int dsi_cmd_dma_tx(struct msm_dsi_host *msm_host, int len) { const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd; + struct drm_device *dev = msm_host->dev; + struct msm_drm_private *priv = dev->dev_private; int ret; uint64_t dma_base; bool triggered; if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) { - ret = msm_gem_get_iova(msm_host->tx_gem_obj, 0, &dma_base); + ret = msm_gem_get_iova(msm_host->tx_gem_obj, + priv->kms->id, &dma_base); if (ret) { pr_err("%s: failed to get iova: %d\n", __func__, ret); return ret; diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index 698e514203c6..d9ee73c3672d 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -126,8 +126,9 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val) struct mdp4_crtc *mdp4_crtc = container_of(work, struct mdp4_crtc, unref_cursor_work); struct mdp4_kms *mdp4_kms = get_kms(&mdp4_crtc->base); + struct msm_kms *kms = &mdp4_kms->base.base; - msm_gem_put_iova(val, mdp4_kms->id); + msm_gem_put_iova(val, kms->id); drm_gem_object_unreference_unlocked(val); } @@ -360,6 +361,7 @@ static void update_cursor(struct drm_crtc *crtc) { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); struct mdp4_kms *mdp4_kms = get_kms(crtc); + struct msm_kms *kms = &mdp4_kms->base.base; enum mdp4_dma dma = mdp4_crtc->dma; unsigned long flags; @@ -372,7 +374,7 @@ static void update_cursor(struct drm_crtc *crtc) if (next_bo) { /* take a obj ref + iova ref when we start scanning out: */ drm_gem_object_reference(next_bo); - msm_gem_get_iova_locked(next_bo, mdp4_kms->id, &iova); + msm_gem_get_iova_locked(next_bo, kms->id, &iova); /* enable cursor: */ mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_SIZE(dma), @@ -409,6 +411,7 @@ static int mdp4_crtc_cursor_set(struct drm_crtc *crtc, { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); struct mdp4_kms *mdp4_kms = get_kms(crtc); + struct msm_kms *kms = &mdp4_kms->base.base; struct drm_device *dev = crtc->dev; struct drm_gem_object *cursor_bo, *old_bo; unsigned long flags; @@ -429,7 +432,7 @@ static int mdp4_crtc_cursor_set(struct drm_crtc *crtc, } if (cursor_bo) { - ret = msm_gem_get_iova(cursor_bo, mdp4_kms->id, &iova); + ret = msm_gem_get_iova(cursor_bo, kms->id, &iova); if (ret) goto fail; } else { diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c index 3d26d7774c08..7cf4dd40de28 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c @@ -160,10 +160,10 @@ static void mdp4_destroy(struct msm_kms *kms) { struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); struct device *dev = mdp4_kms->dev->dev; - struct msm_gem_address_space *aspace = mdp4_kms->aspace; + struct msm_gem_address_space *aspace = kms->aspace; if (mdp4_kms->blank_cursor_iova) - msm_gem_put_iova(mdp4_kms->blank_cursor_bo, mdp4_kms->id); + msm_gem_put_iova(mdp4_kms->blank_cursor_bo, kms->id); drm_gem_object_unreference_unlocked(mdp4_kms->blank_cursor_bo); if (aspace) { @@ -510,7 +510,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) goto fail; } - mdp4_kms->aspace = aspace; + kms->aspace = aspace; ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports, ARRAY_SIZE(iommu_ports)); @@ -522,9 +522,9 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) aspace = NULL; } - mdp4_kms->id = msm_register_address_space(dev, aspace); - if (mdp4_kms->id < 0) { - ret = mdp4_kms->id; + kms->id = msm_register_address_space(dev, aspace); + if (kms->id < 0) { + ret = kms->id; dev_err(dev->dev, "failed to register mdp4 iommu: %d\n", ret); goto fail; } @@ -545,7 +545,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) goto fail; } - ret = msm_gem_get_iova(mdp4_kms->blank_cursor_bo, mdp4_kms->id, + ret = msm_gem_get_iova(mdp4_kms->blank_cursor_bo, kms->id, &mdp4_kms->blank_cursor_iova); if (ret) { dev_err(dev->dev, "could not pin blank-cursor bo: %d\n", ret); diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h index c413779d488a..940de51ac5cd 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h @@ -34,9 +34,6 @@ struct mdp4_kms { int rev; - /* mapper-id used to request GEM buffer mapped for scanout: */ - int id; - void __iomem *mmio; struct regulator *vdd; @@ -45,7 +42,6 @@ struct mdp4_kms { struct clk *pclk; struct clk *lut_clk; struct clk *axi_clk; - struct msm_gem_address_space *aspace; struct mdp_irq error_handler; diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c index 53619d07677e..17fb1d6f2f23 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c @@ -103,13 +103,14 @@ static int mdp4_plane_prepare_fb(struct drm_plane *plane, { struct mdp4_plane *mdp4_plane = to_mdp4_plane(plane); struct mdp4_kms *mdp4_kms = get_kms(plane); + struct msm_kms *kms = &mdp4_kms->base.base; struct drm_framebuffer *fb = new_state->fb; if (!fb) return 0; DBG("%s: prepare: FB[%u]", mdp4_plane->name, fb->base.id); - return msm_framebuffer_prepare(fb, mdp4_kms->id); + return msm_framebuffer_prepare(fb, kms->id); } static void mdp4_plane_cleanup_fb(struct drm_plane *plane, @@ -117,13 +118,14 @@ static void mdp4_plane_cleanup_fb(struct drm_plane *plane, { struct mdp4_plane *mdp4_plane = to_mdp4_plane(plane); struct mdp4_kms *mdp4_kms = get_kms(plane); + struct msm_kms *kms = &mdp4_kms->base.base; struct drm_framebuffer *fb = old_state->fb; if (!fb) return; DBG("%s: cleanup: FB[%u]", mdp4_plane->name, fb->base.id); - msm_framebuffer_cleanup(fb, mdp4_kms->id); + msm_framebuffer_cleanup(fb, kms->id); } @@ -161,6 +163,7 @@ static void mdp4_plane_set_scanout(struct drm_plane *plane, { struct mdp4_plane *mdp4_plane = to_mdp4_plane(plane); struct mdp4_kms *mdp4_kms = get_kms(plane); + struct msm_kms *kms = &mdp4_kms->base.base; enum mdp4_pipe pipe = mdp4_plane->pipe; mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRC_STRIDE_A(pipe), @@ -172,13 +175,13 @@ static void mdp4_plane_set_scanout(struct drm_plane *plane, MDP4_PIPE_SRC_STRIDE_B_P3(fb->pitches[3])); mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRCP0_BASE(pipe), - msm_framebuffer_iova(fb, mdp4_kms->id, 0)); + msm_framebuffer_iova(fb, kms->id, 0)); mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRCP1_BASE(pipe), - msm_framebuffer_iova(fb, mdp4_kms->id, 1)); + msm_framebuffer_iova(fb, kms->id, 1)); mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRCP2_BASE(pipe), - msm_framebuffer_iova(fb, mdp4_kms->id, 2)); + msm_framebuffer_iova(fb, kms->id, 2)); mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRCP3_BASE(pipe), - msm_framebuffer_iova(fb, mdp4_kms->id, 3)); + msm_framebuffer_iova(fb, kms->id, 3)); plane->fb = fb; } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 0764a6498110..d79c5faba35e 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -160,8 +160,9 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val) struct mdp5_crtc *mdp5_crtc = container_of(work, struct mdp5_crtc, unref_cursor_work); struct mdp5_kms *mdp5_kms = get_kms(&mdp5_crtc->base); + struct msm_kms *kms = &mdp5_kms->base.base; - msm_gem_put_iova(val, mdp5_kms->id); + msm_gem_put_iova(val, kms->id); drm_gem_object_unreference_unlocked(val); } @@ -724,6 +725,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline; struct drm_device *dev = crtc->dev; struct mdp5_kms *mdp5_kms = get_kms(crtc); + struct msm_kms *kms = &mdp5_kms->base.base; struct drm_gem_object *cursor_bo, *old_bo = NULL; uint32_t blendcfg, stride; uint64_t cursor_addr; @@ -758,7 +760,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, if (!cursor_bo) return -ENOENT; - ret = msm_gem_get_iova(cursor_bo, mdp5_kms->id, &cursor_addr); + ret = msm_gem_get_iova(cursor_bo, kms->id, &cursor_addr); if (ret) return -EINVAL; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index e2b3346ead48..71d08a805806 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -163,7 +163,7 @@ static void mdp5_set_encoder_mode(struct msm_kms *kms, static void mdp5_kms_destroy(struct msm_kms *kms) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); - struct msm_gem_address_space *aspace = mdp5_kms->aspace; + struct msm_gem_address_space *aspace = kms->aspace; int i; for (i = 0; i < mdp5_kms->num_hwmixers; i++) @@ -663,7 +663,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) goto fail; } - mdp5_kms->aspace = aspace; + kms->aspace = aspace; ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports, ARRAY_SIZE(iommu_ports)); @@ -678,9 +678,9 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) aspace = NULL;; } - mdp5_kms->id = msm_register_address_space(dev, aspace); - if (mdp5_kms->id < 0) { - ret = mdp5_kms->id; + kms->id = msm_register_address_space(dev, aspace); + if (kms->id < 0) { + ret = kms->id; dev_err(&pdev->dev, "failed to register mdp5 iommu: %d\n", ret); goto fail; } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h index 8bdb7ee4983b..17caa0e8c8ae 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h @@ -55,10 +55,6 @@ struct mdp5_kms { struct mdp5_state *state; struct drm_modeset_lock state_lock; - /* mapper-id used to request GEM buffer mapped for scanout: */ - int id; - struct msm_gem_address_space *aspace; - struct mdp5_smp *smp; struct mdp5_ctl_manager *ctlm; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 65b87a098724..b6a66befd1b7 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -272,26 +272,28 @@ static int mdp5_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *new_state) { struct mdp5_kms *mdp5_kms = get_kms(plane); + struct msm_kms *kms = &mdp5_kms->base.base; struct drm_framebuffer *fb = new_state->fb; if (!new_state->fb) return 0; DBG("%s: prepare: FB[%u]", plane->name, fb->base.id); - return msm_framebuffer_prepare(fb, mdp5_kms->id); + return msm_framebuffer_prepare(fb, kms->id); } static void mdp5_plane_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { struct mdp5_kms *mdp5_kms = get_kms(plane); + struct msm_kms *kms = &mdp5_kms->base.base; struct drm_framebuffer *fb = old_state->fb; if (!fb) return; DBG("%s: cleanup: FB[%u]", plane->name, fb->base.id); - msm_framebuffer_cleanup(fb, mdp5_kms->id); + msm_framebuffer_cleanup(fb, kms->id); } #define FRAC_16_16(mult, div) (((mult) << 16) / (div)) @@ -498,6 +500,8 @@ static void set_scanout_locked(struct mdp5_kms *mdp5_kms, enum mdp5_pipe pipe, struct drm_framebuffer *fb) { + struct msm_kms *kms = &mdp5_kms->base.base; + mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_STRIDE_A(pipe), MDP5_PIPE_SRC_STRIDE_A_P0(fb->pitches[0]) | MDP5_PIPE_SRC_STRIDE_A_P1(fb->pitches[1])); @@ -507,13 +511,13 @@ static void set_scanout_locked(struct mdp5_kms *mdp5_kms, MDP5_PIPE_SRC_STRIDE_B_P3(fb->pitches[3])); mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC0_ADDR(pipe), - msm_framebuffer_iova(fb, mdp5_kms->id, 0)); + msm_framebuffer_iova(fb, kms->id, 0)); mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC1_ADDR(pipe), - msm_framebuffer_iova(fb, mdp5_kms->id, 1)); + msm_framebuffer_iova(fb, kms->id, 1)); mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC2_ADDR(pipe), - msm_framebuffer_iova(fb, mdp5_kms->id, 2)); + msm_framebuffer_iova(fb, kms->id, 2)); mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC3_ADDR(pipe), - msm_framebuffer_iova(fb, mdp5_kms->id, 3)); + msm_framebuffer_iova(fb, kms->id, 3)); } /* Note: mdp5_plane->pipe_lock must be locked */ diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index feea8ba4e05b..3c08d6d35944 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -20,6 +20,7 @@ #include "msm_drv.h" #include "msm_gem.h" +#include "msm_kms.h" extern int msm_gem_mmap_obj(struct drm_gem_object *obj, struct vm_area_struct *vma); @@ -73,6 +74,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, { struct msm_fbdev *fbdev = to_msm_fbdev(helper); struct drm_device *dev = helper->dev; + struct msm_drm_private *priv = dev->dev_private; struct drm_framebuffer *fb = NULL; struct fb_info *fbi = NULL; struct drm_mode_fb_cmd2 mode_cmd = {0}; @@ -124,7 +126,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, * in panic (ie. lock-safe, etc) we could avoid pinning the * buffer now: */ - ret = msm_gem_get_iova_locked(fbdev->bo, 0, &paddr); + ret = msm_gem_get_iova_locked(fbdev->bo, priv->kms->id, &paddr); if (ret) { dev_err(dev->dev, "failed to get buffer obj iova: %d\n", ret); goto fail_unlock; diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h index faa22c7c5423..0b98171415fc 100644 --- a/drivers/gpu/drm/msm/msm_kms.h +++ b/drivers/gpu/drm/msm/msm_kms.h @@ -72,6 +72,10 @@ struct msm_kms { /* irq number to be passed on to drm_irq_install */ int irq; + + /* mapper-id used to request GEM buffer mapped for scanout: */ + int id; + struct msm_gem_address_space *aspace; }; /** From 8bdcd949bbe7e7f9e60a3564baa600884f8f4ba7 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 13 Jun 2017 11:07:08 -0400 Subject: [PATCH 224/341] drm/msm: pass address-space to _get_iova() and friends No functional change, that will come later. But this will make it easier to deal with dynamically created address spaces (ie. per- process pagetables for gpu). Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 8 ++++---- drivers/gpu/drm/msm/adreno/a5xx_power.c | 5 +++-- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 6 +++--- drivers/gpu/drm/msm/dsi/dsi_host.c | 4 ++-- drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c | 6 +++--- drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c | 4 ++-- drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c | 12 ++++++------ drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 4 ++-- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 12 ++++++------ drivers/gpu/drm/msm/msm_drv.c | 7 +++++-- drivers/gpu/drm/msm/msm_drv.h | 22 ++++++++++++++-------- drivers/gpu/drm/msm/msm_fb.c | 15 +++++++++------ drivers/gpu/drm/msm/msm_fbdev.c | 2 +- drivers/gpu/drm/msm/msm_gem.c | 18 ++++++++++++------ drivers/gpu/drm/msm/msm_gem.h | 1 + drivers/gpu/drm/msm/msm_gem_submit.c | 4 ++-- drivers/gpu/drm/msm/msm_gpu.c | 6 +++--- 17 files changed, 78 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 8d17f525c417..f6a9eec71fec 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -308,7 +308,7 @@ static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, } if (iova) { - int ret = msm_gem_get_iova_locked(bo, gpu->id, iova); + int ret = msm_gem_get_iova_locked(bo, gpu->aspace, iova); if (ret) { drm_gem_object_unreference(bo); @@ -696,19 +696,19 @@ static void a5xx_destroy(struct msm_gpu *gpu) if (a5xx_gpu->pm4_bo) { if (a5xx_gpu->pm4_iova) - msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->id); + msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace); drm_gem_object_unreference_unlocked(a5xx_gpu->pm4_bo); } if (a5xx_gpu->pfp_bo) { if (a5xx_gpu->pfp_iova) - msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->id); + msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace); drm_gem_object_unreference_unlocked(a5xx_gpu->pfp_bo); } if (a5xx_gpu->gpmu_bo) { if (a5xx_gpu->gpmu_iova) - msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id); + msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace); drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo); } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index f3274b827a49..feb7f4fd42fb 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -298,7 +298,8 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) if (IS_ERR(a5xx_gpu->gpmu_bo)) goto err; - if (msm_gem_get_iova_locked(a5xx_gpu->gpmu_bo, gpu->id, &a5xx_gpu->gpmu_iova)) + if (msm_gem_get_iova_locked(a5xx_gpu->gpmu_bo, gpu->aspace, + &a5xx_gpu->gpmu_iova)) goto err; ptr = msm_gem_get_vaddr_locked(a5xx_gpu->gpmu_bo); @@ -327,7 +328,7 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) err: if (a5xx_gpu->gpmu_iova) - msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id); + msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace); if (a5xx_gpu->gpmu_bo) drm_gem_object_unreference(a5xx_gpu->gpmu_bo); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 30a2096ac9a2..6fa694e6ae8c 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -64,7 +64,7 @@ int adreno_hw_init(struct msm_gpu *gpu) DBG("%s", gpu->name); - ret = msm_gem_get_iova_locked(gpu->rb->bo, gpu->id, &gpu->rb_iova); + ret = msm_gem_get_iova_locked(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); if (ret) { gpu->rb_iova = 0; dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); @@ -414,7 +414,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, return -ENOMEM; } - ret = msm_gem_get_iova(adreno_gpu->memptrs_bo, gpu->id, + ret = msm_gem_get_iova(adreno_gpu->memptrs_bo, gpu->aspace, &adreno_gpu->memptrs_iova); if (ret) { dev_err(drm->dev, "could not map memptrs: %d\n", ret); @@ -433,7 +433,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) msm_gem_put_vaddr(adreno_gpu->memptrs_bo); if (adreno_gpu->memptrs_iova) - msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->id); + msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->aspace); drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo); } diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 3c752cd0cc1c..2e7077194b21 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -994,7 +994,7 @@ static int dsi_tx_buf_alloc(struct msm_dsi_host *msm_host, int size) } ret = msm_gem_get_iova_locked(msm_host->tx_gem_obj, - priv->kms->id, &iova); + priv->kms->aspace, &iova); mutex_unlock(&dev->struct_mutex); if (ret) { pr_err("%s: failed to get iova, %d\n", __func__, ret); @@ -1152,7 +1152,7 @@ static int dsi_cmd_dma_tx(struct msm_dsi_host *msm_host, int len) if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) { ret = msm_gem_get_iova(msm_host->tx_gem_obj, - priv->kms->id, &dma_base); + priv->kms->aspace, &dma_base); if (ret) { pr_err("%s: failed to get iova: %d\n", __func__, ret); return ret; diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index d9ee73c3672d..59153a4ebd18 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -128,7 +128,7 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val) struct mdp4_kms *mdp4_kms = get_kms(&mdp4_crtc->base); struct msm_kms *kms = &mdp4_kms->base.base; - msm_gem_put_iova(val, kms->id); + msm_gem_put_iova(val, kms->aspace); drm_gem_object_unreference_unlocked(val); } @@ -374,7 +374,7 @@ static void update_cursor(struct drm_crtc *crtc) if (next_bo) { /* take a obj ref + iova ref when we start scanning out: */ drm_gem_object_reference(next_bo); - msm_gem_get_iova_locked(next_bo, kms->id, &iova); + msm_gem_get_iova_locked(next_bo, kms->aspace, &iova); /* enable cursor: */ mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_SIZE(dma), @@ -432,7 +432,7 @@ static int mdp4_crtc_cursor_set(struct drm_crtc *crtc, } if (cursor_bo) { - ret = msm_gem_get_iova(cursor_bo, kms->id, &iova); + ret = msm_gem_get_iova(cursor_bo, kms->aspace, &iova); if (ret) goto fail; } else { diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c index 7cf4dd40de28..0c01f9fe0ef0 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c @@ -163,7 +163,7 @@ static void mdp4_destroy(struct msm_kms *kms) struct msm_gem_address_space *aspace = kms->aspace; if (mdp4_kms->blank_cursor_iova) - msm_gem_put_iova(mdp4_kms->blank_cursor_bo, kms->id); + msm_gem_put_iova(mdp4_kms->blank_cursor_bo, kms->aspace); drm_gem_object_unreference_unlocked(mdp4_kms->blank_cursor_bo); if (aspace) { @@ -545,7 +545,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) goto fail; } - ret = msm_gem_get_iova(mdp4_kms->blank_cursor_bo, kms->id, + ret = msm_gem_get_iova(mdp4_kms->blank_cursor_bo, kms->aspace, &mdp4_kms->blank_cursor_iova); if (ret) { dev_err(dev->dev, "could not pin blank-cursor bo: %d\n", ret); diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c index 17fb1d6f2f23..a20e3d644523 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c @@ -110,7 +110,7 @@ static int mdp4_plane_prepare_fb(struct drm_plane *plane, return 0; DBG("%s: prepare: FB[%u]", mdp4_plane->name, fb->base.id); - return msm_framebuffer_prepare(fb, kms->id); + return msm_framebuffer_prepare(fb, kms->aspace); } static void mdp4_plane_cleanup_fb(struct drm_plane *plane, @@ -125,7 +125,7 @@ static void mdp4_plane_cleanup_fb(struct drm_plane *plane, return; DBG("%s: cleanup: FB[%u]", mdp4_plane->name, fb->base.id); - msm_framebuffer_cleanup(fb, kms->id); + msm_framebuffer_cleanup(fb, kms->aspace); } @@ -175,13 +175,13 @@ static void mdp4_plane_set_scanout(struct drm_plane *plane, MDP4_PIPE_SRC_STRIDE_B_P3(fb->pitches[3])); mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRCP0_BASE(pipe), - msm_framebuffer_iova(fb, kms->id, 0)); + msm_framebuffer_iova(fb, kms->aspace, 0)); mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRCP1_BASE(pipe), - msm_framebuffer_iova(fb, kms->id, 1)); + msm_framebuffer_iova(fb, kms->aspace, 1)); mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRCP2_BASE(pipe), - msm_framebuffer_iova(fb, kms->id, 2)); + msm_framebuffer_iova(fb, kms->aspace, 2)); mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRCP3_BASE(pipe), - msm_framebuffer_iova(fb, kms->id, 3)); + msm_framebuffer_iova(fb, kms->aspace, 3)); plane->fb = fb; } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index d79c5faba35e..cb5415d6c04b 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -162,7 +162,7 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val) struct mdp5_kms *mdp5_kms = get_kms(&mdp5_crtc->base); struct msm_kms *kms = &mdp5_kms->base.base; - msm_gem_put_iova(val, kms->id); + msm_gem_put_iova(val, kms->aspace); drm_gem_object_unreference_unlocked(val); } @@ -760,7 +760,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, if (!cursor_bo) return -ENOENT; - ret = msm_gem_get_iova(cursor_bo, kms->id, &cursor_addr); + ret = msm_gem_get_iova(cursor_bo, kms->aspace, &cursor_addr); if (ret) return -EINVAL; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index b6a66befd1b7..fe3a4de1a433 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -279,7 +279,7 @@ static int mdp5_plane_prepare_fb(struct drm_plane *plane, return 0; DBG("%s: prepare: FB[%u]", plane->name, fb->base.id); - return msm_framebuffer_prepare(fb, kms->id); + return msm_framebuffer_prepare(fb, kms->aspace); } static void mdp5_plane_cleanup_fb(struct drm_plane *plane, @@ -293,7 +293,7 @@ static void mdp5_plane_cleanup_fb(struct drm_plane *plane, return; DBG("%s: cleanup: FB[%u]", plane->name, fb->base.id); - msm_framebuffer_cleanup(fb, kms->id); + msm_framebuffer_cleanup(fb, kms->aspace); } #define FRAC_16_16(mult, div) (((mult) << 16) / (div)) @@ -511,13 +511,13 @@ static void set_scanout_locked(struct mdp5_kms *mdp5_kms, MDP5_PIPE_SRC_STRIDE_B_P3(fb->pitches[3])); mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC0_ADDR(pipe), - msm_framebuffer_iova(fb, kms->id, 0)); + msm_framebuffer_iova(fb, kms->aspace, 0)); mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC1_ADDR(pipe), - msm_framebuffer_iova(fb, kms->id, 1)); + msm_framebuffer_iova(fb, kms->aspace, 1)); mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC2_ADDR(pipe), - msm_framebuffer_iova(fb, kms->id, 2)); + msm_framebuffer_iova(fb, kms->aspace, 2)); mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC3_ADDR(pipe), - msm_framebuffer_iova(fb, kms->id, 3)); + msm_framebuffer_iova(fb, kms->aspace, 3)); } /* Note: mdp5_plane->pipe_lock must be locked */ diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index beb4f6b3ac70..a9c3c6b813d3 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -51,6 +51,7 @@ static const struct drm_mode_config_funcs mode_config_funcs = { .atomic_state_free = msm_atomic_state_free, }; +#include "msm_gem.h" /* temporary */ int msm_register_address_space(struct drm_device *dev, struct msm_gem_address_space *aspace) { @@ -61,7 +62,9 @@ int msm_register_address_space(struct drm_device *dev, priv->aspace[priv->num_aspaces] = aspace; - return priv->num_aspaces++; + aspace->id = priv->num_aspaces++; + + return aspace->id; } #ifdef CONFIG_DRM_MSM_REGISTER_LOGGING @@ -707,7 +710,7 @@ static int msm_ioctl_gem_info_iova(struct drm_device *dev, if (!priv->gpu) return -EINVAL; - return msm_gem_get_iova(obj, priv->gpu->id, iova); + return msm_gem_get_iova(obj, priv->gpu->aspace, iova); } static int msm_ioctl_gem_info(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 1b26ca626528..5570c5c91340 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -209,13 +209,16 @@ int msm_gem_mmap_obj(struct drm_gem_object *obj, int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma); int msm_gem_fault(struct vm_fault *vmf); uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); -int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id, - uint64_t *iova); -int msm_gem_get_iova(struct drm_gem_object *obj, int id, uint64_t *iova); -uint64_t msm_gem_iova(struct drm_gem_object *obj, int id); +int msm_gem_get_iova_locked(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace, uint64_t *iova); +int msm_gem_get_iova(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace, uint64_t *iova); +uint64_t msm_gem_iova(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace); struct page **msm_gem_get_pages(struct drm_gem_object *obj); void msm_gem_put_pages(struct drm_gem_object *obj); -void msm_gem_put_iova(struct drm_gem_object *obj, int id); +void msm_gem_put_iova(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace); int msm_gem_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args); int msm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, @@ -251,9 +254,12 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, struct drm_gem_object *msm_gem_import(struct drm_device *dev, struct dma_buf *dmabuf, struct sg_table *sgt); -int msm_framebuffer_prepare(struct drm_framebuffer *fb, int id); -void msm_framebuffer_cleanup(struct drm_framebuffer *fb, int id); -uint32_t msm_framebuffer_iova(struct drm_framebuffer *fb, int id, int plane); +int msm_framebuffer_prepare(struct drm_framebuffer *fb, + struct msm_gem_address_space *aspace); +void msm_framebuffer_cleanup(struct drm_framebuffer *fb, + struct msm_gem_address_space *aspace); +uint32_t msm_framebuffer_iova(struct drm_framebuffer *fb, + struct msm_gem_address_space *aspace, int plane); struct drm_gem_object *msm_framebuffer_bo(struct drm_framebuffer *fb, int plane); const struct msm_format *msm_framebuffer_format(struct drm_framebuffer *fb); struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c index ba2733a95a4f..6ecb7b170316 100644 --- a/drivers/gpu/drm/msm/msm_fb.c +++ b/drivers/gpu/drm/msm/msm_fb.c @@ -84,14 +84,15 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m) * should be fine, since only the scanout (mdpN) side of things needs * this, the gpu doesn't care about fb's. */ -int msm_framebuffer_prepare(struct drm_framebuffer *fb, int id) +int msm_framebuffer_prepare(struct drm_framebuffer *fb, + struct msm_gem_address_space *aspace) { struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb); int ret, i, n = fb->format->num_planes; uint64_t iova; for (i = 0; i < n; i++) { - ret = msm_gem_get_iova(msm_fb->planes[i], id, &iova); + ret = msm_gem_get_iova(msm_fb->planes[i], aspace, &iova); DBG("FB[%u]: iova[%d]: %08llx (%d)", fb->base.id, i, iova, ret); if (ret) return ret; @@ -100,21 +101,23 @@ int msm_framebuffer_prepare(struct drm_framebuffer *fb, int id) return 0; } -void msm_framebuffer_cleanup(struct drm_framebuffer *fb, int id) +void msm_framebuffer_cleanup(struct drm_framebuffer *fb, + struct msm_gem_address_space *aspace) { struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb); int i, n = fb->format->num_planes; for (i = 0; i < n; i++) - msm_gem_put_iova(msm_fb->planes[i], id); + msm_gem_put_iova(msm_fb->planes[i], aspace); } -uint32_t msm_framebuffer_iova(struct drm_framebuffer *fb, int id, int plane) +uint32_t msm_framebuffer_iova(struct drm_framebuffer *fb, + struct msm_gem_address_space *aspace, int plane) { struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb); if (!msm_fb->planes[plane]) return 0; - return msm_gem_iova(msm_fb->planes[plane], id) + fb->offsets[plane]; + return msm_gem_iova(msm_fb->planes[plane], aspace) + fb->offsets[plane]; } struct drm_gem_object *msm_framebuffer_bo(struct drm_framebuffer *fb, int plane) diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index 3c08d6d35944..803ed272dc6d 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -126,7 +126,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, * in panic (ie. lock-safe, etc) we could avoid pinning the * buffer now: */ - ret = msm_gem_get_iova_locked(fbdev->bo, priv->kms->id, &paddr); + ret = msm_gem_get_iova_locked(fbdev->bo, priv->kms->aspace, &paddr); if (ret) { dev_err(dev->dev, "failed to get buffer obj iova: %d\n", ret); goto fail_unlock; diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 38fbaadccfb7..0a38c5b1a799 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -308,10 +308,11 @@ put_iova(struct drm_gem_object *obj) * That means when I do eventually need to add support for unpinning * the refcnt counter needs to be atomic_t. */ -int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id, - uint64_t *iova) +int msm_gem_get_iova_locked(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace, uint64_t *iova) { struct msm_gem_object *msm_obj = to_msm_bo(obj); + int id = aspace ? aspace->id : 0; int ret = 0; WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); @@ -338,9 +339,11 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id, } /* get iova, taking a reference. Should have a matching put */ -int msm_gem_get_iova(struct drm_gem_object *obj, int id, uint64_t *iova) +int msm_gem_get_iova(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace, uint64_t *iova) { struct msm_gem_object *msm_obj = to_msm_bo(obj); + int id = aspace ? aspace->id : 0; int ret; /* this is safe right now because we don't unmap until the @@ -353,7 +356,7 @@ int msm_gem_get_iova(struct drm_gem_object *obj, int id, uint64_t *iova) } mutex_lock(&obj->dev->struct_mutex); - ret = msm_gem_get_iova_locked(obj, id, iova); + ret = msm_gem_get_iova_locked(obj, aspace, iova); mutex_unlock(&obj->dev->struct_mutex); return ret; } @@ -361,14 +364,17 @@ int msm_gem_get_iova(struct drm_gem_object *obj, int id, uint64_t *iova) /* get iova without taking a reference, used in places where you have * already done a 'msm_gem_get_iova()'. */ -uint64_t msm_gem_iova(struct drm_gem_object *obj, int id) +uint64_t msm_gem_iova(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace) { struct msm_gem_object *msm_obj = to_msm_bo(obj); + int id = aspace ? aspace->id : 0; WARN_ON(!msm_obj->domain[id].iova); return msm_obj->domain[id].iova; } -void msm_gem_put_iova(struct drm_gem_object *obj, int id) +void msm_gem_put_iova(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace) { // XXX TODO .. // NOTE: probably don't need a _locked() version.. we wouldn't diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 1b4cf20043ea..4b4b352b5718 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -33,6 +33,7 @@ struct msm_gem_address_space { struct drm_mm mm; struct msm_mmu *mmu; struct kref kref; + int id; /* temporary */ }; struct msm_gem_vma { diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 7832e6421d25..c8d01df993da 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -158,7 +158,7 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) struct msm_gem_object *msm_obj = submit->bos[i].obj; if (submit->bos[i].flags & BO_PINNED) - msm_gem_put_iova(&msm_obj->base, submit->gpu->id); + msm_gem_put_iova(&msm_obj->base, submit->gpu->aspace); if (submit->bos[i].flags & BO_LOCKED) ww_mutex_unlock(&msm_obj->resv->lock); @@ -246,7 +246,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit) /* if locking succeeded, pin bo: */ ret = msm_gem_get_iova_locked(&msm_obj->base, - submit->gpu->id, &iova); + submit->gpu->aspace, &iova); if (ret) break; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index ebbaed442e8a..36f0f1e5fc81 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -416,7 +416,7 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_gem_object *msm_obj = submit->bos[i].obj; /* move to inactive: */ msm_gem_move_to_inactive(&msm_obj->base); - msm_gem_put_iova(&msm_obj->base, gpu->id); + msm_gem_put_iova(&msm_obj->base, gpu->aspace); drm_gem_object_unreference(&msm_obj->base); } @@ -498,7 +498,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, /* submit takes a reference to the bo and iova until retired: */ drm_gem_object_reference(&msm_obj->base); msm_gem_get_iova_locked(&msm_obj->base, - submit->gpu->id, &iova); + submit->gpu->aspace, &iova); if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); @@ -694,7 +694,7 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) if (gpu->rb) { if (gpu->rb_iova) - msm_gem_put_iova(gpu->rb->bo, gpu->id); + msm_gem_put_iova(gpu->rb->bo, gpu->aspace); msm_ringbuffer_destroy(gpu->rb); } From f4839bd5126310635314610a85468e87b40ce4c8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 13 Jun 2017 11:50:05 -0400 Subject: [PATCH 225/341] drm/msm: refactor how we handle vram carveout buffers Pull some of the logic out into msm_gem_new() (since we don't need to care about the imported-bo case), and don't defer allocating pages. The latter is generally a good idea, since if we are using VRAM carveout to allocate contiguous buffers (ie. no IOMMU), the allocation is more likely to fail. So failing at allocation time is a more sane option. Plus this simplifies things in the next patch. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 48 ++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 0a38c5b1a799..2e5c987f7f2c 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -324,12 +324,8 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, if (IS_ERR(pages)) return PTR_ERR(pages); - if (iommu_present(&platform_bus_type)) { - ret = msm_gem_map_vma(priv->aspace[id], &msm_obj->domain[id], - msm_obj->sgt, obj->size >> PAGE_SHIFT); - } else { - msm_obj->domain[id].iova = physaddr(obj); - } + ret = msm_gem_map_vma(priv->aspace[id], &msm_obj->domain[id], + msm_obj->sgt, obj->size >> PAGE_SHIFT); } if (!ret) @@ -765,7 +761,6 @@ static int msm_gem_new_impl(struct drm_device *dev, { struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; - bool use_vram = false; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -780,21 +775,10 @@ static int msm_gem_new_impl(struct drm_device *dev, return -EINVAL; } - if (!iommu_present(&platform_bus_type)) - use_vram = true; - else if ((flags & MSM_BO_STOLEN) && priv->vram.size) - use_vram = true; - - if (WARN_ON(use_vram && !priv->vram.size)) - return -EINVAL; - msm_obj = kzalloc(sizeof(*msm_obj), GFP_KERNEL); if (!msm_obj) return -ENOMEM; - if (use_vram) - msm_obj->vram_node = &msm_obj->domain[0].node; - msm_obj->flags = flags; msm_obj->madv = MSM_MADV_WILLNEED; @@ -816,13 +800,23 @@ static int msm_gem_new_impl(struct drm_device *dev, struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags) { + struct msm_drm_private *priv = dev->dev_private; struct drm_gem_object *obj = NULL; + bool use_vram = false; int ret; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); size = PAGE_ALIGN(size); + if (!iommu_present(&platform_bus_type)) + use_vram = true; + else if ((flags & MSM_BO_STOLEN) && priv->vram.size) + use_vram = true; + + if (WARN_ON(use_vram && !priv->vram.size)) + return ERR_PTR(-EINVAL); + /* Disallow zero sized objects as they make the underlying * infrastructure grumpy */ @@ -833,12 +827,24 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, if (ret) goto fail; - if (use_pages(obj)) { + if (use_vram) { + struct msm_gem_object *msm_obj = to_msm_bo(obj); + struct page **pages; + + msm_obj->vram_node = &msm_obj->domain[0].node; + drm_gem_private_object_init(dev, obj, size); + + msm_obj->pages = get_pages(obj); + pages = get_pages(obj); + if (IS_ERR(pages)) { + ret = PTR_ERR(pages); + goto fail; + } + msm_obj->domain[0].iova = physaddr(obj); + } else { ret = drm_gem_object_init(dev, obj, size); if (ret) goto fail; - } else { - drm_gem_private_object_init(dev, obj, size); } return obj; From 4b85f7f5cf776b0fcd4a2e38cb9c69849aae0fc5 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 13 Jun 2017 13:54:13 -0400 Subject: [PATCH 226/341] drm/msm: support for an arbitrary number of address spaces It means we have to do a list traversal where we once had an index into a table. But the list will normally have one or two entries. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 138 +++++++++++++++++++++++----------- drivers/gpu/drm/msm/msm_gem.h | 4 +- 2 files changed, 99 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 2e5c987f7f2c..9951c78ee215 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -283,21 +283,59 @@ uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj) return offset; } +static struct msm_gem_vma *add_vma(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + struct msm_gem_vma *vma; + + vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (!vma) + return ERR_PTR(-ENOMEM); + + vma->aspace = aspace; + + list_add_tail(&vma->list, &msm_obj->vmas); + + return vma; +} + +static struct msm_gem_vma *lookup_vma(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + struct msm_gem_vma *vma; + + WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); + + list_for_each_entry(vma, &msm_obj->vmas, list) { + if (vma->aspace == aspace) + return vma; + } + + return NULL; +} + +static void del_vma(struct msm_gem_vma *vma) +{ + if (!vma) + return; + + list_del(&vma->list); + kfree(vma); +} + static void put_iova(struct drm_gem_object *obj) { - struct drm_device *dev = obj->dev; - struct msm_drm_private *priv = obj->dev->dev_private; struct msm_gem_object *msm_obj = to_msm_bo(obj); - int id; + struct msm_gem_vma *vma, *tmp; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); - for (id = 0; id < ARRAY_SIZE(msm_obj->domain); id++) { - if (!priv->aspace[id]) - continue; - msm_gem_unmap_vma(priv->aspace[id], - &msm_obj->domain[id], msm_obj->sgt); + list_for_each_entry_safe(vma, tmp, &msm_obj->vmas, list) { + msm_gem_unmap_vma(vma->aspace, vma, msm_obj->sgt); + del_vma(vma); } } @@ -312,24 +350,37 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova) { struct msm_gem_object *msm_obj = to_msm_bo(obj); - int id = aspace ? aspace->id : 0; + struct msm_gem_vma *vma; int ret = 0; WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); - if (!msm_obj->domain[id].iova) { - struct msm_drm_private *priv = obj->dev->dev_private; - struct page **pages = get_pages(obj); + vma = lookup_vma(obj, aspace); - if (IS_ERR(pages)) - return PTR_ERR(pages); + if (!vma) { + struct page **pages; - ret = msm_gem_map_vma(priv->aspace[id], &msm_obj->domain[id], - msm_obj->sgt, obj->size >> PAGE_SHIFT); + vma = add_vma(obj, aspace); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + pages = get_pages(obj); + if (IS_ERR(pages)) { + ret = PTR_ERR(pages); + goto fail; + } + + ret = msm_gem_map_vma(aspace, vma, msm_obj->sgt, + obj->size >> PAGE_SHIFT); + if (ret) + goto fail; } - if (!ret) - *iova = msm_obj->domain[id].iova; + *iova = vma->iova; + return 0; + +fail: + del_vma(vma); return ret; } @@ -338,22 +389,12 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, int msm_gem_get_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); - int id = aspace ? aspace->id : 0; int ret; - /* this is safe right now because we don't unmap until the - * bo is deleted: - */ - if (msm_obj->domain[id].iova) { - might_lock(&obj->dev->struct_mutex); - *iova = msm_obj->domain[id].iova; - return 0; - } - mutex_lock(&obj->dev->struct_mutex); ret = msm_gem_get_iova_locked(obj, aspace, iova); mutex_unlock(&obj->dev->struct_mutex); + return ret; } @@ -363,10 +404,14 @@ int msm_gem_get_iova(struct drm_gem_object *obj, uint64_t msm_gem_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); - int id = aspace ? aspace->id : 0; - WARN_ON(!msm_obj->domain[id].iova); - return msm_obj->domain[id].iova; + struct msm_gem_vma *vma; + + mutex_lock(&obj->dev->struct_mutex); + vma = lookup_vma(obj, aspace); + mutex_unlock(&obj->dev->struct_mutex); + WARN_ON(!vma); + + return vma ? vma->iova : 0; } void msm_gem_put_iova(struct drm_gem_object *obj, @@ -624,11 +669,10 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) struct msm_gem_object *msm_obj = to_msm_bo(obj); struct reservation_object *robj = msm_obj->resv; struct reservation_object_list *fobj; - struct msm_drm_private *priv = obj->dev->dev_private; struct dma_fence *fence; + struct msm_gem_vma *vma; uint64_t off = drm_vma_node_start(&obj->vma_node); const char *madv; - unsigned id; WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); @@ -650,8 +694,9 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) obj->name, kref_read(&obj->refcount), off, msm_obj->vaddr); - for (id = 0; id < priv->num_aspaces; id++) - seq_printf(m, " %08llx", msm_obj->domain[id].iova); + /* FIXME: we need to print the address space here too */ + list_for_each_entry(vma, &msm_obj->vmas, list) + seq_printf(m, " %08llx", vma->iova); seq_printf(m, " %zu%s\n", obj->size, madv); @@ -790,6 +835,8 @@ static int msm_gem_new_impl(struct drm_device *dev, } INIT_LIST_HEAD(&msm_obj->submit_entry); + INIT_LIST_HEAD(&msm_obj->vmas); + list_add_tail(&msm_obj->mm_list, &priv->inactive_list); *obj = &msm_obj->base; @@ -828,19 +875,26 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, goto fail; if (use_vram) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); + struct msm_gem_vma *vma; struct page **pages; - msm_obj->vram_node = &msm_obj->domain[0].node; + vma = add_vma(obj, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto fail; + } + + to_msm_bo(obj)->vram_node = &vma->node; + drm_gem_private_object_init(dev, obj, size); - msm_obj->pages = get_pages(obj); pages = get_pages(obj); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto fail; } - msm_obj->domain[0].iova = physaddr(obj); + + vma->iova = physaddr(obj); } else { ret = drm_gem_object_init(dev, obj, size); if (ret) diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 4b4b352b5718..ff468da70fb6 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -39,6 +39,8 @@ struct msm_gem_address_space { struct msm_gem_vma { struct drm_mm_node node; uint64_t iova; + struct msm_gem_address_space *aspace; + struct list_head list; /* node in msm_gem_object::vmas */ }; struct msm_gem_object { @@ -78,7 +80,7 @@ struct msm_gem_object { struct sg_table *sgt; void *vaddr; - struct msm_gem_vma domain[NUM_DOMAINS]; + struct list_head vmas; /* list of msm_gem_vma */ /* normally (resv == &_resv) except for imported bo's */ struct reservation_object *resv; From 8432a903fb97ac3640dbf9281e7c15a673b45747 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 13 Jun 2017 14:27:45 -0400 Subject: [PATCH 227/341] drm/msm: remove address-space id Now that the msm_gem supports an arbitrary number of vma's, we no longer need to assign an id (index) to each address space. So rip out the associated code. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c | 7 ------- drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 7 ------- drivers/gpu/drm/msm/msm_drv.c | 16 ---------------- drivers/gpu/drm/msm/msm_drv.h | 11 ----------- drivers/gpu/drm/msm/msm_gem.h | 1 - drivers/gpu/drm/msm/msm_gpu.c | 2 -- drivers/gpu/drm/msm/msm_gpu.h | 1 - drivers/gpu/drm/msm/msm_kms.h | 1 - 8 files changed, 46 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c index 0c01f9fe0ef0..3d96687a1b39 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c @@ -522,13 +522,6 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) aspace = NULL; } - kms->id = msm_register_address_space(dev, aspace); - if (kms->id < 0) { - ret = kms->id; - dev_err(dev->dev, "failed to register mdp4 iommu: %d\n", ret); - goto fail; - } - ret = modeset_init(mdp4_kms); if (ret) { dev_err(dev->dev, "modeset_init failed: %d\n", ret); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index 71d08a805806..5d13fa5381ee 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -678,13 +678,6 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) aspace = NULL;; } - kms->id = msm_register_address_space(dev, aspace); - if (kms->id < 0) { - ret = kms->id; - dev_err(&pdev->dev, "failed to register mdp5 iommu: %d\n", ret); - goto fail; - } - ret = modeset_init(mdp5_kms); if (ret) { dev_err(&pdev->dev, "modeset_init failed: %d\n", ret); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index a9c3c6b813d3..506de3862c18 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -51,22 +51,6 @@ static const struct drm_mode_config_funcs mode_config_funcs = { .atomic_state_free = msm_atomic_state_free, }; -#include "msm_gem.h" /* temporary */ -int msm_register_address_space(struct drm_device *dev, - struct msm_gem_address_space *aspace) -{ - struct msm_drm_private *priv = dev->dev_private; - - if (WARN_ON(priv->num_aspaces >= ARRAY_SIZE(priv->aspace))) - return -EINVAL; - - priv->aspace[priv->num_aspaces] = aspace; - - aspace->id = priv->num_aspaces++; - - return aspace->id; -} - #ifdef CONFIG_DRM_MSM_REGISTER_LOGGING static bool reglog = false; MODULE_PARM_DESC(reglog, "Enable register read/write logging"); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 5570c5c91340..1d47ec467ded 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -123,14 +123,6 @@ struct msm_drm_private { uint32_t pending_crtcs; wait_queue_head_t pending_crtcs_event; - /* Registered address spaces.. currently this is fixed per # of - * iommu's. Ie. one for display block and one for gpu block. - * Eventually, to do per-process gpu pagetables, we'll want one - * of these per-process. - */ - unsigned int num_aspaces; - struct msm_gem_address_space *aspace[NUM_DOMAINS]; - unsigned int num_planes; struct drm_plane *planes[16]; @@ -183,9 +175,6 @@ struct drm_atomic_state *msm_atomic_state_alloc(struct drm_device *dev); void msm_atomic_state_clear(struct drm_atomic_state *state); void msm_atomic_state_free(struct drm_atomic_state *state); -int msm_register_address_space(struct drm_device *dev, - struct msm_gem_address_space *aspace); - void msm_gem_unmap_vma(struct msm_gem_address_space *aspace, struct msm_gem_vma *vma, struct sg_table *sgt); int msm_gem_map_vma(struct msm_gem_address_space *aspace, diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index ff468da70fb6..112eb63b5908 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -33,7 +33,6 @@ struct msm_gem_address_space { struct drm_mm mm; struct msm_mmu *mmu; struct kref kref; - int id; /* temporary */ }; struct msm_gem_vma { diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 36f0f1e5fc81..2d5c9afbcdbe 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -659,8 +659,6 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, } else { dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); } - gpu->id = msm_register_address_space(drm, gpu->aspace); - /* Create ringbuffer: */ mutex_lock(&drm->struct_mutex); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index fd8049592aae..df4e2771fb85 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -106,7 +106,6 @@ struct msm_gpu { int irq; struct msm_gem_address_space *aspace; - int id; /* Power Control: */ struct regulator *gpu_reg, *gpu_cx; diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h index 0b98171415fc..a8f2ba5e5f07 100644 --- a/drivers/gpu/drm/msm/msm_kms.h +++ b/drivers/gpu/drm/msm/msm_kms.h @@ -74,7 +74,6 @@ struct msm_kms { int irq; /* mapper-id used to request GEM buffer mapped for scanout: */ - int id; struct msm_gem_address_space *aspace; }; From 52260ae4c461538a4f104d463a55dfc13c3d8543 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 16 Jun 2017 08:57:18 -0400 Subject: [PATCH 228/341] drm/msm: update generated headers Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a2xx.xml.h | 265 +++- drivers/gpu/drm/msm/adreno/a3xx.xml.h | 20 +- drivers/gpu/drm/msm/adreno/a4xx.xml.h | 64 +- drivers/gpu/drm/msm/adreno/a5xx.xml.h | 1369 +++++++++++++++-- .../gpu/drm/msm/adreno/adreno_common.xml.h | 49 +- drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h | 366 ++++- drivers/gpu/drm/msm/dsi/dsi.xml.h | 13 +- drivers/gpu/drm/msm/dsi/mmss_cc.xml.h | 24 +- drivers/gpu/drm/msm/dsi/sfpb.xml.h | 24 +- drivers/gpu/drm/msm/edp/edp.xml.h | 24 +- drivers/gpu/drm/msm/hdmi/hdmi.xml.h | 66 +- drivers/gpu/drm/msm/hdmi/qfprom.xml.h | 24 +- drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h | 24 +- drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h | 14 +- drivers/gpu/drm/msm/mdp/mdp_common.xml.h | 24 +- 15 files changed, 2046 insertions(+), 324 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a2xx.xml.h b/drivers/gpu/drm/msm/adreno/a2xx.xml.h index 4be092f911f9..644374c7b3e0 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx.xml.h +++ b/drivers/gpu/drm/msm/adreno/a2xx.xml.h @@ -8,17 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2016-04-26 17:56:44) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32907 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 12025 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 22544 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110765 bytes, from 2016-11-26 23:01:48) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 90321 bytes, from 2016-11-28 16:50:05) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2016 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) @@ -352,6 +352,38 @@ static inline uint32_t A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_cln #define REG_A2XX_RBBM_DEBUG 0x0000039b #define REG_A2XX_RBBM_PM_OVERRIDE1 0x0000039c +#define A2XX_RBBM_PM_OVERRIDE1_RBBM_AHBCLK_PM_OVERRIDE 0x00000001 +#define A2XX_RBBM_PM_OVERRIDE1_SC_REG_SCLK_PM_OVERRIDE 0x00000002 +#define A2XX_RBBM_PM_OVERRIDE1_SC_SCLK_PM_OVERRIDE 0x00000004 +#define A2XX_RBBM_PM_OVERRIDE1_SP_TOP_SCLK_PM_OVERRIDE 0x00000008 +#define A2XX_RBBM_PM_OVERRIDE1_SP_V0_SCLK_PM_OVERRIDE 0x00000010 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_SCLK_PM_OVERRIDE 0x00000020 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_FIFOS_SCLK_PM_OVERRIDE 0x00000040 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_CONST_MEM_SCLK_PM_OVERRIDE 0x00000080 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_SQ_SCLK_PM_OVERRIDE 0x00000100 +#define A2XX_RBBM_PM_OVERRIDE1_SX_SCLK_PM_OVERRIDE 0x00000200 +#define A2XX_RBBM_PM_OVERRIDE1_SX_REG_SCLK_PM_OVERRIDE 0x00000400 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCO_SCLK_PM_OVERRIDE 0x00000800 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCM_SCLK_PM_OVERRIDE 0x00001000 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCD_SCLK_PM_OVERRIDE 0x00002000 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_REG_SCLK_PM_OVERRIDE 0x00004000 +#define A2XX_RBBM_PM_OVERRIDE1_TPC_TPC_SCLK_PM_OVERRIDE 0x00008000 +#define A2XX_RBBM_PM_OVERRIDE1_TPC_REG_SCLK_PM_OVERRIDE 0x00010000 +#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCA_SCLK_PM_OVERRIDE 0x00020000 +#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_SCLK_PM_OVERRIDE 0x00040000 +#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_READ_SCLK_PM_OVERRIDE 0x00080000 +#define A2XX_RBBM_PM_OVERRIDE1_TP_TP_SCLK_PM_OVERRIDE 0x00100000 +#define A2XX_RBBM_PM_OVERRIDE1_TP_REG_SCLK_PM_OVERRIDE 0x00200000 +#define A2XX_RBBM_PM_OVERRIDE1_CP_G_SCLK_PM_OVERRIDE 0x00400000 +#define A2XX_RBBM_PM_OVERRIDE1_CP_REG_SCLK_PM_OVERRIDE 0x00800000 +#define A2XX_RBBM_PM_OVERRIDE1_CP_G_REG_SCLK_PM_OVERRIDE 0x01000000 +#define A2XX_RBBM_PM_OVERRIDE1_SPI_SCLK_PM_OVERRIDE 0x02000000 +#define A2XX_RBBM_PM_OVERRIDE1_RB_REG_SCLK_PM_OVERRIDE 0x04000000 +#define A2XX_RBBM_PM_OVERRIDE1_RB_SCLK_PM_OVERRIDE 0x08000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_MH_SCLK_PM_OVERRIDE 0x10000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_REG_SCLK_PM_OVERRIDE 0x20000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_MMU_SCLK_PM_OVERRIDE 0x40000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_TCROQ_SCLK_PM_OVERRIDE 0x80000000 #define REG_A2XX_RBBM_PM_OVERRIDE2 0x0000039d @@ -477,12 +509,43 @@ static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x000 #define REG_A2XX_PA_SU_DEBUG_DATA 0x00000c81 #define REG_A2XX_PA_SU_FACE_DATA 0x00000c86 +#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK 0xffffffe0 +#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT 5 +static inline uint32_t A2XX_PA_SU_FACE_DATA_BASE_ADDR(uint32_t val) +{ + return ((val) << A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT) & A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK; +} #define REG_A2XX_SQ_GPR_MANAGEMENT 0x00000d00 +#define A2XX_SQ_GPR_MANAGEMENT_REG_DYNAMIC 0x00000001 +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK 0x00000ff0 +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT 4 +static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX(uint32_t val) +{ + return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK; +} +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK 0x000ff000 +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT 12 +static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX(uint32_t val) +{ + return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK; +} #define REG_A2XX_SQ_FLOW_CONTROL 0x00000d01 #define REG_A2XX_SQ_INST_STORE_MANAGMENT 0x00000d02 +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK 0x00000fff +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT 0 +static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX(uint32_t val) +{ + return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK; +} +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK 0x0fff0000 +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT 16 +static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX(uint32_t val) +{ + return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK; +} #define REG_A2XX_SQ_DEBUG_MISC 0x00000d05 @@ -742,6 +805,24 @@ static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) #define REG_A2XX_RB_BLEND_ALPHA 0x00002108 #define REG_A2XX_RB_FOG_COLOR 0x00002109 +#define A2XX_RB_FOG_COLOR_FOG_RED__MASK 0x000000ff +#define A2XX_RB_FOG_COLOR_FOG_RED__SHIFT 0 +static inline uint32_t A2XX_RB_FOG_COLOR_FOG_RED(uint32_t val) +{ + return ((val) << A2XX_RB_FOG_COLOR_FOG_RED__SHIFT) & A2XX_RB_FOG_COLOR_FOG_RED__MASK; +} +#define A2XX_RB_FOG_COLOR_FOG_GREEN__MASK 0x0000ff00 +#define A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT 8 +static inline uint32_t A2XX_RB_FOG_COLOR_FOG_GREEN(uint32_t val) +{ + return ((val) << A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT) & A2XX_RB_FOG_COLOR_FOG_GREEN__MASK; +} +#define A2XX_RB_FOG_COLOR_FOG_BLUE__MASK 0x00ff0000 +#define A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT 16 +static inline uint32_t A2XX_RB_FOG_COLOR_FOG_BLUE(uint32_t val) +{ + return ((val) << A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT) & A2XX_RB_FOG_COLOR_FOG_BLUE__MASK; +} #define REG_A2XX_RB_STENCILREFMASK_BF 0x0000210c #define A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff @@ -890,14 +971,146 @@ static inline uint32_t A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(uint32_t val) #define A2XX_SQ_CONTEXT_MISC_TX_CACHE_SEL 0x00040000 #define REG_A2XX_SQ_INTERPOLATOR_CNTL 0x00002182 +#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK 0x0000ffff +#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT 0 +static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE(uint32_t val) +{ + return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK; +} +#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK 0xffff0000 +#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT 16 +static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN(uint32_t val) +{ + return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK; +} #define REG_A2XX_SQ_WRAPPING_0 0x00002183 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK 0x0000000f +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT 0 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_0(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK 0x000000f0 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT 4 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_1(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK 0x00000f00 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT 8 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_2(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK 0x0000f000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT 12 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_3(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK 0x000f0000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT 16 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_4(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK 0x00f00000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT 20 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_5(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK 0x0f000000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT 24 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_6(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK 0xf0000000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT 28 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_7(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK; +} #define REG_A2XX_SQ_WRAPPING_1 0x00002184 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK 0x0000000f +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT 0 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_8(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK 0x000000f0 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT 4 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_9(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK 0x00000f00 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT 8 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_10(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK 0x0000f000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT 12 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_11(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK 0x000f0000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT 16 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_12(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK 0x00f00000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT 20 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_13(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK 0x0f000000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT 24 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_14(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK 0xf0000000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT 28 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_15(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK; +} #define REG_A2XX_SQ_PS_PROGRAM 0x000021f6 +#define A2XX_SQ_PS_PROGRAM_BASE__MASK 0x00000fff +#define A2XX_SQ_PS_PROGRAM_BASE__SHIFT 0 +static inline uint32_t A2XX_SQ_PS_PROGRAM_BASE(uint32_t val) +{ + return ((val) << A2XX_SQ_PS_PROGRAM_BASE__SHIFT) & A2XX_SQ_PS_PROGRAM_BASE__MASK; +} +#define A2XX_SQ_PS_PROGRAM_SIZE__MASK 0x00fff000 +#define A2XX_SQ_PS_PROGRAM_SIZE__SHIFT 12 +static inline uint32_t A2XX_SQ_PS_PROGRAM_SIZE(uint32_t val) +{ + return ((val) << A2XX_SQ_PS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_PS_PROGRAM_SIZE__MASK; +} #define REG_A2XX_SQ_VS_PROGRAM 0x000021f7 +#define A2XX_SQ_VS_PROGRAM_BASE__MASK 0x00000fff +#define A2XX_SQ_VS_PROGRAM_BASE__SHIFT 0 +static inline uint32_t A2XX_SQ_VS_PROGRAM_BASE(uint32_t val) +{ + return ((val) << A2XX_SQ_VS_PROGRAM_BASE__SHIFT) & A2XX_SQ_VS_PROGRAM_BASE__MASK; +} +#define A2XX_SQ_VS_PROGRAM_SIZE__MASK 0x00fff000 +#define A2XX_SQ_VS_PROGRAM_SIZE__SHIFT 12 +static inline uint32_t A2XX_SQ_VS_PROGRAM_SIZE(uint32_t val) +{ + return ((val) << A2XX_SQ_VS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_VS_PROGRAM_SIZE__MASK; +} #define REG_A2XX_VGT_EVENT_INITIATOR 0x000021f9 @@ -1304,6 +1517,14 @@ static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL(enum a2xx_pa_sc_a } #define REG_A2XX_PA_SC_VIZ_QUERY 0x00002293 +#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ENA 0x00000001 +#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK 0x0000007e +#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT 1 +static inline uint32_t A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(uint32_t val) +{ + return ((val) << A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT) & A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK; +} +#define A2XX_PA_SC_VIZ_QUERY_KILL_PIX_POST_EARLY_Z 0x00000100 #define REG_A2XX_VGT_ENHANCE 0x00002294 @@ -1319,6 +1540,18 @@ static inline uint32_t A2XX_PA_SC_LINE_CNTL_BRES_CNTL(uint32_t val) #define A2XX_PA_SC_LINE_CNTL_LAST_PIXEL 0x00000400 #define REG_A2XX_PA_SC_AA_CONFIG 0x00002301 +#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK 0x00000007 +#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT 0 +static inline uint32_t A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(uint32_t val) +{ + return ((val) << A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT) & A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK; +} +#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK 0x0001e000 +#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT 13 +static inline uint32_t A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST(uint32_t val) +{ + return ((val) << A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT) & A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK; +} #define REG_A2XX_PA_SU_VTX_CNTL 0x00002302 #define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK 0x00000001 @@ -1407,8 +1640,20 @@ static inline uint32_t A2XX_SQ_PS_CONST_SIZE(uint32_t val) #define REG_A2XX_PA_SC_AA_MASK 0x00002312 #define REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL 0x00002316 +#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK 0x00000007 +#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT 0 +static inline uint32_t A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH(uint32_t val) +{ + return ((val) << A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT) & A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK; +} #define REG_A2XX_VGT_OUT_DEALLOC_CNTL 0x00002317 +#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK 0x00000003 +#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT 0 +static inline uint32_t A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST(uint32_t val) +{ + return ((val) << A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT) & A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK; +} #define REG_A2XX_RB_COPY_CONTROL 0x00002318 #define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK 0x00000007 diff --git a/drivers/gpu/drm/msm/adreno/a3xx.xml.h b/drivers/gpu/drm/msm/adreno/a3xx.xml.h index a066c8b9eccd..663a73216926 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx.xml.h +++ b/drivers/gpu/drm/msm/adreno/a3xx.xml.h @@ -8,17 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2016-04-26 17:56:44) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32907 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 12025 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 22544 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110765 bytes, from 2016-11-26 23:01:48) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 90321 bytes, from 2016-11-28 16:50:05) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2016 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) diff --git a/drivers/gpu/drm/msm/adreno/a4xx.xml.h b/drivers/gpu/drm/msm/adreno/a4xx.xml.h index 4ce21b902779..1a14f4a40b9c 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx.xml.h +++ b/drivers/gpu/drm/msm/adreno/a4xx.xml.h @@ -8,17 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2016-04-26 17:56:44) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32907 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 12025 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 22544 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110765 bytes, from 2016-11-26 23:01:48) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 90321 bytes, from 2016-11-28 16:50:05) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2016 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) @@ -3010,11 +3010,11 @@ static inline uint32_t A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val) static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x0000220b + 0x4*i0; } static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_2(uint32_t i0) { return 0x0000220c + 0x4*i0; } -#define A4XX_VFD_FETCH_INSTR_2_SIZE__MASK 0xfffffff0 -#define A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT 4 +#define A4XX_VFD_FETCH_INSTR_2_SIZE__MASK 0xffffffff +#define A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT 0 static inline uint32_t A4XX_VFD_FETCH_INSTR_2_SIZE(uint32_t val) { - return ((val >> 4) << A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_2_SIZE__MASK; + return ((val) << A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_2_SIZE__MASK; } static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_3(uint32_t i0) { return 0x0000220d + 0x4*i0; } @@ -3829,6 +3829,44 @@ static inline uint32_t A4XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val) #define REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049 +#define REG_A4XX_VBIF_PERF_CNT_EN0 0x000030c0 + +#define REG_A4XX_VBIF_PERF_CNT_EN1 0x000030c1 + +#define REG_A4XX_VBIF_PERF_CNT_EN2 0x000030c2 + +#define REG_A4XX_VBIF_PERF_CNT_EN3 0x000030c3 + +#define REG_A4XX_VBIF_PERF_CNT_SEL0 0x000030d0 + +#define REG_A4XX_VBIF_PERF_CNT_SEL1 0x000030d1 + +#define REG_A4XX_VBIF_PERF_CNT_SEL2 0x000030d2 + +#define REG_A4XX_VBIF_PERF_CNT_SEL3 0x000030d3 + +#define REG_A4XX_VBIF_PERF_CNT_LOW0 0x000030d8 + +#define REG_A4XX_VBIF_PERF_CNT_LOW1 0x000030d9 + +#define REG_A4XX_VBIF_PERF_CNT_LOW2 0x000030da + +#define REG_A4XX_VBIF_PERF_CNT_LOW3 0x000030db + +#define REG_A4XX_VBIF_PERF_CNT_HIGH0 0x000030e0 + +#define REG_A4XX_VBIF_PERF_CNT_HIGH1 0x000030e1 + +#define REG_A4XX_VBIF_PERF_CNT_HIGH2 0x000030e2 + +#define REG_A4XX_VBIF_PERF_CNT_HIGH3 0x000030e3 + +#define REG_A4XX_VBIF_PERF_PWR_CNT_EN0 0x00003100 + +#define REG_A4XX_VBIF_PERF_PWR_CNT_EN1 0x00003101 + +#define REG_A4XX_VBIF_PERF_PWR_CNT_EN2 0x00003102 + #define REG_A4XX_UNKNOWN_0CC5 0x00000cc5 #define REG_A4XX_UNKNOWN_0CC6 0x00000cc6 diff --git a/drivers/gpu/drm/msm/adreno/a5xx.xml.h b/drivers/gpu/drm/msm/adreno/a5xx.xml.h index b6fe763ddf34..e0e6711f4f78 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx.xml.h +++ b/drivers/gpu/drm/msm/adreno/a5xx.xml.h @@ -8,17 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2016-04-26 17:56:44) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32907 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 12025 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 22544 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110765 bytes, from 2016-11-26 23:01:48) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 90321 bytes, from 2016-11-28 16:50:05) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2016 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) @@ -45,20 +45,50 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. enum a5xx_color_fmt { + RB5_A8_UNORM = 2, RB5_R8_UNORM = 3, + RB5_R8_SNORM = 4, + RB5_R8_UINT = 5, + RB5_R8_SINT = 6, RB5_R4G4B4A4_UNORM = 8, RB5_R5G5B5A1_UNORM = 10, RB5_R5G6B5_UNORM = 14, + RB5_R8G8_UNORM = 15, + RB5_R8G8_SNORM = 16, + RB5_R8G8_UINT = 17, + RB5_R8G8_SINT = 18, + RB5_R16_UNORM = 21, + RB5_R16_SNORM = 22, RB5_R16_FLOAT = 23, + RB5_R16_UINT = 24, + RB5_R16_SINT = 25, RB5_R8G8B8A8_UNORM = 48, RB5_R8G8B8_UNORM = 49, + RB5_R8G8B8A8_SNORM = 50, RB5_R8G8B8A8_UINT = 51, + RB5_R8G8B8A8_SINT = 52, + RB5_R10G10B10A2_UNORM = 55, RB5_R10G10B10A2_UINT = 58, + RB5_R11G11B10_FLOAT = 66, + RB5_R16G16_UNORM = 67, + RB5_R16G16_SNORM = 68, RB5_R16G16_FLOAT = 69, + RB5_R16G16_UINT = 70, + RB5_R16G16_SINT = 71, RB5_R32_FLOAT = 74, + RB5_R32_UINT = 75, + RB5_R32_SINT = 76, + RB5_R16G16B16A16_UNORM = 96, + RB5_R16G16B16A16_SNORM = 97, RB5_R16G16B16A16_FLOAT = 98, + RB5_R16G16B16A16_UINT = 99, + RB5_R16G16B16A16_SINT = 100, RB5_R32G32_FLOAT = 103, + RB5_R32G32_UINT = 104, + RB5_R32G32_SINT = 105, RB5_R32G32B32A32_FLOAT = 130, + RB5_R32G32B32A32_UINT = 131, + RB5_R32G32B32A32_SINT = 132, }; enum a5xx_tile_mode { @@ -133,25 +163,55 @@ enum a5xx_vtx_fmt { enum a5xx_tex_fmt { TFMT5_A8_UNORM = 2, TFMT5_8_UNORM = 3, + TFMT5_8_SNORM = 4, + TFMT5_8_UINT = 5, + TFMT5_8_SINT = 6, TFMT5_4_4_4_4_UNORM = 8, TFMT5_5_5_5_1_UNORM = 10, TFMT5_5_6_5_UNORM = 14, TFMT5_8_8_UNORM = 15, TFMT5_8_8_SNORM = 16, + TFMT5_8_8_UINT = 17, + TFMT5_8_8_SINT = 18, TFMT5_L8_A8_UNORM = 19, + TFMT5_16_UNORM = 21, + TFMT5_16_SNORM = 22, TFMT5_16_FLOAT = 23, + TFMT5_16_UINT = 24, + TFMT5_16_SINT = 25, TFMT5_8_8_8_8_UNORM = 48, TFMT5_8_8_8_UNORM = 49, - TFMT5_8_8_8_SNORM = 50, + TFMT5_8_8_8_8_SNORM = 50, + TFMT5_8_8_8_8_UINT = 51, + TFMT5_8_8_8_8_SINT = 52, TFMT5_9_9_9_E5_FLOAT = 53, TFMT5_10_10_10_2_UNORM = 54, + TFMT5_10_10_10_2_UINT = 58, TFMT5_11_11_10_FLOAT = 66, + TFMT5_16_16_UNORM = 67, + TFMT5_16_16_SNORM = 68, TFMT5_16_16_FLOAT = 69, + TFMT5_16_16_UINT = 70, + TFMT5_16_16_SINT = 71, TFMT5_32_FLOAT = 74, + TFMT5_32_UINT = 75, + TFMT5_32_SINT = 76, + TFMT5_16_16_16_16_UNORM = 96, + TFMT5_16_16_16_16_SNORM = 97, TFMT5_16_16_16_16_FLOAT = 98, + TFMT5_16_16_16_16_UINT = 99, + TFMT5_16_16_16_16_SINT = 100, TFMT5_32_32_FLOAT = 103, + TFMT5_32_32_UINT = 104, + TFMT5_32_32_SINT = 105, TFMT5_32_32_32_32_FLOAT = 130, + TFMT5_32_32_32_32_UINT = 131, + TFMT5_32_32_32_32_SINT = 132, TFMT5_X8Z24_UNORM = 160, + TFMT5_RGTC1_UNORM = 183, + TFMT5_RGTC1_SNORM = 184, + TFMT5_RGTC2_UNORM = 187, + TFMT5_RGTC2_SNORM = 188, }; enum a5xx_tex_fetchsize { @@ -182,6 +242,565 @@ enum a5xx_blit_buf { BLIT_Z32 = 9, }; +enum a5xx_cp_perfcounter_select { + PERF_CP_ALWAYS_COUNT = 0, + PERF_CP_BUSY_GFX_CORE_IDLE = 1, + PERF_CP_BUSY_CYCLES = 2, + PERF_CP_PFP_IDLE = 3, + PERF_CP_PFP_BUSY_WORKING = 4, + PERF_CP_PFP_STALL_CYCLES_ANY = 5, + PERF_CP_PFP_STARVE_CYCLES_ANY = 6, + PERF_CP_PFP_ICACHE_MISS = 7, + PERF_CP_PFP_ICACHE_HIT = 8, + PERF_CP_PFP_MATCH_PM4_PKT_PROFILE = 9, + PERF_CP_ME_BUSY_WORKING = 10, + PERF_CP_ME_IDLE = 11, + PERF_CP_ME_STARVE_CYCLES_ANY = 12, + PERF_CP_ME_FIFO_EMPTY_PFP_IDLE = 13, + PERF_CP_ME_FIFO_EMPTY_PFP_BUSY = 14, + PERF_CP_ME_FIFO_FULL_ME_BUSY = 15, + PERF_CP_ME_FIFO_FULL_ME_NON_WORKING = 16, + PERF_CP_ME_STALL_CYCLES_ANY = 17, + PERF_CP_ME_ICACHE_MISS = 18, + PERF_CP_ME_ICACHE_HIT = 19, + PERF_CP_NUM_PREEMPTIONS = 20, + PERF_CP_PREEMPTION_REACTION_DELAY = 21, + PERF_CP_PREEMPTION_SWITCH_OUT_TIME = 22, + PERF_CP_PREEMPTION_SWITCH_IN_TIME = 23, + PERF_CP_DEAD_DRAWS_IN_BIN_RENDER = 24, + PERF_CP_PREDICATED_DRAWS_KILLED = 25, + PERF_CP_MODE_SWITCH = 26, + PERF_CP_ZPASS_DONE = 27, + PERF_CP_CONTEXT_DONE = 28, + PERF_CP_CACHE_FLUSH = 29, + PERF_CP_LONG_PREEMPTIONS = 30, +}; + +enum a5xx_rbbm_perfcounter_select { + PERF_RBBM_ALWAYS_COUNT = 0, + PERF_RBBM_ALWAYS_ON = 1, + PERF_RBBM_TSE_BUSY = 2, + PERF_RBBM_RAS_BUSY = 3, + PERF_RBBM_PC_DCALL_BUSY = 4, + PERF_RBBM_PC_VSD_BUSY = 5, + PERF_RBBM_STATUS_MASKED = 6, + PERF_RBBM_COM_BUSY = 7, + PERF_RBBM_DCOM_BUSY = 8, + PERF_RBBM_VBIF_BUSY = 9, + PERF_RBBM_VSC_BUSY = 10, + PERF_RBBM_TESS_BUSY = 11, + PERF_RBBM_UCHE_BUSY = 12, + PERF_RBBM_HLSQ_BUSY = 13, +}; + +enum a5xx_pc_perfcounter_select { + PERF_PC_BUSY_CYCLES = 0, + PERF_PC_WORKING_CYCLES = 1, + PERF_PC_STALL_CYCLES_VFD = 2, + PERF_PC_STALL_CYCLES_TSE = 3, + PERF_PC_STALL_CYCLES_VPC = 4, + PERF_PC_STALL_CYCLES_UCHE = 5, + PERF_PC_STALL_CYCLES_TESS = 6, + PERF_PC_STALL_CYCLES_TSE_ONLY = 7, + PERF_PC_STALL_CYCLES_VPC_ONLY = 8, + PERF_PC_PASS1_TF_STALL_CYCLES = 9, + PERF_PC_STARVE_CYCLES_FOR_INDEX = 10, + PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR = 11, + PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM = 12, + PERF_PC_STARVE_CYCLES_FOR_POSITION = 13, + PERF_PC_STARVE_CYCLES_DI = 14, + PERF_PC_VIS_STREAMS_LOADED = 15, + PERF_PC_INSTANCES = 16, + PERF_PC_VPC_PRIMITIVES = 17, + PERF_PC_DEAD_PRIM = 18, + PERF_PC_LIVE_PRIM = 19, + PERF_PC_VERTEX_HITS = 20, + PERF_PC_IA_VERTICES = 21, + PERF_PC_IA_PRIMITIVES = 22, + PERF_PC_GS_PRIMITIVES = 23, + PERF_PC_HS_INVOCATIONS = 24, + PERF_PC_DS_INVOCATIONS = 25, + PERF_PC_VS_INVOCATIONS = 26, + PERF_PC_GS_INVOCATIONS = 27, + PERF_PC_DS_PRIMITIVES = 28, + PERF_PC_VPC_POS_DATA_TRANSACTION = 29, + PERF_PC_3D_DRAWCALLS = 30, + PERF_PC_2D_DRAWCALLS = 31, + PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS = 32, + PERF_TESS_BUSY_CYCLES = 33, + PERF_TESS_WORKING_CYCLES = 34, + PERF_TESS_STALL_CYCLES_PC = 35, + PERF_TESS_STARVE_CYCLES_PC = 36, +}; + +enum a5xx_vfd_perfcounter_select { + PERF_VFD_BUSY_CYCLES = 0, + PERF_VFD_STALL_CYCLES_UCHE = 1, + PERF_VFD_STALL_CYCLES_VPC_ALLOC = 2, + PERF_VFD_STALL_CYCLES_MISS_VB = 3, + PERF_VFD_STALL_CYCLES_MISS_Q = 4, + PERF_VFD_STALL_CYCLES_SP_INFO = 5, + PERF_VFD_STALL_CYCLES_SP_ATTR = 6, + PERF_VFD_STALL_CYCLES_VFDP_VB = 7, + PERF_VFD_STALL_CYCLES_VFDP_Q = 8, + PERF_VFD_DECODER_PACKER_STALL = 9, + PERF_VFD_STARVE_CYCLES_UCHE = 10, + PERF_VFD_RBUFFER_FULL = 11, + PERF_VFD_ATTR_INFO_FIFO_FULL = 12, + PERF_VFD_DECODED_ATTRIBUTE_BYTES = 13, + PERF_VFD_NUM_ATTRIBUTES = 14, + PERF_VFD_INSTRUCTIONS = 15, + PERF_VFD_UPPER_SHADER_FIBERS = 16, + PERF_VFD_LOWER_SHADER_FIBERS = 17, + PERF_VFD_MODE_0_FIBERS = 18, + PERF_VFD_MODE_1_FIBERS = 19, + PERF_VFD_MODE_2_FIBERS = 20, + PERF_VFD_MODE_3_FIBERS = 21, + PERF_VFD_MODE_4_FIBERS = 22, + PERF_VFD_TOTAL_VERTICES = 23, + PERF_VFD_NUM_ATTR_MISS = 24, + PERF_VFD_1_BURST_REQ = 25, + PERF_VFDP_STALL_CYCLES_VFD = 26, + PERF_VFDP_STALL_CYCLES_VFD_INDEX = 27, + PERF_VFDP_STALL_CYCLES_VFD_PROG = 28, + PERF_VFDP_STARVE_CYCLES_PC = 29, + PERF_VFDP_VS_STAGE_32_WAVES = 30, +}; + +enum a5xx_hlsq_perfcounter_select { + PERF_HLSQ_BUSY_CYCLES = 0, + PERF_HLSQ_STALL_CYCLES_UCHE = 1, + PERF_HLSQ_STALL_CYCLES_SP_STATE = 2, + PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE = 3, + PERF_HLSQ_UCHE_LATENCY_CYCLES = 4, + PERF_HLSQ_UCHE_LATENCY_COUNT = 5, + PERF_HLSQ_FS_STAGE_32_WAVES = 6, + PERF_HLSQ_FS_STAGE_64_WAVES = 7, + PERF_HLSQ_QUADS = 8, + PERF_HLSQ_SP_STATE_COPY_TRANS_FS_STAGE = 9, + PERF_HLSQ_SP_STATE_COPY_TRANS_VS_STAGE = 10, + PERF_HLSQ_TP_STATE_COPY_TRANS_FS_STAGE = 11, + PERF_HLSQ_TP_STATE_COPY_TRANS_VS_STAGE = 12, + PERF_HLSQ_CS_INVOCATIONS = 13, + PERF_HLSQ_COMPUTE_DRAWCALLS = 14, +}; + +enum a5xx_vpc_perfcounter_select { + PERF_VPC_BUSY_CYCLES = 0, + PERF_VPC_WORKING_CYCLES = 1, + PERF_VPC_STALL_CYCLES_UCHE = 2, + PERF_VPC_STALL_CYCLES_VFD_WACK = 3, + PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC = 4, + PERF_VPC_STALL_CYCLES_PC = 5, + PERF_VPC_STALL_CYCLES_SP_LM = 6, + PERF_VPC_POS_EXPORT_STALL_CYCLES = 7, + PERF_VPC_STARVE_CYCLES_SP = 8, + PERF_VPC_STARVE_CYCLES_LRZ = 9, + PERF_VPC_PC_PRIMITIVES = 10, + PERF_VPC_SP_COMPONENTS = 11, + PERF_VPC_SP_LM_PRIMITIVES = 12, + PERF_VPC_SP_LM_COMPONENTS = 13, + PERF_VPC_SP_LM_DWORDS = 14, + PERF_VPC_STREAMOUT_COMPONENTS = 15, + PERF_VPC_GRANT_PHASES = 16, +}; + +enum a5xx_tse_perfcounter_select { + PERF_TSE_BUSY_CYCLES = 0, + PERF_TSE_CLIPPING_CYCLES = 1, + PERF_TSE_STALL_CYCLES_RAS = 2, + PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE = 3, + PERF_TSE_STALL_CYCLES_LRZ_ZPLANE = 4, + PERF_TSE_STARVE_CYCLES_PC = 5, + PERF_TSE_INPUT_PRIM = 6, + PERF_TSE_INPUT_NULL_PRIM = 7, + PERF_TSE_TRIVAL_REJ_PRIM = 8, + PERF_TSE_CLIPPED_PRIM = 9, + PERF_TSE_ZERO_AREA_PRIM = 10, + PERF_TSE_FACENESS_CULLED_PRIM = 11, + PERF_TSE_ZERO_PIXEL_PRIM = 12, + PERF_TSE_OUTPUT_NULL_PRIM = 13, + PERF_TSE_OUTPUT_VISIBLE_PRIM = 14, + PERF_TSE_CINVOCATION = 15, + PERF_TSE_CPRIMITIVES = 16, + PERF_TSE_2D_INPUT_PRIM = 17, + PERF_TSE_2D_ALIVE_CLCLES = 18, +}; + +enum a5xx_ras_perfcounter_select { + PERF_RAS_BUSY_CYCLES = 0, + PERF_RAS_SUPERTILE_ACTIVE_CYCLES = 1, + PERF_RAS_STALL_CYCLES_LRZ = 2, + PERF_RAS_STARVE_CYCLES_TSE = 3, + PERF_RAS_SUPER_TILES = 4, + PERF_RAS_8X4_TILES = 5, + PERF_RAS_MASKGEN_ACTIVE = 6, + PERF_RAS_FULLY_COVERED_SUPER_TILES = 7, + PERF_RAS_FULLY_COVERED_8X4_TILES = 8, + PERF_RAS_PRIM_KILLED_INVISILBE = 9, +}; + +enum a5xx_lrz_perfcounter_select { + PERF_LRZ_BUSY_CYCLES = 0, + PERF_LRZ_STARVE_CYCLES_RAS = 1, + PERF_LRZ_STALL_CYCLES_RB = 2, + PERF_LRZ_STALL_CYCLES_VSC = 3, + PERF_LRZ_STALL_CYCLES_VPC = 4, + PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH = 5, + PERF_LRZ_STALL_CYCLES_UCHE = 6, + PERF_LRZ_LRZ_READ = 7, + PERF_LRZ_LRZ_WRITE = 8, + PERF_LRZ_READ_LATENCY = 9, + PERF_LRZ_MERGE_CACHE_UPDATING = 10, + PERF_LRZ_PRIM_KILLED_BY_MASKGEN = 11, + PERF_LRZ_PRIM_KILLED_BY_LRZ = 12, + PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ = 13, + PERF_LRZ_FULL_8X8_TILES = 14, + PERF_LRZ_PARTIAL_8X8_TILES = 15, + PERF_LRZ_TILE_KILLED = 16, + PERF_LRZ_TOTAL_PIXEL = 17, + PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ = 18, +}; + +enum a5xx_uche_perfcounter_select { + PERF_UCHE_BUSY_CYCLES = 0, + PERF_UCHE_STALL_CYCLES_VBIF = 1, + PERF_UCHE_VBIF_LATENCY_CYCLES = 2, + PERF_UCHE_VBIF_LATENCY_SAMPLES = 3, + PERF_UCHE_VBIF_READ_BEATS_TP = 4, + PERF_UCHE_VBIF_READ_BEATS_VFD = 5, + PERF_UCHE_VBIF_READ_BEATS_HLSQ = 6, + PERF_UCHE_VBIF_READ_BEATS_LRZ = 7, + PERF_UCHE_VBIF_READ_BEATS_SP = 8, + PERF_UCHE_READ_REQUESTS_TP = 9, + PERF_UCHE_READ_REQUESTS_VFD = 10, + PERF_UCHE_READ_REQUESTS_HLSQ = 11, + PERF_UCHE_READ_REQUESTS_LRZ = 12, + PERF_UCHE_READ_REQUESTS_SP = 13, + PERF_UCHE_WRITE_REQUESTS_LRZ = 14, + PERF_UCHE_WRITE_REQUESTS_SP = 15, + PERF_UCHE_WRITE_REQUESTS_VPC = 16, + PERF_UCHE_WRITE_REQUESTS_VSC = 17, + PERF_UCHE_EVICTS = 18, + PERF_UCHE_BANK_REQ0 = 19, + PERF_UCHE_BANK_REQ1 = 20, + PERF_UCHE_BANK_REQ2 = 21, + PERF_UCHE_BANK_REQ3 = 22, + PERF_UCHE_BANK_REQ4 = 23, + PERF_UCHE_BANK_REQ5 = 24, + PERF_UCHE_BANK_REQ6 = 25, + PERF_UCHE_BANK_REQ7 = 26, + PERF_UCHE_VBIF_READ_BEATS_CH0 = 27, + PERF_UCHE_VBIF_READ_BEATS_CH1 = 28, + PERF_UCHE_GMEM_READ_BEATS = 29, + PERF_UCHE_FLAG_COUNT = 30, +}; + +enum a5xx_tp_perfcounter_select { + PERF_TP_BUSY_CYCLES = 0, + PERF_TP_STALL_CYCLES_UCHE = 1, + PERF_TP_LATENCY_CYCLES = 2, + PERF_TP_LATENCY_TRANS = 3, + PERF_TP_FLAG_CACHE_REQUEST_SAMPLES = 4, + PERF_TP_FLAG_CACHE_REQUEST_LATENCY = 5, + PERF_TP_L1_CACHELINE_REQUESTS = 6, + PERF_TP_L1_CACHELINE_MISSES = 7, + PERF_TP_SP_TP_TRANS = 8, + PERF_TP_TP_SP_TRANS = 9, + PERF_TP_OUTPUT_PIXELS = 10, + PERF_TP_FILTER_WORKLOAD_16BIT = 11, + PERF_TP_FILTER_WORKLOAD_32BIT = 12, + PERF_TP_QUADS_RECEIVED = 13, + PERF_TP_QUADS_OFFSET = 14, + PERF_TP_QUADS_SHADOW = 15, + PERF_TP_QUADS_ARRAY = 16, + PERF_TP_QUADS_GRADIENT = 17, + PERF_TP_QUADS_1D = 18, + PERF_TP_QUADS_2D = 19, + PERF_TP_QUADS_BUFFER = 20, + PERF_TP_QUADS_3D = 21, + PERF_TP_QUADS_CUBE = 22, + PERF_TP_STATE_CACHE_REQUESTS = 23, + PERF_TP_STATE_CACHE_MISSES = 24, + PERF_TP_DIVERGENT_QUADS_RECEIVED = 25, + PERF_TP_BINDLESS_STATE_CACHE_REQUESTS = 26, + PERF_TP_BINDLESS_STATE_CACHE_MISSES = 27, + PERF_TP_PRT_NON_RESIDENT_EVENTS = 28, + PERF_TP_OUTPUT_PIXELS_POINT = 29, + PERF_TP_OUTPUT_PIXELS_BILINEAR = 30, + PERF_TP_OUTPUT_PIXELS_MIP = 31, + PERF_TP_OUTPUT_PIXELS_ANISO = 32, + PERF_TP_OUTPUT_PIXELS_ZERO_LOD = 33, + PERF_TP_FLAG_CACHE_REQUESTS = 34, + PERF_TP_FLAG_CACHE_MISSES = 35, + PERF_TP_L1_5_L2_REQUESTS = 36, + PERF_TP_2D_OUTPUT_PIXELS = 37, + PERF_TP_2D_OUTPUT_PIXELS_POINT = 38, + PERF_TP_2D_OUTPUT_PIXELS_BILINEAR = 39, + PERF_TP_2D_FILTER_WORKLOAD_16BIT = 40, + PERF_TP_2D_FILTER_WORKLOAD_32BIT = 41, +}; + +enum a5xx_sp_perfcounter_select { + PERF_SP_BUSY_CYCLES = 0, + PERF_SP_ALU_WORKING_CYCLES = 1, + PERF_SP_EFU_WORKING_CYCLES = 2, + PERF_SP_STALL_CYCLES_VPC = 3, + PERF_SP_STALL_CYCLES_TP = 4, + PERF_SP_STALL_CYCLES_UCHE = 5, + PERF_SP_STALL_CYCLES_RB = 6, + PERF_SP_SCHEDULER_NON_WORKING = 7, + PERF_SP_WAVE_CONTEXTS = 8, + PERF_SP_WAVE_CONTEXT_CYCLES = 9, + PERF_SP_FS_STAGE_WAVE_CYCLES = 10, + PERF_SP_FS_STAGE_WAVE_SAMPLES = 11, + PERF_SP_VS_STAGE_WAVE_CYCLES = 12, + PERF_SP_VS_STAGE_WAVE_SAMPLES = 13, + PERF_SP_FS_STAGE_DURATION_CYCLES = 14, + PERF_SP_VS_STAGE_DURATION_CYCLES = 15, + PERF_SP_WAVE_CTRL_CYCLES = 16, + PERF_SP_WAVE_LOAD_CYCLES = 17, + PERF_SP_WAVE_EMIT_CYCLES = 18, + PERF_SP_WAVE_NOP_CYCLES = 19, + PERF_SP_WAVE_WAIT_CYCLES = 20, + PERF_SP_WAVE_FETCH_CYCLES = 21, + PERF_SP_WAVE_IDLE_CYCLES = 22, + PERF_SP_WAVE_END_CYCLES = 23, + PERF_SP_WAVE_LONG_SYNC_CYCLES = 24, + PERF_SP_WAVE_SHORT_SYNC_CYCLES = 25, + PERF_SP_WAVE_JOIN_CYCLES = 26, + PERF_SP_LM_LOAD_INSTRUCTIONS = 27, + PERF_SP_LM_STORE_INSTRUCTIONS = 28, + PERF_SP_LM_ATOMICS = 29, + PERF_SP_GM_LOAD_INSTRUCTIONS = 30, + PERF_SP_GM_STORE_INSTRUCTIONS = 31, + PERF_SP_GM_ATOMICS = 32, + PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = 33, + PERF_SP_VS_STAGE_CFLOW_INSTRUCTIONS = 34, + PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = 35, + PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 36, + PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 37, + PERF_SP_FS_STAGE_TEX_INSTRUCTIONS = 38, + PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS = 39, + PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = 40, + PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 41, + PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 42, + PERF_SP_FS_STAGE_BARY_INSTRUCTIONS = 43, + PERF_SP_VS_INSTRUCTIONS = 44, + PERF_SP_FS_INSTRUCTIONS = 45, + PERF_SP_ADDR_LOCK_COUNT = 46, + PERF_SP_UCHE_READ_TRANS = 47, + PERF_SP_UCHE_WRITE_TRANS = 48, + PERF_SP_EXPORT_VPC_TRANS = 49, + PERF_SP_EXPORT_RB_TRANS = 50, + PERF_SP_PIXELS_KILLED = 51, + PERF_SP_ICL1_REQUESTS = 52, + PERF_SP_ICL1_MISSES = 53, + PERF_SP_ICL0_REQUESTS = 54, + PERF_SP_ICL0_MISSES = 55, + PERF_SP_HS_INSTRUCTIONS = 56, + PERF_SP_DS_INSTRUCTIONS = 57, + PERF_SP_GS_INSTRUCTIONS = 58, + PERF_SP_CS_INSTRUCTIONS = 59, + PERF_SP_GPR_READ = 60, + PERF_SP_GPR_WRITE = 61, + PERF_SP_LM_CH0_REQUESTS = 62, + PERF_SP_LM_CH1_REQUESTS = 63, + PERF_SP_LM_BANK_CONFLICTS = 64, +}; + +enum a5xx_rb_perfcounter_select { + PERF_RB_BUSY_CYCLES = 0, + PERF_RB_STALL_CYCLES_CCU = 1, + PERF_RB_STALL_CYCLES_HLSQ = 2, + PERF_RB_STALL_CYCLES_FIFO0_FULL = 3, + PERF_RB_STALL_CYCLES_FIFO1_FULL = 4, + PERF_RB_STALL_CYCLES_FIFO2_FULL = 5, + PERF_RB_STARVE_CYCLES_SP = 6, + PERF_RB_STARVE_CYCLES_LRZ_TILE = 7, + PERF_RB_STARVE_CYCLES_CCU = 8, + PERF_RB_STARVE_CYCLES_Z_PLANE = 9, + PERF_RB_STARVE_CYCLES_BARY_PLANE = 10, + PERF_RB_Z_WORKLOAD = 11, + PERF_RB_HLSQ_ACTIVE = 12, + PERF_RB_Z_READ = 13, + PERF_RB_Z_WRITE = 14, + PERF_RB_C_READ = 15, + PERF_RB_C_WRITE = 16, + PERF_RB_TOTAL_PASS = 17, + PERF_RB_Z_PASS = 18, + PERF_RB_Z_FAIL = 19, + PERF_RB_S_FAIL = 20, + PERF_RB_BLENDED_FXP_COMPONENTS = 21, + PERF_RB_BLENDED_FP16_COMPONENTS = 22, + RB_RESERVED = 23, + PERF_RB_2D_ALIVE_CYCLES = 24, + PERF_RB_2D_STALL_CYCLES_A2D = 25, + PERF_RB_2D_STARVE_CYCLES_SRC = 26, + PERF_RB_2D_STARVE_CYCLES_SP = 27, + PERF_RB_2D_STARVE_CYCLES_DST = 28, + PERF_RB_2D_VALID_PIXELS = 29, +}; + +enum a5xx_rb_samples_perfcounter_select { + TOTAL_SAMPLES = 0, + ZPASS_SAMPLES = 1, + ZFAIL_SAMPLES = 2, + SFAIL_SAMPLES = 3, +}; + +enum a5xx_vsc_perfcounter_select { + PERF_VSC_BUSY_CYCLES = 0, + PERF_VSC_WORKING_CYCLES = 1, + PERF_VSC_STALL_CYCLES_UCHE = 2, + PERF_VSC_EOT_NUM = 3, +}; + +enum a5xx_ccu_perfcounter_select { + PERF_CCU_BUSY_CYCLES = 0, + PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN = 1, + PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN = 2, + PERF_CCU_STARVE_CYCLES_FLAG_RETURN = 3, + PERF_CCU_DEPTH_BLOCKS = 4, + PERF_CCU_COLOR_BLOCKS = 5, + PERF_CCU_DEPTH_BLOCK_HIT = 6, + PERF_CCU_COLOR_BLOCK_HIT = 7, + PERF_CCU_PARTIAL_BLOCK_READ = 8, + PERF_CCU_GMEM_READ = 9, + PERF_CCU_GMEM_WRITE = 10, + PERF_CCU_DEPTH_READ_FLAG0_COUNT = 11, + PERF_CCU_DEPTH_READ_FLAG1_COUNT = 12, + PERF_CCU_DEPTH_READ_FLAG2_COUNT = 13, + PERF_CCU_DEPTH_READ_FLAG3_COUNT = 14, + PERF_CCU_DEPTH_READ_FLAG4_COUNT = 15, + PERF_CCU_COLOR_READ_FLAG0_COUNT = 16, + PERF_CCU_COLOR_READ_FLAG1_COUNT = 17, + PERF_CCU_COLOR_READ_FLAG2_COUNT = 18, + PERF_CCU_COLOR_READ_FLAG3_COUNT = 19, + PERF_CCU_COLOR_READ_FLAG4_COUNT = 20, + PERF_CCU_2D_BUSY_CYCLES = 21, + PERF_CCU_2D_RD_REQ = 22, + PERF_CCU_2D_WR_REQ = 23, + PERF_CCU_2D_REORDER_STARVE_CYCLES = 24, + PERF_CCU_2D_PIXELS = 25, +}; + +enum a5xx_cmp_perfcounter_select { + PERF_CMPDECMP_STALL_CYCLES_VBIF = 0, + PERF_CMPDECMP_VBIF_LATENCY_CYCLES = 1, + PERF_CMPDECMP_VBIF_LATENCY_SAMPLES = 2, + PERF_CMPDECMP_VBIF_READ_DATA_CCU = 3, + PERF_CMPDECMP_VBIF_WRITE_DATA_CCU = 4, + PERF_CMPDECMP_VBIF_READ_REQUEST = 5, + PERF_CMPDECMP_VBIF_WRITE_REQUEST = 6, + PERF_CMPDECMP_VBIF_READ_DATA = 7, + PERF_CMPDECMP_VBIF_WRITE_DATA = 8, + PERF_CMPDECMP_FLAG_FETCH_CYCLES = 9, + PERF_CMPDECMP_FLAG_FETCH_SAMPLES = 10, + PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT = 11, + PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT = 12, + PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT = 13, + PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT = 14, + PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT = 15, + PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT = 16, + PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT = 17, + PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT = 18, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_REQ = 19, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_WR = 20, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_RETURN = 21, + PERF_CMPDECMP_2D_RD_DATA = 22, + PERF_CMPDECMP_2D_WR_DATA = 23, +}; + +enum a5xx_vbif_perfcounter_select { + AXI_READ_REQUESTS_ID_0 = 0, + AXI_READ_REQUESTS_ID_1 = 1, + AXI_READ_REQUESTS_ID_2 = 2, + AXI_READ_REQUESTS_ID_3 = 3, + AXI_READ_REQUESTS_ID_4 = 4, + AXI_READ_REQUESTS_ID_5 = 5, + AXI_READ_REQUESTS_ID_6 = 6, + AXI_READ_REQUESTS_ID_7 = 7, + AXI_READ_REQUESTS_ID_8 = 8, + AXI_READ_REQUESTS_ID_9 = 9, + AXI_READ_REQUESTS_ID_10 = 10, + AXI_READ_REQUESTS_ID_11 = 11, + AXI_READ_REQUESTS_ID_12 = 12, + AXI_READ_REQUESTS_ID_13 = 13, + AXI_READ_REQUESTS_ID_14 = 14, + AXI_READ_REQUESTS_ID_15 = 15, + AXI0_READ_REQUESTS_TOTAL = 16, + AXI1_READ_REQUESTS_TOTAL = 17, + AXI2_READ_REQUESTS_TOTAL = 18, + AXI3_READ_REQUESTS_TOTAL = 19, + AXI_READ_REQUESTS_TOTAL = 20, + AXI_WRITE_REQUESTS_ID_0 = 21, + AXI_WRITE_REQUESTS_ID_1 = 22, + AXI_WRITE_REQUESTS_ID_2 = 23, + AXI_WRITE_REQUESTS_ID_3 = 24, + AXI_WRITE_REQUESTS_ID_4 = 25, + AXI_WRITE_REQUESTS_ID_5 = 26, + AXI_WRITE_REQUESTS_ID_6 = 27, + AXI_WRITE_REQUESTS_ID_7 = 28, + AXI_WRITE_REQUESTS_ID_8 = 29, + AXI_WRITE_REQUESTS_ID_9 = 30, + AXI_WRITE_REQUESTS_ID_10 = 31, + AXI_WRITE_REQUESTS_ID_11 = 32, + AXI_WRITE_REQUESTS_ID_12 = 33, + AXI_WRITE_REQUESTS_ID_13 = 34, + AXI_WRITE_REQUESTS_ID_14 = 35, + AXI_WRITE_REQUESTS_ID_15 = 36, + AXI0_WRITE_REQUESTS_TOTAL = 37, + AXI1_WRITE_REQUESTS_TOTAL = 38, + AXI2_WRITE_REQUESTS_TOTAL = 39, + AXI3_WRITE_REQUESTS_TOTAL = 40, + AXI_WRITE_REQUESTS_TOTAL = 41, + AXI_TOTAL_REQUESTS = 42, + AXI_READ_DATA_BEATS_ID_0 = 43, + AXI_READ_DATA_BEATS_ID_1 = 44, + AXI_READ_DATA_BEATS_ID_2 = 45, + AXI_READ_DATA_BEATS_ID_3 = 46, + AXI_READ_DATA_BEATS_ID_4 = 47, + AXI_READ_DATA_BEATS_ID_5 = 48, + AXI_READ_DATA_BEATS_ID_6 = 49, + AXI_READ_DATA_BEATS_ID_7 = 50, + AXI_READ_DATA_BEATS_ID_8 = 51, + AXI_READ_DATA_BEATS_ID_9 = 52, + AXI_READ_DATA_BEATS_ID_10 = 53, + AXI_READ_DATA_BEATS_ID_11 = 54, + AXI_READ_DATA_BEATS_ID_12 = 55, + AXI_READ_DATA_BEATS_ID_13 = 56, + AXI_READ_DATA_BEATS_ID_14 = 57, + AXI_READ_DATA_BEATS_ID_15 = 58, + AXI0_READ_DATA_BEATS_TOTAL = 59, + AXI1_READ_DATA_BEATS_TOTAL = 60, + AXI2_READ_DATA_BEATS_TOTAL = 61, + AXI3_READ_DATA_BEATS_TOTAL = 62, + AXI_READ_DATA_BEATS_TOTAL = 63, + AXI_WRITE_DATA_BEATS_ID_0 = 64, + AXI_WRITE_DATA_BEATS_ID_1 = 65, + AXI_WRITE_DATA_BEATS_ID_2 = 66, + AXI_WRITE_DATA_BEATS_ID_3 = 67, + AXI_WRITE_DATA_BEATS_ID_4 = 68, + AXI_WRITE_DATA_BEATS_ID_5 = 69, + AXI_WRITE_DATA_BEATS_ID_6 = 70, + AXI_WRITE_DATA_BEATS_ID_7 = 71, + AXI_WRITE_DATA_BEATS_ID_8 = 72, + AXI_WRITE_DATA_BEATS_ID_9 = 73, + AXI_WRITE_DATA_BEATS_ID_10 = 74, + AXI_WRITE_DATA_BEATS_ID_11 = 75, + AXI_WRITE_DATA_BEATS_ID_12 = 76, + AXI_WRITE_DATA_BEATS_ID_13 = 77, + AXI_WRITE_DATA_BEATS_ID_14 = 78, + AXI_WRITE_DATA_BEATS_ID_15 = 79, + AXI0_WRITE_DATA_BEATS_TOTAL = 80, + AXI1_WRITE_DATA_BEATS_TOTAL = 81, + AXI2_WRITE_DATA_BEATS_TOTAL = 82, + AXI3_WRITE_DATA_BEATS_TOTAL = 83, + AXI_WRITE_DATA_BEATS_TOTAL = 84, + AXI_DATA_BEATS_TOTAL = 85, +}; + enum a5xx_tex_filter { A5XX_TEX_NEAREST = 0, A5XX_TEX_LINEAR = 1, @@ -1289,25 +1908,83 @@ static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) #define REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0x0000f810 -#define REG_A5XX_VSC_PIPE_DATA_LENGTH_0 0x00000c00 +#define REG_A5XX_VSC_BIN_SIZE 0x00000bc2 +#define A5XX_VSC_BIN_SIZE_WIDTH__MASK 0x000000ff +#define A5XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A5XX_VSC_BIN_SIZE_WIDTH(uint32_t val) +{ + return ((val >> 5) << A5XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A5XX_VSC_BIN_SIZE_WIDTH__MASK; +} +#define A5XX_VSC_BIN_SIZE_HEIGHT__MASK 0x0001fe00 +#define A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT 9 +static inline uint32_t A5XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) +{ + return ((val >> 5) << A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A5XX_VSC_BIN_SIZE_HEIGHT__MASK; +} + +#define REG_A5XX_VSC_SIZE_ADDRESS_LO 0x00000bc3 + +#define REG_A5XX_VSC_SIZE_ADDRESS_HI 0x00000bc4 + +#define REG_A5XX_UNKNOWN_0BC5 0x00000bc5 + +#define REG_A5XX_UNKNOWN_0BC6 0x00000bc6 + +static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; } +#define A5XX_VSC_PIPE_CONFIG_REG_X__MASK 0x000003ff +#define A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT 0 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_X(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_X__MASK; +} +#define A5XX_VSC_PIPE_CONFIG_REG_Y__MASK 0x000ffc00 +#define A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT 10 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_Y__MASK; +} +#define A5XX_VSC_PIPE_CONFIG_REG_W__MASK 0x00f00000 +#define A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT 20 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_W(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_W__MASK; +} +#define A5XX_VSC_PIPE_CONFIG_REG_H__MASK 0x0f000000 +#define A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT 24 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_H(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_H__MASK; +} + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000be0 + 0x2*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(uint32_t i0) { return 0x00000be0 + 0x2*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_HI(uint32_t i0) { return 0x00000be1 + 0x2*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c00 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c00 + 0x1*i0; } #define REG_A5XX_VSC_PERFCTR_VSC_SEL_0 0x00000c60 #define REG_A5XX_VSC_PERFCTR_VSC_SEL_1 0x00000c61 -#define REG_A5XX_VSC_BIN_SIZE 0x00000cdd -#define A5XX_VSC_BIN_SIZE_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_VSC_BIN_SIZE_X__MASK 0x00007fff -#define A5XX_VSC_BIN_SIZE_X__SHIFT 0 -static inline uint32_t A5XX_VSC_BIN_SIZE_X(uint32_t val) +#define REG_A5XX_VSC_RESOLVE_CNTL 0x00000cdd +#define A5XX_VSC_RESOLVE_CNTL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_VSC_RESOLVE_CNTL_X__MASK 0x00007fff +#define A5XX_VSC_RESOLVE_CNTL_X__SHIFT 0 +static inline uint32_t A5XX_VSC_RESOLVE_CNTL_X(uint32_t val) { - return ((val) << A5XX_VSC_BIN_SIZE_X__SHIFT) & A5XX_VSC_BIN_SIZE_X__MASK; + return ((val) << A5XX_VSC_RESOLVE_CNTL_X__SHIFT) & A5XX_VSC_RESOLVE_CNTL_X__MASK; } -#define A5XX_VSC_BIN_SIZE_Y__MASK 0x7fff0000 -#define A5XX_VSC_BIN_SIZE_Y__SHIFT 16 -static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val) +#define A5XX_VSC_RESOLVE_CNTL_Y__MASK 0x7fff0000 +#define A5XX_VSC_RESOLVE_CNTL_Y__SHIFT 16 +static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val) { - return ((val) << A5XX_VSC_BIN_SIZE_Y__SHIFT) & A5XX_VSC_BIN_SIZE_Y__MASK; + return ((val) << A5XX_VSC_RESOLVE_CNTL_Y__SHIFT) & A5XX_VSC_RESOLVE_CNTL_Y__MASK; } #define REG_A5XX_GRAS_ADDR_MODE_CNTL 0x00000c81 @@ -1470,6 +2147,7 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val) #define REG_A5XX_VPC_ADDR_MODE_CNTL 0x00000e61 #define REG_A5XX_VPC_MODE_CNTL 0x00000e62 +#define A5XX_VPC_MODE_CNTL_BINNING_PASS 0x00000001 #define REG_A5XX_VPC_PERFCTR_VPC_SEL_0 0x00000e64 @@ -1641,6 +2319,14 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val) #define REG_A5XX_VBIF_TEST_BUS_OUT 0x0000308c +#define REG_A5XX_VBIF_PERF_CNT_EN0 0x000030c0 + +#define REG_A5XX_VBIF_PERF_CNT_EN1 0x000030c1 + +#define REG_A5XX_VBIF_PERF_CNT_EN2 0x000030c2 + +#define REG_A5XX_VBIF_PERF_CNT_EN3 0x000030c3 + #define REG_A5XX_VBIF_PERF_CNT_SEL0 0x000030d0 #define REG_A5XX_VBIF_PERF_CNT_SEL1 0x000030d1 @@ -1911,6 +2597,11 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val) #define REG_A5XX_GRAS_CNTL 0x0000e005 #define A5XX_GRAS_CNTL_VARYING 0x00000001 +#define A5XX_GRAS_CNTL_UNK3 0x00000008 +#define A5XX_GRAS_CNTL_XCOORD 0x00000040 +#define A5XX_GRAS_CNTL_YCOORD 0x00000080 +#define A5XX_GRAS_CNTL_ZCOORD 0x00000100 +#define A5XX_GRAS_CNTL_WCOORD 0x00000200 #define REG_A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ 0x0000e006 #define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK 0x000003ff @@ -1975,6 +2666,8 @@ static inline uint32_t A5XX_GRAS_CL_VPORT_ZSCALE_0(float val) } #define REG_A5XX_GRAS_SU_CNTL 0x0000e090 +#define A5XX_GRAS_SU_CNTL_CULL_FRONT 0x00000001 +#define A5XX_GRAS_SU_CNTL_CULL_BACK 0x00000002 #define A5XX_GRAS_SU_CNTL_FRONT_CW 0x00000004 #define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK 0x000007f8 #define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT 3 @@ -2010,7 +2703,8 @@ static inline uint32_t A5XX_GRAS_SU_POINT_SIZE(float val) #define REG_A5XX_UNKNOWN_E093 0x0000e093 #define REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL 0x0000e094 -#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_ALPHA_TEST_ENABLE 0x00000001 +#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 +#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1 0x00000002 #define REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000e095 #define A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff @@ -2047,6 +2741,7 @@ static inline uint32_t A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_dep #define REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL 0x0000e099 #define REG_A5XX_GRAS_SC_CNTL 0x0000e0a0 +#define A5XX_GRAS_SC_CNTL_BINNING_PASS 0x00000001 #define A5XX_GRAS_SC_CNTL_SAMPLES_PASSED 0x00008000 #define REG_A5XX_GRAS_SC_BIN_CNTL 0x0000e0a1 @@ -2161,12 +2856,21 @@ static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) } #define REG_A5XX_GRAS_LRZ_CNTL 0x0000e100 +#define A5XX_GRAS_LRZ_CNTL_ENABLE 0x00000001 +#define A5XX_GRAS_LRZ_CNTL_LRZ_WRITE 0x00000002 +#define A5XX_GRAS_LRZ_CNTL_GREATER 0x00000004 #define REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO 0x0000e101 #define REG_A5XX_GRAS_LRZ_BUFFER_BASE_HI 0x0000e102 #define REG_A5XX_GRAS_LRZ_BUFFER_PITCH 0x0000e103 +#define A5XX_GRAS_LRZ_BUFFER_PITCH__MASK 0xffffffff +#define A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT 0 +static inline uint32_t A5XX_GRAS_LRZ_BUFFER_PITCH(uint32_t val) +{ + return ((val >> 5) << A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT) & A5XX_GRAS_LRZ_BUFFER_PITCH__MASK; +} #define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO 0x0000e104 @@ -2188,7 +2892,9 @@ static inline uint32_t A5XX_RB_CNTL_HEIGHT(uint32_t val) #define A5XX_RB_CNTL_BYPASS 0x00020000 #define REG_A5XX_RB_RENDER_CNTL 0x0000e141 +#define A5XX_RB_RENDER_CNTL_BINNING_PASS 0x00000001 #define A5XX_RB_RENDER_CNTL_SAMPLES_PASSED 0x00000040 +#define A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE 0x00000080 #define A5XX_RB_RENDER_CNTL_FLAG_DEPTH 0x00004000 #define A5XX_RB_RENDER_CNTL_FLAG_DEPTH2 0x00008000 #define A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK 0x00ff0000 @@ -2223,6 +2929,7 @@ static inline uint32_t A5XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val #define REG_A5XX_RB_RENDER_CONTROL0 0x0000e144 #define A5XX_RB_RENDER_CONTROL0_VARYING 0x00000001 +#define A5XX_RB_RENDER_CONTROL0_UNK3 0x00000008 #define A5XX_RB_RENDER_CONTROL0_XCOORD 0x00000040 #define A5XX_RB_RENDER_CONTROL0_YCOORD 0x00000080 #define A5XX_RB_RENDER_CONTROL0_ZCOORD 0x00000100 @@ -2525,6 +3232,7 @@ static inline uint32_t A5XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val) #define REG_A5XX_RB_DEPTH_PLANE_CNTL 0x0000e1b0 #define A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 +#define A5XX_RB_DEPTH_PLANE_CNTL_UNK1 0x00000002 #define REG_A5XX_RB_DEPTH_CNTL 0x0000e1b1 #define A5XX_RB_DEPTH_CNTL_Z_ENABLE 0x00000001 @@ -2554,7 +3262,7 @@ static inline uint32_t A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_fo #define A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT 0 static inline uint32_t A5XX_RB_DEPTH_BUFFER_PITCH(uint32_t val) { - return ((val >> 5) << A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_PITCH__MASK; + return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_PITCH__MASK; } #define REG_A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH 0x0000e1b6 @@ -2562,7 +3270,7 @@ static inline uint32_t A5XX_RB_DEPTH_BUFFER_PITCH(uint32_t val) #define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT 0 static inline uint32_t A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val) { - return ((val >> 5) << A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK; + return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK; } #define REG_A5XX_RB_STENCIL_CONTROL 0x0000e1c0 @@ -2678,8 +3386,11 @@ static inline uint32_t A5XX_RB_WINDOW_OFFSET_Y(uint32_t val) return ((val) << A5XX_RB_WINDOW_OFFSET_Y__SHIFT) & A5XX_RB_WINDOW_OFFSET_Y__MASK; } +#define REG_A5XX_RB_SAMPLE_COUNT_CONTROL 0x0000e1d1 +#define A5XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 + #define REG_A5XX_RB_BLIT_CNTL 0x0000e210 -#define A5XX_RB_BLIT_CNTL_BUF__MASK 0x0000003f +#define A5XX_RB_BLIT_CNTL_BUF__MASK 0x0000000f #define A5XX_RB_BLIT_CNTL_BUF__SHIFT 0 static inline uint32_t A5XX_RB_BLIT_CNTL_BUF(enum a5xx_blit_buf val) { @@ -2803,6 +3514,10 @@ static inline uint32_t A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH(uint32_t val) return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK; } +#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO 0x0000e267 + +#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_HI 0x0000e268 + #define REG_A5XX_VPC_CNTL_0 0x0000e280 #define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK 0x0000007f #define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT 0 @@ -2839,32 +3554,71 @@ static inline uint32_t A5XX_VPC_PACK_NUMNONPOSVAR(uint32_t val) { return ((val) << A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A5XX_VPC_PACK_NUMNONPOSVAR__MASK; } +#define A5XX_VPC_PACK_PSIZELOC__MASK 0x0000ff00 +#define A5XX_VPC_PACK_PSIZELOC__SHIFT 8 +static inline uint32_t A5XX_VPC_PACK_PSIZELOC(uint32_t val) +{ + return ((val) << A5XX_VPC_PACK_PSIZELOC__SHIFT) & A5XX_VPC_PACK_PSIZELOC__MASK; +} #define REG_A5XX_VPC_FS_PRIMITIVEID_CNTL 0x0000e2a0 -#define REG_A5XX_UNKNOWN_E2A1 0x0000e2a1 +#define REG_A5XX_VPC_SO_BUF_CNTL 0x0000e2a1 +#define A5XX_VPC_SO_BUF_CNTL_BUF0 0x00000001 +#define A5XX_VPC_SO_BUF_CNTL_BUF1 0x00000008 +#define A5XX_VPC_SO_BUF_CNTL_BUF2 0x00000040 +#define A5XX_VPC_SO_BUF_CNTL_BUF3 0x00000200 +#define A5XX_VPC_SO_BUF_CNTL_ENABLE 0x00008000 #define REG_A5XX_VPC_SO_OVERRIDE 0x0000e2a2 +#define A5XX_VPC_SO_OVERRIDE_SO_DISABLE 0x00000001 -#define REG_A5XX_VPC_SO_BUFFER_BASE_LO_0 0x0000e2a7 +#define REG_A5XX_VPC_SO_CNTL 0x0000e2a3 +#define A5XX_VPC_SO_CNTL_ENABLE 0x00010000 -#define REG_A5XX_VPC_SO_BUFFER_BASE_HI_0 0x0000e2a8 +#define REG_A5XX_VPC_SO_PROG 0x0000e2a4 +#define A5XX_VPC_SO_PROG_A_BUF__MASK 0x00000003 +#define A5XX_VPC_SO_PROG_A_BUF__SHIFT 0 +static inline uint32_t A5XX_VPC_SO_PROG_A_BUF(uint32_t val) +{ + return ((val) << A5XX_VPC_SO_PROG_A_BUF__SHIFT) & A5XX_VPC_SO_PROG_A_BUF__MASK; +} +#define A5XX_VPC_SO_PROG_A_OFF__MASK 0x000007fc +#define A5XX_VPC_SO_PROG_A_OFF__SHIFT 2 +static inline uint32_t A5XX_VPC_SO_PROG_A_OFF(uint32_t val) +{ + return ((val >> 2) << A5XX_VPC_SO_PROG_A_OFF__SHIFT) & A5XX_VPC_SO_PROG_A_OFF__MASK; +} +#define A5XX_VPC_SO_PROG_A_EN 0x00000800 +#define A5XX_VPC_SO_PROG_B_BUF__MASK 0x00003000 +#define A5XX_VPC_SO_PROG_B_BUF__SHIFT 12 +static inline uint32_t A5XX_VPC_SO_PROG_B_BUF(uint32_t val) +{ + return ((val) << A5XX_VPC_SO_PROG_B_BUF__SHIFT) & A5XX_VPC_SO_PROG_B_BUF__MASK; +} +#define A5XX_VPC_SO_PROG_B_OFF__MASK 0x007fc000 +#define A5XX_VPC_SO_PROG_B_OFF__SHIFT 14 +static inline uint32_t A5XX_VPC_SO_PROG_B_OFF(uint32_t val) +{ + return ((val >> 2) << A5XX_VPC_SO_PROG_B_OFF__SHIFT) & A5XX_VPC_SO_PROG_B_OFF__MASK; +} +#define A5XX_VPC_SO_PROG_B_EN 0x00800000 -#define REG_A5XX_VPC_SO_BUFFER_SIZE_0 0x0000e2a9 +static inline uint32_t REG_A5XX_VPC_SO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; } -#define REG_A5XX_UNKNOWN_E2AB 0x0000e2ab +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; } -#define REG_A5XX_VPC_SO_FLUSH_BASE_LO_0 0x0000e2ac +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000e2a8 + 0x7*i0; } -#define REG_A5XX_VPC_SO_FLUSH_BASE_HI_0 0x0000e2ad +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000e2a9 + 0x7*i0; } -#define REG_A5XX_UNKNOWN_E2AE 0x0000e2ae +static inline uint32_t REG_A5XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000e2aa + 0x7*i0; } -#define REG_A5XX_UNKNOWN_E2B2 0x0000e2b2 +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000e2ab + 0x7*i0; } -#define REG_A5XX_UNKNOWN_E2B9 0x0000e2b9 +static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000e2ac + 0x7*i0; } -#define REG_A5XX_UNKNOWN_E2C0 0x0000e2c0 +static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x0000e2ad + 0x7*i0; } #define REG_A5XX_PC_PRIMITIVE_CNTL 0x0000e384 #define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK 0x0000007f @@ -2873,6 +3627,7 @@ static inline uint32_t A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(uint32_t val) { return ((val) << A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT) & A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK; } +#define A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST 0x00000400 #define REG_A5XX_PC_PRIM_VTX_CNTL 0x0000e385 #define A5XX_PC_PRIM_VTX_CNTL_PSIZE 0x00000800 @@ -2900,18 +3655,18 @@ static inline uint32_t A5XX_VFD_CONTROL_0_VTXCNT(uint32_t val) } #define REG_A5XX_VFD_CONTROL_1 0x0000e401 +#define A5XX_VFD_CONTROL_1_REGID4VTX__MASK 0x000000ff +#define A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT 0 +static inline uint32_t A5XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A5XX_VFD_CONTROL_1_REGID4VTX__MASK; +} #define A5XX_VFD_CONTROL_1_REGID4INST__MASK 0x0000ff00 #define A5XX_VFD_CONTROL_1_REGID4INST__SHIFT 8 static inline uint32_t A5XX_VFD_CONTROL_1_REGID4INST(uint32_t val) { return ((val) << A5XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A5XX_VFD_CONTROL_1_REGID4INST__MASK; } -#define A5XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000 -#define A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16 -static inline uint32_t A5XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A5XX_VFD_CONTROL_1_REGID4VTX__MASK; -} #define REG_A5XX_VFD_CONTROL_2 0x0000e402 @@ -2944,18 +3699,15 @@ static inline uint32_t A5XX_VFD_DECODE_INSTR_IDX(uint32_t val) { return ((val) << A5XX_VFD_DECODE_INSTR_IDX__SHIFT) & A5XX_VFD_DECODE_INSTR_IDX__MASK; } +#define A5XX_VFD_DECODE_INSTR_INSTANCED 0x00020000 #define A5XX_VFD_DECODE_INSTR_FORMAT__MASK 0x3ff00000 #define A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT 20 static inline uint32_t A5XX_VFD_DECODE_INSTR_FORMAT(enum a5xx_vtx_fmt val) { return ((val) << A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A5XX_VFD_DECODE_INSTR_FORMAT__MASK; } -#define A5XX_VFD_DECODE_INSTR_SWAP__MASK 0xc0000000 -#define A5XX_VFD_DECODE_INSTR_SWAP__SHIFT 30 -static inline uint32_t A5XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A5XX_VFD_DECODE_INSTR_SWAP__MASK; -} +#define A5XX_VFD_DECODE_INSTR_UNK30 0x40000000 +#define A5XX_VFD_DECODE_INSTR_FLOAT 0x80000000 static inline uint32_t REG_A5XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000e48b + 0x2*i0; } @@ -2979,88 +3731,107 @@ static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val) #define REG_A5XX_SP_SP_CNTL 0x0000e580 -#define REG_A5XX_SP_VS_CONTROL_REG 0x0000e584 -#define A5XX_SP_VS_CONTROL_REG_ENABLED 0x00000001 -#define A5XX_SP_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_VS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +#define REG_A5XX_SP_VS_CONFIG 0x0000e584 +#define A5XX_SP_VS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) { - return ((val) << A5XX_SP_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; + return ((val) << A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK; } -#define A5XX_SP_VS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_VS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(uint32_t val) { - return ((val) << A5XX_SP_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_VS_CONTROL_REG_SHADEROBJOFFSET__MASK; + return ((val) << A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK; } -#define REG_A5XX_SP_FS_CONTROL_REG 0x0000e585 -#define A5XX_SP_FS_CONTROL_REG_ENABLED 0x00000001 -#define A5XX_SP_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_FS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +#define REG_A5XX_SP_FS_CONFIG 0x0000e585 +#define A5XX_SP_FS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) { - return ((val) << A5XX_SP_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; + return ((val) << A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK; } -#define A5XX_SP_FS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_FS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(uint32_t val) { - return ((val) << A5XX_SP_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_FS_CONTROL_REG_SHADEROBJOFFSET__MASK; + return ((val) << A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK; } -#define REG_A5XX_SP_HS_CONTROL_REG 0x0000e586 -#define A5XX_SP_HS_CONTROL_REG_ENABLED 0x00000001 -#define A5XX_SP_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_HS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +#define REG_A5XX_SP_HS_CONFIG 0x0000e586 +#define A5XX_SP_HS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) { - return ((val) << A5XX_SP_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; + return ((val) << A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK; } -#define A5XX_SP_HS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_HS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(uint32_t val) { - return ((val) << A5XX_SP_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_HS_CONTROL_REG_SHADEROBJOFFSET__MASK; + return ((val) << A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK; } -#define REG_A5XX_SP_DS_CONTROL_REG 0x0000e587 -#define A5XX_SP_DS_CONTROL_REG_ENABLED 0x00000001 -#define A5XX_SP_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_DS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +#define REG_A5XX_SP_DS_CONFIG 0x0000e587 +#define A5XX_SP_DS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) { - return ((val) << A5XX_SP_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; + return ((val) << A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK; } -#define A5XX_SP_DS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_DS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(uint32_t val) { - return ((val) << A5XX_SP_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_DS_CONTROL_REG_SHADEROBJOFFSET__MASK; + return ((val) << A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK; } -#define REG_A5XX_SP_GS_CONTROL_REG 0x0000e588 -#define A5XX_SP_GS_CONTROL_REG_ENABLED 0x00000001 -#define A5XX_SP_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_GS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +#define REG_A5XX_SP_GS_CONFIG 0x0000e588 +#define A5XX_SP_GS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) { - return ((val) << A5XX_SP_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; + return ((val) << A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK; } -#define A5XX_SP_GS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_GS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(uint32_t val) { - return ((val) << A5XX_SP_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_GS_CONTROL_REG_SHADEROBJOFFSET__MASK; + return ((val) << A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK; } #define REG_A5XX_SP_CS_CONFIG 0x0000e589 +#define A5XX_SP_CS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK; +} #define REG_A5XX_SP_VS_CONFIG_MAX_CONST 0x0000e58a #define REG_A5XX_SP_FS_CONFIG_MAX_CONST 0x0000e58b #define REG_A5XX_SP_VS_CTRL_REG0 0x0000e590 +#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; +} #define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 #define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 static inline uint32_t A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) @@ -3075,13 +3846,19 @@ static inline uint32_t A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) } #define A5XX_SP_VS_CTRL_REG0_VARYING 0x00010000 #define A5XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK; +} #define REG_A5XX_SP_PRIMITIVE_CNTL 0x0000e592 -#define A5XX_SP_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK 0x0000001f -#define A5XX_SP_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT 0 -static inline uint32_t A5XX_SP_PRIMITIVE_CNTL_STRIDE_IN_VPC(uint32_t val) +#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK 0x0000001f +#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT 0 +static inline uint32_t A5XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val) { - return ((val >> 2) << A5XX_SP_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT) & A5XX_SP_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK; + return ((val) << A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK; } static inline uint32_t REG_A5XX_SP_VS_OUT(uint32_t i0) { return 0x0000e593 + 0x1*i0; } @@ -3147,6 +3924,12 @@ static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) #define REG_A5XX_SP_VS_OBJ_START_HI 0x0000e5ad #define REG_A5XX_SP_FS_CTRL_REG0 0x0000e5c0 +#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; +} #define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 #define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 static inline uint32_t A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) @@ -3161,6 +3944,12 @@ static inline uint32_t A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) } #define A5XX_SP_FS_CTRL_REG0_VARYING 0x00010000 #define A5XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK; +} #define REG_A5XX_UNKNOWN_E5C2 0x0000e5c2 @@ -3169,6 +3958,8 @@ static inline uint32_t A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) #define REG_A5XX_SP_FS_OBJ_START_HI 0x0000e5c4 #define REG_A5XX_SP_BLEND_CNTL 0x0000e5c9 +#define A5XX_SP_BLEND_CNTL_ENABLED 0x00000001 +#define A5XX_SP_BLEND_CNTL_UNK8 0x00000100 #define REG_A5XX_SP_FS_OUTPUT_CNTL 0x0000e5ca #define A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK 0x0000000f @@ -3210,15 +4001,66 @@ static inline uint32_t A5XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a5xx_color_fmt val) { return ((val) << A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK; } +#define A5XX_SP_FS_MRT_REG_COLOR_SRGB 0x00000400 #define REG_A5XX_UNKNOWN_E5DB 0x0000e5db -#define REG_A5XX_SP_CS_CNTL_0 0x0000e5f0 +#define REG_A5XX_UNKNOWN_E5F2 0x0000e5f2 + +#define REG_A5XX_SP_CS_OBJ_START_LO 0x0000e5f3 + +#define REG_A5XX_SP_CS_OBJ_START_HI 0x0000e5f4 + +#define REG_A5XX_SP_CS_CTRL_REG0 0x0000e5f0 +#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_CS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_CS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK; +} #define REG_A5XX_UNKNOWN_E600 0x0000e600 +#define REG_A5XX_UNKNOWN_E602 0x0000e602 + +#define REG_A5XX_SP_HS_OBJ_START_LO 0x0000e603 + +#define REG_A5XX_SP_HS_OBJ_START_HI 0x0000e604 + +#define REG_A5XX_UNKNOWN_E62B 0x0000e62b + +#define REG_A5XX_SP_DS_OBJ_START_LO 0x0000e62c + +#define REG_A5XX_SP_DS_OBJ_START_HI 0x0000e62d + #define REG_A5XX_UNKNOWN_E640 0x0000e640 +#define REG_A5XX_UNKNOWN_E65B 0x0000e65b + +#define REG_A5XX_SP_GS_OBJ_START_LO 0x0000e65c + +#define REG_A5XX_SP_GS_OBJ_START_HI 0x0000e65d + #define REG_A5XX_TPL1_TP_RAS_MSAA_CNTL 0x0000e704 #define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 #define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 @@ -3236,29 +4078,85 @@ static inline uint32_t A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_sample } #define A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 +#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO 0x0000e706 + +#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_HI 0x0000e707 + #define REG_A5XX_TPL1_VS_TEX_COUNT 0x0000e700 +#define REG_A5XX_TPL1_HS_TEX_COUNT 0x0000e701 + +#define REG_A5XX_TPL1_DS_TEX_COUNT 0x0000e702 + +#define REG_A5XX_TPL1_GS_TEX_COUNT 0x0000e703 + #define REG_A5XX_TPL1_VS_TEX_SAMP_LO 0x0000e722 #define REG_A5XX_TPL1_VS_TEX_SAMP_HI 0x0000e723 +#define REG_A5XX_TPL1_HS_TEX_SAMP_LO 0x0000e724 + +#define REG_A5XX_TPL1_HS_TEX_SAMP_HI 0x0000e725 + +#define REG_A5XX_TPL1_DS_TEX_SAMP_LO 0x0000e726 + +#define REG_A5XX_TPL1_DS_TEX_SAMP_HI 0x0000e727 + +#define REG_A5XX_TPL1_GS_TEX_SAMP_LO 0x0000e728 + +#define REG_A5XX_TPL1_GS_TEX_SAMP_HI 0x0000e729 + #define REG_A5XX_TPL1_VS_TEX_CONST_LO 0x0000e72a #define REG_A5XX_TPL1_VS_TEX_CONST_HI 0x0000e72b +#define REG_A5XX_TPL1_HS_TEX_CONST_LO 0x0000e72c + +#define REG_A5XX_TPL1_HS_TEX_CONST_HI 0x0000e72d + +#define REG_A5XX_TPL1_DS_TEX_CONST_LO 0x0000e72e + +#define REG_A5XX_TPL1_DS_TEX_CONST_HI 0x0000e72f + +#define REG_A5XX_TPL1_GS_TEX_CONST_LO 0x0000e730 + +#define REG_A5XX_TPL1_GS_TEX_CONST_HI 0x0000e731 + #define REG_A5XX_TPL1_FS_TEX_COUNT 0x0000e750 +#define REG_A5XX_TPL1_CS_TEX_COUNT 0x0000e751 + #define REG_A5XX_TPL1_FS_TEX_SAMP_LO 0x0000e75a #define REG_A5XX_TPL1_FS_TEX_SAMP_HI 0x0000e75b +#define REG_A5XX_TPL1_CS_TEX_SAMP_LO 0x0000e75c + +#define REG_A5XX_TPL1_CS_TEX_SAMP_HI 0x0000e75d + #define REG_A5XX_TPL1_FS_TEX_CONST_LO 0x0000e75e #define REG_A5XX_TPL1_FS_TEX_CONST_HI 0x0000e75f +#define REG_A5XX_TPL1_CS_TEX_CONST_LO 0x0000e760 + +#define REG_A5XX_TPL1_CS_TEX_CONST_HI 0x0000e761 + #define REG_A5XX_TPL1_TP_FS_ROTATION_CNTL 0x0000e764 #define REG_A5XX_HLSQ_CONTROL_0_REG 0x0000e784 +#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000001 +#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK; +} +#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK 0x00000004 +#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT 2 +static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK; +} #define REG_A5XX_HLSQ_CONTROL_1_REG 0x0000e785 #define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK 0x0000003f @@ -3300,84 +4198,98 @@ static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val) #define REG_A5XX_HLSQ_UPDATE_CNTL 0x0000e78a -#define REG_A5XX_HLSQ_VS_CONTROL_REG 0x0000e78b -#define A5XX_HLSQ_VS_CONTROL_REG_ENABLED 0x00000001 -#define A5XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +#define REG_A5XX_HLSQ_VS_CONFIG 0x0000e78b +#define A5XX_HLSQ_VS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) { - return ((val) << A5XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; + return ((val) << A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK; } -#define A5XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(uint32_t val) { - return ((val) << A5XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK; + return ((val) << A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK; } -#define REG_A5XX_HLSQ_FS_CONTROL_REG 0x0000e78c -#define A5XX_HLSQ_FS_CONTROL_REG_ENABLED 0x00000001 -#define A5XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +#define REG_A5XX_HLSQ_FS_CONFIG 0x0000e78c +#define A5XX_HLSQ_FS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) { - return ((val) << A5XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; + return ((val) << A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK; } -#define A5XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(uint32_t val) { - return ((val) << A5XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK; + return ((val) << A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK; } -#define REG_A5XX_HLSQ_HS_CONTROL_REG 0x0000e78d -#define A5XX_HLSQ_HS_CONTROL_REG_ENABLED 0x00000001 -#define A5XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +#define REG_A5XX_HLSQ_HS_CONFIG 0x0000e78d +#define A5XX_HLSQ_HS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) { - return ((val) << A5XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; + return ((val) << A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK; } -#define A5XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(uint32_t val) { - return ((val) << A5XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK; + return ((val) << A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK; } -#define REG_A5XX_HLSQ_DS_CONTROL_REG 0x0000e78e -#define A5XX_HLSQ_DS_CONTROL_REG_ENABLED 0x00000001 -#define A5XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +#define REG_A5XX_HLSQ_DS_CONFIG 0x0000e78e +#define A5XX_HLSQ_DS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) { - return ((val) << A5XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; + return ((val) << A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK; } -#define A5XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(uint32_t val) { - return ((val) << A5XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK; + return ((val) << A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK; } -#define REG_A5XX_HLSQ_GS_CONTROL_REG 0x0000e78f -#define A5XX_HLSQ_GS_CONTROL_REG_ENABLED 0x00000001 -#define A5XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +#define REG_A5XX_HLSQ_GS_CONFIG 0x0000e78f +#define A5XX_HLSQ_GS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) { - return ((val) << A5XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; + return ((val) << A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK; } -#define A5XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(uint32_t val) { - return ((val) << A5XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK; + return ((val) << A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK; } #define REG_A5XX_HLSQ_CS_CONFIG 0x0000e790 +#define A5XX_HLSQ_CS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK; +} #define REG_A5XX_HLSQ_VS_CNTL 0x0000e791 +#define A5XX_HLSQ_VS_CNTL_SSBO_ENABLE 0x00000001 #define A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK 0xfffffffe #define A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT 1 static inline uint32_t A5XX_HLSQ_VS_CNTL_INSTRLEN(uint32_t val) @@ -3386,6 +4298,7 @@ static inline uint32_t A5XX_HLSQ_VS_CNTL_INSTRLEN(uint32_t val) } #define REG_A5XX_HLSQ_FS_CNTL 0x0000e792 +#define A5XX_HLSQ_FS_CNTL_SSBO_ENABLE 0x00000001 #define A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK 0xfffffffe #define A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT 1 static inline uint32_t A5XX_HLSQ_FS_CNTL_INSTRLEN(uint32_t val) @@ -3394,6 +4307,7 @@ static inline uint32_t A5XX_HLSQ_FS_CNTL_INSTRLEN(uint32_t val) } #define REG_A5XX_HLSQ_HS_CNTL 0x0000e793 +#define A5XX_HLSQ_HS_CNTL_SSBO_ENABLE 0x00000001 #define A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK 0xfffffffe #define A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT 1 static inline uint32_t A5XX_HLSQ_HS_CNTL_INSTRLEN(uint32_t val) @@ -3402,6 +4316,7 @@ static inline uint32_t A5XX_HLSQ_HS_CNTL_INSTRLEN(uint32_t val) } #define REG_A5XX_HLSQ_DS_CNTL 0x0000e794 +#define A5XX_HLSQ_DS_CNTL_SSBO_ENABLE 0x00000001 #define A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK 0xfffffffe #define A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT 1 static inline uint32_t A5XX_HLSQ_DS_CNTL_INSTRLEN(uint32_t val) @@ -3410,6 +4325,7 @@ static inline uint32_t A5XX_HLSQ_DS_CNTL_INSTRLEN(uint32_t val) } #define REG_A5XX_HLSQ_GS_CNTL 0x0000e795 +#define A5XX_HLSQ_GS_CNTL_SSBO_ENABLE 0x00000001 #define A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK 0xfffffffe #define A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT 1 static inline uint32_t A5XX_HLSQ_GS_CNTL_INSTRLEN(uint32_t val) @@ -3418,6 +4334,7 @@ static inline uint32_t A5XX_HLSQ_GS_CNTL_INSTRLEN(uint32_t val) } #define REG_A5XX_HLSQ_CS_CNTL 0x0000e796 +#define A5XX_HLSQ_CS_CNTL_SSBO_ENABLE 0x00000001 #define A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK 0xfffffffe #define A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT 1 static inline uint32_t A5XX_HLSQ_CS_CNTL_INSTRLEN(uint32_t val) @@ -3432,20 +4349,86 @@ static inline uint32_t A5XX_HLSQ_CS_CNTL_INSTRLEN(uint32_t val) #define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Z 0x0000e7bb #define REG_A5XX_HLSQ_CS_NDRANGE_0 0x0000e7b0 +#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK 0x00000003 +#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK; +} +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK 0x00000ffc +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT 2 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK; +} +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK 0x003ff000 +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT 12 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK; +} +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK 0xffc00000 +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT 22 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK; +} #define REG_A5XX_HLSQ_CS_NDRANGE_1 0x0000e7b1 +#define A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_1_SIZE_X(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__MASK; +} #define REG_A5XX_HLSQ_CS_NDRANGE_2 0x0000e7b2 #define REG_A5XX_HLSQ_CS_NDRANGE_3 0x0000e7b3 +#define A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__MASK; +} #define REG_A5XX_HLSQ_CS_NDRANGE_4 0x0000e7b4 #define REG_A5XX_HLSQ_CS_NDRANGE_5 0x0000e7b5 +#define A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__MASK; +} #define REG_A5XX_HLSQ_CS_NDRANGE_6 0x0000e7b6 #define REG_A5XX_HLSQ_CS_CNTL_0 0x0000e7b7 +#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK 0x000000ff +#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK; +} +#define A5XX_HLSQ_CS_CNTL_0_UNK0__MASK 0x0000ff00 +#define A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT 8 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK0__MASK; +} +#define A5XX_HLSQ_CS_CNTL_0_UNK1__MASK 0x00ff0000 +#define A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT 16 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK1__MASK; +} +#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK 0xff000000 +#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT 24 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK; +} #define REG_A5XX_HLSQ_CS_CNTL_1 0x0000e7b8 @@ -3457,16 +4440,12 @@ static inline uint32_t A5XX_HLSQ_CS_CNTL_INSTRLEN(uint32_t val) #define REG_A5XX_UNKNOWN_E7C5 0x0000e7c5 -#define REG_A5XX_UNKNOWN_E7CA 0x0000e7ca - -#define REG_A5XX_HLSQ_FS_CONSTLEN 0x0000e7d7 - -#define REG_A5XX_HLSQ_FS_INSTRLEN 0x0000e7d8 - #define REG_A5XX_HLSQ_HS_CONSTLEN 0x0000e7c8 #define REG_A5XX_HLSQ_HS_INSTRLEN 0x0000e7c9 +#define REG_A5XX_UNKNOWN_E7CA 0x0000e7ca + #define REG_A5XX_HLSQ_DS_CONSTLEN 0x0000e7cd #define REG_A5XX_HLSQ_DS_INSTRLEN 0x0000e7ce @@ -3479,13 +4458,23 @@ static inline uint32_t A5XX_HLSQ_CS_CNTL_INSTRLEN(uint32_t val) #define REG_A5XX_UNKNOWN_E7D4 0x0000e7d4 +#define REG_A5XX_HLSQ_FS_CONSTLEN 0x0000e7d7 + +#define REG_A5XX_HLSQ_FS_INSTRLEN 0x0000e7d8 + #define REG_A5XX_UNKNOWN_E7D9 0x0000e7d9 -#define REG_A5XX_HLSQ_CONTEXT_SWITCH_CS_SW_3 0x0000e7dc +#define REG_A5XX_HLSQ_CS_CONSTLEN 0x0000e7dc -#define REG_A5XX_HLSQ_CONTEXT_SWITCH_CS_SW_4 0x0000e7dd +#define REG_A5XX_HLSQ_CS_INSTRLEN 0x0000e7dd -#define REG_A5XX_RB_2D_DST_FILL 0x00002101 +#define REG_A5XX_RB_2D_SRC_SOLID_DW0 0x00002101 + +#define REG_A5XX_RB_2D_SRC_SOLID_DW1 0x00002102 + +#define REG_A5XX_RB_2D_SRC_SOLID_DW2 0x00002103 + +#define REG_A5XX_RB_2D_SRC_SOLID_DW3 0x00002104 #define REG_A5XX_RB_2D_SRC_INFO 0x00002107 #define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff @@ -3505,6 +4494,20 @@ static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) #define REG_A5XX_RB_2D_SRC_HI 0x00002109 +#define REG_A5XX_RB_2D_SRC_SIZE 0x0000210a +#define A5XX_RB_2D_SRC_SIZE_PITCH__MASK 0x0000ffff +#define A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_2D_SRC_SIZE_PITCH(uint32_t val) +{ + return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_PITCH__MASK; +} +#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK 0xffff0000 +#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT 16 +static inline uint32_t A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(uint32_t val) +{ + return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK; +} + #define REG_A5XX_RB_2D_DST_INFO 0x00002110 #define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff #define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 @@ -3519,14 +4522,28 @@ static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) return ((val) << A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK; } -#define REG_A5XX_RB_2D_SRC_FLAGS_LO 0x00002140 - -#define REG_A5XX_RB_2D_SRC_FLAGS_HI 0x00002141 - #define REG_A5XX_RB_2D_DST_LO 0x00002111 #define REG_A5XX_RB_2D_DST_HI 0x00002112 +#define REG_A5XX_RB_2D_DST_SIZE 0x00002113 +#define A5XX_RB_2D_DST_SIZE_PITCH__MASK 0x0000ffff +#define A5XX_RB_2D_DST_SIZE_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_2D_DST_SIZE_PITCH(uint32_t val) +{ + return ((val >> 6) << A5XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_PITCH__MASK; +} +#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK 0xffff0000 +#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT 16 +static inline uint32_t A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(uint32_t val) +{ + return ((val >> 6) << A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_2D_SRC_FLAGS_LO 0x00002140 + +#define REG_A5XX_RB_2D_SRC_FLAGS_HI 0x00002141 + #define REG_A5XX_RB_2D_DST_FLAGS_LO 0x00002143 #define REG_A5XX_RB_2D_DST_FLAGS_HI 0x00002144 @@ -3559,6 +4576,12 @@ static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK; } +#define REG_A5XX_UNKNOWN_2100 0x00002100 + +#define REG_A5XX_UNKNOWN_2180 0x00002180 + +#define REG_A5XX_UNKNOWN_2184 0x00002184 + #define REG_A5XX_TEX_SAMP_0 0x00000000 #define A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001 #define A5XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006 @@ -3628,6 +4651,12 @@ static inline uint32_t A5XX_TEX_SAMP_1_MIN_LOD(float val) } #define REG_A5XX_TEX_SAMP_2 0x00000002 +#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK 0xfffffff0 +#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT 4 +static inline uint32_t A5XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val) +{ + return ((val) << A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK; +} #define REG_A5XX_TEX_SAMP_3 0x00000003 @@ -3663,6 +4692,12 @@ static inline uint32_t A5XX_TEX_CONST_0_SWIZ_W(enum a5xx_tex_swiz val) { return ((val) << A5XX_TEX_CONST_0_SWIZ_W__SHIFT) & A5XX_TEX_CONST_0_SWIZ_W__MASK; } +#define A5XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 +#define A5XX_TEX_CONST_0_MIPLVLS__SHIFT 16 +static inline uint32_t A5XX_TEX_CONST_0_MIPLVLS(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_0_MIPLVLS__SHIFT) & A5XX_TEX_CONST_0_MIPLVLS__MASK; +} #define A5XX_TEX_CONST_0_FMT__MASK 0x3fc00000 #define A5XX_TEX_CONST_0_FMT__SHIFT 22 static inline uint32_t A5XX_TEX_CONST_0_FMT(enum a5xx_tex_fmt val) diff --git a/drivers/gpu/drm/msm/adreno/adreno_common.xml.h b/drivers/gpu/drm/msm/adreno/adreno_common.xml.h index 4a33ba6f1244..b634cf71352b 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_common.xml.h +++ b/drivers/gpu/drm/msm/adreno/adreno_common.xml.h @@ -8,17 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2016-04-26 17:56:44) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32907 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 12025 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 22544 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110765 bytes, from 2016-11-26 23:01:48) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 90321 bytes, from 2016-11-28 16:50:05) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2016 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) @@ -421,6 +421,35 @@ static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val) #define REG_AXXX_CP_IB2_BUFSZ 0x0000045b #define REG_AXXX_CP_STAT 0x0000047f +#define AXXX_CP_STAT_CP_BUSY 0x80000000 +#define AXXX_CP_STAT_VS_EVENT_FIFO_BUSY 0x40000000 +#define AXXX_CP_STAT_PS_EVENT_FIFO_BUSY 0x20000000 +#define AXXX_CP_STAT_CF_EVENT_FIFO_BUSY 0x10000000 +#define AXXX_CP_STAT_RB_EVENT_FIFO_BUSY 0x08000000 +#define AXXX_CP_STAT_ME_BUSY 0x04000000 +#define AXXX_CP_STAT_MIU_WR_C_BUSY 0x02000000 +#define AXXX_CP_STAT_CP_3D_BUSY 0x00800000 +#define AXXX_CP_STAT_CP_NRT_BUSY 0x00400000 +#define AXXX_CP_STAT_RBIU_SCRATCH_BUSY 0x00200000 +#define AXXX_CP_STAT_RCIU_ME_BUSY 0x00100000 +#define AXXX_CP_STAT_RCIU_PFP_BUSY 0x00080000 +#define AXXX_CP_STAT_MEQ_RING_BUSY 0x00040000 +#define AXXX_CP_STAT_PFP_BUSY 0x00020000 +#define AXXX_CP_STAT_ST_QUEUE_BUSY 0x00010000 +#define AXXX_CP_STAT_INDIRECT2_QUEUE_BUSY 0x00002000 +#define AXXX_CP_STAT_INDIRECTS_QUEUE_BUSY 0x00001000 +#define AXXX_CP_STAT_RING_QUEUE_BUSY 0x00000800 +#define AXXX_CP_STAT_CSF_BUSY 0x00000400 +#define AXXX_CP_STAT_CSF_ST_BUSY 0x00000200 +#define AXXX_CP_STAT_EVENT_BUSY 0x00000100 +#define AXXX_CP_STAT_CSF_INDIRECT2_BUSY 0x00000080 +#define AXXX_CP_STAT_CSF_INDIRECTS_BUSY 0x00000040 +#define AXXX_CP_STAT_CSF_RING_BUSY 0x00000020 +#define AXXX_CP_STAT_RCIU_BUSY 0x00000010 +#define AXXX_CP_STAT_RBIU_BUSY 0x00000008 +#define AXXX_CP_STAT_MIU_RD_RETURN_BUSY 0x00000004 +#define AXXX_CP_STAT_MIU_RD_REQ_BUSY 0x00000002 +#define AXXX_CP_STAT_MIU_WR_BUSY 0x00000001 #define REG_AXXX_CP_SCRATCH_REG0 0x00000578 diff --git a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h index 6a2930e75503..fb605a3534cf 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h +++ b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h @@ -8,17 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2016-04-26 17:56:44) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32907 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 12025 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 22544 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110765 bytes, from 2016-11-26 23:01:48) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 90321 bytes, from 2016-11-28 16:50:05) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2016 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) @@ -67,10 +67,18 @@ enum vgt_event_type { PERFCOUNTER_STOP = 24, VS_FETCH_DONE = 27, FACENESS_FLUSH = 28, + FLUSH_SO_0 = 17, + FLUSH_SO_1 = 18, + FLUSH_SO_2 = 19, + FLUSH_SO_3 = 20, + UNK_19 = 25, UNK_1C = 28, UNK_1D = 29, BLIT = 30, - UNK_26 = 38, + UNK_25 = 37, + LRZ_FLUSH = 38, + UNK_2C = 44, + UNK_2D = 45, }; enum pc_di_primtype { @@ -134,11 +142,13 @@ enum adreno_pm4_type3_packets { CP_WAIT_IB_PFD_COMPLETE = 93, CP_REG_RMW = 33, CP_SET_BIN_DATA = 47, + CP_SET_BIN_DATA5 = 47, CP_REG_TO_MEM = 62, CP_MEM_WRITE = 61, CP_MEM_WRITE_CNTR = 79, CP_COND_EXEC = 68, CP_COND_WRITE = 69, + CP_COND_WRITE5 = 69, CP_EVENT_WRITE = 70, CP_EVENT_WRITE_SHD = 88, CP_EVENT_WRITE_CFL = 89, @@ -165,6 +175,7 @@ enum adreno_pm4_type3_packets { CP_SET_PROTECTED_MODE = 95, CP_BOOTSTRAP_UCODE = 111, CP_LOAD_STATE = 48, + CP_LOAD_STATE4 = 48, CP_COND_INDIRECT_BUFFER_PFE = 58, CP_COND_INDIRECT_BUFFER_PFD = 50, CP_INDIRECT_BUFFER_PFE = 63, @@ -204,6 +215,7 @@ enum adreno_pm4_type3_packets { CP_COMPUTE_CHECKPOINT = 110, CP_MEM_TO_MEM = 115, CP_BLIT = 44, + CP_UNK_39 = 57, IN_IB_PREFETCH_END = 23, IN_SUBBLK_PREFETCH = 31, IN_INSTR_PREFETCH = 32, @@ -239,21 +251,61 @@ enum adreno_state_src { SS_INDIRECT_STM = 6, }; +enum a4xx_state_block { + SB4_VS_TEX = 0, + SB4_HS_TEX = 1, + SB4_DS_TEX = 2, + SB4_GS_TEX = 3, + SB4_FS_TEX = 4, + SB4_CS_TEX = 5, + SB4_VS_SHADER = 8, + SB4_HS_SHADER = 9, + SB4_DS_SHADER = 10, + SB4_GS_SHADER = 11, + SB4_FS_SHADER = 12, + SB4_CS_SHADER = 13, + SB4_SSBO = 14, + SB4_CS_SSBO = 15, +}; + +enum a4xx_state_type { + ST4_SHADER = 0, + ST4_CONSTANTS = 1, +}; + +enum a4xx_state_src { + SS4_DIRECT = 0, + SS4_INDIRECT = 2, +}; + enum a4xx_index_size { INDEX4_SIZE_8_BIT = 0, INDEX4_SIZE_16_BIT = 1, INDEX4_SIZE_32_BIT = 2, }; +enum cp_cond_function { + WRITE_ALWAYS = 0, + WRITE_LT = 1, + WRITE_LE = 2, + WRITE_EQ = 3, + WRITE_NE = 4, + WRITE_GE = 5, + WRITE_GT = 6, +}; + enum render_mode_cmd { BYPASS = 1, + BINNING = 2, GMEM = 3, BLIT2D = 5, + BLIT2DSCALE = 7, }; enum cp_blit_cmd { BLIT_OP_FILL = 0, - BLIT_OP_BLIT = 1, + BLIT_OP_COPY = 1, + BLIT_OP_SCALE = 3, }; #define REG_CP_LOAD_STATE_0 0x00000000 @@ -296,12 +348,52 @@ static inline uint32_t CP_LOAD_STATE_1_EXT_SRC_ADDR(uint32_t val) return ((val >> 2) << CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK; } -#define REG_CP_LOAD_STATE_2 0x00000002 -#define CP_LOAD_STATE_2_EXT_SRC_ADDR_HI__MASK 0xffffffff -#define CP_LOAD_STATE_2_EXT_SRC_ADDR_HI__SHIFT 0 -static inline uint32_t CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(uint32_t val) +#define REG_CP_LOAD_STATE4_0 0x00000000 +#define CP_LOAD_STATE4_0_DST_OFF__MASK 0x0000ffff +#define CP_LOAD_STATE4_0_DST_OFF__SHIFT 0 +static inline uint32_t CP_LOAD_STATE4_0_DST_OFF(uint32_t val) { - return ((val) << CP_LOAD_STATE_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE_2_EXT_SRC_ADDR_HI__MASK; + return ((val) << CP_LOAD_STATE4_0_DST_OFF__SHIFT) & CP_LOAD_STATE4_0_DST_OFF__MASK; +} +#define CP_LOAD_STATE4_0_STATE_SRC__MASK 0x00030000 +#define CP_LOAD_STATE4_0_STATE_SRC__SHIFT 16 +static inline uint32_t CP_LOAD_STATE4_0_STATE_SRC(enum a4xx_state_src val) +{ + return ((val) << CP_LOAD_STATE4_0_STATE_SRC__SHIFT) & CP_LOAD_STATE4_0_STATE_SRC__MASK; +} +#define CP_LOAD_STATE4_0_STATE_BLOCK__MASK 0x003c0000 +#define CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT 18 +static inline uint32_t CP_LOAD_STATE4_0_STATE_BLOCK(enum a4xx_state_block val) +{ + return ((val) << CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE4_0_STATE_BLOCK__MASK; +} +#define CP_LOAD_STATE4_0_NUM_UNIT__MASK 0xffc00000 +#define CP_LOAD_STATE4_0_NUM_UNIT__SHIFT 22 +static inline uint32_t CP_LOAD_STATE4_0_NUM_UNIT(uint32_t val) +{ + return ((val) << CP_LOAD_STATE4_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE4_0_NUM_UNIT__MASK; +} + +#define REG_CP_LOAD_STATE4_1 0x00000001 +#define CP_LOAD_STATE4_1_STATE_TYPE__MASK 0x00000003 +#define CP_LOAD_STATE4_1_STATE_TYPE__SHIFT 0 +static inline uint32_t CP_LOAD_STATE4_1_STATE_TYPE(enum a4xx_state_type val) +{ + return ((val) << CP_LOAD_STATE4_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE4_1_STATE_TYPE__MASK; +} +#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK 0xfffffffc +#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT 2 +static inline uint32_t CP_LOAD_STATE4_1_EXT_SRC_ADDR(uint32_t val) +{ + return ((val >> 2) << CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK; +} + +#define REG_CP_LOAD_STATE4_2 0x00000002 +#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK 0xffffffff +#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT 0 +static inline uint32_t CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(uint32_t val) +{ + return ((val) << CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK; } #define REG_CP_DRAW_INDX_0 0x00000000 @@ -570,6 +662,52 @@ static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val) return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK; } +#define REG_CP_SET_BIN_DATA5_0 0x00000000 +#define CP_SET_BIN_DATA5_0_VSC_SIZE__MASK 0x003f0000 +#define CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT 16 +static inline uint32_t CP_SET_BIN_DATA5_0_VSC_SIZE(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT) & CP_SET_BIN_DATA5_0_VSC_SIZE__MASK; +} +#define CP_SET_BIN_DATA5_0_VSC_N__MASK 0x07c00000 +#define CP_SET_BIN_DATA5_0_VSC_N__SHIFT 22 +static inline uint32_t CP_SET_BIN_DATA5_0_VSC_N(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_0_VSC_N__SHIFT) & CP_SET_BIN_DATA5_0_VSC_N__MASK; +} + +#define REG_CP_SET_BIN_DATA5_1 0x00000001 +#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK 0xffffffff +#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT) & CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK; +} + +#define REG_CP_SET_BIN_DATA5_2 0x00000002 +#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK 0xffffffff +#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT) & CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK; +} + +#define REG_CP_SET_BIN_DATA5_3 0x00000003 +#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK 0xffffffff +#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT) & CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK; +} + +#define REG_CP_SET_BIN_DATA5_4 0x00000004 +#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK 0xffffffff +#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT) & CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK; +} + #define REG_CP_REG_TO_MEM_0 0x00000000 #define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff #define CP_REG_TO_MEM_0_REG__SHIFT 0 @@ -594,6 +732,128 @@ static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val) return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK; } +#define REG_CP_MEM_TO_MEM_0 0x00000000 +#define CP_MEM_TO_MEM_0_NEG_A 0x00000001 +#define CP_MEM_TO_MEM_0_NEG_B 0x00000002 +#define CP_MEM_TO_MEM_0_NEG_C 0x00000004 +#define CP_MEM_TO_MEM_0_DOUBLE 0x20000000 + +#define REG_CP_COND_WRITE_0 0x00000000 +#define CP_COND_WRITE_0_FUNCTION__MASK 0x00000007 +#define CP_COND_WRITE_0_FUNCTION__SHIFT 0 +static inline uint32_t CP_COND_WRITE_0_FUNCTION(enum cp_cond_function val) +{ + return ((val) << CP_COND_WRITE_0_FUNCTION__SHIFT) & CP_COND_WRITE_0_FUNCTION__MASK; +} +#define CP_COND_WRITE_0_POLL_MEMORY 0x00000010 +#define CP_COND_WRITE_0_WRITE_MEMORY 0x00000100 + +#define REG_CP_COND_WRITE_1 0x00000001 +#define CP_COND_WRITE_1_POLL_ADDR__MASK 0xffffffff +#define CP_COND_WRITE_1_POLL_ADDR__SHIFT 0 +static inline uint32_t CP_COND_WRITE_1_POLL_ADDR(uint32_t val) +{ + return ((val) << CP_COND_WRITE_1_POLL_ADDR__SHIFT) & CP_COND_WRITE_1_POLL_ADDR__MASK; +} + +#define REG_CP_COND_WRITE_2 0x00000002 +#define CP_COND_WRITE_2_REF__MASK 0xffffffff +#define CP_COND_WRITE_2_REF__SHIFT 0 +static inline uint32_t CP_COND_WRITE_2_REF(uint32_t val) +{ + return ((val) << CP_COND_WRITE_2_REF__SHIFT) & CP_COND_WRITE_2_REF__MASK; +} + +#define REG_CP_COND_WRITE_3 0x00000003 +#define CP_COND_WRITE_3_MASK__MASK 0xffffffff +#define CP_COND_WRITE_3_MASK__SHIFT 0 +static inline uint32_t CP_COND_WRITE_3_MASK(uint32_t val) +{ + return ((val) << CP_COND_WRITE_3_MASK__SHIFT) & CP_COND_WRITE_3_MASK__MASK; +} + +#define REG_CP_COND_WRITE_4 0x00000004 +#define CP_COND_WRITE_4_WRITE_ADDR__MASK 0xffffffff +#define CP_COND_WRITE_4_WRITE_ADDR__SHIFT 0 +static inline uint32_t CP_COND_WRITE_4_WRITE_ADDR(uint32_t val) +{ + return ((val) << CP_COND_WRITE_4_WRITE_ADDR__SHIFT) & CP_COND_WRITE_4_WRITE_ADDR__MASK; +} + +#define REG_CP_COND_WRITE_5 0x00000005 +#define CP_COND_WRITE_5_WRITE_DATA__MASK 0xffffffff +#define CP_COND_WRITE_5_WRITE_DATA__SHIFT 0 +static inline uint32_t CP_COND_WRITE_5_WRITE_DATA(uint32_t val) +{ + return ((val) << CP_COND_WRITE_5_WRITE_DATA__SHIFT) & CP_COND_WRITE_5_WRITE_DATA__MASK; +} + +#define REG_CP_COND_WRITE5_0 0x00000000 +#define CP_COND_WRITE5_0_FUNCTION__MASK 0x00000007 +#define CP_COND_WRITE5_0_FUNCTION__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_0_FUNCTION(enum cp_cond_function val) +{ + return ((val) << CP_COND_WRITE5_0_FUNCTION__SHIFT) & CP_COND_WRITE5_0_FUNCTION__MASK; +} +#define CP_COND_WRITE5_0_POLL_MEMORY 0x00000010 +#define CP_COND_WRITE5_0_WRITE_MEMORY 0x00000100 + +#define REG_CP_COND_WRITE5_1 0x00000001 +#define CP_COND_WRITE5_1_POLL_ADDR_LO__MASK 0xffffffff +#define CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_1_POLL_ADDR_LO(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT) & CP_COND_WRITE5_1_POLL_ADDR_LO__MASK; +} + +#define REG_CP_COND_WRITE5_2 0x00000002 +#define CP_COND_WRITE5_2_POLL_ADDR_HI__MASK 0xffffffff +#define CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_2_POLL_ADDR_HI(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT) & CP_COND_WRITE5_2_POLL_ADDR_HI__MASK; +} + +#define REG_CP_COND_WRITE5_3 0x00000003 +#define CP_COND_WRITE5_3_REF__MASK 0xffffffff +#define CP_COND_WRITE5_3_REF__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_3_REF(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_3_REF__SHIFT) & CP_COND_WRITE5_3_REF__MASK; +} + +#define REG_CP_COND_WRITE5_4 0x00000004 +#define CP_COND_WRITE5_4_MASK__MASK 0xffffffff +#define CP_COND_WRITE5_4_MASK__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_4_MASK(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_4_MASK__SHIFT) & CP_COND_WRITE5_4_MASK__MASK; +} + +#define REG_CP_COND_WRITE5_5 0x00000005 +#define CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK 0xffffffff +#define CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_5_WRITE_ADDR_LO(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT) & CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK; +} + +#define REG_CP_COND_WRITE5_6 0x00000006 +#define CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK 0xffffffff +#define CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_6_WRITE_ADDR_HI(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT) & CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK; +} + +#define REG_CP_COND_WRITE5_7 0x00000007 +#define CP_COND_WRITE5_7_WRITE_DATA__MASK 0xffffffff +#define CP_COND_WRITE5_7_WRITE_DATA__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_7_WRITE_DATA(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_7_WRITE_DATA__SHIFT) & CP_COND_WRITE5_7_WRITE_DATA__MASK; +} + #define REG_CP_DISPATCH_COMPUTE_0 0x00000000 #define REG_CP_DISPATCH_COMPUTE_1 0x00000001 @@ -645,6 +905,7 @@ static inline uint32_t CP_SET_RENDER_MODE_2_ADDR_0_HI(uint32_t val) } #define REG_CP_SET_RENDER_MODE_3 0x00000003 +#define CP_SET_RENDER_MODE_3_VSC_ENABLE 0x00000008 #define CP_SET_RENDER_MODE_3_GMEM_ENABLE 0x00000010 #define REG_CP_SET_RENDER_MODE_4 0x00000004 @@ -673,6 +934,50 @@ static inline uint32_t CP_SET_RENDER_MODE_7_ADDR_1_HI(uint32_t val) return ((val) << CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT) & CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK; } +#define REG_CP_COMPUTE_CHECKPOINT_0 0x00000000 +#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_1 0x00000001 +#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_2 0x00000002 + +#define REG_CP_COMPUTE_CHECKPOINT_3 0x00000003 + +#define REG_CP_COMPUTE_CHECKPOINT_4 0x00000004 +#define CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__SHIFT) & CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_5 0x00000005 +#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_6 0x00000006 +#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK; +} + #define REG_CP_PERFCOUNTER_ACTION_0 0x00000000 #define REG_CP_PERFCOUNTER_ACTION_1 0x00000001 @@ -698,6 +1003,7 @@ static inline uint32_t CP_EVENT_WRITE_0_EVENT(enum vgt_event_type val) { return ((val) << CP_EVENT_WRITE_0_EVENT__SHIFT) & CP_EVENT_WRITE_0_EVENT__MASK; } +#define CP_EVENT_WRITE_0_TIMESTAMP 0x40000000 #define REG_CP_EVENT_WRITE_1 0x00000001 #define CP_EVENT_WRITE_1_ADDR_0_LO__MASK 0xffffffff @@ -781,5 +1087,31 @@ static inline uint32_t CP_BLIT_4_DST_Y2(uint32_t val) return ((val) << CP_BLIT_4_DST_Y2__SHIFT) & CP_BLIT_4_DST_Y2__MASK; } +#define REG_CP_EXEC_CS_0 0x00000000 + +#define REG_CP_EXEC_CS_1 0x00000001 +#define CP_EXEC_CS_1_NGROUPS_X__MASK 0xffffffff +#define CP_EXEC_CS_1_NGROUPS_X__SHIFT 0 +static inline uint32_t CP_EXEC_CS_1_NGROUPS_X(uint32_t val) +{ + return ((val) << CP_EXEC_CS_1_NGROUPS_X__SHIFT) & CP_EXEC_CS_1_NGROUPS_X__MASK; +} + +#define REG_CP_EXEC_CS_2 0x00000002 +#define CP_EXEC_CS_2_NGROUPS_Y__MASK 0xffffffff +#define CP_EXEC_CS_2_NGROUPS_Y__SHIFT 0 +static inline uint32_t CP_EXEC_CS_2_NGROUPS_Y(uint32_t val) +{ + return ((val) << CP_EXEC_CS_2_NGROUPS_Y__SHIFT) & CP_EXEC_CS_2_NGROUPS_Y__MASK; +} + +#define REG_CP_EXEC_CS_3 0x00000003 +#define CP_EXEC_CS_3_NGROUPS_Z__MASK 0xffffffff +#define CP_EXEC_CS_3_NGROUPS_Z__SHIFT 0 +static inline uint32_t CP_EXEC_CS_3_NGROUPS_Z(uint32_t val) +{ + return ((val) << CP_EXEC_CS_3_NGROUPS_Z__SHIFT) & CP_EXEC_CS_3_NGROUPS_Z__MASK; +} + #endif /* ADRENO_PM4_XML */ diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h b/drivers/gpu/drm/msm/dsi/dsi.xml.h index b3d70ea42891..479086ccf180 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.xml.h +++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h @@ -8,8 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /local/mnt/workspace/source_trees/envytools/rnndb/../rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-01-11 05:19:19) -- /local/mnt/workspace/source_trees/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-05-09 06:32:54) +- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) +- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) diff --git a/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h b/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h index 8b9f3ebaeba7..57cf7fa7f1c4 100644 --- a/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h +++ b/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2015-09-18 12:07:28) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 36965 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 27887 bytes, from 2015-10-22 16:34:52) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2015-10-22 16:35:02) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2015-05-20 20:03:07) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41472 bytes, from 2016-01-22 18:18:18) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2015-05-20 20:03:14) +- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) +- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) diff --git a/drivers/gpu/drm/msm/dsi/sfpb.xml.h b/drivers/gpu/drm/msm/dsi/sfpb.xml.h index 3fcbb30dc241..9d4d1feaefd7 100644 --- a/drivers/gpu/drm/msm/dsi/sfpb.xml.h +++ b/drivers/gpu/drm/msm/dsi/sfpb.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2015-09-18 12:07:28) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 36965 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 27887 bytes, from 2015-10-22 16:34:52) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2015-10-22 16:35:02) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2015-05-20 20:03:07) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41472 bytes, from 2016-01-22 18:18:18) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2015-05-20 20:03:14) +- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) +- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) diff --git a/drivers/gpu/drm/msm/edp/edp.xml.h b/drivers/gpu/drm/msm/edp/edp.xml.h index d7bf3232dc88..f150d4a47707 100644 --- a/drivers/gpu/drm/msm/edp/edp.xml.h +++ b/drivers/gpu/drm/msm/edp/edp.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2015-09-18 12:07:28) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 36965 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 27887 bytes, from 2015-10-22 16:34:52) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2015-10-22 16:35:02) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2015-05-20 20:03:07) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41472 bytes, from 2016-01-22 18:18:18) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2015-05-20 20:03:14) +- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) +- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.xml.h b/drivers/gpu/drm/msm/hdmi/hdmi.xml.h index 0a97ff75ed6f..ecebf8b623ab 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.xml.h +++ b/drivers/gpu/drm/msm/hdmi/hdmi.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2015-09-18 12:07:28) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 36965 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 27887 bytes, from 2015-10-22 16:34:52) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2015-10-22 16:35:02) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2015-05-20 20:03:07) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41472 bytes, from 2016-01-22 18:18:18) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2015-05-20 20:03:14) +- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) +- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2016 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) @@ -111,6 +111,32 @@ static inline uint32_t HDMI_ACR_PKT_CTRL_N_MULTIPLIER(uint32_t val) #define HDMI_INFOFRAME_CTRL0_AUDIO_INFO_SOURCE 0x00000040 #define HDMI_INFOFRAME_CTRL0_AUDIO_INFO_UPDATE 0x00000080 +#define REG_HDMI_INFOFRAME_CTRL1 0x00000030 +#define HDMI_INFOFRAME_CTRL1_AVI_INFO_LINE__MASK 0x0000003f +#define HDMI_INFOFRAME_CTRL1_AVI_INFO_LINE__SHIFT 0 +static inline uint32_t HDMI_INFOFRAME_CTRL1_AVI_INFO_LINE(uint32_t val) +{ + return ((val) << HDMI_INFOFRAME_CTRL1_AVI_INFO_LINE__SHIFT) & HDMI_INFOFRAME_CTRL1_AVI_INFO_LINE__MASK; +} +#define HDMI_INFOFRAME_CTRL1_AUDIO_INFO_LINE__MASK 0x00003f00 +#define HDMI_INFOFRAME_CTRL1_AUDIO_INFO_LINE__SHIFT 8 +static inline uint32_t HDMI_INFOFRAME_CTRL1_AUDIO_INFO_LINE(uint32_t val) +{ + return ((val) << HDMI_INFOFRAME_CTRL1_AUDIO_INFO_LINE__SHIFT) & HDMI_INFOFRAME_CTRL1_AUDIO_INFO_LINE__MASK; +} +#define HDMI_INFOFRAME_CTRL1_MPEG_INFO_LINE__MASK 0x003f0000 +#define HDMI_INFOFRAME_CTRL1_MPEG_INFO_LINE__SHIFT 16 +static inline uint32_t HDMI_INFOFRAME_CTRL1_MPEG_INFO_LINE(uint32_t val) +{ + return ((val) << HDMI_INFOFRAME_CTRL1_MPEG_INFO_LINE__SHIFT) & HDMI_INFOFRAME_CTRL1_MPEG_INFO_LINE__MASK; +} +#define HDMI_INFOFRAME_CTRL1_VENSPEC_INFO_LINE__MASK 0x3f000000 +#define HDMI_INFOFRAME_CTRL1_VENSPEC_INFO_LINE__SHIFT 24 +static inline uint32_t HDMI_INFOFRAME_CTRL1_VENSPEC_INFO_LINE(uint32_t val) +{ + return ((val) << HDMI_INFOFRAME_CTRL1_VENSPEC_INFO_LINE__SHIFT) & HDMI_INFOFRAME_CTRL1_VENSPEC_INFO_LINE__MASK; +} + #define REG_HDMI_GEN_PKT_CTRL 0x00000034 #define HDMI_GEN_PKT_CTRL_GENERIC0_SEND 0x00000001 #define HDMI_GEN_PKT_CTRL_GENERIC0_CONT 0x00000002 @@ -463,7 +489,7 @@ static inline uint32_t HDMI_DDC_REF_REFTIMER(uint32_t val) #define REG_HDMI_CEC_RD_FILTER 0x000002b0 #define REG_HDMI_ACTIVE_HSYNC 0x000002b4 -#define HDMI_ACTIVE_HSYNC_START__MASK 0x00000fff +#define HDMI_ACTIVE_HSYNC_START__MASK 0x00001fff #define HDMI_ACTIVE_HSYNC_START__SHIFT 0 static inline uint32_t HDMI_ACTIVE_HSYNC_START(uint32_t val) { @@ -477,13 +503,13 @@ static inline uint32_t HDMI_ACTIVE_HSYNC_END(uint32_t val) } #define REG_HDMI_ACTIVE_VSYNC 0x000002b8 -#define HDMI_ACTIVE_VSYNC_START__MASK 0x00000fff +#define HDMI_ACTIVE_VSYNC_START__MASK 0x00001fff #define HDMI_ACTIVE_VSYNC_START__SHIFT 0 static inline uint32_t HDMI_ACTIVE_VSYNC_START(uint32_t val) { return ((val) << HDMI_ACTIVE_VSYNC_START__SHIFT) & HDMI_ACTIVE_VSYNC_START__MASK; } -#define HDMI_ACTIVE_VSYNC_END__MASK 0x0fff0000 +#define HDMI_ACTIVE_VSYNC_END__MASK 0x1fff0000 #define HDMI_ACTIVE_VSYNC_END__SHIFT 16 static inline uint32_t HDMI_ACTIVE_VSYNC_END(uint32_t val) { @@ -491,13 +517,13 @@ static inline uint32_t HDMI_ACTIVE_VSYNC_END(uint32_t val) } #define REG_HDMI_VSYNC_ACTIVE_F2 0x000002bc -#define HDMI_VSYNC_ACTIVE_F2_START__MASK 0x00000fff +#define HDMI_VSYNC_ACTIVE_F2_START__MASK 0x00001fff #define HDMI_VSYNC_ACTIVE_F2_START__SHIFT 0 static inline uint32_t HDMI_VSYNC_ACTIVE_F2_START(uint32_t val) { return ((val) << HDMI_VSYNC_ACTIVE_F2_START__SHIFT) & HDMI_VSYNC_ACTIVE_F2_START__MASK; } -#define HDMI_VSYNC_ACTIVE_F2_END__MASK 0x0fff0000 +#define HDMI_VSYNC_ACTIVE_F2_END__MASK 0x1fff0000 #define HDMI_VSYNC_ACTIVE_F2_END__SHIFT 16 static inline uint32_t HDMI_VSYNC_ACTIVE_F2_END(uint32_t val) { @@ -505,13 +531,13 @@ static inline uint32_t HDMI_VSYNC_ACTIVE_F2_END(uint32_t val) } #define REG_HDMI_TOTAL 0x000002c0 -#define HDMI_TOTAL_H_TOTAL__MASK 0x00000fff +#define HDMI_TOTAL_H_TOTAL__MASK 0x00001fff #define HDMI_TOTAL_H_TOTAL__SHIFT 0 static inline uint32_t HDMI_TOTAL_H_TOTAL(uint32_t val) { return ((val) << HDMI_TOTAL_H_TOTAL__SHIFT) & HDMI_TOTAL_H_TOTAL__MASK; } -#define HDMI_TOTAL_V_TOTAL__MASK 0x0fff0000 +#define HDMI_TOTAL_V_TOTAL__MASK 0x1fff0000 #define HDMI_TOTAL_V_TOTAL__SHIFT 16 static inline uint32_t HDMI_TOTAL_V_TOTAL(uint32_t val) { @@ -519,7 +545,7 @@ static inline uint32_t HDMI_TOTAL_V_TOTAL(uint32_t val) } #define REG_HDMI_VSYNC_TOTAL_F2 0x000002c4 -#define HDMI_VSYNC_TOTAL_F2_V_TOTAL__MASK 0x00000fff +#define HDMI_VSYNC_TOTAL_F2_V_TOTAL__MASK 0x00001fff #define HDMI_VSYNC_TOTAL_F2_V_TOTAL__SHIFT 0 static inline uint32_t HDMI_VSYNC_TOTAL_F2_V_TOTAL(uint32_t val) { diff --git a/drivers/gpu/drm/msm/hdmi/qfprom.xml.h b/drivers/gpu/drm/msm/hdmi/qfprom.xml.h index 1b996ede7a65..da646deedf4b 100644 --- a/drivers/gpu/drm/msm/hdmi/qfprom.xml.h +++ b/drivers/gpu/drm/msm/hdmi/qfprom.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2015-09-18 12:07:28) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 36965 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 27887 bytes, from 2015-10-22 16:34:52) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2015-10-22 16:35:02) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2015-05-20 20:03:07) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41472 bytes, from 2016-01-22 18:18:18) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2015-05-20 20:03:14) +- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) +- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h index 88037889589b..576cea30d391 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2015-09-18 12:07:28) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 36965 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 27887 bytes, from 2015-10-22 16:34:52) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2015-10-22 16:35:02) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2015-05-20 20:03:07) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41472 bytes, from 2016-01-22 18:18:18) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2015-05-20 20:03:14) +- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) +- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h index e6dfc518d4db..d9c10e02ee41 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h @@ -8,9 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /local/mnt/workspace/source_trees/envytools/rnndb/../rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-01-11 05:19:19) -- /local/mnt/workspace/source_trees/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-05-09 06:32:54) -- /local/mnt/workspace/source_trees/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2016-01-07 08:45:55) +- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) +- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) diff --git a/drivers/gpu/drm/msm/mdp/mdp_common.xml.h b/drivers/gpu/drm/msm/mdp/mdp_common.xml.h index 8994c365e218..1494c407be44 100644 --- a/drivers/gpu/drm/msm/mdp/mdp_common.xml.h +++ b/drivers/gpu/drm/msm/mdp/mdp_common.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2015-09-18 12:07:28) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 36965 bytes, from 2016-11-26 23:01:08) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 27887 bytes, from 2015-10-22 16:34:52) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2015-10-22 16:35:02) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2015-05-20 20:03:07) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41472 bytes, from 2016-01-22 18:18:18) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2015-05-20 20:03:14) +- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) +- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) +- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2017 by the following authors: - Rob Clark (robclark) - Ilia Mirkin (imirkin) From ffe8f53f9cc73fb25c8f78d4aed7ddf285503a60 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Thu, 15 Jun 2017 15:13:46 +0100 Subject: [PATCH 229/341] drm/msm/hdmi: Use bitwise operators when building register values Commit c0c0d9eeeb8d ("drm/msm: hdmi audio support") uses logical OR operators to build up a value to be written in the REG_HDMI_AUDIO_INFO0 and REG_HDMI_AUDIO_INFO1 registers when it should have used bitwise operators. Signed-off-by: Liviu Dudau Fixes: c0c0d9eeeb8d ("drm/msm: hdmi audio support") Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/hdmi/hdmi_audio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c index 8177e8511afd..9c34b91ae329 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c @@ -175,10 +175,10 @@ int msm_hdmi_audio_update(struct hdmi *hdmi) /* configure infoframe: */ hdmi_audio_infoframe_pack(info, buf, sizeof(buf)); hdmi_write(hdmi, REG_HDMI_AUDIO_INFO0, - (buf[3] << 0) || (buf[4] << 8) || - (buf[5] << 16) || (buf[6] << 24)); + (buf[3] << 0) | (buf[4] << 8) | + (buf[5] << 16) | (buf[6] << 24)); hdmi_write(hdmi, REG_HDMI_AUDIO_INFO1, - (buf[7] << 0) || (buf[8] << 8)); + (buf[7] << 0) | (buf[8] << 8)); hdmi_write(hdmi, REG_HDMI_GC, 0); From b474cbbb2bfd8d38f8bc36165567fe153d89fc9a Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Fri, 16 Jun 2017 10:39:34 +0530 Subject: [PATCH 230/341] drm/msm/hdmi: 8996 PLL: Populate unprepare Without doing anything in unprepare, the HDMI driver isn't able to switch modes successfully. Calling set_rate with a new rate results in an un-locked PLL. If we reset the PLL in unprepare, the PLL is able to lock with the new rate. Signed-off-by: Archit Taneja Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c index 143eab46ba68..1fb7645cc721 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c @@ -670,6 +670,11 @@ static unsigned long hdmi_8996_pll_recalc_rate(struct clk_hw *hw, static void hdmi_8996_pll_unprepare(struct clk_hw *hw) { + struct hdmi_pll_8996 *pll = hw_clk_to_pll(hw); + struct hdmi_phy *phy = pll_get_phy(pll); + + hdmi_phy_write(phy, REG_HDMI_8996_PHY_CFG, 0x6); + usleep_range(100, 150); } static int hdmi_8996_pll_is_enabled(struct clk_hw *hw) From 816fa34c051492c7f115ad2fd91c9e723d7fc298 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Fri, 16 Jun 2017 10:39:36 +0530 Subject: [PATCH 231/341] drm/msm/hdmi: Fix HDMI pink strip issue seen on 8x96 A 2 pixel wide pink strip was observed on the left end of some HDMI monitors configured in a HDMI mode. It turned out that we were missing out on configuring AVI infoframes, and unlike APQ8064, the 8x96 HDMI H/W seems to be sensitive to that. Add configuration of AVI infoframes. While at it, make sure that hdmi_audio_update is only called when we've detected that the monitor supports HDMI. Signed-off-by: Archit Taneja Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/hdmi/hdmi_bridge.c | 70 ++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c index 4e6d1bf27474..ae40e7179d4f 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c @@ -86,6 +86,65 @@ static void power_off(struct drm_bridge *bridge) } } +#define AVI_IFRAME_LINE_NUMBER 1 + +static void msm_hdmi_config_avi_infoframe(struct hdmi *hdmi) +{ + struct drm_crtc *crtc = hdmi->encoder->crtc; + const struct drm_display_mode *mode = &crtc->state->adjusted_mode; + union hdmi_infoframe frame; + u8 buffer[HDMI_INFOFRAME_SIZE(AVI)]; + u32 val; + int len; + + drm_hdmi_avi_infoframe_from_display_mode(&frame.avi, mode); + + len = hdmi_infoframe_pack(&frame, buffer, sizeof(buffer)); + if (len < 0) { + dev_err(&hdmi->pdev->dev, + "failed to configure avi infoframe\n"); + return; + } + + /* + * the AVI_INFOx registers don't map exactly to how the AVI infoframes + * are packed according to the spec. The checksum from the header is + * written to the LSB byte of AVI_INFO0 and the version is written to + * the third byte from the LSB of AVI_INFO3 + */ + hdmi_write(hdmi, REG_HDMI_AVI_INFO(0), + buffer[3] | + buffer[4] << 8 | + buffer[5] << 16 | + buffer[6] << 24); + + hdmi_write(hdmi, REG_HDMI_AVI_INFO(1), + buffer[7] | + buffer[8] << 8 | + buffer[9] << 16 | + buffer[10] << 24); + + hdmi_write(hdmi, REG_HDMI_AVI_INFO(2), + buffer[11] | + buffer[12] << 8 | + buffer[13] << 16 | + buffer[14] << 24); + + hdmi_write(hdmi, REG_HDMI_AVI_INFO(3), + buffer[15] | + buffer[16] << 8 | + buffer[1] << 24); + + hdmi_write(hdmi, REG_HDMI_INFOFRAME_CTRL0, + HDMI_INFOFRAME_CTRL0_AVI_SEND | + HDMI_INFOFRAME_CTRL0_AVI_CONT); + + val = hdmi_read(hdmi, REG_HDMI_INFOFRAME_CTRL1); + val &= ~HDMI_INFOFRAME_CTRL1_AVI_INFO_LINE__MASK; + val |= HDMI_INFOFRAME_CTRL1_AVI_INFO_LINE(AVI_IFRAME_LINE_NUMBER); + hdmi_write(hdmi, REG_HDMI_INFOFRAME_CTRL1, val); +} + static void msm_hdmi_bridge_pre_enable(struct drm_bridge *bridge) { struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge); @@ -98,7 +157,10 @@ static void msm_hdmi_bridge_pre_enable(struct drm_bridge *bridge) msm_hdmi_phy_resource_enable(phy); msm_hdmi_power_on(bridge); hdmi->power_on = true; - msm_hdmi_audio_update(hdmi); + if (hdmi->hdmi_mode) { + msm_hdmi_config_avi_infoframe(hdmi); + msm_hdmi_audio_update(hdmi); + } } msm_hdmi_phy_powerup(phy, hdmi->pixclock); @@ -134,7 +196,8 @@ static void msm_hdmi_bridge_post_disable(struct drm_bridge *bridge) if (hdmi->power_on) { power_off(bridge); hdmi->power_on = false; - msm_hdmi_audio_update(hdmi); + if (hdmi->hdmi_mode) + msm_hdmi_audio_update(hdmi); msm_hdmi_phy_resource_disable(phy); } } @@ -196,7 +259,8 @@ static void msm_hdmi_bridge_mode_set(struct drm_bridge *bridge, DBG("frame_ctrl=%08x", frame_ctrl); hdmi_write(hdmi, REG_HDMI_FRAME_CTRL, frame_ctrl); - msm_hdmi_audio_update(hdmi); + if (hdmi->hdmi_mode) + msm_hdmi_audio_update(hdmi); } static const struct drm_bridge_funcs msm_hdmi_bridge_funcs = { From 4c9c0d09741deab0aac76b83961cfe95b24f3e6f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 14:28:49 +0100 Subject: [PATCH 232/341] drm/i915: Fix retrieval of hangcheck stats The default context is always supported (as it contains the global hangcheck stats) and the contexts for hangcheck are not limited to any ring. This was dropped in 2013 because it was supposed to have been included with Ben's full-ppgtt patch set. It never landed and the bug remains. References: https://bugs.freedesktop.org/show_bug.cgi?id=65845 Link: http://patchwork.freedesktop.org/patch/msgid/1372175222-27622-1-git-send-email-mika.kuoppala@intel.com Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170616132849.29597-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c5d1666d7071..81c73dee095f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1034,9 +1034,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, if (args->flags || args->pad) return -EINVAL; - if (args->ctx_id == DEFAULT_CONTEXT_HANDLE && !capable(CAP_SYS_ADMIN)) - return -EPERM; - ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; From 4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 15:05:16 +0100 Subject: [PATCH 233/341] drm/i915: Store a direct lookup from object handle to vma The advent of full-ppgtt lead to an extra indirection between the object and its binding. That extra indirection has a noticeable impact on how fast we can convert from the user handles to our internal vma for execbuffer. In order to bypass the extra indirection, we use a resizable hashtable to jump from the object to the per-ctx vma. rhashtable was considered but we don't need the online resizing feature and the extra complexity proved to undermine its usefulness. Instead, we simply reallocate the hastable on demand in a background task and serialize it before iterating. In non-full-ppgtt modes, multiple files and multiple contexts can share the same vma. This leads to having multiple possible handle->vma links, so we only use the first to establish the fast path. The majority of buffers are not shared and so we should still be able to realise speedups with multiple clients. v2: Prettier names, more magic. v3: Many style tweaks, most notably hiding the misuse of execobj[].rsvd2 Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_debugfs.c | 6 + drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 5 +- drivers/gpu/drm/i915/i915_gem_context.c | 82 ++++- drivers/gpu/drm/i915/i915_gem_context.h | 26 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 316 +++++++++++------- drivers/gpu/drm/i915/i915_gem_object.h | 4 +- drivers/gpu/drm/i915/i915_utils.h | 5 + drivers/gpu/drm/i915/i915_vma.c | 20 ++ drivers/gpu/drm/i915/i915_vma.h | 8 +- drivers/gpu/drm/i915/selftests/mock_context.c | 12 +- 11 files changed, 348 insertions(+), 138 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a6ba2100bb88..4577b0af6886 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1998,6 +1998,12 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_putc(m, '\n'); } + seq_printf(m, + "\tvma hashtable size=%u (actual %lu), count=%u\n", + ctx->vma_lut.ht_size, + BIT(ctx->vma_lut.ht_bits), + ctx->vma_lut.ht_count); + seq_putc(m, '\n'); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a1b2672cfe56..af2a54672396 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4ae30f74c475..fcdc452f28bb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3261,6 +3261,10 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) if (vma->vm->file == fpriv) i915_vma_close(vma); + vma = obj->vma_hashed; + if (vma && vma->ctx->file_priv == fpriv) + i915_vma_unlink_ctx(vma); + if (i915_gem_object_is_active(obj) && !i915_gem_object_has_active_reference(obj)) { i915_gem_object_set_active_reference(obj); @@ -4254,7 +4258,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->global_link); INIT_LIST_HEAD(&obj->userfault_link); - INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->batch_pool_link); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 81c73dee095f..23f74014e158 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -85,6 +85,7 @@ * */ +#include #include #include #include "i915_drv.h" @@ -92,6 +93,70 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 +/* Initial size (as log2) to preallocate the handle->object hashtable */ +#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */ + +static void resize_vma_ht(struct work_struct *work) +{ + struct i915_gem_context_vma_lut *lut = + container_of(work, typeof(*lut), resize); + unsigned int bits, new_bits, size, i; + struct hlist_head *new_ht; + + GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)); + + bits = 1 + ilog2(4*lut->ht_count/3 + 1); + new_bits = min_t(unsigned int, + max(bits, VMA_HT_BITS), + sizeof(unsigned int) * BITS_PER_BYTE - 1); + if (new_bits == lut->ht_bits) + goto out; + + new_ht = kzalloc(sizeof(*new_ht)<ht_bits); + for (i = 0; i < size; i++) { + struct i915_vma *vma; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node) + hlist_add_head(&vma->ctx_node, + &new_ht[hash_32(vma->ctx_handle, + new_bits)]); + } + kvfree(lut->ht); + lut->ht = new_ht; + lut->ht_bits = new_bits; +out: + smp_store_release(&lut->ht_size, BIT(bits)); + GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS); +} + +static void vma_lut_free(struct i915_gem_context *ctx) +{ + struct i915_gem_context_vma_lut *lut = &ctx->vma_lut; + unsigned int i, size; + + if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) + cancel_work_sync(&lut->resize); + + size = BIT(lut->ht_bits); + for (i = 0; i < size; i++) { + struct i915_vma *vma; + + hlist_for_each_entry(vma, &lut->ht[i], ctx_node) { + vma->obj->vma_hashed = NULL; + vma->ctx = NULL; + } + } + kvfree(lut->ht); +} + void i915_gem_context_free(struct kref *ctx_ref) { struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); @@ -101,6 +166,7 @@ void i915_gem_context_free(struct kref *ctx_ref) trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + vma_lut_free(ctx); i915_ppgtt_put(ctx->ppgtt); for (i = 0; i < I915_NUM_ENGINES; i++) { @@ -118,6 +184,7 @@ void i915_gem_context_free(struct kref *ctx_ref) kfree(ctx->name); put_pid(ctx->pid); + list_del(&ctx->link); ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id); @@ -201,13 +268,24 @@ __create_hw_context(struct drm_i915_private *dev_priv, ctx->i915 = dev_priv; ctx->priority = I915_PRIORITY_NORMAL; + ctx->vma_lut.ht_bits = VMA_HT_BITS; + ctx->vma_lut.ht_size = BIT(VMA_HT_BITS); + BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS); + ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size, + sizeof(*ctx->vma_lut.ht), + GFP_KERNEL); + if (!ctx->vma_lut.ht) + goto err_out; + + INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht); + /* Default context will never have a file_priv */ ret = DEFAULT_CONTEXT_HANDLE; if (file_priv) { ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL); if (ret < 0) - goto err_out; + goto err_lut; } ctx->user_handle = ret; @@ -248,6 +326,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, err_pid: put_pid(ctx->pid); idr_remove(&file_priv->context_idr, ctx->user_handle); +err_lut: + kvfree(ctx->vma_lut.ht); err_out: context_close(ctx); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 4af2ab94558b..82c99ba92ad3 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -143,6 +143,32 @@ struct i915_gem_context { /** ggtt_offset_bias: placement restriction for context objects */ u32 ggtt_offset_bias; + struct i915_gem_context_vma_lut { + /** ht_size: last request size to allocate the hashtable for. */ + unsigned int ht_size; +#define I915_CTX_RESIZE_IN_PROGRESS BIT(0) + /** ht_bits: real log2(size) of hashtable. */ + unsigned int ht_bits; + /** ht_count: current number of entries inside the hashtable */ + unsigned int ht_count; + + /** ht: the array of buckets comprising the simple hashtable */ + struct hlist_head *ht; + + /** + * resize: After an execbuf completes, we check the load factor + * of the hashtable. If the hashtable is too full, or too empty, + * we schedule a task to resize the hashtable. During the + * resize, the entries are moved between different buckets and + * so we cannot simultaneously read the hashtable as it is + * being resized (unlike rhashtable). Therefore we treat the + * active work as a strong barrier, pausing a subsequent + * execbuf to wait for the resize worker to complete, if + * required. + */ + struct work_struct resize; + } vma_lut; + /** engine: per-engine logical HW state */ struct intel_context { struct i915_vma *state; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d6099d084748..b06f561a268f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -75,37 +75,42 @@ struct i915_execbuffer { unsigned int page; bool use_64bit_reloc : 1; } reloc_cache; - int and; - union { - struct i915_vma **lut; - struct hlist_head *buckets; - }; + int lut_mask; + struct hlist_head *buckets; }; +/* + * As an alternative to creating a hashtable of handle-to-vma for a batch, + * we used the last available reserved field in the execobject[] and stash + * a link from the execobj to its vma. + */ +#define __exec_to_vma(ee) (ee)->rsvd2 +#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee)) + static int eb_create(struct i915_execbuffer *eb) { - eb->lut = NULL; - if (eb->args->flags & I915_EXEC_HANDLE_LUT) { - unsigned int size = eb->args->buffer_count; - size *= sizeof(struct i915_vma *); - eb->lut = kmalloc(size, - GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); - } + if ((eb->args->flags & I915_EXEC_HANDLE_LUT) == 0) { + unsigned int size = 1 + ilog2(eb->args->buffer_count); - if (!eb->lut) { - unsigned int size = eb->args->buffer_count; - unsigned int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; - BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); - while (count > 2*size) - count >>= 1; - eb->lut = kzalloc(count * sizeof(struct hlist_head), - GFP_TEMPORARY); - if (!eb->lut) - return -ENOMEM; + do { + eb->buckets = kzalloc(sizeof(struct hlist_head) << size, + GFP_TEMPORARY | + __GFP_NORETRY | + __GFP_NOWARN); + if (eb->buckets) + break; + } while (--size); - eb->and = count - 1; + if (unlikely(!eb->buckets)) { + eb->buckets = kzalloc(sizeof(struct hlist_head), + GFP_TEMPORARY); + if (unlikely(!eb->buckets)) + return -ENOMEM; + } + + eb->lut_mask = size; } else { - eb->and = -eb->args->buffer_count; + eb->lut_mask = -eb->args->buffer_count; } return 0; @@ -142,14 +147,160 @@ eb_reset(struct i915_execbuffer *eb) vma->exec_entry = NULL; } - if (eb->and >= 0) - memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); + if (eb->lut_mask >= 0) + memset(eb->buckets, 0, + sizeof(struct hlist_head) << eb->lut_mask); +} + +static bool +eb_add_vma(struct i915_execbuffer *eb, struct i915_vma *vma, int i) +{ + if (unlikely(vma->exec_entry)) { + DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", + eb->exec[i].handle, i); + return false; + } + list_add_tail(&vma->exec_link, &eb->vmas); + + vma->exec_entry = &eb->exec[i]; + if (eb->lut_mask >= 0) { + vma->exec_handle = eb->exec[i].handle; + hlist_add_head(&vma->exec_node, + &eb->buckets[hash_32(vma->exec_handle, + eb->lut_mask)]); + } + + i915_vma_get(vma); + __exec_to_vma(&eb->exec[i]) = (uintptr_t)vma; + return true; +} + +static inline struct hlist_head * +ht_head(const struct i915_gem_context *ctx, u32 handle) +{ + return &ctx->vma_lut.ht[hash_32(handle, ctx->vma_lut.ht_bits)]; +} + +static inline bool +ht_needs_resize(const struct i915_gem_context *ctx) +{ + return (4*ctx->vma_lut.ht_count > 3*ctx->vma_lut.ht_size || + 4*ctx->vma_lut.ht_count + 1 < ctx->vma_lut.ht_size); +} + +static int +eb_lookup_vmas(struct i915_execbuffer *eb) +{ +#define INTERMEDIATE BIT(0) + const int count = eb->args->buffer_count; + struct i915_vma *vma; + int slow_pass = -1; + int i; + + INIT_LIST_HEAD(&eb->vmas); + + if (unlikely(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS)) + flush_work(&eb->ctx->vma_lut.resize); + GEM_BUG_ON(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS); + + for (i = 0; i < count; i++) { + __exec_to_vma(&eb->exec[i]) = 0; + + hlist_for_each_entry(vma, + ht_head(eb->ctx, eb->exec[i].handle), + ctx_node) { + if (vma->ctx_handle != eb->exec[i].handle) + continue; + + if (!eb_add_vma(eb, vma, i)) + return -EINVAL; + + goto next_vma; + } + + if (slow_pass < 0) + slow_pass = i; +next_vma: ; + } + + if (slow_pass < 0) + return 0; + + spin_lock(&eb->file->table_lock); + /* Grab a reference to the object and release the lock so we can lookup + * or create the VMA without using GFP_ATOMIC */ + for (i = slow_pass; i < count; i++) { + struct drm_i915_gem_object *obj; + + if (__exec_to_vma(&eb->exec[i])) + continue; + + obj = to_intel_bo(idr_find(&eb->file->object_idr, + eb->exec[i].handle)); + if (unlikely(!obj)) { + spin_unlock(&eb->file->table_lock); + DRM_DEBUG("Invalid object handle %d at index %d\n", + eb->exec[i].handle, i); + return -ENOENT; + } + + __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj; + } + spin_unlock(&eb->file->table_lock); + + for (i = slow_pass; i < count; i++) { + struct drm_i915_gem_object *obj; + + if ((__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) == 0) + continue; + + /* + * NOTE: We can leak any vmas created here when something fails + * later on. But that's no issue since vma_unbind can deal with + * vmas which are not actually bound. And since only + * lookup_or_create exists as an interface to get at the vma + * from the (obj, vm) we don't run the risk of creating + * duplicated vmas for the same vm. + */ + obj = u64_to_ptr(struct drm_i915_gem_object, + __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE); + vma = i915_vma_instance(obj, eb->vm, NULL); + if (unlikely(IS_ERR(vma))) { + DRM_DEBUG("Failed to lookup VMA\n"); + return PTR_ERR(vma); + } + + /* First come, first served */ + if (!vma->ctx) { + vma->ctx = eb->ctx; + vma->ctx_handle = eb->exec[i].handle; + hlist_add_head(&vma->ctx_node, + ht_head(eb->ctx, eb->exec[i].handle)); + eb->ctx->vma_lut.ht_count++; + if (i915_vma_is_ggtt(vma)) { + GEM_BUG_ON(obj->vma_hashed); + obj->vma_hashed = vma; + } + } + + if (!eb_add_vma(eb, vma, i)) + return -EINVAL; + } + + if (ht_needs_resize(eb->ctx)) { + eb->ctx->vma_lut.ht_size |= I915_CTX_RESIZE_IN_PROGRESS; + queue_work(system_highpri_wq, &eb->ctx->vma_lut.resize); + } + + return 0; +#undef INTERMEDIATE } static struct i915_vma * eb_get_batch(struct i915_execbuffer *eb) { - struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_link); + struct i915_vma *vma = + exec_to_vma(&eb->exec[eb->args->buffer_count - 1]); /* * SNA is doing fancy tricks with compressing batch buffers, which leads @@ -166,113 +317,18 @@ eb_get_batch(struct i915_execbuffer *eb) return vma; } -static int -eb_lookup_vmas(struct i915_execbuffer *eb) +static struct i915_vma * +eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) { - struct drm_i915_gem_object *obj; - struct list_head objects; - int i, ret; - - INIT_LIST_HEAD(&eb->vmas); - - INIT_LIST_HEAD(&objects); - spin_lock(&eb->file->table_lock); - /* Grab a reference to the object and release the lock so we can lookup - * or create the VMA without using GFP_ATOMIC */ - for (i = 0; i < eb->args->buffer_count; i++) { - obj = to_intel_bo(idr_find(&eb->file->object_idr, eb->exec[i].handle)); - if (obj == NULL) { - spin_unlock(&eb->file->table_lock); - DRM_DEBUG("Invalid object handle %d at index %d\n", - eb->exec[i].handle, i); - ret = -ENOENT; - goto err; - } - - if (!list_empty(&obj->obj_exec_link)) { - spin_unlock(&eb->file->table_lock); - DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", - obj, eb->exec[i].handle, i); - ret = -EINVAL; - goto err; - } - - i915_gem_object_get(obj); - list_add_tail(&obj->obj_exec_link, &objects); - } - spin_unlock(&eb->file->table_lock); - - i = 0; - while (!list_empty(&objects)) { - struct i915_vma *vma; - - obj = list_first_entry(&objects, - struct drm_i915_gem_object, - obj_exec_link); - - /* - * NOTE: We can leak any vmas created here when something fails - * later on. But that's no issue since vma_unbind can deal with - * vmas which are not actually bound. And since only - * lookup_or_create exists as an interface to get at the vma - * from the (obj, vm) we don't run the risk of creating - * duplicated vmas for the same vm. - */ - vma = i915_vma_instance(obj, eb->vm, NULL); - if (unlikely(IS_ERR(vma))) { - DRM_DEBUG("Failed to lookup VMA\n"); - ret = PTR_ERR(vma); - goto err; - } - - /* Transfer ownership from the objects list to the vmas list. */ - list_add_tail(&vma->exec_link, &eb->vmas); - list_del_init(&obj->obj_exec_link); - - vma->exec_entry = &eb->exec[i]; - if (eb->and < 0) { - eb->lut[i] = vma; - } else { - u32 handle = - eb->args->flags & I915_EXEC_HANDLE_LUT ? - i : eb->exec[i].handle; - vma->exec_handle = handle; - hlist_add_head(&vma->exec_node, - &eb->buckets[handle & eb->and]); - } - ++i; - } - - return 0; - - -err: - while (!list_empty(&objects)) { - obj = list_first_entry(&objects, - struct drm_i915_gem_object, - obj_exec_link); - list_del_init(&obj->obj_exec_link); - i915_gem_object_put(obj); - } - /* - * Objects already transfered to the vmas list will be unreferenced by - * eb_destroy. - */ - - return ret; -} - -static struct i915_vma *eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) -{ - if (eb->and < 0) { - if (handle >= -eb->and) + if (eb->lut_mask < 0) { + if (handle >= -eb->lut_mask) return NULL; - return eb->lut[handle]; + return exec_to_vma(&eb->exec[handle]); } else { struct hlist_head *head; struct i915_vma *vma; - head = &eb->buckets[handle & eb->and]; + head = &eb->buckets[hash_32(handle, eb->lut_mask)]; hlist_for_each_entry(vma, head, exec_node) { if (vma->exec_handle == handle) return vma; @@ -296,7 +352,7 @@ static void eb_destroy(struct i915_execbuffer *eb) i915_gem_context_put(eb->ctx); - if (eb->buckets) + if (eb->lut_mask >= 0) kfree(eb->buckets); } @@ -916,7 +972,7 @@ static int eb_reserve(struct i915_execbuffer *eb) need_fence = (entry->flags & EXEC_OBJECT_NEEDS_FENCE || needs_unfenced_map) && - i915_gem_object_is_tiled(obj); + i915_gem_object_is_tiled(vma->obj); need_mappable = need_fence || need_reloc_mappable(vma); if (entry->flags & EXEC_OBJECT_PINNED) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index adb482b00271..5b19a4916a4d 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -86,6 +86,7 @@ struct drm_i915_gem_object { * They are also added to @vma_list for easy iteration. */ struct rb_root vma_tree; + struct i915_vma *vma_hashed; /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; @@ -100,9 +101,6 @@ struct drm_i915_gem_object { */ struct list_head userfault_link; - /** Used in execbuf to temporarily hold a ref */ - struct list_head obj_exec_link; - struct list_head batch_pool_link; I915_SELFTEST_DECLARE(struct list_head st_link); diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 16ecd1ab108d..12fc250b47b9 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -99,6 +99,11 @@ __T; \ }) +#define u64_to_ptr(T, x) ({ \ + typecheck(u64, x); \ + (T *)(uintptr_t)(x); \ +}) + #define __mask_next_bit(mask) ({ \ int __idx = ffs(mask) - 1; \ mask &= ~BIT(__idx); \ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index bbc8309743a0..ce68194ebff6 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -590,11 +590,31 @@ static void i915_vma_destroy(struct i915_vma *vma) kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); } +void i915_vma_unlink_ctx(struct i915_vma *vma) +{ + struct i915_gem_context *ctx = vma->ctx; + + if (ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS) { + cancel_work_sync(&ctx->vma_lut.resize); + ctx->vma_lut.ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; + } + + __hlist_del(&vma->ctx_node); + ctx->vma_lut.ht_count--; + + if (i915_vma_is_ggtt(vma)) + vma->obj->vma_hashed = NULL; + vma->ctx = NULL; +} + void i915_vma_close(struct i915_vma *vma) { GEM_BUG_ON(i915_vma_is_closed(vma)); vma->flags |= I915_VMA_CLOSED; + if (vma->ctx) + i915_vma_unlink_ctx(vma); + list_del(&vma->obj_link); rb_erase(&vma->obj_node, &vma->obj->vma_tree); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 11ce83a8adf0..ea98e6e4262f 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -99,6 +99,7 @@ struct i915_vma { struct list_head obj_link; /* Link in the object's VMA list */ struct rb_node obj_node; + struct hlist_node obj_hash; /** This vma's place in the execbuf reservation list */ struct list_head exec_link; @@ -110,8 +111,12 @@ struct i915_vma { * Used for performing relocations during execbuffer insertion. */ struct hlist_node exec_node; - unsigned long exec_handle; struct drm_i915_gem_exec_object2 *exec_entry; + u32 exec_handle; + + struct i915_gem_context *ctx; + struct hlist_node ctx_node; + u32 ctx_handle; }; struct i915_vma * @@ -235,6 +240,7 @@ bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); +void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); int __i915_vma_do_pin(struct i915_vma *vma, diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 8d3a90c3f8ac..f8b9cc212b02 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -40,10 +40,18 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + ctx->vma_lut.ht_bits = VMA_HT_BITS; + ctx->vma_lut.ht_size = BIT(VMA_HT_BITS); + ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size, + sizeof(*ctx->vma_lut.ht), + GFP_KERNEL); + if (!ctx->vma_lut.ht) + goto err_free; + ret = ida_simple_get(&i915->context_hw_ida, 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); if (ret < 0) - goto err_free; + goto err_vma_ht; ctx->hw_id = ret; if (name) { @@ -58,6 +66,8 @@ mock_context(struct drm_i915_private *i915, return ctx; +err_vma_ht: + kvfree(ctx->vma_lut.ht); err_free: kfree(ctx); return NULL; From 507d977ff965682a925ffe479c95136680fcb77b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 15:05:17 +0100 Subject: [PATCH 234/341] drm/i915: Pass vma to relocate entry We can simplify our tracking of pending writes in an execbuf to the single bit in the vma->exec_entry->flags, but that requires the relocation function knowing the object's vma. Pass it along. Note we have only been using a single bit to track flushing since commit cc889e0f6ce6a63c62db17d702ecfed86d58083f Author: Daniel Vetter Date: Wed Jun 13 20:45:19 2012 +0200 drm/i915: disable flushing_list/gpu_write_list unconditionally flushed all render caches before the breadcrumb and commit 6ac42f4148bc27e5ffd18a9ab0eac57f58822af4 Author: Daniel Vetter Date: Sat Jul 21 12:25:01 2012 +0200 drm/i915: Replace the complex flushing logic with simple invalidate/flush all did away with the explicit GPU domain tracking. This was then codified into the ABI with NO_RELOC in commit ed5982e6ce5f106abcbf071f80730db344a6da42 Author: Daniel Vetter # Oi! Patch stealer! Date: Thu Jan 17 22:23:36 2013 +0100 drm/i915: Allow userspace to hint that the relocations were known Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 102 +++++++++------------ 1 file changed, 42 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index b06f561a268f..d7154688eba9 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -622,42 +622,25 @@ repeat: } static int -eb_relocate_entry(struct drm_i915_gem_object *obj, +eb_relocate_entry(struct i915_vma *vma, struct i915_execbuffer *eb, struct drm_i915_gem_relocation_entry *reloc) { - struct drm_gem_object *target_obj; - struct drm_i915_gem_object *target_i915_obj; - struct i915_vma *target_vma; - uint64_t target_offset; + struct i915_vma *target; + u64 target_offset; int ret; /* we've already hold a reference to all valid objects */ - target_vma = eb_get_vma(eb, reloc->target_handle); - if (unlikely(target_vma == NULL)) + target = eb_get_vma(eb, reloc->target_handle); + if (unlikely(!target)) return -ENOENT; - target_i915_obj = target_vma->obj; - target_obj = &target_vma->obj->base; - - target_offset = gen8_canonical_addr(target_vma->node.start); - - /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and - * pipe_control writes because the gpu doesn't properly redirect them - * through the ppgtt for non_secure batchbuffers. */ - if (unlikely(IS_GEN6(eb->i915) && - reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { - ret = i915_vma_bind(target_vma, target_i915_obj->cache_level, - PIN_GLOBAL); - if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) - return ret; - } /* Validate that the target is in a valid r/w GPU domain */ if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { DRM_DEBUG("reloc with multiple write domains: " - "obj %p target %d offset %d " + "target %d offset %d " "read %08x write %08x", - obj, reloc->target_handle, + reloc->target_handle, (int) reloc->offset, reloc->read_domains, reloc->write_domain); @@ -666,43 +649,57 @@ eb_relocate_entry(struct drm_i915_gem_object *obj, if (unlikely((reloc->write_domain | reloc->read_domains) & ~I915_GEM_GPU_DOMAINS)) { DRM_DEBUG("reloc with read/write non-GPU domains: " - "obj %p target %d offset %d " + "target %d offset %d " "read %08x write %08x", - obj, reloc->target_handle, + reloc->target_handle, (int) reloc->offset, reloc->read_domains, reloc->write_domain); return -EINVAL; } - target_obj->pending_read_domains |= reloc->read_domains; - target_obj->pending_write_domain |= reloc->write_domain; + if (reloc->write_domain) + target->exec_entry->flags |= EXEC_OBJECT_WRITE; + + /* + * Sandybridge PPGTT errata: We need a global gtt mapping for MI and + * pipe_control writes because the gpu doesn't properly redirect them + * through the ppgtt for non_secure batchbuffers. + */ + if (unlikely(IS_GEN6(eb->i915) && + reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { + ret = i915_vma_bind(target, target->obj->cache_level, + PIN_GLOBAL); + if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) + return ret; + } /* If the relocation already has the right value in it, no * more work needs to be done. */ + target_offset = gen8_canonical_addr(target->node.start); if (target_offset == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > - obj->base.size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { + vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { DRM_DEBUG("Relocation beyond object bounds: " - "obj %p target %d offset %d size %d.\n", - obj, reloc->target_handle, - (int) reloc->offset, - (int) obj->base.size); + "target %d offset %d size %d.\n", + reloc->target_handle, + (int)reloc->offset, + (int)vma->size); return -EINVAL; } if (unlikely(reloc->offset & 3)) { DRM_DEBUG("Relocation not 4-byte aligned: " - "obj %p target %d offset %d.\n", - obj, reloc->target_handle, - (int) reloc->offset); + "target %d offset %d.\n", + reloc->target_handle, + (int)reloc->offset); return -EINVAL; } - ret = relocate_entry(obj, reloc, &eb->reloc_cache, target_offset); + ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset); if (ret) return ret; @@ -748,7 +745,7 @@ static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) do { u64 offset = r->presumed_offset; - ret = eb_relocate_entry(vma->obj, eb, r); + ret = eb_relocate_entry(vma, eb, r); if (ret) goto out; @@ -794,7 +791,7 @@ eb_relocate_vma_slow(struct i915_vma *vma, int i, ret = 0; for (i = 0; i < entry->relocation_count; i++) { - ret = eb_relocate_entry(vma->obj, eb, &relocs[i]); + ret = eb_relocate_entry(vma, eb, &relocs[i]); if (ret) break; } @@ -827,7 +824,6 @@ eb_reserve_vma(struct i915_vma *vma, struct intel_engine_cs *engine, bool *need_reloc) { - struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; uint64_t flags; int ret; @@ -881,11 +877,6 @@ eb_reserve_vma(struct i915_vma *vma, *need_reloc = true; } - if (entry->flags & EXEC_OBJECT_WRITE) { - obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER; - obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; - } - return 0; } @@ -948,7 +939,6 @@ static int eb_reserve(struct i915_execbuffer *eb) { const bool has_fenced_gpu_access = INTEL_GEN(eb->i915) < 4; const bool needs_unfenced_map = INTEL_INFO(eb->i915)->unfenced_needs_alignment; - struct drm_i915_gem_object *obj; struct i915_vma *vma; struct list_head ordered_vmas; struct list_head pinned_vmas; @@ -961,7 +951,6 @@ static int eb_reserve(struct i915_execbuffer *eb) bool need_fence, need_mappable; vma = list_first_entry(&eb->vmas, struct i915_vma, exec_link); - obj = vma->obj; entry = vma->exec_entry; if (eb->ctx->flags & CONTEXT_NO_ZEROMAP) @@ -982,9 +971,6 @@ static int eb_reserve(struct i915_execbuffer *eb) list_move(&vma->exec_link, &ordered_vmas); } else list_move_tail(&vma->exec_link, &ordered_vmas); - - obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; - obj->base.pending_write_domain = 0; } list_splice(&ordered_vmas, &eb->vmas); list_splice(&pinned_vmas, &eb->vmas); @@ -1170,7 +1156,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb) i915_gem_clflush_object(obj, 0); ret = i915_gem_request_await_object - (eb->request, obj, obj->base.pending_write_domain); + (eb->request, obj, vma->exec_entry->flags & EXEC_OBJECT_WRITE); if (ret) return ret; } @@ -1366,12 +1352,10 @@ eb_move_to_active(struct i915_execbuffer *eb) list_for_each_entry(vma, &eb->vmas, exec_link) { struct drm_i915_gem_object *obj = vma->obj; - obj->base.write_domain = obj->base.pending_write_domain; - if (obj->base.write_domain) - vma->exec_entry->flags |= EXEC_OBJECT_WRITE; - else - obj->base.pending_read_domains |= obj->base.read_domains; - obj->base.read_domains = obj->base.pending_read_domains; + obj->base.write_domain = 0; + if (vma->exec_entry->flags & EXEC_OBJECT_WRITE) + obj->base.read_domains = 0; + obj->base.read_domains |= I915_GEM_GPU_DOMAINS; i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags); eb_export_fence(obj, eb->request, vma->exec_entry->flags); @@ -1681,8 +1665,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err; } - /* Set the pending read domains for the batch buffer to COMMAND */ - if (eb.batch->obj->base.pending_write_domain) { + if (eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; @@ -1719,7 +1702,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, } } - eb.batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; if (eb.batch_len == 0) eb.batch_len = eb.batch->size - eb.batch_start_offset; From 071750e550af46b5d3a84ad56c2a108c3e136284 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 15:05:18 +0100 Subject: [PATCH 235/341] drm/i915: Disable EXEC_OBJECT_ASYNC when doing relocations If we write a relocation into the buffer, we require our own implicit synchronisation added after the start of the execbuf, outside of the user's control. As we may end up clflushing, or doing the patch itself on the GPU, asynchronously we need to look at the implicit serialisation on obj->resv and hence need to disable EXEC_OBJECT_ASYNC for this object. If the user does trigger a stall for relocations, we make sure the stall is complete enough so that the batch is not submitted before we complete those relocations. Fixes: 77ae9957897d ("drm/i915: Enable userspace to opt-out of implicit fencing") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Jason Ekstrand Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d7154688eba9..9c3f6c40270f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -699,6 +699,16 @@ eb_relocate_entry(struct i915_vma *vma, return -EINVAL; } + /* + * If we write into the object, we need to force the synchronisation + * barrier, either with an asynchronous clflush or if we executed the + * patching using the GPU (though that should be serialised by the + * timeline). To be completely sure, and since we are required to + * do relocations we are already stalling, disable the user's opt + * of our synchronisation. + */ + vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC; + ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset); if (ret) return ret; From 2889caa9232109afc8881f29a2205abeb5709d0c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 15:05:19 +0100 Subject: [PATCH 236/341] drm/i915: Eliminate lots of iterations over the execobjects array The major scaling bottleneck in execbuffer is the processing of the execobjects. Creating an auxiliary list is inefficient when compared to using the execobject array we already have allocated. Reservation is then split into phases. As we lookup up the VMA, we try and bind it back into active location. Only if that fails, do we add it to the unbound list for phase 2. In phase 2, we try and add all those objects that could not fit into their previous location, with fallback to retrying all objects and evicting the VM in case of severe fragmentation. (This is the same as before, except that phase 1 is now done inline with looking up the VMA to avoid an iteration over the execobject array. In the ideal case, we eliminate the separate reservation phase). During the reservation phase, we only evict from the VM between passes (rather than currently as we try to fit every new VMA). In testing with Unreal Engine's Atlantis demo which stresses the eviction logic on gen7 class hardware, this speed up the framerate by a factor of 2. The second loop amalgamation is between move_to_gpu and move_to_active. As we always submit the request, even if incomplete, we can use the current request to track active VMA as we perform the flushes and synchronisation required. The next big advancement is to avoid copying back to the user any execobjects and relocations that are not changed. v2: Add a Theory of Operation spiel. v3: Fall back to slow relocations in preparation for flushing userptrs. v4: Document struct members, factor out eb_validate_vma(), add a few more comments to explain some magic and hide other magic behind macros. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 92 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2122 ++++++++++------- drivers/gpu/drm/i915/i915_vma.c | 2 +- drivers/gpu/drm/i915/i915_vma.h | 1 + .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 16 +- 7 files changed, 1281 insertions(+), 958 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index af2a54672396..7e182dd7e356 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3581,7 +3581,7 @@ int __must_check i915_gem_evict_something(struct i915_address_space *vm, int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags); -int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); +int i915_gem_evict_vm(struct i915_address_space *vm); /* belongs in i915_gem_gtt.h */ static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 204a2d9288ae..a193f1b36c67 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -50,6 +50,29 @@ static bool ggtt_is_idle(struct drm_i915_private *dev_priv) return true; } +static int ggtt_flush(struct drm_i915_private *i915) +{ + int err; + + /* Not everything in the GGTT is tracked via vma (otherwise we + * could evict as required with minimal stalling) so we are forced + * to idle the GPU and explicitly retire outstanding requests in + * the hopes that we can then remove contexts and the like only + * bound by their active reference. + */ + err = i915_gem_switch_to_kernel_context(i915); + if (err) + return err; + + err = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (err) + return err; + + return 0; +} + static bool mark_free(struct drm_mm_scan *scan, struct i915_vma *vma, @@ -175,19 +198,7 @@ search_again: return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC; } - /* Not everything in the GGTT is tracked via vma (otherwise we - * could evict as required with minimal stalling) so we are forced - * to idle the GPU and explicitly retire outstanding requests in - * the hopes that we can then remove contexts and the like only - * bound by their active reference. - */ - ret = i915_gem_switch_to_kernel_context(dev_priv); - if (ret) - return ret; - - ret = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); + ret = ggtt_flush(dev_priv); if (ret) return ret; @@ -337,10 +348,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, /** * i915_gem_evict_vm - Evict all idle vmas from a vm * @vm: Address space to cleanse - * @do_idle: Boolean directing whether to idle first. * - * This function evicts all idles vmas from a vm. If all unpinned vmas should be - * evicted the @do_idle needs to be set to true. + * This function evicts all vmas from a vm. * * This is used by the execbuf code as a last-ditch effort to defragment the * address space. @@ -348,37 +357,50 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * To clarify: This is for freeing up virtual address space, not for freeing * memory in e.g. the shrinker. */ -int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) +int i915_gem_evict_vm(struct i915_address_space *vm) { + struct list_head *phases[] = { + &vm->inactive_list, + &vm->active_list, + NULL + }, **phase; + struct list_head eviction_list; struct i915_vma *vma, *next; int ret; lockdep_assert_held(&vm->i915->drm.struct_mutex); trace_i915_gem_evict_vm(vm); - if (do_idle) { - struct drm_i915_private *dev_priv = vm->i915; - - if (i915_is_ggtt(vm)) { - ret = i915_gem_switch_to_kernel_context(dev_priv); - if (ret) - return ret; - } - - ret = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); + /* Switch back to the default context in order to unpin + * the existing context objects. However, such objects only + * pin themselves inside the global GTT and performing the + * switch otherwise is ineffective. + */ + if (i915_is_ggtt(vm)) { + ret = ggtt_flush(vm->i915); if (ret) return ret; - - WARN_ON(!list_empty(&vm->active_list)); } - list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link) - if (!i915_vma_is_pinned(vma)) - WARN_ON(i915_vma_unbind(vma)); + INIT_LIST_HEAD(&eviction_list); + phase = phases; + do { + list_for_each_entry(vma, *phase, vm_link) { + if (i915_vma_is_pinned(vma)) + continue; - return 0; + __i915_vma_pin(vma); + list_add(&vma->evict_link, &eviction_list); + } + } while (*++phase); + + ret = 0; + list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { + __i915_vma_unpin(vma); + if (ret == 0) + ret = i915_vma_unbind(vma); + } + return ret; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 9c3f6c40270f..a052072fe8b3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -42,41 +42,195 @@ #define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) -#define __EXEC_OBJECT_NEEDS_MAP (1<<29) -#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) -#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ +#define __EXEC_OBJECT_HAS_PIN BIT(31) +#define __EXEC_OBJECT_HAS_FENCE BIT(30) +#define __EXEC_OBJECT_NEEDS_MAP BIT(29) +#define __EXEC_OBJECT_NEEDS_BIAS BIT(28) +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */ +#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) + +#define __EXEC_HAS_RELOC BIT(31) +#define __EXEC_VALIDATED BIT(30) +#define UPDATE PIN_OFFSET_FIXED #define BATCH_OFFSET_BIAS (256*1024) #define __I915_EXEC_ILLEGAL_FLAGS \ (__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK) +/** + * DOC: User command execution + * + * Userspace submits commands to be executed on the GPU as an instruction + * stream within a GEM object we call a batchbuffer. This instructions may + * refer to other GEM objects containing auxiliary state such as kernels, + * samplers, render targets and even secondary batchbuffers. Userspace does + * not know where in the GPU memory these objects reside and so before the + * batchbuffer is passed to the GPU for execution, those addresses in the + * batchbuffer and auxiliary objects are updated. This is known as relocation, + * or patching. To try and avoid having to relocate each object on the next + * execution, userspace is told the location of those objects in this pass, + * but this remains just a hint as the kernel may choose a new location for + * any object in the future. + * + * Processing an execbuf ioctl is conceptually split up into a few phases. + * + * 1. Validation - Ensure all the pointers, handles and flags are valid. + * 2. Reservation - Assign GPU address space for every object + * 3. Relocation - Update any addresses to point to the final locations + * 4. Serialisation - Order the request with respect to its dependencies + * 5. Construction - Construct a request to execute the batchbuffer + * 6. Submission (at some point in the future execution) + * + * Reserving resources for the execbuf is the most complicated phase. We + * neither want to have to migrate the object in the address space, nor do + * we want to have to update any relocations pointing to this object. Ideally, + * we want to leave the object where it is and for all the existing relocations + * to match. If the object is given a new address, or if userspace thinks the + * object is elsewhere, we have to parse all the relocation entries and update + * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that + * all the target addresses in all of its objects match the value in the + * relocation entries and that they all match the presumed offsets given by the + * list of execbuffer objects. Using this knowledge, we know that if we haven't + * moved any buffers, all the relocation entries are valid and we can skip + * the update. (If userspace is wrong, the likely outcome is an impromptu GPU + * hang.) The requirement for using I915_EXEC_NO_RELOC are: + * + * The addresses written in the objects must match the corresponding + * reloc.presumed_offset which in turn must match the corresponding + * execobject.offset. + * + * Any render targets written to in the batch must be flagged with + * EXEC_OBJECT_WRITE. + * + * To avoid stalling, execobject.offset should match the current + * address of that object within the active context. + * + * The reservation is done is multiple phases. First we try and keep any + * object already bound in its current location - so as long as meets the + * constraints imposed by the new execbuffer. Any object left unbound after the + * first pass is then fitted into any available idle space. If an object does + * not fit, all objects are removed from the reservation and the process rerun + * after sorting the objects into a priority order (more difficult to fit + * objects are tried first). Failing that, the entire VM is cleared and we try + * to fit the execbuf once last time before concluding that it simply will not + * fit. + * + * A small complication to all of this is that we allow userspace not only to + * specify an alignment and a size for the object in the address space, but + * we also allow userspace to specify the exact offset. This objects are + * simpler to place (the location is known a priori) all we have to do is make + * sure the space is available. + * + * Once all the objects are in place, patching up the buried pointers to point + * to the final locations is a fairly simple job of walking over the relocation + * entry arrays, looking up the right address and rewriting the value into + * the object. Simple! ... The relocation entries are stored in user memory + * and so to access them we have to copy them into a local buffer. That copy + * has to avoid taking any pagefaults as they may lead back to a GEM object + * requiring the struct_mutex (i.e. recursive deadlock). So once again we split + * the relocation into multiple passes. First we try to do everything within an + * atomic context (avoid the pagefaults) which requires that we never wait. If + * we detect that we may wait, or if we need to fault, then we have to fallback + * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm + * bells yet?) Dropping the mutex means that we lose all the state we have + * built up so far for the execbuf and we must reset any global data. However, + * we do leave the objects pinned in their final locations - which is a + * potential issue for concurrent execbufs. Once we have left the mutex, we can + * allocate and copy all the relocation entries into a large array at our + * leisure, reacquire the mutex, reclaim all the objects and other state and + * then proceed to update any incorrect addresses with the objects. + * + * As we process the relocation entries, we maintain a record of whether the + * object is being written to. Using NORELOC, we expect userspace to provide + * this information instead. We also check whether we can skip the relocation + * by comparing the expected value inside the relocation entry with the target's + * final address. If they differ, we have to map the current object and rewrite + * the 4 or 8 byte pointer within. + * + * Serialising an execbuf is quite simple according to the rules of the GEM + * ABI. Execution within each context is ordered by the order of submission. + * Writes to any GEM object are in order of submission and are exclusive. Reads + * from a GEM object are unordered with respect to other reads, but ordered by + * writes. A write submitted after a read cannot occur before the read, and + * similarly any read submitted after a write cannot occur before the write. + * Writes are ordered between engines such that only one write occurs at any + * time (completing any reads beforehand) - using semaphores where available + * and CPU serialisation otherwise. Other GEM access obey the same rules, any + * write (either via mmaps using set-domain, or via pwrite) must flush all GPU + * reads before starting, and any read (either using set-domain or pread) must + * flush all GPU writes before starting. (Note we only employ a barrier before, + * we currently rely on userspace not concurrently starting a new execution + * whilst reading or writing to an object. This may be an advantage or not + * depending on how much you trust userspace not to shoot themselves in the + * foot.) Serialisation may just result in the request being inserted into + * a DAG awaiting its turn, but most simple is to wait on the CPU until + * all dependencies are resolved. + * + * After all of that, is just a matter of closing the request and handing it to + * the hardware (well, leaving it in a queue to be executed). However, we also + * offer the ability for batchbuffers to be run with elevated privileges so + * that they access otherwise hidden registers. (Used to adjust L3 cache etc.) + * Before any batch is given extra privileges we first must check that it + * contains no nefarious instructions, we check that each instruction is from + * our whitelist and all registers are also from an allowed list. We first + * copy the user's batchbuffer to a shadow (so that the user doesn't have + * access to it, either by the CPU or GPU as we scan it) and then parse each + * instruction. If everything is ok, we set a flag telling the hardware to run + * the batchbuffer in trusted mode, otherwise the ioctl is rejected. + */ + struct i915_execbuffer { - struct drm_i915_private *i915; - struct drm_file *file; - struct drm_i915_gem_execbuffer2 *args; - struct drm_i915_gem_exec_object2 *exec; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - struct i915_address_space *vm; - struct i915_vma *batch; - struct drm_i915_gem_request *request; - u32 batch_start_offset; - u32 batch_len; - unsigned int dispatch_flags; - struct drm_i915_gem_exec_object2 shadow_exec_entry; - bool need_relocs; - struct list_head vmas; + struct drm_i915_private *i915; /** i915 backpointer */ + struct drm_file *file; /** per-file lookup tables and limits */ + struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ + struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ + + struct intel_engine_cs *engine; /** engine to queue the request to */ + struct i915_gem_context *ctx; /** context for building the request */ + struct i915_address_space *vm; /** GTT and vma for the request */ + + struct drm_i915_gem_request *request; /** our request to build */ + struct i915_vma *batch; /** identity of the batch obj/vma */ + + /** actual size of execobj[] as we may extend it for the cmdparser */ + unsigned int buffer_count; + + /** list of vma not yet bound during reservation phase */ + struct list_head unbound; + + /** list of vma that have execobj.relocation_count */ + struct list_head relocs; + + /** + * Track the most recently used object for relocations, as we + * frequently have to perform multiple relocations within the same + * obj/page + */ struct reloc_cache { - struct drm_mm_node node; - unsigned long vaddr; - unsigned int page; + struct drm_mm_node node; /** temporary GTT binding */ + unsigned long vaddr; /** Current kmap address */ + unsigned long page; /** Currently mapped page index */ bool use_64bit_reloc : 1; + bool has_llc : 1; + bool has_fence : 1; + bool needs_unfenced : 1; } reloc_cache; - int lut_mask; - struct hlist_head *buckets; + + u64 invalid_flags; /** Set of execobj.flags that are invalid */ + u32 context_flags; /** Set of execobj.flags to insert from the ctx */ + + u32 batch_start_offset; /** Location within object of batch */ + u32 batch_len; /** Length of batch within object */ + u32 batch_flags; /** Flags composed for emit_bb_start() */ + + /** + * Indicate either the size of the hastable used to resolve + * relocation handles, or if negative that we are using a direct + * index into the execobj[]. + */ + int lut_size; + struct hlist_head *buckets; /** ht for relocation handles */ }; /* @@ -87,11 +241,41 @@ struct i915_execbuffer { #define __exec_to_vma(ee) (ee)->rsvd2 #define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee)) +/* + * Used to convert any address to canonical form. + * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, + * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the + * addresses to be in a canonical form: + * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct + * canonical form [63:48] == [47]." + */ +#define GEN8_HIGH_ADDRESS_BIT 47 +static inline u64 gen8_canonical_addr(u64 address) +{ + return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); +} + +static inline u64 gen8_noncanonical_addr(u64 address) +{ + return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); +} + static int eb_create(struct i915_execbuffer *eb) { - if ((eb->args->flags & I915_EXEC_HANDLE_LUT) == 0) { - unsigned int size = 1 + ilog2(eb->args->buffer_count); + if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { + unsigned int size = 1 + ilog2(eb->buffer_count); + /* + * Without a 1:1 association between relocation handles and + * the execobject[] index, we instead create a hashtable. + * We size it dynamically based on available memory, starting + * first with 1:1 assocative hash and scaling back until + * the allocation succeeds. + * + * Later on we use a positive lut_size to indicate we are + * using this hashtable, and a negative value to indicate a + * direct lookup. + */ do { eb->buckets = kzalloc(sizeof(struct hlist_head) << size, GFP_TEMPORARY | @@ -108,112 +292,411 @@ static int eb_create(struct i915_execbuffer *eb) return -ENOMEM; } - eb->lut_mask = size; + eb->lut_size = size; } else { - eb->lut_mask = -eb->args->buffer_count; + eb->lut_size = -eb->buffer_count; } return 0; } +static bool +eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, + const struct i915_vma *vma) +{ + if (!(entry->flags & __EXEC_OBJECT_HAS_PIN)) + return true; + + if (vma->node.size < entry->pad_to_size) + return true; + + if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) + return true; + + if (entry->flags & EXEC_OBJECT_PINNED && + vma->node.start != entry->offset) + return true; + + if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && + vma->node.start < BATCH_OFFSET_BIAS) + return true; + + if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && + (vma->node.start + vma->node.size - 1) >> 32) + return true; + + return false; +} + +static inline void +eb_pin_vma(struct i915_execbuffer *eb, + struct drm_i915_gem_exec_object2 *entry, + struct i915_vma *vma) +{ + u64 flags; + + flags = vma->node.start; + flags |= PIN_USER | PIN_NONBLOCK | PIN_OFFSET_FIXED; + if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_GTT)) + flags |= PIN_GLOBAL; + if (unlikely(i915_vma_pin(vma, 0, 0, flags))) + return; + + if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) { + if (unlikely(i915_vma_get_fence(vma))) { + i915_vma_unpin(vma); + return; + } + + if (i915_vma_pin_fence(vma)) + entry->flags |= __EXEC_OBJECT_HAS_FENCE; + } + + entry->flags |= __EXEC_OBJECT_HAS_PIN; +} + static inline void __eb_unreserve_vma(struct i915_vma *vma, const struct drm_i915_gem_exec_object2 *entry) { + GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN)); + if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE)) i915_vma_unpin_fence(vma); - if (entry->flags & __EXEC_OBJECT_HAS_PIN) - __i915_vma_unpin(vma); + __i915_vma_unpin(vma); } -static void -eb_unreserve_vma(struct i915_vma *vma) +static inline void +eb_unreserve_vma(struct i915_vma *vma, + struct drm_i915_gem_exec_object2 *entry) { - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + if (!(entry->flags & __EXEC_OBJECT_HAS_PIN)) + return; __eb_unreserve_vma(vma, entry); - entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); -} - -static void -eb_reset(struct i915_execbuffer *eb) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &eb->vmas, exec_link) { - eb_unreserve_vma(vma); - i915_vma_put(vma); - vma->exec_entry = NULL; - } - - if (eb->lut_mask >= 0) - memset(eb->buckets, 0, - sizeof(struct hlist_head) << eb->lut_mask); -} - -static bool -eb_add_vma(struct i915_execbuffer *eb, struct i915_vma *vma, int i) -{ - if (unlikely(vma->exec_entry)) { - DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", - eb->exec[i].handle, i); - return false; - } - list_add_tail(&vma->exec_link, &eb->vmas); - - vma->exec_entry = &eb->exec[i]; - if (eb->lut_mask >= 0) { - vma->exec_handle = eb->exec[i].handle; - hlist_add_head(&vma->exec_node, - &eb->buckets[hash_32(vma->exec_handle, - eb->lut_mask)]); - } - - i915_vma_get(vma); - __exec_to_vma(&eb->exec[i]) = (uintptr_t)vma; - return true; -} - -static inline struct hlist_head * -ht_head(const struct i915_gem_context *ctx, u32 handle) -{ - return &ctx->vma_lut.ht[hash_32(handle, ctx->vma_lut.ht_bits)]; -} - -static inline bool -ht_needs_resize(const struct i915_gem_context *ctx) -{ - return (4*ctx->vma_lut.ht_count > 3*ctx->vma_lut.ht_size || - 4*ctx->vma_lut.ht_count + 1 < ctx->vma_lut.ht_size); + entry->flags &= ~__EXEC_OBJECT_RESERVED; } static int -eb_lookup_vmas(struct i915_execbuffer *eb) +eb_validate_vma(struct i915_execbuffer *eb, + struct drm_i915_gem_exec_object2 *entry, + struct i915_vma *vma) +{ + if (unlikely(entry->flags & eb->invalid_flags)) + return -EINVAL; + + if (unlikely(entry->alignment && !is_power_of_2(entry->alignment))) + return -EINVAL; + + /* + * Offset can be used as input (EXEC_OBJECT_PINNED), reject + * any non-page-aligned or non-canonical addresses. + */ + if (unlikely(entry->flags & EXEC_OBJECT_PINNED && + entry->offset != gen8_canonical_addr(entry->offset & PAGE_MASK))) + return -EINVAL; + + /* pad_to_size was once a reserved field, so sanitize it */ + if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { + if (unlikely(offset_in_page(entry->pad_to_size))) + return -EINVAL; + } else { + entry->pad_to_size = 0; + } + + if (unlikely(vma->exec_entry)) { + DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", + entry->handle, (int)(entry - eb->exec)); + return -EINVAL; + } + + /* + * From drm_mm perspective address space is continuous, + * so from this point we're always using non-canonical + * form internally. + */ + entry->offset = gen8_noncanonical_addr(entry->offset); + + return 0; +} + +static int +eb_add_vma(struct i915_execbuffer *eb, + struct drm_i915_gem_exec_object2 *entry, + struct i915_vma *vma) +{ + int err; + + GEM_BUG_ON(i915_vma_is_closed(vma)); + + if (!(eb->args->flags & __EXEC_VALIDATED)) { + err = eb_validate_vma(eb, entry, vma); + if (unlikely(err)) + return err; + } + + if (eb->lut_size >= 0) { + vma->exec_handle = entry->handle; + hlist_add_head(&vma->exec_node, + &eb->buckets[hash_32(entry->handle, + eb->lut_size)]); + } + + if (entry->relocation_count) + list_add_tail(&vma->reloc_link, &eb->relocs); + + if (!eb->reloc_cache.has_fence) { + entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; + } else { + if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || + eb->reloc_cache.needs_unfenced) && + i915_gem_object_is_tiled(vma->obj)) + entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; + } + + if (!(entry->flags & EXEC_OBJECT_PINNED)) + entry->flags |= eb->context_flags; + + /* + * Stash a pointer from the vma to execobj, so we can query its flags, + * size, alignment etc as provided by the user. Also we stash a pointer + * to the vma inside the execobj so that we can use a direct lookup + * to find the right target VMA when doing relocations. + */ + vma->exec_entry = entry; + __exec_to_vma(entry) = (uintptr_t)i915_vma_get(vma); + + err = 0; + if (vma->node.size) + eb_pin_vma(eb, entry, vma); + if (eb_vma_misplaced(entry, vma)) { + eb_unreserve_vma(vma, entry); + + list_add_tail(&vma->exec_link, &eb->unbound); + if (drm_mm_node_allocated(&vma->node)) + err = i915_vma_unbind(vma); + } else { + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + } + return err; +} + +static inline int use_cpu_reloc(const struct reloc_cache *cache, + const struct drm_i915_gem_object *obj) +{ + if (!i915_gem_object_has_struct_page(obj)) + return false; + + if (DBG_USE_CPU_RELOC) + return DBG_USE_CPU_RELOC > 0; + + return (cache->has_llc || + obj->cache_dirty || + obj->cache_level != I915_CACHE_NONE); +} + +static int eb_reserve_vma(const struct i915_execbuffer *eb, + struct i915_vma *vma) +{ + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + u64 flags; + int err; + + flags = PIN_USER | PIN_NONBLOCK; + if (entry->flags & EXEC_OBJECT_NEEDS_GTT) + flags |= PIN_GLOBAL; + + /* + * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, + * limit address to the first 4GBs for unflagged objects. + */ + if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) + flags |= PIN_ZONE_4G; + + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) + flags |= PIN_MAPPABLE; + + if (entry->flags & EXEC_OBJECT_PINNED) { + flags |= entry->offset | PIN_OFFSET_FIXED; + flags &= ~PIN_NONBLOCK; /* force overlapping PINNED checks */ + } else if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) { + flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + } + + err = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, flags); + if (err) + return err; + + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + + entry->flags |= __EXEC_OBJECT_HAS_PIN; + GEM_BUG_ON(eb_vma_misplaced(entry, vma)); + + if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) { + err = i915_vma_get_fence(vma); + if (unlikely(err)) { + i915_vma_unpin(vma); + return err; + } + + if (i915_vma_pin_fence(vma)) + entry->flags |= __EXEC_OBJECT_HAS_FENCE; + } + + return 0; +} + +static int eb_reserve(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + struct list_head last; + struct i915_vma *vma; + unsigned int i, pass; + int err; + + /* + * Attempt to pin all of the buffers into the GTT. + * This is done in 3 phases: + * + * 1a. Unbind all objects that do not match the GTT constraints for + * the execbuffer (fenceable, mappable, alignment etc). + * 1b. Increment pin count for already bound objects. + * 2. Bind new objects. + * 3. Decrement pin count. + * + * This avoid unnecessary unbinding of later objects in order to make + * room for the earlier objects *unless* we need to defragment. + */ + + pass = 0; + err = 0; + do { + list_for_each_entry(vma, &eb->unbound, exec_link) { + err = eb_reserve_vma(eb, vma); + if (err) + break; + } + if (err != -ENOSPC) + return err; + + /* Resort *all* the objects into priority order */ + INIT_LIST_HEAD(&eb->unbound); + INIT_LIST_HEAD(&last); + for (i = 0; i < count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + + if (entry->flags & EXEC_OBJECT_PINNED && + entry->flags & __EXEC_OBJECT_HAS_PIN) + continue; + + vma = exec_to_vma(entry); + eb_unreserve_vma(vma, entry); + + if (entry->flags & EXEC_OBJECT_PINNED) + list_add(&vma->exec_link, &eb->unbound); + else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) + list_add_tail(&vma->exec_link, &eb->unbound); + else + list_add_tail(&vma->exec_link, &last); + } + list_splice_tail(&last, &eb->unbound); + + switch (pass++) { + case 0: + break; + + case 1: + /* Too fragmented, unbind everything and retry */ + err = i915_gem_evict_vm(eb->vm); + if (err) + return err; + break; + + default: + return -ENOSPC; + } + } while (1); +} + +static inline struct hlist_head * +ht_head(const struct i915_gem_context_vma_lut *lut, u32 handle) +{ + return &lut->ht[hash_32(handle, lut->ht_bits)]; +} + +static inline bool +ht_needs_resize(const struct i915_gem_context_vma_lut *lut) +{ + return (4*lut->ht_count > 3*lut->ht_size || + 4*lut->ht_count + 1 < lut->ht_size); +} + +static unsigned int eb_batch_index(const struct i915_execbuffer *eb) +{ + return eb->buffer_count - 1; +} + +static int eb_select_context(struct i915_execbuffer *eb) +{ + struct i915_gem_context *ctx; + + ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); + if (unlikely(IS_ERR(ctx))) + return PTR_ERR(ctx); + + if (unlikely(i915_gem_context_is_banned(ctx))) { + DRM_DEBUG("Context %u tried to submit while banned\n", + ctx->user_handle); + return -EIO; + } + + eb->ctx = i915_gem_context_get(ctx); + eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base; + + eb->context_flags = 0; + if (ctx->flags & CONTEXT_NO_ZEROMAP) + eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return 0; +} + +static int eb_lookup_vmas(struct i915_execbuffer *eb) { #define INTERMEDIATE BIT(0) - const int count = eb->args->buffer_count; + const unsigned int count = eb->buffer_count; + struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut; struct i915_vma *vma; + struct idr *idr; + unsigned int i; int slow_pass = -1; - int i; + int err; - INIT_LIST_HEAD(&eb->vmas); + INIT_LIST_HEAD(&eb->relocs); + INIT_LIST_HEAD(&eb->unbound); - if (unlikely(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS)) - flush_work(&eb->ctx->vma_lut.resize); - GEM_BUG_ON(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS); + if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)) + flush_work(&lut->resize); + GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS); for (i = 0; i < count; i++) { __exec_to_vma(&eb->exec[i]) = 0; hlist_for_each_entry(vma, - ht_head(eb->ctx, eb->exec[i].handle), + ht_head(lut, eb->exec[i].handle), ctx_node) { if (vma->ctx_handle != eb->exec[i].handle) continue; - if (!eb_add_vma(eb, vma, i)) - return -EINVAL; + err = eb_add_vma(eb, &eb->exec[i], vma); + if (unlikely(err)) + return err; goto next_vma; } @@ -224,24 +707,27 @@ next_vma: ; } if (slow_pass < 0) - return 0; + goto out; spin_lock(&eb->file->table_lock); - /* Grab a reference to the object and release the lock so we can lookup - * or create the VMA without using GFP_ATOMIC */ + /* + * Grab a reference to the object and release the lock so we can lookup + * or create the VMA without using GFP_ATOMIC + */ + idr = &eb->file->object_idr; for (i = slow_pass; i < count; i++) { struct drm_i915_gem_object *obj; if (__exec_to_vma(&eb->exec[i])) continue; - obj = to_intel_bo(idr_find(&eb->file->object_idr, - eb->exec[i].handle)); + obj = to_intel_bo(idr_find(idr, eb->exec[i].handle)); if (unlikely(!obj)) { spin_unlock(&eb->file->table_lock); DRM_DEBUG("Invalid object handle %d at index %d\n", eb->exec[i].handle, i); - return -ENOENT; + err = -ENOENT; + goto err; } __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj; @@ -251,7 +737,7 @@ next_vma: ; for (i = slow_pass; i < count; i++) { struct drm_i915_gem_object *obj; - if ((__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) == 0) + if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)) continue; /* @@ -262,12 +748,13 @@ next_vma: ; * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - obj = u64_to_ptr(struct drm_i915_gem_object, + obj = u64_to_ptr(typeof(*obj), __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE); vma = i915_vma_instance(obj, eb->vm, NULL); if (unlikely(IS_ERR(vma))) { DRM_DEBUG("Failed to lookup VMA\n"); - return PTR_ERR(vma); + err = PTR_ERR(vma); + goto err; } /* First come, first served */ @@ -275,32 +762,31 @@ next_vma: ; vma->ctx = eb->ctx; vma->ctx_handle = eb->exec[i].handle; hlist_add_head(&vma->ctx_node, - ht_head(eb->ctx, eb->exec[i].handle)); - eb->ctx->vma_lut.ht_count++; + ht_head(lut, eb->exec[i].handle)); + lut->ht_count++; + lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS; if (i915_vma_is_ggtt(vma)) { GEM_BUG_ON(obj->vma_hashed); obj->vma_hashed = vma; } } - if (!eb_add_vma(eb, vma, i)) - return -EINVAL; + err = eb_add_vma(eb, &eb->exec[i], vma); + if (unlikely(err)) + goto err; } - if (ht_needs_resize(eb->ctx)) { - eb->ctx->vma_lut.ht_size |= I915_CTX_RESIZE_IN_PROGRESS; - queue_work(system_highpri_wq, &eb->ctx->vma_lut.resize); + if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) { + if (ht_needs_resize(lut)) + queue_work(system_highpri_wq, &lut->resize); + else + lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; } - return 0; -#undef INTERMEDIATE -} - -static struct i915_vma * -eb_get_batch(struct i915_execbuffer *eb) -{ - struct i915_vma *vma = - exec_to_vma(&eb->exec[eb->args->buffer_count - 1]); +out: + /* take note of the batch buffer before we might reorder the lists */ + i = eb_batch_index(eb); + eb->batch = exec_to_vma(&eb->exec[i]); /* * SNA is doing fancy tricks with compressing batch buffers, which leads @@ -311,24 +797,36 @@ eb_get_batch(struct i915_execbuffer *eb) * Note that actual hangs have only been observed on gen7, but for * paranoia do it everywhere. */ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + if (!(eb->exec[i].flags & EXEC_OBJECT_PINNED)) + eb->exec[i].flags |= __EXEC_OBJECT_NEEDS_BIAS; + if (eb->reloc_cache.has_fence) + eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE; - return vma; + eb->args->flags |= __EXEC_VALIDATED; + return eb_reserve(eb); + +err: + for (i = slow_pass; i < count; i++) { + if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) + __exec_to_vma(&eb->exec[i]) = 0; + } + lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; + return err; +#undef INTERMEDIATE } static struct i915_vma * -eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) +eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) { - if (eb->lut_mask < 0) { - if (handle >= -eb->lut_mask) + if (eb->lut_size < 0) { + if (handle >= -eb->lut_size) return NULL; return exec_to_vma(&eb->exec[handle]); } else { struct hlist_head *head; struct i915_vma *vma; - head = &eb->buckets[hash_32(handle, eb->lut_mask)]; + head = &eb->buckets[hash_32(handle, eb->lut_size)]; hlist_for_each_entry(vma, head, exec_node) { if (vma->exec_handle == handle) return vma; @@ -337,61 +835,46 @@ eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) } } -static void eb_destroy(struct i915_execbuffer *eb) +static void eb_release_vmas(const struct i915_execbuffer *eb) { - struct i915_vma *vma; + const unsigned int count = eb->buffer_count; + unsigned int i; - list_for_each_entry(vma, &eb->vmas, exec_link) { - if (!vma->exec_entry) + for (i = 0; i < count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct i915_vma *vma = exec_to_vma(entry); + + if (!vma) continue; - __eb_unreserve_vma(vma, vma->exec_entry); + GEM_BUG_ON(vma->exec_entry != entry); vma->exec_entry = NULL; + + eb_unreserve_vma(vma, entry); + i915_vma_put(vma); } +} - i915_gem_context_put(eb->ctx); +static void eb_reset_vmas(const struct i915_execbuffer *eb) +{ + eb_release_vmas(eb); + if (eb->lut_size >= 0) + memset(eb->buckets, 0, + sizeof(struct hlist_head) << eb->lut_size); +} - if (eb->lut_mask >= 0) +static void eb_destroy(const struct i915_execbuffer *eb) +{ + if (eb->lut_size >= 0) kfree(eb->buckets); } -static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) -{ - if (!i915_gem_object_has_struct_page(obj)) - return false; - - if (DBG_USE_CPU_RELOC) - return DBG_USE_CPU_RELOC > 0; - - return (HAS_LLC(to_i915(obj->base.dev)) || - obj->cache_dirty || - obj->cache_level != I915_CACHE_NONE); -} - -/* Used to convert any address to canonical form. - * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, - * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the - * addresses to be in a canonical form: - * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct - * canonical form [63:48] == [47]." - */ -#define GEN8_HIGH_ADDRESS_BIT 47 -static inline uint64_t gen8_canonical_addr(uint64_t address) -{ - return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); -} - -static inline uint64_t gen8_noncanonical_addr(uint64_t address) -{ - return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1); -} - -static inline uint64_t +static inline u64 relocation_target(const struct drm_i915_gem_relocation_entry *reloc, - uint64_t target_offset) + const struct i915_vma *target) { - return gen8_canonical_addr((int)reloc->delta + target_offset); + return gen8_canonical_addr((int)reloc->delta + target->node.start); } static void reloc_cache_init(struct reloc_cache *cache, @@ -400,6 +883,9 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->page = -1; cache->vaddr = 0; /* Must be a variable in the struct to allow GCC to unroll. */ + cache->has_llc = HAS_LLC(i915); + cache->has_fence = INTEL_GEN(i915) < 4; + cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); cache->node.allocated = false; } @@ -458,7 +944,7 @@ static void reloc_cache_reset(struct reloc_cache *cache) static void *reloc_kmap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, - int page) + unsigned long page) { void *vaddr; @@ -466,11 +952,11 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, kunmap_atomic(unmask_page(cache->vaddr)); } else { unsigned int flushes; - int ret; + int err; - ret = i915_gem_obj_prepare_shmem_write(obj, &flushes); - if (ret) - return ERR_PTR(ret); + err = i915_gem_obj_prepare_shmem_write(obj, &flushes); + if (err) + return ERR_PTR(err); BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); @@ -490,7 +976,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, static void *reloc_iomap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, - int page) + unsigned long page) { struct i915_ggtt *ggtt = cache_to_ggtt(cache); unsigned long offset; @@ -500,31 +986,31 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); } else { struct i915_vma *vma; - int ret; + int err; - if (use_cpu_reloc(obj)) + if (use_cpu_reloc(cache, obj)) return NULL; - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ERR_PTR(ret); + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return ERR_PTR(err); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | PIN_NONBLOCK); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); - ret = drm_mm_insert_node_in_range + err = drm_mm_insert_node_in_range (&ggtt->base.mm, &cache->node, PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); - if (ret) /* no inactive aperture space, use cpu reloc */ + if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { - ret = i915_vma_put_fence(vma); - if (ret) { + err = i915_vma_put_fence(vma); + if (err) { i915_vma_unpin(vma); - return ERR_PTR(ret); + return ERR_PTR(err); } cache->node.start = vma->node.start; @@ -552,7 +1038,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, static void *reloc_vaddr(struct drm_i915_gem_object *obj, struct reloc_cache *cache, - int page) + unsigned long page) { void *vaddr; @@ -579,7 +1065,8 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) *addr = value; - /* Writes to the same cacheline are serialised by the CPU + /* + * Writes to the same cacheline are serialised by the CPU * (including clflush). On the write path, we only require * that it hits memory in an orderly fashion and place * mb barriers at the start and end of the relocation phase @@ -591,25 +1078,26 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) *addr = value; } -static int -relocate_entry(struct drm_i915_gem_object *obj, +static u64 +relocate_entry(struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, - struct reloc_cache *cache, - u64 target_offset) + struct i915_execbuffer *eb, + const struct i915_vma *target) { + struct drm_i915_gem_object *obj = vma->obj; u64 offset = reloc->offset; - bool wide = cache->use_64bit_reloc; + u64 target_offset = relocation_target(reloc, target); + bool wide = eb->reloc_cache.use_64bit_reloc; void *vaddr; - target_offset = relocation_target(reloc, target_offset); repeat: - vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT); + vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); clflush_write32(vaddr + offset_in_page(offset), lower_32_bits(target_offset), - cache->vaddr); + eb->reloc_cache.vaddr); if (wide) { offset += sizeof(u32); @@ -618,17 +1106,16 @@ repeat: goto repeat; } - return 0; + return target->node.start | UPDATE; } -static int -eb_relocate_entry(struct i915_vma *vma, - struct i915_execbuffer *eb, - struct drm_i915_gem_relocation_entry *reloc) +static u64 +eb_relocate_entry(struct i915_execbuffer *eb, + struct i915_vma *vma, + const struct drm_i915_gem_relocation_entry *reloc) { struct i915_vma *target; - u64 target_offset; - int ret; + int err; /* we've already hold a reference to all valid objects */ target = eb_get_vma(eb, reloc->target_handle); @@ -658,27 +1145,30 @@ eb_relocate_entry(struct i915_vma *vma, return -EINVAL; } - if (reloc->write_domain) + if (reloc->write_domain) { target->exec_entry->flags |= EXEC_OBJECT_WRITE; - /* - * Sandybridge PPGTT errata: We need a global gtt mapping for MI and - * pipe_control writes because the gpu doesn't properly redirect them - * through the ppgtt for non_secure batchbuffers. - */ - if (unlikely(IS_GEN6(eb->i915) && - reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { - ret = i915_vma_bind(target, target->obj->cache_level, - PIN_GLOBAL); - if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) - return ret; + /* + * Sandybridge PPGTT errata: We need a global gtt mapping + * for MI and pipe_control writes because the gpu doesn't + * properly redirect them through the ppgtt for non_secure + * batchbuffers. + */ + if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && + IS_GEN6(eb->i915)) { + err = i915_vma_bind(target, target->obj->cache_level, + PIN_GLOBAL); + if (WARN_ONCE(err, + "Unexpected failure to bind target VMA!")) + return err; + } } - /* If the relocation already has the right value in it, no + /* + * If the relocation already has the right value in it, no * more work needs to be done. */ - target_offset = gen8_canonical_addr(target->node.start); - if (target_offset == reloc->presumed_offset) + if (gen8_canonical_addr(target->node.start) == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ @@ -709,35 +1199,39 @@ eb_relocate_entry(struct i915_vma *vma, */ vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC; - ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset); - if (ret) - return ret; - /* and update the user's relocation entry */ - reloc->presumed_offset = target_offset; - return 0; + return relocate_entry(vma, reloc, eb, target); } -static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) +static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) { #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) - struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; - struct drm_i915_gem_relocation_entry __user *user_relocs; - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - int remain, ret = 0; - - user_relocs = u64_to_user_ptr(entry->relocs_ptr); + struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; + struct drm_i915_gem_relocation_entry __user *urelocs; + const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + unsigned int remain; + urelocs = u64_to_user_ptr(entry->relocs_ptr); remain = entry->relocation_count; - while (remain) { - struct drm_i915_gem_relocation_entry *r = stack_reloc; - unsigned long unwritten; - unsigned int count; + if (unlikely(remain > N_RELOC(ULONG_MAX))) + return -EINVAL; - count = min_t(unsigned int, remain, ARRAY_SIZE(stack_reloc)); - remain -= count; + /* + * We must check that the entire relocation array is safe + * to read. However, if the array is not writable the user loses + * the updated relocation values. + */ + if (unlikely(!access_ok(VERIFY_READ, urelocs, remain*sizeof(urelocs)))) + return -EFAULT; - /* This is the fast path and we cannot handle a pagefault + do { + struct drm_i915_gem_relocation_entry *r = stack; + unsigned int count = + min_t(unsigned int, remain, ARRAY_SIZE(stack)); + unsigned int copied; + + /* + * This is the fast path and we cannot handle a pagefault * whilst holding the struct mutex lest the user pass in the * relocations contained within a mmaped bo. For in such a case * we, the page fault handler would call i915_gem_fault() and @@ -745,342 +1239,154 @@ static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) * this is bad and so lockdep complains vehemently. */ pagefault_disable(); - unwritten = __copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])); + copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); pagefault_enable(); - if (unlikely(unwritten)) { - ret = -EFAULT; + if (unlikely(copied)) { + remain = -EFAULT; goto out; } + remain -= count; do { - u64 offset = r->presumed_offset; + u64 offset = eb_relocate_entry(eb, vma, r); - ret = eb_relocate_entry(vma, eb, r); - if (ret) + if (likely(offset == 0)) { + } else if ((s64)offset < 0) { + remain = (int)offset; goto out; - - if (r->presumed_offset != offset) { - pagefault_disable(); - unwritten = __put_user(r->presumed_offset, - &user_relocs->presumed_offset); - pagefault_enable(); - if (unlikely(unwritten)) { - /* Note that reporting an error now - * leaves everything in an inconsistent - * state as we have *already* changed - * the relocation value inside the - * object. As we have not changed the - * reloc.presumed_offset or will not - * change the execobject.offset, on the - * call we may not rewrite the value - * inside the object, leaving it - * dangling and causing a GPU hang. - */ - ret = -EFAULT; - goto out; - } + } else { + /* + * Note that reporting an error now + * leaves everything in an inconsistent + * state as we have *already* changed + * the relocation value inside the + * object. As we have not changed the + * reloc.presumed_offset or will not + * change the execobject.offset, on the + * call we may not rewrite the value + * inside the object, leaving it + * dangling and causing a GPU hang. Unless + * userspace dynamically rebuilds the + * relocations on each execbuf rather than + * presume a static tree. + * + * We did previously check if the relocations + * were writable (access_ok), an error now + * would be a strange race with mprotect, + * having already demonstrated that we + * can read from this userspace address. + */ + offset = gen8_canonical_addr(offset & ~UPDATE); + __put_user(offset, + &urelocs[r-stack].presumed_offset); } - - user_relocs++; - r++; - } while (--count); - } - + } while (r++, --count); + urelocs += ARRAY_SIZE(stack); + } while (remain); out: reloc_cache_reset(&eb->reloc_cache); - return ret; -#undef N_RELOC + return remain; } static int -eb_relocate_vma_slow(struct i915_vma *vma, - struct i915_execbuffer *eb, - struct drm_i915_gem_relocation_entry *relocs) +eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma) { const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - int i, ret = 0; + struct drm_i915_gem_relocation_entry *relocs = + u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + unsigned int i; + int err; for (i = 0; i < entry->relocation_count; i++) { - ret = eb_relocate_entry(vma, eb, &relocs[i]); - if (ret) - break; - } - reloc_cache_reset(&eb->reloc_cache); - return ret; -} + u64 offset = eb_relocate_entry(eb, vma, &relocs[i]); -static int eb_relocate(struct i915_execbuffer *eb) -{ - struct i915_vma *vma; - int ret = 0; - - list_for_each_entry(vma, &eb->vmas, exec_link) { - ret = eb_relocate_vma(vma, eb); - if (ret) - break; - } - - return ret; -} - -static bool only_mappable_for_reloc(unsigned int flags) -{ - return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) == - __EXEC_OBJECT_NEEDS_MAP; -} - -static int -eb_reserve_vma(struct i915_vma *vma, - struct intel_engine_cs *engine, - bool *need_reloc) -{ - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - uint64_t flags; - int ret; - - flags = PIN_USER; - if (entry->flags & EXEC_OBJECT_NEEDS_GTT) - flags |= PIN_GLOBAL; - - if (!drm_mm_node_allocated(&vma->node)) { - /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, - * limit address to the first 4GBs for unflagged objects. - */ - if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0) - flags |= PIN_ZONE_4G; - if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) - flags |= PIN_GLOBAL | PIN_MAPPABLE; - if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) - flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; - if (entry->flags & EXEC_OBJECT_PINNED) - flags |= entry->offset | PIN_OFFSET_FIXED; - if ((flags & PIN_MAPPABLE) == 0) - flags |= PIN_HIGH; - } - - ret = i915_vma_pin(vma, - entry->pad_to_size, - entry->alignment, - flags); - if ((ret == -ENOSPC || ret == -E2BIG) && - only_mappable_for_reloc(entry->flags)) - ret = i915_vma_pin(vma, - entry->pad_to_size, - entry->alignment, - flags & ~PIN_MAPPABLE); - if (ret) - return ret; - - entry->flags |= __EXEC_OBJECT_HAS_PIN; - - if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - ret = i915_vma_get_fence(vma); - if (ret) - return ret; - - if (i915_vma_pin_fence(vma)) - entry->flags |= __EXEC_OBJECT_HAS_FENCE; - } - - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start; - *need_reloc = true; - } - - return 0; -} - -static bool -need_reloc_mappable(struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - - if (entry->relocation_count == 0) - return false; - - if (!i915_vma_is_ggtt(vma)) - return false; - - /* See also use_cpu_reloc() */ - if (HAS_LLC(to_i915(vma->obj->base.dev))) - return false; - - if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) - return false; - - return true; -} - -static bool -eb_vma_misplaced(struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - - WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && - !i915_vma_is_ggtt(vma)); - - if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) - return true; - - if (vma->node.size < entry->pad_to_size) - return true; - - if (entry->flags & EXEC_OBJECT_PINNED && - vma->node.start != entry->offset) - return true; - - if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && - vma->node.start < BATCH_OFFSET_BIAS) - return true; - - /* avoid costly ping-pong once a batch bo ended up non-mappable */ - if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && - !i915_vma_is_map_and_fenceable(vma)) - return !only_mappable_for_reloc(entry->flags); - - if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && - (vma->node.start + vma->node.size - 1) >> 32) - return true; - - return false; -} - -static int eb_reserve(struct i915_execbuffer *eb) -{ - const bool has_fenced_gpu_access = INTEL_GEN(eb->i915) < 4; - const bool needs_unfenced_map = INTEL_INFO(eb->i915)->unfenced_needs_alignment; - struct i915_vma *vma; - struct list_head ordered_vmas; - struct list_head pinned_vmas; - int retry; - - INIT_LIST_HEAD(&ordered_vmas); - INIT_LIST_HEAD(&pinned_vmas); - while (!list_empty(&eb->vmas)) { - struct drm_i915_gem_exec_object2 *entry; - bool need_fence, need_mappable; - - vma = list_first_entry(&eb->vmas, struct i915_vma, exec_link); - entry = vma->exec_entry; - - if (eb->ctx->flags & CONTEXT_NO_ZEROMAP) - entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - if (!has_fenced_gpu_access) - entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; - need_fence = - (entry->flags & EXEC_OBJECT_NEEDS_FENCE || - needs_unfenced_map) && - i915_gem_object_is_tiled(vma->obj); - need_mappable = need_fence || need_reloc_mappable(vma); - - if (entry->flags & EXEC_OBJECT_PINNED) - list_move_tail(&vma->exec_link, &pinned_vmas); - else if (need_mappable) { - entry->flags |= __EXEC_OBJECT_NEEDS_MAP; - list_move(&vma->exec_link, &ordered_vmas); - } else - list_move_tail(&vma->exec_link, &ordered_vmas); - } - list_splice(&ordered_vmas, &eb->vmas); - list_splice(&pinned_vmas, &eb->vmas); - - /* Attempt to pin all of the buffers into the GTT. - * This is done in 3 phases: - * - * 1a. Unbind all objects that do not match the GTT constraints for - * the execbuffer (fenceable, mappable, alignment etc). - * 1b. Increment pin count for already bound objects. - * 2. Bind new objects. - * 3. Decrement pin count. - * - * This avoid unnecessary unbinding of later objects in order to make - * room for the earlier objects *unless* we need to defragment. - */ - retry = 0; - do { - int ret = 0; - - /* Unbind any ill-fitting objects or pin. */ - list_for_each_entry(vma, &eb->vmas, exec_link) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - if (eb_vma_misplaced(vma)) - ret = i915_vma_unbind(vma); - else - ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); - if (ret) - goto err; + if ((s64)offset < 0) { + err = (int)offset; + goto err; } - - /* Bind fresh objects */ - list_for_each_entry(vma, &eb->vmas, exec_link) { - if (drm_mm_node_allocated(&vma->node)) - continue; - - ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); - if (ret) - goto err; - } - + } + err = 0; err: - if (ret != -ENOSPC || retry++) - return ret; - - /* Decrement pin count for bound objects */ - list_for_each_entry(vma, &eb->vmas, exec_link) - eb_unreserve_vma(vma); - - ret = i915_gem_evict_vm(eb->vm, true); - if (ret) - return ret; - } while (1); + reloc_cache_reset(&eb->reloc_cache); + return err; } -static int -eb_relocate_slow(struct i915_execbuffer *eb) +static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) { - const unsigned int count = eb->args->buffer_count; - struct drm_device *dev = &eb->i915->drm; - struct drm_i915_gem_relocation_entry *reloc; - struct i915_vma *vma; - int *reloc_offset; - int i, total, ret; + const char __user *addr, *end; + unsigned long size; + char __maybe_unused c; - /* We may process another execbuffer during the unlock... */ - eb_reset(eb); - mutex_unlock(&dev->struct_mutex); + size = entry->relocation_count; + if (size == 0) + return 0; - total = 0; - for (i = 0; i < count; i++) - total += eb->exec[i].relocation_count; + if (size > N_RELOC(ULONG_MAX)) + return -EINVAL; - reloc_offset = kvmalloc_array(count, sizeof(*reloc_offset), GFP_KERNEL); - reloc = kvmalloc_array(total, sizeof(*reloc), GFP_KERNEL); - if (reloc == NULL || reloc_offset == NULL) { - kvfree(reloc); - kvfree(reloc_offset); - mutex_lock(&dev->struct_mutex); - return -ENOMEM; + addr = u64_to_user_ptr(entry->relocs_ptr); + size *= sizeof(struct drm_i915_gem_relocation_entry); + if (!access_ok(VERIFY_READ, addr, size)) + return -EFAULT; + + end = addr + size; + for (; addr < end; addr += PAGE_SIZE) { + int err = __get_user(c, addr); + if (err) + return err; } + return __get_user(c, end - 1); +} + +static int eb_copy_relocations(const struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + int err; - total = 0; for (i = 0; i < count; i++) { - struct drm_i915_gem_relocation_entry __user *user_relocs; - u64 invalid_offset = (u64)-1; - int j; + const unsigned int nreloc = eb->exec[i].relocation_count; + struct drm_i915_gem_relocation_entry __user *urelocs; + struct drm_i915_gem_relocation_entry *relocs; + unsigned long size; + unsigned long copied; - user_relocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); + if (nreloc == 0) + continue; - if (copy_from_user(reloc+total, user_relocs, - eb->exec[i].relocation_count * sizeof(*reloc))) { - ret = -EFAULT; - mutex_lock(&dev->struct_mutex); + err = check_relocations(&eb->exec[i]); + if (err) + goto err; + + urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); + size = nreloc * sizeof(*relocs); + + relocs = kvmalloc_array(size, 1, GFP_TEMPORARY); + if (!relocs) { + kvfree(relocs); + err = -ENOMEM; goto err; } - /* As we do not update the known relocation offsets after + /* copy_from_user is limited to < 4GiB */ + copied = 0; + do { + unsigned int len = + min_t(u64, BIT_ULL(31), size - copied); + + if (__copy_from_user((char *)relocs + copied, + (char *)urelocs + copied, + len)) { + kvfree(relocs); + err = -EFAULT; + goto err; + } + + copied += len; + } while (copied < size); + + /* + * As we do not update the known relocation offsets after * relocating (due to the complexities in lock handling), * we need to mark them as invalid now so that we force the * relocation processing next time. Just in case the target @@ -1089,65 +1395,201 @@ eb_relocate_slow(struct i915_execbuffer *eb) * happened we would make the mistake of assuming that the * relocations were valid. */ - for (j = 0; j < eb->exec[i].relocation_count; j++) { - if (__copy_to_user(&user_relocs[j].presumed_offset, - &invalid_offset, - sizeof(invalid_offset))) { - ret = -EFAULT; - mutex_lock(&dev->struct_mutex); - goto err; - } - } + user_access_begin(); + for (copied = 0; copied < nreloc; copied++) + unsafe_put_user(-1, + &urelocs[copied].presumed_offset, + end_user); +end_user: + user_access_end(); - reloc_offset[i] = total; - total += eb->exec[i].relocation_count; + eb->exec[i].relocs_ptr = (uintptr_t)relocs; } - ret = i915_mutex_lock_interruptible(dev); - if (ret) { + return 0; + +err: + while (i--) { + struct drm_i915_gem_relocation_entry *relocs = + u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); + if (eb->exec[i].relocation_count) + kvfree(relocs); + } + return err; +} + +static int eb_prefault_relocations(const struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + + if (unlikely(i915.prefault_disable)) + return 0; + + for (i = 0; i < count; i++) { + int err; + + err = check_relocations(&eb->exec[i]); + if (err) + return err; + } + + return 0; +} + +static noinline int eb_relocate_slow(struct i915_execbuffer *eb) +{ + struct drm_device *dev = &eb->i915->drm; + bool have_copy = false; + struct i915_vma *vma; + int err = 0; + +repeat: + if (signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } + + /* We may process another execbuffer during the unlock... */ + eb_reset_vmas(eb); + mutex_unlock(&dev->struct_mutex); + + /* + * We take 3 passes through the slowpatch. + * + * 1 - we try to just prefault all the user relocation entries and + * then attempt to reuse the atomic pagefault disabled fast path again. + * + * 2 - we copy the user entries to a local buffer here outside of the + * local and allow ourselves to wait upon any rendering before + * relocations + * + * 3 - we already have a local copy of the relocation entries, but + * were interrupted (EAGAIN) whilst waiting for the objects, try again. + */ + if (!err) { + err = eb_prefault_relocations(eb); + } else if (!have_copy) { + err = eb_copy_relocations(eb); + have_copy = err == 0; + } else { + cond_resched(); + err = 0; + } + if (err) { mutex_lock(&dev->struct_mutex); - goto err; + goto out; + } + + err = i915_mutex_lock_interruptible(dev); + if (err) { + mutex_lock(&dev->struct_mutex); + goto out; } /* reacquire the objects */ - ret = eb_lookup_vmas(eb); - if (ret) + err = eb_lookup_vmas(eb); + if (err) goto err; - ret = eb_reserve(eb); - if (ret) - goto err; - - list_for_each_entry(vma, &eb->vmas, exec_link) { - int idx = vma->exec_entry - eb->exec; - - ret = eb_relocate_vma_slow(vma, eb, reloc + reloc_offset[idx]); - if (ret) - goto err; + list_for_each_entry(vma, &eb->relocs, reloc_link) { + if (!have_copy) { + pagefault_disable(); + err = eb_relocate_vma(eb, vma); + pagefault_enable(); + if (err) + goto repeat; + } else { + err = eb_relocate_vma_slow(eb, vma); + if (err) + goto err; + } } - /* Leave the user relocations as are, this is the painfully slow path, + /* + * Leave the user relocations as are, this is the painfully slow path, * and we want to avoid the complication of dropping the lock whilst * having buffers reserved in the aperture and so causing spurious * ENOSPC for random operations. */ err: - kvfree(reloc); - kvfree(reloc_offset); - return ret; + if (err == -EAGAIN) + goto repeat; + +out: + if (have_copy) { + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + const struct drm_i915_gem_exec_object2 *entry = + &eb->exec[i]; + struct drm_i915_gem_relocation_entry *relocs; + + if (!entry->relocation_count) + continue; + + relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + kvfree(relocs); + } + } + + return err ?: have_copy; } -static int -eb_move_to_gpu(struct i915_execbuffer *eb) +static int eb_relocate(struct i915_execbuffer *eb) { - struct i915_vma *vma; - int ret; + if (eb_lookup_vmas(eb)) + goto slow; - list_for_each_entry(vma, &eb->vmas, exec_link) { + /* The objects are in their final locations, apply the relocations. */ + if (eb->args->flags & __EXEC_HAS_RELOC) { + struct i915_vma *vma; + + list_for_each_entry(vma, &eb->relocs, reloc_link) { + if (eb_relocate_vma(eb, vma)) + goto slow; + } + } + + return 0; + +slow: + return eb_relocate_slow(eb); +} + +static void eb_export_fence(struct drm_i915_gem_object *obj, + struct drm_i915_gem_request *req, + unsigned int flags) +{ + struct reservation_object *resv = obj->resv; + + /* + * Ignore errors from failing to allocate the new fence, we can't + * handle an error right now. Worst case should be missed + * synchronisation leading to rendering corruption. + */ + reservation_object_lock(resv, NULL); + if (flags & EXEC_OBJECT_WRITE) + reservation_object_add_excl_fence(resv, &req->fence); + else if (reservation_object_reserve_shared(resv) == 0) + reservation_object_add_shared_fence(resv, &req->fence); + reservation_object_unlock(resv); +} + +static int eb_move_to_gpu(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + int err; + + for (i = 0; i < count; i++) { + const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct i915_vma *vma = exec_to_vma(entry); struct drm_i915_gem_object *obj = vma->obj; - if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) { + if (entry->flags & EXEC_OBJECT_CAPTURE) { struct i915_gem_capture_list *capture; capture = kmalloc(sizeof(*capture), GFP_KERNEL); @@ -1159,18 +1601,32 @@ eb_move_to_gpu(struct i915_execbuffer *eb) eb->request->capture_list = capture; } - if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) - continue; + if (entry->flags & EXEC_OBJECT_ASYNC) + goto skip_flushes; if (unlikely(obj->cache_dirty && !obj->cache_coherent)) i915_gem_clflush_object(obj, 0); - ret = i915_gem_request_await_object - (eb->request, obj, vma->exec_entry->flags & EXEC_OBJECT_WRITE); - if (ret) - return ret; + err = i915_gem_request_await_object + (eb->request, obj, entry->flags & EXEC_OBJECT_WRITE); + if (err) + return err; + +skip_flushes: + i915_vma_move_to_active(vma, eb->request, entry->flags); + __eb_unreserve_vma(vma, entry); + vma->exec_entry = NULL; } + for (i = 0; i < count; i++) { + const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct i915_vma *vma = exec_to_vma(entry); + + eb_export_fence(vma->obj, eb->request, entry->flags); + i915_vma_put(vma); + } + eb->exec = NULL; + /* Unconditionally flush any chipset caches (for streaming writes). */ i915_gem_chipset_flush(eb->i915); @@ -1178,8 +1634,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb) return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE); } -static bool -i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) +static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) { if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) return false; @@ -1201,103 +1656,6 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return true; } -static int -validate_exec_list(struct drm_device *dev, - struct drm_i915_gem_exec_object2 *exec, - int count) -{ - unsigned relocs_total = 0; - unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); - unsigned invalid_flags; - int i; - - /* INTERNAL flags must not overlap with external ones */ - BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); - - invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; - if (USES_FULL_PPGTT(dev)) - invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - - for (i = 0; i < count; i++) { - char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr); - int length; /* limited by fault_in_pages_readable() */ - - if (exec[i].flags & invalid_flags) - return -EINVAL; - - /* Offset can be used as input (EXEC_OBJECT_PINNED), reject - * any non-page-aligned or non-canonical addresses. - */ - if (exec[i].flags & EXEC_OBJECT_PINNED) { - if (exec[i].offset != - gen8_canonical_addr(exec[i].offset & PAGE_MASK)) - return -EINVAL; - } - - /* From drm_mm perspective address space is continuous, - * so from this point we're always using non-canonical - * form internally. - */ - exec[i].offset = gen8_noncanonical_addr(exec[i].offset); - - if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) - return -EINVAL; - - /* pad_to_size was once a reserved field, so sanitize it */ - if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { - if (offset_in_page(exec[i].pad_to_size)) - return -EINVAL; - } else { - exec[i].pad_to_size = 0; - } - - /* First check for malicious input causing overflow in - * the worst case where we need to allocate the entire - * relocation tree as a single array. - */ - if (exec[i].relocation_count > relocs_max - relocs_total) - return -EINVAL; - relocs_total += exec[i].relocation_count; - - length = exec[i].relocation_count * - sizeof(struct drm_i915_gem_relocation_entry); - /* - * We must check that the entire relocation array is safe - * to read, but since we may need to update the presumed - * offsets during execution, check for full write access. - */ - if (!access_ok(VERIFY_WRITE, ptr, length)) - return -EFAULT; - - if (likely(!i915.prefault_disable)) { - if (fault_in_pages_readable(ptr, length)) - return -EFAULT; - } - } - - return 0; -} - -static int eb_select_context(struct i915_execbuffer *eb) -{ - unsigned int ctx_id = i915_execbuffer2_get_context_id(*eb->args); - struct i915_gem_context *ctx; - - ctx = i915_gem_context_lookup(eb->file->driver_priv, ctx_id); - if (unlikely(IS_ERR(ctx))) - return PTR_ERR(ctx); - - if (unlikely(i915_gem_context_is_banned(ctx))) { - DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); - return -EIO; - } - - eb->ctx = i915_gem_context_get(ctx); - eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base; - - return 0; -} - void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req, unsigned int flags) @@ -1308,7 +1666,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, lockdep_assert_held(&req->i915->drm.struct_mutex); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - /* Add a reference if we're newly entering the active list. + /* + * Add a reference if we're newly entering the active list. * The order in which we add operations to the retirement queue is * vital here: mark_active adds to the start of the callback list, * such that subsequent callbacks are called first. Therefore we @@ -1336,44 +1695,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, i915_gem_active_set(&vma->last_fence, req); } -static void eb_export_fence(struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req, - unsigned int flags) -{ - struct reservation_object *resv = obj->resv; - - /* Ignore errors from failing to allocate the new fence, we can't - * handle an error right now. Worst case should be missed - * synchronisation leading to rendering corruption. - */ - reservation_object_lock(resv, NULL); - if (flags & EXEC_OBJECT_WRITE) - reservation_object_add_excl_fence(resv, &req->fence); - else if (reservation_object_reserve_shared(resv) == 0) - reservation_object_add_shared_fence(resv, &req->fence); - reservation_object_unlock(resv); -} - -static void -eb_move_to_active(struct i915_execbuffer *eb) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &eb->vmas, exec_link) { - struct drm_i915_gem_object *obj = vma->obj; - - obj->base.write_domain = 0; - if (vma->exec_entry->flags & EXEC_OBJECT_WRITE) - obj->base.read_domains = 0; - obj->base.read_domains |= I915_GEM_GPU_DOMAINS; - - i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags); - eb_export_fence(obj, eb->request, vma->exec_entry->flags); - } -} - -static int -i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) +static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { u32 *cs; int i; @@ -1383,16 +1705,16 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) return -EINVAL; } - cs = intel_ring_begin(req, 4 * 3); + cs = intel_ring_begin(req, 4 * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); + *cs++ = MI_LOAD_REGISTER_IMM(4); for (i = 0; i < 4; i++) { - *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); *cs++ = 0; } - + *cs++ = MI_NOOP; intel_ring_advance(req, cs); return 0; @@ -1402,24 +1724,24 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) { struct drm_i915_gem_object *shadow_batch_obj; struct i915_vma *vma; - int ret; + int err; shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_ALIGN(eb->batch_len)); if (IS_ERR(shadow_batch_obj)) return ERR_CAST(shadow_batch_obj); - ret = intel_engine_cmd_parser(eb->engine, + err = intel_engine_cmd_parser(eb->engine, eb->batch->obj, shadow_batch_obj, eb->batch_start_offset, eb->batch_len, is_master); - if (ret) { - if (ret == -EACCES) /* unhandled chained batch */ + if (err) { + if (err == -EACCES) /* unhandled chained batch */ vma = NULL; else - vma = ERR_PTR(ret); + vma = ERR_PTR(err); goto out; } @@ -1428,10 +1750,10 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) goto out; vma->exec_entry = - memset(&eb->shadow_exec_entry, 0, sizeof(*vma->exec_entry)); + memset(&eb->exec[eb->buffer_count++], + 0, sizeof(*vma->exec_entry)); vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; - i915_gem_object_get(shadow_batch_obj); - list_add_tail(&vma->exec_link, &eb->vmas); + __exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma); out: i915_gem_object_unpin_pages(shadow_batch_obj); @@ -1439,41 +1761,37 @@ out: } static void -add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file) +add_to_client(struct drm_i915_gem_request *req, struct drm_file *file) { req->file_priv = file->driver_priv; list_add_tail(&req->client_link, &req->file_priv->mm.request_list); } -static int -execbuf_submit(struct i915_execbuffer *eb) +static int eb_submit(struct i915_execbuffer *eb) { - int ret; + int err; - ret = eb_move_to_gpu(eb); - if (ret) - return ret; + err = eb_move_to_gpu(eb); + if (err) + return err; - ret = i915_switch_context(eb->request); - if (ret) - return ret; + err = i915_switch_context(eb->request); + if (err) + return err; if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { - ret = i915_reset_gen7_sol_offsets(eb->request); - if (ret) - return ret; + err = i915_reset_gen7_sol_offsets(eb->request); + if (err) + return err; } - ret = eb->engine->emit_bb_start(eb->request, + err = eb->engine->emit_bb_start(eb->request, eb->batch->node.start + eb->batch_start_offset, eb->batch_len, - eb->dispatch_flags); - if (ret) - return ret; - - eb_move_to_active(eb); + eb->batch_flags); + if (err) + return err; return 0; } @@ -1564,34 +1882,36 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct dma_fence *in_fence = NULL; struct sync_file *out_fence = NULL; int out_fence_fd = -1; - int ret; + int err; - if (!i915_gem_check_execbuffer(args)) - return -EINVAL; - - ret = validate_exec_list(dev, exec, args->buffer_count); - if (ret) - return ret; + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & + ~__EXEC_OBJECT_UNKNOWN_FLAGS); eb.i915 = to_i915(dev); eb.file = file; eb.args = args; + if (!(args->flags & I915_EXEC_NO_RELOC)) + args->flags |= __EXEC_HAS_RELOC; eb.exec = exec; - eb.need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; + eb.ctx = NULL; + eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; + if (USES_FULL_PPGTT(eb.i915)) + eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT; reloc_cache_init(&eb.reloc_cache, eb.i915); + eb.buffer_count = args->buffer_count; eb.batch_start_offset = args->batch_start_offset; eb.batch_len = args->batch_len; - eb.dispatch_flags = 0; + eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) return -EPERM; - eb.dispatch_flags |= I915_DISPATCH_SECURE; + eb.batch_flags |= I915_DISPATCH_SECURE; } if (args->flags & I915_EXEC_IS_PINNED) - eb.dispatch_flags |= I915_DISPATCH_PINNED; + eb.batch_flags |= I915_DISPATCH_PINNED; eb.engine = eb_select_engine(eb.i915, file, args); if (!eb.engine) @@ -1608,7 +1928,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, return -EINVAL; } - eb.dispatch_flags |= I915_DISPATCH_RS; + eb.batch_flags |= I915_DISPATCH_RS; } if (args->flags & I915_EXEC_FENCE_IN) { @@ -1620,71 +1940,53 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (args->flags & I915_EXEC_FENCE_OUT) { out_fence_fd = get_unused_fd_flags(O_CLOEXEC); if (out_fence_fd < 0) { - ret = out_fence_fd; + err = out_fence_fd; goto err_in_fence; } } - /* Take a local wakeref for preparing to dispatch the execbuf as + if (eb_create(&eb)) + return -ENOMEM; + + /* + * Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the * process. Upon first dispatch, we acquire another prolonged * wakeref that we hold until the GPU has been idle for at least * 100ms. */ intel_runtime_pm_get(eb.i915); + err = i915_mutex_lock_interruptible(dev); + if (err) + goto err_rpm; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto pre_mutex_err; + err = eb_select_context(&eb); + if (unlikely(err)) + goto err_unlock; - ret = eb_select_context(&eb); - if (ret) { - mutex_unlock(&dev->struct_mutex); - goto pre_mutex_err; - } + err = eb_relocate(&eb); + if (err) + /* + * If the user expects the execobject.offset and + * reloc.presumed_offset to be an exact match, + * as for using NO_RELOC, then we cannot update + * the execobject.offset until we have completed + * relocation. + */ + args->flags &= ~__EXEC_HAS_RELOC; + if (err < 0) + goto err_vma; - if (eb_create(&eb)) { - i915_gem_context_put(eb.ctx); - mutex_unlock(&dev->struct_mutex); - ret = -ENOMEM; - goto pre_mutex_err; - } - - /* Look up object handles */ - ret = eb_lookup_vmas(&eb); - if (ret) - goto err; - - /* take note of the batch buffer before we might reorder the lists */ - eb.batch = eb_get_batch(&eb); - - /* Move the objects en-masse into the GTT, evicting if necessary. */ - ret = eb_reserve(&eb); - if (ret) - goto err; - - /* The objects are in their final locations, apply the relocations. */ - if (eb.need_relocs) - ret = eb_relocate(&eb); - if (ret) { - if (ret == -EFAULT) { - ret = eb_relocate_slow(&eb); - BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - } - if (ret) - goto err; - } - - if (eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE) { + if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); - ret = -EINVAL; - goto err; + err = -EINVAL; + goto err_vma; } if (eb.batch_start_offset > eb.batch->size || eb.batch_len > eb.batch->size - eb.batch_start_offset) { DRM_DEBUG("Attempting to use out-of-bounds batch\n"); - ret = -EINVAL; - goto err; + err = -EINVAL; + goto err_vma; } if (eb.engine->needs_cmd_parser && eb.batch_len) { @@ -1692,8 +1994,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, vma = eb_parse(&eb, drm_is_current_master(file)); if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err; + err = PTR_ERR(vma); + goto err_vma; } if (vma) { @@ -1706,7 +2008,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, * specifically don't want that set on batches the * command parser has accepted. */ - eb.dispatch_flags |= I915_DISPATCH_SECURE; + eb.batch_flags |= I915_DISPATCH_SECURE; eb.batch_start_offset = 0; eb.batch = vma; } @@ -1715,11 +2017,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (eb.batch_len == 0) eb.batch_len = eb.batch->size - eb.batch_start_offset; - /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure + /* + * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - if (eb.dispatch_flags & I915_DISPATCH_SECURE) { - struct drm_i915_gem_object *obj = eb.batch->obj; + if (eb.batch_flags & I915_DISPATCH_SECURE) { struct i915_vma *vma; /* @@ -1732,10 +2034,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, * fitting due to fragmentation. * So this is actually safe. */ - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0); if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err; + err = PTR_ERR(vma); + goto err_vma; } eb.batch = vma; @@ -1744,25 +2046,26 @@ i915_gem_do_execbuffer(struct drm_device *dev, /* Allocate a request for this batch buffer nice and early. */ eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); if (IS_ERR(eb.request)) { - ret = PTR_ERR(eb.request); + err = PTR_ERR(eb.request); goto err_batch_unpin; } if (in_fence) { - ret = i915_gem_request_await_dma_fence(eb.request, in_fence); - if (ret < 0) + err = i915_gem_request_await_dma_fence(eb.request, in_fence); + if (err < 0) goto err_request; } if (out_fence_fd != -1) { out_fence = sync_file_create(&eb.request->fence); if (!out_fence) { - ret = -ENOMEM; + err = -ENOMEM; goto err_request; } } - /* Whilst this request exists, batch_obj will be on the + /* + * Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this * request is retired will the the batch_obj be moved onto the * inactive_list and lose its active reference. Hence we do not need @@ -1770,14 +2073,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, */ eb.request->batch = eb.batch; - trace_i915_gem_request_queue(eb.request, eb.dispatch_flags); - ret = execbuf_submit(&eb); + trace_i915_gem_request_queue(eb.request, eb.batch_flags); + err = eb_submit(&eb); err_request: - __i915_add_request(eb.request, ret == 0); + __i915_add_request(eb.request, err == 0); add_to_client(eb.request, file); if (out_fence) { - if (ret == 0) { + if (err == 0) { fd_install(out_fence_fd, out_fence->file); args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ args->rsvd2 |= (u64)out_fence_fd << 32; @@ -1788,28 +2091,22 @@ err_request: } err_batch_unpin: - /* - * FIXME: We crucially rely upon the active tracking for the (ppgtt) - * batch vma for correctness. For less ugly and less fragility this - * needs to be adjusted to also track the ggtt batch vma properly as - * active. - */ - if (eb.dispatch_flags & I915_DISPATCH_SECURE) + if (eb.batch_flags & I915_DISPATCH_SECURE) i915_vma_unpin(eb.batch); -err: - /* the request owns the ref now */ - eb_destroy(&eb); +err_vma: + if (eb.exec) + eb_release_vmas(&eb); + i915_gem_context_put(eb.ctx); +err_unlock: mutex_unlock(&dev->struct_mutex); - -pre_mutex_err: - /* intel_gpu_busy should also get a ref, so it will free when the device - * is really idle. */ +err_rpm: intel_runtime_pm_put(eb.i915); + eb_destroy(&eb); if (out_fence_fd != -1) put_unused_fd(out_fence_fd); err_in_fence: dma_fence_put(in_fence); - return ret; + return err; } /* @@ -1820,20 +2117,38 @@ int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file) { + const size_t sz = sizeof(struct drm_i915_gem_exec_object2); struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_execbuffer2 exec2; struct drm_i915_gem_exec_object *exec_list = NULL; struct drm_i915_gem_exec_object2 *exec2_list = NULL; - int ret, i; + unsigned int i; + int err; - if (args->buffer_count < 1) { - DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); + if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) { + DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); return -EINVAL; } + exec2.buffers_ptr = args->buffers_ptr; + exec2.buffer_count = args->buffer_count; + exec2.batch_start_offset = args->batch_start_offset; + exec2.batch_len = args->batch_len; + exec2.DR1 = args->DR1; + exec2.DR4 = args->DR4; + exec2.num_cliprects = args->num_cliprects; + exec2.cliprects_ptr = args->cliprects_ptr; + exec2.flags = I915_EXEC_RENDER; + i915_execbuffer2_set_context_id(exec2, 0); + + if (!i915_gem_check_execbuffer(&exec2)) + return -EINVAL; + /* Copy in the exec list from userland */ - exec_list = kvmalloc_array(sizeof(*exec_list), args->buffer_count, GFP_KERNEL); - exec2_list = kvmalloc_array(sizeof(*exec2_list), args->buffer_count, GFP_KERNEL); + exec_list = kvmalloc_array(args->buffer_count, sizeof(*exec_list), + __GFP_NOWARN | GFP_TEMPORARY); + exec2_list = kvmalloc_array(args->buffer_count + 1, sz, + __GFP_NOWARN | GFP_TEMPORARY); if (exec_list == NULL || exec2_list == NULL) { DRM_DEBUG("Failed to allocate exec list for %d buffers\n", args->buffer_count); @@ -1841,12 +2156,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, kvfree(exec2_list); return -ENOMEM; } - ret = copy_from_user(exec_list, + err = copy_from_user(exec_list, u64_to_user_ptr(args->buffers_ptr), sizeof(*exec_list) * args->buffer_count); - if (ret != 0) { + if (err) { DRM_DEBUG("copy %d exec entries failed %d\n", - args->buffer_count, ret); + args->buffer_count, err); kvfree(exec_list); kvfree(exec2_list); return -EFAULT; @@ -1864,99 +2179,94 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec2_list[i].flags = 0; } - exec2.buffers_ptr = args->buffers_ptr; - exec2.buffer_count = args->buffer_count; - exec2.batch_start_offset = args->batch_start_offset; - exec2.batch_len = args->batch_len; - exec2.DR1 = args->DR1; - exec2.DR4 = args->DR4; - exec2.num_cliprects = args->num_cliprects; - exec2.cliprects_ptr = args->cliprects_ptr; - exec2.flags = I915_EXEC_RENDER; - i915_execbuffer2_set_context_id(exec2, 0); - - ret = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); - if (!ret) { + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); + if (exec2.flags & __EXEC_HAS_RELOC) { struct drm_i915_gem_exec_object __user *user_exec_list = u64_to_user_ptr(args->buffers_ptr); /* Copy the new buffer offsets back to the user's exec list. */ for (i = 0; i < args->buffer_count; i++) { + if (!(exec2_list[i].offset & UPDATE)) + continue; + exec2_list[i].offset = - gen8_canonical_addr(exec2_list[i].offset); - ret = __copy_to_user(&user_exec_list[i].offset, - &exec2_list[i].offset, - sizeof(user_exec_list[i].offset)); - if (ret) { - ret = -EFAULT; - DRM_DEBUG("failed to copy %d exec entries " - "back to user (%d)\n", - args->buffer_count, ret); + gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); + exec2_list[i].offset &= PIN_OFFSET_MASK; + if (__copy_to_user(&user_exec_list[i].offset, + &exec2_list[i].offset, + sizeof(user_exec_list[i].offset))) break; - } } } kvfree(exec_list); kvfree(exec2_list); - return ret; + return err; } int i915_gem_execbuffer2(struct drm_device *dev, void *data, struct drm_file *file) { + const size_t sz = sizeof(struct drm_i915_gem_exec_object2); struct drm_i915_gem_execbuffer2 *args = data; - struct drm_i915_gem_exec_object2 *exec2_list = NULL; - int ret; + struct drm_i915_gem_exec_object2 *exec2_list; + int err; - if (args->buffer_count < 1 || - args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { + if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) { DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); return -EINVAL; } - exec2_list = kvmalloc_array(args->buffer_count, - sizeof(*exec2_list), - GFP_TEMPORARY); + if (!i915_gem_check_execbuffer(args)) + return -EINVAL; + + /* Allocate an extra slot for use by the command parser */ + exec2_list = kvmalloc_array(args->buffer_count + 1, sz, + __GFP_NOWARN | GFP_TEMPORARY); if (exec2_list == NULL) { DRM_DEBUG("Failed to allocate exec list for %d buffers\n", args->buffer_count); return -ENOMEM; } - ret = copy_from_user(exec2_list, - u64_to_user_ptr(args->buffers_ptr), - sizeof(*exec2_list) * args->buffer_count); - if (ret != 0) { - DRM_DEBUG("copy %d exec entries failed %d\n", - args->buffer_count, ret); + if (copy_from_user(exec2_list, + u64_to_user_ptr(args->buffers_ptr), + sizeof(*exec2_list) * args->buffer_count)) { + DRM_DEBUG("copy %d exec entries failed\n", args->buffer_count); kvfree(exec2_list); return -EFAULT; } - ret = i915_gem_do_execbuffer(dev, file, args, exec2_list); - if (!ret) { - /* Copy the new buffer offsets back to the user's exec list. */ - struct drm_i915_gem_exec_object2 __user *user_exec_list = - u64_to_user_ptr(args->buffers_ptr); - int i; + err = i915_gem_do_execbuffer(dev, file, args, exec2_list); + /* + * Now that we have begun execution of the batchbuffer, we ignore + * any new error after this point. Also given that we have already + * updated the associated relocations, we try to write out the current + * object locations irrespective of any error. + */ + if (args->flags & __EXEC_HAS_RELOC) { + struct drm_i915_gem_exec_object2 __user *user_exec_list = + u64_to_user_ptr(args->buffers_ptr); + unsigned int i; + + /* Copy the new buffer offsets back to the user's exec list. */ + user_access_begin(); for (i = 0; i < args->buffer_count; i++) { + if (!(exec2_list[i].offset & UPDATE)) + continue; + exec2_list[i].offset = - gen8_canonical_addr(exec2_list[i].offset); - ret = __copy_to_user(&user_exec_list[i].offset, - &exec2_list[i].offset, - sizeof(user_exec_list[i].offset)); - if (ret) { - ret = -EFAULT; - DRM_DEBUG("failed to copy %d exec entries " - "back to user\n", - args->buffer_count); - break; - } + gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); + unsafe_put_user(exec2_list[i].offset, + &user_exec_list[i].offset, + end_user); } +end_user: + user_access_end(); } + args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; kvfree(exec2_list); - return ret; + return err; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index ce68194ebff6..9e6a47323362 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -463,7 +463,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); - return -E2BIG; + return -ENOSPC; } ret = i915_gem_object_pin_pages(obj); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index ea98e6e4262f..04d7a5da70fd 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -103,6 +103,7 @@ struct i915_vma { /** This vma's place in the execbuf reservation list */ struct list_head exec_link; + struct list_head reloc_link; /** This vma's place in the eviction list */ struct list_head evict_link; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 14e9c2fbc4e6..5ea373221f49 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -304,7 +304,7 @@ static int igt_evict_vm(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ - err = i915_gem_evict_vm(&ggtt->base, false); + err = i915_gem_evict_vm(&ggtt->base); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); @@ -313,7 +313,7 @@ static int igt_evict_vm(void *arg) unpin_ggtt(i915); - err = i915_gem_evict_vm(&ggtt->base, false); + err = i915_gem_evict_vm(&ggtt->base); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index ad56566e24db..fb9072d5877f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -224,14 +224,6 @@ static bool assert_pin_valid(const struct i915_vma *vma, return true; } -__maybe_unused -static bool assert_pin_e2big(const struct i915_vma *vma, - const struct pin_mode *mode, - int result) -{ - return result == -E2BIG; -} - __maybe_unused static bool assert_pin_enospc(const struct i915_vma *vma, const struct pin_mode *mode, @@ -255,7 +247,6 @@ static int igt_vma_pin1(void *arg) #define VALID(sz, fl) { .size = (sz), .flags = (fl), .assert = assert_pin_valid, .string = #sz ", " #fl ", (valid) " } #define __INVALID(sz, fl, check, eval) { .size = (sz), .flags = (fl), .assert = (check), .string = #sz ", " #fl ", (invalid " #eval ")" } #define INVALID(sz, fl) __INVALID(sz, fl, assert_pin_einval, EINVAL) -#define TOOBIG(sz, fl) __INVALID(sz, fl, assert_pin_e2big, E2BIG) #define NOSPACE(sz, fl) __INVALID(sz, fl, assert_pin_enospc, ENOSPC) VALID(0, PIN_GLOBAL), VALID(0, PIN_GLOBAL | PIN_MAPPABLE), @@ -276,11 +267,11 @@ static int igt_vma_pin1(void *arg) VALID(8192, PIN_GLOBAL), VALID(i915->ggtt.mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), VALID(i915->ggtt.mappable_end, PIN_GLOBAL | PIN_MAPPABLE), - TOOBIG(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), + NOSPACE(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), VALID(i915->ggtt.base.total - 4096, PIN_GLOBAL), VALID(i915->ggtt.base.total, PIN_GLOBAL), - TOOBIG(i915->ggtt.base.total + 4096, PIN_GLOBAL), - TOOBIG(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL), + NOSPACE(i915->ggtt.base.total + 4096, PIN_GLOBAL), + NOSPACE(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL), INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (round_down(U64_MAX, PAGE_SIZE) - 4096)), @@ -300,7 +291,6 @@ static int igt_vma_pin1(void *arg) #endif { }, #undef NOSPACE -#undef TOOBIG #undef INVALID #undef __INVALID #undef VALID From dade2a6165fd7bd825958e9b0ad04678f53f0f60 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 15:05:20 +0100 Subject: [PATCH 237/341] drm/i915: Store a persistent reference for an object in the execbuffer cache If we take a reference to the object/vma when it is first used in an execbuf, we can keep that reference until the object's file-local handle is closed. Thereby saving a frequent ref/unref pair. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_context.c | 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 35 +++++++++++++++------- drivers/gpu/drm/i915/i915_vma.c | 2 ++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 23f74014e158..39ed58a21fc1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -152,6 +152,7 @@ static void vma_lut_free(struct i915_gem_context *ctx) hlist_for_each_entry(vma, &lut->ht[i], ctx_node) { vma->obj->vma_hashed = NULL; vma->ctx = NULL; + i915_vma_put(vma); } } kvfree(lut->ht); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a052072fe8b3..d91386f0e840 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -42,11 +42,12 @@ #define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ -#define __EXEC_OBJECT_HAS_PIN BIT(31) -#define __EXEC_OBJECT_HAS_FENCE BIT(30) -#define __EXEC_OBJECT_NEEDS_MAP BIT(29) -#define __EXEC_OBJECT_NEEDS_BIAS BIT(28) -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */ +#define __EXEC_OBJECT_HAS_REF BIT(31) +#define __EXEC_OBJECT_HAS_PIN BIT(30) +#define __EXEC_OBJECT_HAS_FENCE BIT(29) +#define __EXEC_OBJECT_NEEDS_MAP BIT(28) +#define __EXEC_OBJECT_NEEDS_BIAS BIT(27) +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */ #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) @@ -465,7 +466,7 @@ eb_add_vma(struct i915_execbuffer *eb, * to find the right target VMA when doing relocations. */ vma->exec_entry = entry; - __exec_to_vma(entry) = (uintptr_t)i915_vma_get(vma); + __exec_to_vma(entry) = (uintptr_t)vma; err = 0; if (vma->node.size) @@ -769,11 +770,19 @@ next_vma: ; GEM_BUG_ON(obj->vma_hashed); obj->vma_hashed = vma; } + + i915_vma_get(vma); } err = eb_add_vma(eb, &eb->exec[i], vma); if (unlikely(err)) goto err; + + /* Only after we validated the user didn't use our bits */ + if (vma->ctx != eb->ctx) { + i915_vma_get(vma); + eb->exec[i].flags |= __EXEC_OBJECT_HAS_REF; + } } if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) { @@ -850,9 +859,14 @@ static void eb_release_vmas(const struct i915_execbuffer *eb) GEM_BUG_ON(vma->exec_entry != entry); vma->exec_entry = NULL; - eb_unreserve_vma(vma, entry); + if (entry->flags & __EXEC_OBJECT_HAS_PIN) + __eb_unreserve_vma(vma, entry); - i915_vma_put(vma); + if (entry->flags & __EXEC_OBJECT_HAS_REF) + i915_vma_put(vma); + + entry->flags &= + ~(__EXEC_OBJECT_RESERVED | __EXEC_OBJECT_HAS_REF); } } @@ -1623,7 +1637,8 @@ skip_flushes: struct i915_vma *vma = exec_to_vma(entry); eb_export_fence(vma->obj, eb->request, entry->flags); - i915_vma_put(vma); + if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF)) + i915_vma_put(vma); } eb->exec = NULL; @@ -1752,7 +1767,7 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) vma->exec_entry = memset(&eb->exec[eb->buffer_count++], 0, sizeof(*vma->exec_entry)); - vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; + vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; __exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma); out: diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 9e6a47323362..f5c57dff288e 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -605,6 +605,8 @@ void i915_vma_unlink_ctx(struct i915_vma *vma) if (i915_vma_is_ggtt(vma)) vma->obj->vma_hashed = NULL; vma->ctx = NULL; + + i915_vma_put(vma); } void i915_vma_close(struct i915_vma *vma) From 616d9cee4fdc4a377c03be8fd6efa5df4fcd0d81 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 15:05:21 +0100 Subject: [PATCH 238/341] drm/i915: First try the previous execbuffer location When choosing a slot for an execbuffer, we ideally want to use the same address as last time (so that we don't have to rebind it) and the same address as expected by the user (so that we don't have to fixup any relocations pointing to it). If we first try to bind the incoming execbuffer->offset from the user, or the currently bound offset that should hopefully achieve the goal of avoiding the rebind cost and the relocation penalty. However, if the object is not currently bound there we don't want to arbitrarily unbind an object in our chosen position and so choose to rebind/relocate the incoming object instead. After we report the new position back to the user, on the next pass the relocations should have settled down. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++++++++---- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d91386f0e840..3eaf07dfbe62 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -336,10 +336,15 @@ eb_pin_vma(struct i915_execbuffer *eb, { u64 flags; - flags = vma->node.start; - flags |= PIN_USER | PIN_NONBLOCK | PIN_OFFSET_FIXED; + if (vma->node.size) + flags = vma->node.start; + else + flags = entry->offset & PIN_OFFSET_MASK; + + flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_GTT)) flags |= PIN_GLOBAL; + if (unlikely(i915_vma_pin(vma, 0, 0, flags))) return; @@ -469,8 +474,7 @@ eb_add_vma(struct i915_execbuffer *eb, __exec_to_vma(entry) = (uintptr_t)vma; err = 0; - if (vma->node.size) - eb_pin_vma(eb, entry, vma); + eb_pin_vma(eb, entry, vma); if (eb_vma_misplaced(entry, vma)) { eb_unreserve_vma(vma, entry); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4ff854e6413c..205dd91d3601 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3402,6 +3402,9 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm, if (err != -ENOSPC) return err; + if (flags & PIN_NOEVICT) + return -ENOSPC; + err = i915_gem_evict_for_node(vm, node, flags); if (err == 0) err = drm_mm_reserve_node(&vm->mm, node); @@ -3516,6 +3519,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, if (err != -ENOSPC) return err; + if (flags & PIN_NOEVICT) + return -ENOSPC; + /* No free space, pick a slot at random. * * There is a pathological case here using a GTT shared between diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index da9aa9f706e7..1b2a56c3e5d3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -589,6 +589,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, #define PIN_MAPPABLE BIT(1) #define PIN_ZONE_4G BIT(2) #define PIN_NONFAULT BIT(3) +#define PIN_NOEVICT BIT(4) #define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */ #define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */ From 8a2421bd0d9abb3a599969d88e1910a2a101eccc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 15:05:22 +0100 Subject: [PATCH 239/341] drm/i915: Wait upon userptr get-user-pages within execbuffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This simply hides the EAGAIN caused by userptr when userspace causes resource contention. However, it is quite beneficial with highly contended userptr users as we avoid repeating the setup costs and kernel-user context switches. Signed-off-by: Chris Wilson Reviewed-by: Michał Winiarski --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 10 +++++++++- drivers/gpu/drm/i915/i915_gem.c | 4 +++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +++ drivers/gpu/drm/i915/i915_gem_userptr.c | 18 +++++++++++++++--- 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a534412a5551..e33a2ed9244c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -579,6 +579,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini_hw(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_context_fini(dev_priv); + i915_gem_cleanup_userptr(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_drain_freed_objects(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7e182dd7e356..e750be52b04b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1453,6 +1453,13 @@ struct i915_gem_mm { /** LRU list of objects with fence regs on them. */ struct list_head fence_list; + /** + * Workqueue to fault in userptr pages, flushed by the execbuf + * when required but otherwise left to userspace to try again + * on EAGAIN. + */ + struct workqueue_struct *userptr_wq; + u64 unordered_timeline; /* the indicator for dispatch video commands on two BSD rings */ @@ -3228,7 +3235,8 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -void i915_gem_init_userptr(struct drm_i915_private *dev_priv); +int i915_gem_init_userptr(struct drm_i915_private *dev_priv); +void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); int i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fcdc452f28bb..96b344901a7b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4804,7 +4804,9 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - i915_gem_init_userptr(dev_priv); + ret = i915_gem_init_userptr(dev_priv); + if (ret) + goto out_unlock; ret = i915_gem_init_ggtt(dev_priv); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3eaf07dfbe62..f4b02ef3987f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1499,6 +1499,9 @@ repeat: goto out; } + /* A frequent cause for EAGAIN are currently unavailable client pages */ + flush_workqueue(eb->i915->mm.userptr_wq); + err = i915_mutex_lock_interruptible(dev); if (err) { mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 05c36f663550..ccd09e8419f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -378,7 +378,7 @@ __i915_mm_struct_free(struct kref *kref) mutex_unlock(&mm->i915->mm_lock); INIT_WORK(&mm->work, __i915_mm_struct_free__worker); - schedule_work(&mm->work); + queue_work(mm->i915->mm.userptr_wq, &mm->work); } static void @@ -598,7 +598,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) get_task_struct(work->task); INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); - schedule_work(&work->work); + queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work); return ERR_PTR(-EAGAIN); } @@ -830,8 +830,20 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file return 0; } -void i915_gem_init_userptr(struct drm_i915_private *dev_priv) +int i915_gem_init_userptr(struct drm_i915_private *dev_priv) { mutex_init(&dev_priv->mm_lock); hash_init(dev_priv->mm_structs); + + dev_priv->mm.userptr_wq = + alloc_workqueue("i915-userptr-acquire", WQ_HIGHPRI, 0); + if (!dev_priv->mm.userptr_wq) + return -ENOMEM; + + return 0; +} + +void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv) +{ + destroy_workqueue(dev_priv->mm.userptr_wq); } From 1a71cf2fa646799d4397a49b223549d8617fece0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 15:05:23 +0100 Subject: [PATCH 240/341] drm/i915: Allow execbuffer to use the first object as the batch Currently, the last object in the execlist is the always the batch. However, when building the batch buffer we often know the batch object first and if we can use the first slot in the execlist we can emit relocation instructions relative to it immediately and avoid a separate pass to adjust the relocations to point to the last execlist slot. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 ++++- include/uapi/drm/i915_drm.h | 19 +++++++++++++++++-- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e33a2ed9244c..3c2af70034cf 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -367,6 +367,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_ASYNC: case I915_PARAM_HAS_EXEC_FENCE: case I915_PARAM_HAS_EXEC_CAPTURE: + case I915_PARAM_HAS_EXEC_BATCH_FIRST: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f4b02ef3987f..e262133a7cf5 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -645,7 +645,10 @@ ht_needs_resize(const struct i915_gem_context_vma_lut *lut) static unsigned int eb_batch_index(const struct i915_execbuffer *eb) { - return eb->buffer_count - 1; + if (eb->args->flags & I915_EXEC_BATCH_FIRST) + return 0; + else + return eb->buffer_count - 1; } static int eb_select_context(struct i915_execbuffer *eb) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 15bc9f78ba4d..7ccbd6a2bbe0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -418,7 +418,6 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_CAPTURE 45 -/* Query the mask of slices available for this system */ #define I915_PARAM_SLICE_MASK 46 /* Assuming it's uniform for each slice, this queries the mask of subslices @@ -426,6 +425,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_SUBSLICE_MASK 47 +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer + * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST. + */ +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 + typedef struct drm_i915_getparam { __s32 param; /* @@ -912,7 +917,17 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_OUT (1<<17) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1)) +/* + * Traditionally the execbuf ioctl has only considered the final element in + * the execobject[] to be the executable batch. Often though, the client + * will known the batch object prior to construction and being able to place + * it into the execobject[] array first can simplify the relocation tracking. + * Setting I915_EXEC_BATCH_FIRST tells execbuf to use element 0 of the + * execobject[] as the * batch instead (the default is to use the last + * element). + */ +#define I915_EXEC_BATCH_FIRST (1<<18) +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ From 7dd4f6729f9243bd7046c6f04c107a456bda38eb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 15:05:24 +0100 Subject: [PATCH 241/341] drm/i915: Async GPU relocation processing If the user requires patching of their batch or auxiliary buffers, we currently make the alterations on the cpu. If they are active on the GPU at the time, we wait under the struct_mutex for them to finish executing before we rewrite the contents. This happens if shared relocation trees are used between different contexts with separate address space (and the buffers then have different addresses in each), the 3D state will need to be adjusted between execution on each context. However, we don't need to use the CPU to do the relocation patching, as we could queue commands to the GPU to perform it and use fences to serialise the operation with the current activity and future - so the operation on the GPU appears just as atomic as performing it immediately. Performing the relocation rewrites on the GPU is not free, in terms of pure throughput, the number of relocations/s is about halved - but more importantly so is the time under the struct_mutex. v2: Break out the request/batch allocation for clearer error flow. v3: A few asserts to ensure rq ordering is maintained Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 1 - drivers/gpu/drm/i915/i915_gem_execbuffer.c | 227 ++++++++++++++++++++- 2 files changed, 220 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 96b344901a7b..7dcac3bfb771 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4397,7 +4397,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, GEM_BUG_ON(i915_gem_object_is_active(obj)); list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) { - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; i915_vma_close(vma); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e262133a7cf5..2f7a2d2510fc 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -40,7 +40,12 @@ #include "intel_drv.h" #include "intel_frontbuffer.h" -#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ +enum { + FORCE_CPU_RELOC = 1, + FORCE_GTT_RELOC, + FORCE_GPU_RELOC, +#define DBG_FORCE_RELOC 0 /* choose one of the above! */ +}; #define __EXEC_OBJECT_HAS_REF BIT(31) #define __EXEC_OBJECT_HAS_PIN BIT(30) @@ -212,10 +217,15 @@ struct i915_execbuffer { struct drm_mm_node node; /** temporary GTT binding */ unsigned long vaddr; /** Current kmap address */ unsigned long page; /** Currently mapped page index */ + unsigned int gen; /** Cached value of INTEL_GEN */ bool use_64bit_reloc : 1; bool has_llc : 1; bool has_fence : 1; bool needs_unfenced : 1; + + struct drm_i915_gem_request *rq; + u32 *rq_cmd; + unsigned int rq_size; } reloc_cache; u64 invalid_flags; /** Set of execobj.flags that are invalid */ @@ -496,8 +506,11 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache, if (!i915_gem_object_has_struct_page(obj)) return false; - if (DBG_USE_CPU_RELOC) - return DBG_USE_CPU_RELOC > 0; + if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) + return true; + + if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) + return false; return (cache->has_llc || obj->cache_dirty || @@ -887,6 +900,8 @@ static void eb_reset_vmas(const struct i915_execbuffer *eb) static void eb_destroy(const struct i915_execbuffer *eb) { + GEM_BUG_ON(eb->reloc_cache.rq); + if (eb->lut_size >= 0) kfree(eb->buckets); } @@ -904,11 +919,14 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->page = -1; cache->vaddr = 0; /* Must be a variable in the struct to allow GCC to unroll. */ + cache->gen = INTEL_GEN(i915); cache->has_llc = HAS_LLC(i915); - cache->has_fence = INTEL_GEN(i915) < 4; - cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); + cache->has_fence = cache->gen < 4; + cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.allocated = false; + cache->rq = NULL; + cache->rq_size = 0; } static inline void *unmask_page(unsigned long p) @@ -930,10 +948,24 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } +static void reloc_gpu_flush(struct reloc_cache *cache) +{ + GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32)); + cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(cache->rq->batch->obj); + i915_gem_chipset_flush(cache->rq->i915); + + __i915_add_request(cache->rq, true); + cache->rq = NULL; +} + static void reloc_cache_reset(struct reloc_cache *cache) { void *vaddr; + if (cache->rq) + reloc_gpu_flush(cache); + if (!cache->vaddr) return; @@ -1099,6 +1131,121 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) *addr = value; } +static int __reloc_gpu_alloc(struct i915_execbuffer *eb, + struct i915_vma *vma, + unsigned int len) +{ + struct reloc_cache *cache = &eb->reloc_cache; + struct drm_i915_gem_object *obj; + struct drm_i915_gem_request *rq; + struct i915_vma *batch; + u32 *cmd; + int err; + + GEM_BUG_ON(vma->obj->base.write_domain & I915_GEM_DOMAIN_CPU); + + obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + cmd = i915_gem_object_pin_map(obj, + cache->has_llc ? I915_MAP_WB : I915_MAP_WC); + i915_gem_object_unpin_pages(obj); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); + + err = i915_gem_object_set_to_wc_domain(obj, false); + if (err) + goto err_unmap; + + batch = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_unmap; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); + if (err) + goto err_unmap; + + rq = i915_gem_request_alloc(eb->engine, eb->ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + err = i915_gem_request_await_object(rq, vma->obj, true); + if (err) + goto err_request; + + err = eb->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto err_request; + + err = i915_switch_context(rq); + if (err) + goto err_request; + + err = eb->engine->emit_bb_start(rq, + batch->node.start, PAGE_SIZE, + cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); + if (err) + goto err_request; + + GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv, true)); + i915_vma_move_to_active(batch, rq, 0); + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &rq->fence); + reservation_object_unlock(obj->resv); + i915_vma_unpin(batch); + + i915_vma_move_to_active(vma, rq, true); + reservation_object_lock(vma->obj->resv, NULL); + reservation_object_add_excl_fence(vma->obj->resv, &rq->fence); + reservation_object_unlock(vma->obj->resv); + + rq->batch = batch; + + cache->rq = rq; + cache->rq_cmd = cmd; + cache->rq_size = 0; + + /* Return with batch mapping (cmd) still pinned */ + return 0; + +err_request: + i915_add_request(rq); +err_unpin: + i915_vma_unpin(batch); +err_unmap: + i915_gem_object_unpin_map(obj); + return err; +} + +static u32 *reloc_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + unsigned int len) +{ + struct reloc_cache *cache = &eb->reloc_cache; + u32 *cmd; + + if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) + reloc_gpu_flush(cache); + + if (unlikely(!cache->rq)) { + int err; + + err = __reloc_gpu_alloc(eb, vma, len); + if (unlikely(err)) + return ERR_PTR(err); + } + + cmd = cache->rq_cmd + cache->rq_size; + cache->rq_size += len; + + return cmd; +} + static u64 relocate_entry(struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, @@ -1111,6 +1258,67 @@ relocate_entry(struct i915_vma *vma, bool wide = eb->reloc_cache.use_64bit_reloc; void *vaddr; + if (!eb->reloc_cache.vaddr && + (DBG_FORCE_RELOC == FORCE_GPU_RELOC || + !reservation_object_test_signaled_rcu(obj->resv, true))) { + const unsigned int gen = eb->reloc_cache.gen; + unsigned int len; + u32 *batch; + u64 addr; + + if (wide) + len = offset & 7 ? 8 : 5; + else if (gen >= 4) + len = 4; + else if (gen >= 3) + len = 3; + else /* On gen2 MI_STORE_DWORD_IMM uses a physical address */ + goto repeat; + + batch = reloc_gpu(eb, vma, len); + if (IS_ERR(batch)) + goto repeat; + + addr = gen8_canonical_addr(vma->node.start + offset); + if (wide) { + if (offset & 7) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_offset); + + addr = gen8_canonical_addr(addr + 4); + + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = upper_32_bits(target_offset); + } else { + *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_offset); + *batch++ = upper_32_bits(target_offset); + } + } else if (gen >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_offset; + } else if (gen >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_offset; + } else { + *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *batch++ = addr; + *batch++ = target_offset; + } + + goto out; + } + repeat: vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT); if (IS_ERR(vaddr)) @@ -1127,6 +1335,7 @@ repeat: goto repeat; } +out: return target->node.start | UPDATE; } @@ -1189,7 +1398,8 @@ eb_relocate_entry(struct i915_execbuffer *eb, * If the relocation already has the right value in it, no * more work needs to be done. */ - if (gen8_canonical_addr(target->node.start) == reloc->presumed_offset) + if (!DBG_FORCE_RELOC && + gen8_canonical_addr(target->node.start) == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ @@ -1915,7 +2125,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.i915 = to_i915(dev); eb.file = file; eb.args = args; - if (!(args->flags & I915_EXEC_NO_RELOC)) + if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) args->flags |= __EXEC_HAS_RELOC; eb.exec = exec; eb.ctx = NULL; @@ -2068,6 +2278,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.batch = vma; } + /* All GPU relocation batches must be submitted prior to the user rq */ + GEM_BUG_ON(eb.reloc_cache.rq); + /* Allocate a request for this batch buffer nice and early. */ eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); if (IS_ERR(eb.request)) { From 95ff7c7dd7098860bc131c7dec0ad76ca61e796a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 15:05:25 +0100 Subject: [PATCH 242/341] drm/i915: Stash a pointer to the obj's resv in the vma During execbuf, a mandatory step is that we add this request (this fence) to each object's reservation_object. Inside execbuf, we track the vma, and to add the fence to the reservation_object then means having to first chase the obj, incurring another cache miss. We can reduce the number of cache misses by stashing a pointer to the reservation_object in the vma itself. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170616140525.6394-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++----------- drivers/gpu/drm/i915/i915_vma.c | 1 + drivers/gpu/drm/i915/i915_vma.h | 3 ++- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2f7a2d2510fc..eb46dfa374a7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1192,17 +1192,17 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_request; - GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv, true)); + GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); i915_vma_move_to_active(batch, rq, 0); - reservation_object_lock(obj->resv, NULL); - reservation_object_add_excl_fence(obj->resv, &rq->fence); - reservation_object_unlock(obj->resv); + reservation_object_lock(batch->resv, NULL); + reservation_object_add_excl_fence(batch->resv, &rq->fence); + reservation_object_unlock(batch->resv); i915_vma_unpin(batch); i915_vma_move_to_active(vma, rq, true); - reservation_object_lock(vma->obj->resv, NULL); - reservation_object_add_excl_fence(vma->obj->resv, &rq->fence); - reservation_object_unlock(vma->obj->resv); + reservation_object_lock(vma->resv, NULL); + reservation_object_add_excl_fence(vma->resv, &rq->fence); + reservation_object_unlock(vma->resv); rq->batch = batch; @@ -1252,7 +1252,6 @@ relocate_entry(struct i915_vma *vma, struct i915_execbuffer *eb, const struct i915_vma *target) { - struct drm_i915_gem_object *obj = vma->obj; u64 offset = reloc->offset; u64 target_offset = relocation_target(reloc, target); bool wide = eb->reloc_cache.use_64bit_reloc; @@ -1260,7 +1259,7 @@ relocate_entry(struct i915_vma *vma, if (!eb->reloc_cache.vaddr && (DBG_FORCE_RELOC == FORCE_GPU_RELOC || - !reservation_object_test_signaled_rcu(obj->resv, true))) { + !reservation_object_test_signaled_rcu(vma->resv, true))) { const unsigned int gen = eb->reloc_cache.gen; unsigned int len; u32 *batch; @@ -1320,7 +1319,7 @@ relocate_entry(struct i915_vma *vma, } repeat: - vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT); + vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -1793,11 +1792,11 @@ slow: return eb_relocate_slow(eb); } -static void eb_export_fence(struct drm_i915_gem_object *obj, +static void eb_export_fence(struct i915_vma *vma, struct drm_i915_gem_request *req, unsigned int flags) { - struct reservation_object *resv = obj->resv; + struct reservation_object *resv = vma->resv; /* * Ignore errors from failing to allocate the new fence, we can't @@ -1856,7 +1855,7 @@ skip_flushes: const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; struct i915_vma *vma = exec_to_vma(entry); - eb_export_fence(vma->obj, eb->request, entry->flags); + eb_export_fence(vma, eb->request, entry->flags); if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF)) i915_vma_put(vma); } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index f5c57dff288e..532c709febbd 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -90,6 +90,7 @@ vma_create(struct drm_i915_gem_object *obj, init_request_active(&vma->last_fence, NULL); vma->vm = vm; vma->obj = obj; + vma->resv = obj->resv; vma->size = obj->base.size; vma->display_alignment = I915_GTT_MIN_ALIGNMENT; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 04d7a5da70fd..4a673fc1a432 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -50,6 +50,7 @@ struct i915_vma { struct drm_i915_gem_object *obj; struct i915_address_space *vm; struct drm_i915_fence_reg *fence; + struct reservation_object *resv; /** Alias of obj->resv */ struct sg_table *pages; void __iomem *iomap; u64 size; @@ -111,8 +112,8 @@ struct i915_vma { /** * Used for performing relocations during execbuffer insertion. */ - struct hlist_node exec_node; struct drm_i915_gem_exec_object2 *exec_entry; + struct hlist_node exec_node; u32 exec_handle; struct i915_gem_context *ctx; From 469f87e158628fe66dcbbce9dd5e7b7acfe934a9 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Thu, 15 Jun 2017 10:29:29 +0800 Subject: [PATCH 243/341] ip_tunnel: fix potential issue in ip_tunnel_rcv When ip_tunnel_rcv fails, the tun_dst won't be freed, so call dst_release to free it in error code path. Fixes: 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata.") Acked-by: Eric Dumazet Acked-by: Pravin B Shelar Tested-by: Zhang Shengju Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index b436d0775631..129d1a3616f8 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -446,6 +446,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, return 0; drop: + if (tun_dst) + dst_release((struct dst_entry *)tun_dst); kfree_skb(skb); return 0; } From f1925ca50deb48eddafc01bc12c2a17bfbf54425 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Thu, 15 Jun 2017 10:29:30 +0800 Subject: [PATCH 244/341] ip6_tunnel: fix potential issue in __ip6_tnl_rcv When __ip6_tnl_rcv fails, the tun_dst won't be freed, so call dst_release to free it in error code path. Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") CC: Alexei Starovoitov Tested-by: Zhang Shengju Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c3581973f5d7..035c0496b92a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -858,6 +858,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, return 0; drop: + if (tun_dst) + dst_release((struct dst_entry *)tun_dst); kfree_skb(skb); return 0; } From 76371d2e3ad1f84426a30ebcd8c3b9b98f4c724f Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 16 Jun 2017 10:46:37 -0700 Subject: [PATCH 245/341] decnet: always not take dst->__refcnt when inserting dst into hash table In the existing dn_route.c code, dn_route_output_slow() takes dst->__refcnt before calling dn_insert_route() while dn_route_input_slow() does not take dst->__refcnt before calling dn_insert_route(). This makes the whole routing code very buggy. In dn_dst_check_expire(), dnrt_free() is called when rt expires. This makes the routes inserted by dn_route_output_slow() not able to be freed as the refcnt is not released. In dn_dst_gc(), dnrt_drop() is called to release rt which could potentially cause the dst->__refcnt to be dropped to -1. In dn_run_flush(), dst_free() is called to release all the dst. Again, it makes the dst inserted by dn_route_output_slow() not able to be released and also, it does not wait on the rcu and could potentially cause crash in the path where other users still refer to this dst. This patch makes sure both input and output path do not take dst->__refcnt before calling dn_insert_route() and also makes sure dnrt_free()/dst_free() is called when removing dst from the hash table. The only difference between those 2 calls is that dnrt_free() waits on the rcu while dst_free() does not. Signed-off-by: Wei Wang Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 4b9518a0d248..6f95612b4d32 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -188,12 +188,6 @@ static inline void dnrt_free(struct dn_route *rt) call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } -static inline void dnrt_drop(struct dn_route *rt) -{ - dst_release(&rt->dst); - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); -} - static void dn_dst_check_expire(unsigned long dummy) { int i; @@ -248,7 +242,7 @@ static int dn_dst_gc(struct dst_ops *ops) } *rtp = rt->dst.dn_next; rt->dst.dn_next = NULL; - dnrt_drop(rt); + dnrt_free(rt); break; } spin_unlock_bh(&dn_rt_hash_table[i].lock); @@ -350,7 +344,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou dst_use(&rth->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); - dnrt_drop(rt); + dst_free(&rt->dst); *rp = rth; return 0; } @@ -380,7 +374,7 @@ static void dn_run_flush(unsigned long dummy) for(; rt; rt = next) { next = rcu_dereference_raw(rt->dst.dn_next); RCU_INIT_POINTER(rt->dst.dn_next, NULL); - dst_free((struct dst_entry *)rt); + dnrt_free(rt); } nothing_to_declare: @@ -1187,7 +1181,7 @@ make_route: if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST); + rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, DST_HOST); if (rt == NULL) goto e_nobufs; From 28e0f4eef6cb3413a8eb943cd9e366793bfa7e87 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Fri, 16 Jun 2017 12:36:14 -0700 Subject: [PATCH 246/341] drm/i915: Store 9 bits of PCI Device ID for platforms with a LP PCH Although we use 9 bits of Device ID for identifying PCH, only 8 bits are stored in dev_priv->pch_id. This makes HAS_PCH_CNP_LP() and HAS_PCH_SPT_LP() incorrect. Fix this by storing all the 9 bits for the platforms with LP PCH. v2: Drop PCH_LPT_LP change (Imre) Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Imre Deak Fixes: commit ec7e0bb35f8d ("drm/i915/cnp: Add PCI ID for Cannonpoint LP PCH") Reported-by: Imre Deak Reviewed-by: Imre Deak Signed-off-by: Dhinakaran Pandiyan Signed-off-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/1497641774-29104-1-git-send-email-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3c2af70034cf..ee2325b180e7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -176,24 +176,26 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) unsigned short id_ext = pch->device & INTEL_PCH_DEVICE_ID_MASK_EXT; - dev_priv->pch_id = id; - if (id == INTEL_PCH_IBX_DEVICE_ID_TYPE) { + dev_priv->pch_id = id; dev_priv->pch_type = PCH_IBX; DRM_DEBUG_KMS("Found Ibex Peak PCH\n"); WARN_ON(!IS_GEN5(dev_priv)); } else if (id == INTEL_PCH_CPT_DEVICE_ID_TYPE) { + dev_priv->pch_id = id; dev_priv->pch_type = PCH_CPT; DRM_DEBUG_KMS("Found CougarPoint PCH\n"); WARN_ON(!(IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv))); } else if (id == INTEL_PCH_PPT_DEVICE_ID_TYPE) { /* PantherPoint is CPT compatible */ + dev_priv->pch_id = id; dev_priv->pch_type = PCH_CPT; DRM_DEBUG_KMS("Found PantherPoint PCH\n"); WARN_ON(!(IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv))); } else if (id == INTEL_PCH_LPT_DEVICE_ID_TYPE) { + dev_priv->pch_id = id; dev_priv->pch_type = PCH_LPT; DRM_DEBUG_KMS("Found LynxPoint PCH\n"); WARN_ON(!IS_HASWELL(dev_priv) && @@ -201,6 +203,7 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) WARN_ON(IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)); } else if (id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) { + dev_priv->pch_id = id; dev_priv->pch_type = PCH_LPT; DRM_DEBUG_KMS("Found LynxPoint LP PCH\n"); WARN_ON(!IS_HASWELL(dev_priv) && @@ -208,26 +211,31 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) WARN_ON(!IS_HSW_ULT(dev_priv) && !IS_BDW_ULT(dev_priv)); } else if (id == INTEL_PCH_SPT_DEVICE_ID_TYPE) { + dev_priv->pch_id = id; dev_priv->pch_type = PCH_SPT; DRM_DEBUG_KMS("Found SunrisePoint PCH\n"); WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); } else if (id_ext == INTEL_PCH_SPT_LP_DEVICE_ID_TYPE) { + dev_priv->pch_id = id_ext; dev_priv->pch_type = PCH_SPT; DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n"); WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); } else if (id == INTEL_PCH_KBP_DEVICE_ID_TYPE) { + dev_priv->pch_id = id; dev_priv->pch_type = PCH_KBP; DRM_DEBUG_KMS("Found KabyPoint PCH\n"); WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); } else if (id == INTEL_PCH_CNP_DEVICE_ID_TYPE) { + dev_priv->pch_id = id; dev_priv->pch_type = PCH_CNP; DRM_DEBUG_KMS("Found CannonPoint PCH\n"); WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); } else if (id_ext == INTEL_PCH_CNP_LP_DEVICE_ID_TYPE) { + dev_priv->pch_id = id_ext; dev_priv->pch_type = PCH_CNP; DRM_DEBUG_KMS("Found CannonPoint LP PCH\n"); WARN_ON(!IS_CANNONLAKE(dev_priv) && @@ -239,6 +247,7 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) PCI_SUBVENDOR_ID_REDHAT_QUMRANET && pch->subsystem_device == PCI_SUBDEVICE_ID_QEMU)) { + dev_priv->pch_id = id; dev_priv->pch_type = intel_virt_detect_pch(dev_priv); } else From 46c26662d2fb8377f5d387cf2e5ee3246af780b7 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 16 Jun 2017 15:49:58 -0700 Subject: [PATCH 247/341] drm/i915/cfl: Introduce Coffee Lake workarounds. Coffee Lake inherit most of Kabylake production workarounds. v2: Fix typo on commit message and remove WaDisableKillLogic and GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC, since as Mika pointed out they shouldn't be here for cfl according to BSpec. Cc: Dhinakaran Pandiyan Signed-off-by: Rodrigo Vivi Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1497653398-15722-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 81 +++++++++++++++++++------- 2 files changed, 60 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 205dd91d3601..61fc7e90a7da 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1884,7 +1884,7 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index bc38bd128b76..a4487c5b7e37 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -814,26 +814,27 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; int ret; - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ + /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - /* WaDisableKillLogic:bxt,skl,kbl */ + /* WaDisableKillLogic:bxt,skl,kbl,cfl */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | ECOCHK_DIS_TLB); - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, FLOW_CONTROL_ENABLE | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + if (!IS_COFFEELAKE(dev_priv)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) @@ -851,18 +852,18 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) */ } - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk */ - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ + /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, GEN9_ENABLE_YV12_BUGFIX | GEN9_ENABLE_GPGPU_PREEMPTION); - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ - /* WaDisablePartialResolveInVc:skl,bxt,kbl */ + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ + /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, GEN9_CCS_TLB_PREFETCH_ENABLE); @@ -871,7 +872,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, PIXEL_MASK_CAMMING_DISABLE); - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(HDC_CHICKEN0, HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); @@ -889,39 +890,41 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) * a TLB invalidation occurs during a PSD flush. */ - /* WaForceEnableNonCoherent:skl,bxt,kbl */ + /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(HDC_CHICKEN0, HDC_FORCE_NON_COHERENT); /* WaDisableHDCInvalidation:skl,bxt,kbl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - BDW_DISABLE_HDC_INVALIDATION); + if (!IS_COFFEELAKE(dev_priv)) + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + BDW_DISABLE_HDC_INVALIDATION); - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv) || IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); - /* WaOCLCoherentLineFlush:skl,bxt,kbl */ + /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES)); - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); if (ret) return ret; - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl */ ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); if (ret) return ret; - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); if (ret) return ret; @@ -1140,6 +1143,38 @@ static int glk_init_workarounds(struct intel_engine_cs *engine) return 0; } +static int cfl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaEnableGapsTsvCreditFix:cfl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaToEnableHwFixForPushConstHWBug:cfl */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableGafsUnitClkGating:cfl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaDisableSbeCacheDispatchPortSharing:cfl */ + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + + /* WaInPlaceDecompressionHang:cfl */ + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + return 0; +} + int init_workarounds_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1162,6 +1197,8 @@ int init_workarounds_ring(struct intel_engine_cs *engine) err = kbl_init_workarounds(engine); else if (IS_GEMINILAKE(dev_priv)) err = glk_init_workarounds(engine); + else if (IS_COFFEELAKE(dev_priv)) + err = cfl_init_workarounds(engine); else err = 0; if (err) From 0e08270a1f01bceae17d32a0d75aad2388bd1ba2 Mon Sep 17 00:00:00 2001 From: Sushmita Susheelendra Date: Tue, 13 Jun 2017 16:52:54 -0600 Subject: [PATCH 248/341] drm/msm: Separate locking of buffer resources from struct_mutex Buffer object specific resources like pages, domains, sg list need not be protected with struct_mutex. They can be protected with a buffer object level lock. This simplifies locking and makes it easier to avoid potential recursive locking scenarios for SVM involving mmap_sem and struct_mutex. This also removes unnecessary serialization when creating buffer objects, and also between buffer object creation and GPU command submission. Signed-off-by: Sushmita Susheelendra [robclark: squash in handling new locking for shrinker] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 8 +- drivers/gpu/drm/msm/adreno/a5xx_power.c | 8 +- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 +- drivers/gpu/drm/msm/dsi/dsi_host.c | 4 +- drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c | 2 +- drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c | 2 - drivers/gpu/drm/msm/msm_drv.c | 1 + drivers/gpu/drm/msm/msm_drv.h | 9 +- drivers/gpu/drm/msm/msm_fbdev.c | 6 +- drivers/gpu/drm/msm/msm_gem.c | 288 ++++++++++++++--------- drivers/gpu/drm/msm/msm_gem.h | 22 ++ drivers/gpu/drm/msm/msm_gem_shrinker.c | 16 +- drivers/gpu/drm/msm/msm_gem_submit.c | 6 +- drivers/gpu/drm/msm/msm_gem_vma.c | 10 +- drivers/gpu/drm/msm/msm_gpu.c | 4 +- drivers/gpu/drm/msm/msm_rd.c | 4 +- drivers/gpu/drm/msm/msm_ringbuffer.c | 2 +- 17 files changed, 245 insertions(+), 151 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index f6a9eec71fec..b4b54f1c24bc 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -297,18 +297,18 @@ static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, struct drm_gem_object *bo; void *ptr; - bo = msm_gem_new(drm, fw->size - 4, MSM_BO_UNCACHED); + bo = msm_gem_new_locked(drm, fw->size - 4, MSM_BO_UNCACHED); if (IS_ERR(bo)) return bo; - ptr = msm_gem_get_vaddr_locked(bo); + ptr = msm_gem_get_vaddr(bo); if (!ptr) { drm_gem_object_unreference(bo); return ERR_PTR(-ENOMEM); } if (iova) { - int ret = msm_gem_get_iova_locked(bo, gpu->aspace, iova); + int ret = msm_gem_get_iova(bo, gpu->aspace, iova); if (ret) { drm_gem_object_unreference(bo); @@ -318,7 +318,7 @@ static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, memcpy(ptr, &fw->data[4], fw->size - 4); - msm_gem_put_vaddr_locked(bo); + msm_gem_put_vaddr(bo); return bo; } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index feb7f4fd42fb..87af6eea0483 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -294,15 +294,15 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) */ bosize = (cmds_size + (cmds_size / TYPE4_MAX_PAYLOAD) + 1) << 2; - a5xx_gpu->gpmu_bo = msm_gem_new(drm, bosize, MSM_BO_UNCACHED); + a5xx_gpu->gpmu_bo = msm_gem_new_locked(drm, bosize, MSM_BO_UNCACHED); if (IS_ERR(a5xx_gpu->gpmu_bo)) goto err; - if (msm_gem_get_iova_locked(a5xx_gpu->gpmu_bo, gpu->aspace, + if (msm_gem_get_iova(a5xx_gpu->gpmu_bo, gpu->aspace, &a5xx_gpu->gpmu_iova)) goto err; - ptr = msm_gem_get_vaddr_locked(a5xx_gpu->gpmu_bo); + ptr = msm_gem_get_vaddr(a5xx_gpu->gpmu_bo); if (!ptr) goto err; @@ -321,7 +321,7 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) cmds_size -= _size; } - msm_gem_put_vaddr_locked(a5xx_gpu->gpmu_bo); + msm_gem_put_vaddr(a5xx_gpu->gpmu_bo); a5xx_gpu->gpmu_dwords = dwords; goto out; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 6fa694e6ae8c..f1ab2703674a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -64,7 +64,7 @@ int adreno_hw_init(struct msm_gpu *gpu) DBG("%s", gpu->name); - ret = msm_gem_get_iova_locked(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); + ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); if (ret) { gpu->rb_iova = 0; dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); @@ -397,10 +397,8 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, return ret; } - mutex_lock(&drm->struct_mutex); adreno_gpu->memptrs_bo = msm_gem_new(drm, sizeof(*adreno_gpu->memptrs), MSM_BO_UNCACHED); - mutex_unlock(&drm->struct_mutex); if (IS_ERR(adreno_gpu->memptrs_bo)) { ret = PTR_ERR(adreno_gpu->memptrs_bo); adreno_gpu->memptrs_bo = NULL; diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 2e7077194b21..9e9c5696bc03 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -982,18 +982,16 @@ static int dsi_tx_buf_alloc(struct msm_dsi_host *msm_host, int size) uint64_t iova; if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) { - mutex_lock(&dev->struct_mutex); msm_host->tx_gem_obj = msm_gem_new(dev, size, MSM_BO_UNCACHED); if (IS_ERR(msm_host->tx_gem_obj)) { ret = PTR_ERR(msm_host->tx_gem_obj); pr_err("%s: failed to allocate gem, %d\n", __func__, ret); msm_host->tx_gem_obj = NULL; - mutex_unlock(&dev->struct_mutex); return ret; } - ret = msm_gem_get_iova_locked(msm_host->tx_gem_obj, + ret = msm_gem_get_iova(msm_host->tx_gem_obj, priv->kms->aspace, &iova); mutex_unlock(&dev->struct_mutex); if (ret) { diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index 59153a4ebd18..615e1def64d9 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -374,7 +374,7 @@ static void update_cursor(struct drm_crtc *crtc) if (next_bo) { /* take a obj ref + iova ref when we start scanning out: */ drm_gem_object_reference(next_bo); - msm_gem_get_iova_locked(next_bo, kms->aspace, &iova); + msm_gem_get_iova(next_bo, kms->aspace, &iova); /* enable cursor: */ mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_SIZE(dma), diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c index 3d96687a1b39..bcd1f5cac72c 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c @@ -528,9 +528,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) goto fail; } - mutex_lock(&dev->struct_mutex); mdp4_kms->blank_cursor_bo = msm_gem_new(dev, SZ_16K, MSM_BO_WC); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(mdp4_kms->blank_cursor_bo)) { ret = PTR_ERR(mdp4_kms->blank_cursor_bo); dev_err(dev->dev, "could not allocate blank-cursor bo: %d\n", ret); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 506de3862c18..f49f6ac5585c 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -336,6 +336,7 @@ static int msm_init_vram(struct drm_device *dev) priv->vram.size = size; drm_mm_init(&priv->vram.mm, 0, (size >> PAGE_SHIFT) - 1); + spin_lock_init(&priv->vram.lock); attrs |= DMA_ATTR_NO_KERNEL_MAPPING; attrs |= DMA_ATTR_WRITE_COMBINE; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 1d47ec467ded..fc8d24f7c084 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -149,6 +149,7 @@ struct msm_drm_private { * and position mm_node->start is in # of pages: */ struct drm_mm mm; + spinlock_t lock; /* Protects drm_mm node allocation/removal */ } vram; struct notifier_block vmap_notifier; @@ -198,8 +199,6 @@ int msm_gem_mmap_obj(struct drm_gem_object *obj, int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma); int msm_gem_fault(struct vm_fault *vmf); uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); -int msm_gem_get_iova_locked(struct drm_gem_object *obj, - struct msm_gem_address_space *aspace, uint64_t *iova); int msm_gem_get_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova); uint64_t msm_gem_iova(struct drm_gem_object *obj, @@ -221,13 +220,9 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); int msm_gem_prime_pin(struct drm_gem_object *obj); void msm_gem_prime_unpin(struct drm_gem_object *obj); -void *msm_gem_get_vaddr_locked(struct drm_gem_object *obj); void *msm_gem_get_vaddr(struct drm_gem_object *obj); -void msm_gem_put_vaddr_locked(struct drm_gem_object *obj); void msm_gem_put_vaddr(struct drm_gem_object *obj); int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv); -void msm_gem_purge(struct drm_gem_object *obj); -void msm_gem_vunmap(struct drm_gem_object *obj); int msm_gem_sync_object(struct drm_gem_object *obj, struct msm_fence_context *fctx, bool exclusive); void msm_gem_move_to_active(struct drm_gem_object *obj, @@ -240,6 +235,8 @@ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, uint32_t size, uint32_t flags, uint32_t *handle); struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags); +struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev, + uint32_t size, uint32_t flags); struct drm_gem_object *msm_gem_import(struct drm_device *dev, struct dma_buf *dmabuf, struct sg_table *sgt); diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index 803ed272dc6d..5ecf4ff9a059 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -97,10 +97,8 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, /* allocate backing bo */ size = mode_cmd.pitches[0] * mode_cmd.height; DBG("allocating %d bytes for fb %d", size, dev->primary->index); - mutex_lock(&dev->struct_mutex); fbdev->bo = msm_gem_new(dev, size, MSM_BO_SCANOUT | MSM_BO_WC | MSM_BO_STOLEN); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(fbdev->bo)) { ret = PTR_ERR(fbdev->bo); fbdev->bo = NULL; @@ -126,7 +124,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, * in panic (ie. lock-safe, etc) we could avoid pinning the * buffer now: */ - ret = msm_gem_get_iova_locked(fbdev->bo, priv->kms->aspace, &paddr); + ret = msm_gem_get_iova(fbdev->bo, priv->kms->aspace, &paddr); if (ret) { dev_err(dev->dev, "failed to get buffer obj iova: %d\n", ret); goto fail_unlock; @@ -155,7 +153,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, dev->mode_config.fb_base = paddr; - fbi->screen_base = msm_gem_get_vaddr_locked(fbdev->bo); + fbi->screen_base = msm_gem_get_vaddr(fbdev->bo); if (IS_ERR(fbi->screen_base)) { ret = PTR_ERR(fbi->screen_base); goto fail_unlock; diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 9951c78ee215..65f35544c1ec 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -26,6 +26,9 @@ #include "msm_gpu.h" #include "msm_mmu.h" +static void msm_gem_vunmap_locked(struct drm_gem_object *obj); + + static dma_addr_t physaddr(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -41,8 +44,7 @@ static bool use_pages(struct drm_gem_object *obj) } /* allocate pages from VRAM carveout, used when no IOMMU: */ -static struct page **get_pages_vram(struct drm_gem_object *obj, - int npages) +static struct page **get_pages_vram(struct drm_gem_object *obj, int npages) { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_drm_private *priv = obj->dev->dev_private; @@ -54,7 +56,9 @@ static struct page **get_pages_vram(struct drm_gem_object *obj, if (!p) return ERR_PTR(-ENOMEM); + spin_lock(&priv->vram.lock); ret = drm_mm_insert_node(&priv->vram.mm, msm_obj->vram_node, npages); + spin_unlock(&priv->vram.lock); if (ret) { kvfree(p); return ERR_PTR(ret); @@ -69,7 +73,6 @@ static struct page **get_pages_vram(struct drm_gem_object *obj, return p; } -/* called with dev->struct_mutex held */ static struct page **get_pages(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -109,6 +112,18 @@ static struct page **get_pages(struct drm_gem_object *obj) return msm_obj->pages; } +static void put_pages_vram(struct drm_gem_object *obj) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + struct msm_drm_private *priv = obj->dev->dev_private; + + spin_lock(&priv->vram.lock); + drm_mm_remove_node(msm_obj->vram_node); + spin_unlock(&priv->vram.lock); + + kvfree(msm_obj->pages); +} + static void put_pages(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -125,10 +140,8 @@ static void put_pages(struct drm_gem_object *obj) if (use_pages(obj)) drm_gem_put_pages(obj, msm_obj->pages, true, false); - else { - drm_mm_remove_node(msm_obj->vram_node); - kvfree(msm_obj->pages); - } + else + put_pages_vram(obj); msm_obj->pages = NULL; } @@ -136,11 +149,18 @@ static void put_pages(struct drm_gem_object *obj) struct page **msm_gem_get_pages(struct drm_gem_object *obj) { - struct drm_device *dev = obj->dev; + struct msm_gem_object *msm_obj = to_msm_bo(obj); struct page **p; - mutex_lock(&dev->struct_mutex); + + mutex_lock(&msm_obj->lock); + + if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) { + mutex_unlock(&msm_obj->lock); + return ERR_PTR(-EBUSY); + } + p = get_pages(obj); - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&msm_obj->lock); return p; } @@ -195,28 +215,25 @@ int msm_gem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; - struct drm_device *dev = obj->dev; - struct msm_drm_private *priv = dev->dev_private; + struct msm_gem_object *msm_obj = to_msm_bo(obj); struct page **pages; unsigned long pfn; pgoff_t pgoff; int ret; - /* This should only happen if userspace tries to pass a mmap'd - * but unfaulted gem bo vaddr into submit ioctl, triggering - * a page fault while struct_mutex is already held. This is - * not a valid use-case so just bail. + /* + * vm_ops.open/drm_gem_mmap_obj and close get and put + * a reference on obj. So, we dont need to hold one here. */ - if (priv->struct_mutex_task == current) - return VM_FAULT_SIGBUS; - - /* Make sure we don't parallel update on a fault, nor move or remove - * something from beneath our feet - */ - ret = mutex_lock_interruptible(&dev->struct_mutex); + ret = mutex_lock_interruptible(&msm_obj->lock); if (ret) goto out; + if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) { + mutex_unlock(&msm_obj->lock); + return VM_FAULT_SIGBUS; + } + /* make sure we have pages attached now */ pages = get_pages(obj); if (IS_ERR(pages)) { @@ -235,7 +252,7 @@ int msm_gem_fault(struct vm_fault *vmf) ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); out_unlock: - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&msm_obj->lock); out: switch (ret) { case -EAGAIN: @@ -259,9 +276,10 @@ out: static uint64_t mmap_offset(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; + struct msm_gem_object *msm_obj = to_msm_bo(obj); int ret; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + WARN_ON(!mutex_is_locked(&msm_obj->lock)); /* Make it mmapable */ ret = drm_gem_create_mmap_offset(obj); @@ -277,9 +295,11 @@ static uint64_t mmap_offset(struct drm_gem_object *obj) uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj) { uint64_t offset; - mutex_lock(&obj->dev->struct_mutex); + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + mutex_lock(&msm_obj->lock); offset = mmap_offset(obj); - mutex_unlock(&obj->dev->struct_mutex); + mutex_unlock(&msm_obj->lock); return offset; } @@ -289,6 +309,8 @@ static struct msm_gem_vma *add_vma(struct drm_gem_object *obj, struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma; + WARN_ON(!mutex_is_locked(&msm_obj->lock)); + vma = kzalloc(sizeof(*vma), GFP_KERNEL); if (!vma) return ERR_PTR(-ENOMEM); @@ -306,7 +328,7 @@ static struct msm_gem_vma *lookup_vma(struct drm_gem_object *obj, struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma; - WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); + WARN_ON(!mutex_is_locked(&msm_obj->lock)); list_for_each_entry(vma, &msm_obj->vmas, list) { if (vma->aspace == aspace) @@ -325,13 +347,14 @@ static void del_vma(struct msm_gem_vma *vma) kfree(vma); } +/* Called with msm_obj->lock locked */ static void put_iova(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma, *tmp; - WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); + WARN_ON(!mutex_is_locked(&msm_obj->lock)); list_for_each_entry_safe(vma, tmp, &msm_obj->vmas, list) { msm_gem_unmap_vma(vma->aspace, vma, msm_obj->sgt); @@ -339,21 +362,20 @@ put_iova(struct drm_gem_object *obj) } } -/* should be called under struct_mutex.. although it can be called - * from atomic context without struct_mutex to acquire an extra - * iova ref if you know one is already held. - * - * That means when I do eventually need to add support for unpinning - * the refcnt counter needs to be atomic_t. - */ -int msm_gem_get_iova_locked(struct drm_gem_object *obj, +/* get iova, taking a reference. Should have a matching put */ +int msm_gem_get_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova) { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma; int ret = 0; - WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); + mutex_lock(&msm_obj->lock); + + if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) { + mutex_unlock(&msm_obj->lock); + return -EBUSY; + } vma = lookup_vma(obj, aspace); @@ -377,24 +399,14 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, } *iova = vma->iova; + + mutex_unlock(&msm_obj->lock); return 0; fail: del_vma(vma); - return ret; -} - -/* get iova, taking a reference. Should have a matching put */ -int msm_gem_get_iova(struct drm_gem_object *obj, - struct msm_gem_address_space *aspace, uint64_t *iova) -{ - int ret; - - mutex_lock(&obj->dev->struct_mutex); - ret = msm_gem_get_iova_locked(obj, aspace, iova); - mutex_unlock(&obj->dev->struct_mutex); - + mutex_unlock(&msm_obj->lock); return ret; } @@ -404,11 +416,12 @@ int msm_gem_get_iova(struct drm_gem_object *obj, uint64_t msm_gem_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace) { + struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma; - mutex_lock(&obj->dev->struct_mutex); + mutex_lock(&msm_obj->lock); vma = lookup_vma(obj, aspace); - mutex_unlock(&obj->dev->struct_mutex); + mutex_unlock(&msm_obj->lock); WARN_ON(!vma); return vma ? vma->iova : 0; @@ -455,45 +468,57 @@ fail: return ret; } -void *msm_gem_get_vaddr_locked(struct drm_gem_object *obj) -{ - struct msm_gem_object *msm_obj = to_msm_bo(obj); - WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); - if (!msm_obj->vaddr) { - struct page **pages = get_pages(obj); - if (IS_ERR(pages)) - return ERR_CAST(pages); - msm_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT, - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); - if (msm_obj->vaddr == NULL) - return ERR_PTR(-ENOMEM); - } - msm_obj->vmap_count++; - return msm_obj->vaddr; -} - void *msm_gem_get_vaddr(struct drm_gem_object *obj) -{ - void *ret; - mutex_lock(&obj->dev->struct_mutex); - ret = msm_gem_get_vaddr_locked(obj); - mutex_unlock(&obj->dev->struct_mutex); - return ret; -} - -void msm_gem_put_vaddr_locked(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); - WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); - WARN_ON(msm_obj->vmap_count < 1); + int ret = 0; + + mutex_lock(&msm_obj->lock); + + if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) { + mutex_unlock(&msm_obj->lock); + return ERR_PTR(-EBUSY); + } + + /* increment vmap_count *before* vmap() call, so shrinker can + * check vmap_count (is_vunmapable()) outside of msm_obj->lock. + * This guarantees that we won't try to msm_gem_vunmap() this + * same object from within the vmap() call (while we already + * hold msm_obj->lock) + */ + msm_obj->vmap_count++; + + if (!msm_obj->vaddr) { + struct page **pages = get_pages(obj); + if (IS_ERR(pages)) { + ret = PTR_ERR(pages); + goto fail; + } + msm_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT, + VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + if (msm_obj->vaddr == NULL) { + ret = -ENOMEM; + goto fail; + } + } + + mutex_unlock(&msm_obj->lock); + return msm_obj->vaddr; + +fail: msm_obj->vmap_count--; + mutex_unlock(&msm_obj->lock); + return ERR_PTR(ret); } void msm_gem_put_vaddr(struct drm_gem_object *obj) { - mutex_lock(&obj->dev->struct_mutex); - msm_gem_put_vaddr_locked(obj); - mutex_unlock(&obj->dev->struct_mutex); + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + mutex_lock(&msm_obj->lock); + WARN_ON(msm_obj->vmap_count < 1); + msm_obj->vmap_count--; + mutex_unlock(&msm_obj->lock); } /* Update madvise status, returns true if not purged, else @@ -503,15 +528,21 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv) { struct msm_gem_object *msm_obj = to_msm_bo(obj); + mutex_lock(&msm_obj->lock); + WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); if (msm_obj->madv != __MSM_MADV_PURGED) msm_obj->madv = madv; - return (msm_obj->madv != __MSM_MADV_PURGED); + madv = msm_obj->madv; + + mutex_unlock(&msm_obj->lock); + + return (madv != __MSM_MADV_PURGED); } -void msm_gem_purge(struct drm_gem_object *obj) +void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass) { struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -520,9 +551,11 @@ void msm_gem_purge(struct drm_gem_object *obj) WARN_ON(!is_purgeable(msm_obj)); WARN_ON(obj->import_attach); + mutex_lock_nested(&msm_obj->lock, subclass); + put_iova(obj); - msm_gem_vunmap(obj); + msm_gem_vunmap_locked(obj); put_pages(obj); @@ -540,12 +573,16 @@ void msm_gem_purge(struct drm_gem_object *obj) invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1); + + mutex_unlock(&msm_obj->lock); } -void msm_gem_vunmap(struct drm_gem_object *obj) +static void msm_gem_vunmap_locked(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); + WARN_ON(!mutex_is_locked(&msm_obj->lock)); + if (!msm_obj->vaddr || WARN_ON(!is_vunmapable(msm_obj))) return; @@ -553,6 +590,15 @@ void msm_gem_vunmap(struct drm_gem_object *obj) msm_obj->vaddr = NULL; } +void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + mutex_lock_nested(&msm_obj->lock, subclass); + msm_gem_vunmap_locked(obj); + mutex_unlock(&msm_obj->lock); +} + /* must be called before _move_to_active().. */ int msm_gem_sync_object(struct drm_gem_object *obj, struct msm_fence_context *fctx, bool exclusive) @@ -674,7 +720,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) uint64_t off = drm_vma_node_start(&obj->vma_node); const char *madv; - WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); + mutex_lock(&msm_obj->lock); switch (msm_obj->madv) { case __MSM_MADV_PURGED: @@ -715,6 +761,8 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) if (fence) describe_fence(fence, "Exclusive", m); rcu_read_unlock(); + + mutex_unlock(&msm_obj->lock); } void msm_gem_describe_objects(struct list_head *list, struct seq_file *m) @@ -747,6 +795,8 @@ void msm_gem_free_object(struct drm_gem_object *obj) list_del(&msm_obj->mm_list); + mutex_lock(&msm_obj->lock); + put_iova(obj); if (obj->import_attach) { @@ -761,7 +811,7 @@ void msm_gem_free_object(struct drm_gem_object *obj) drm_prime_gem_destroy(obj, msm_obj->sgt); } else { - msm_gem_vunmap(obj); + msm_gem_vunmap_locked(obj); put_pages(obj); } @@ -770,6 +820,7 @@ void msm_gem_free_object(struct drm_gem_object *obj) drm_gem_object_release(obj); + mutex_unlock(&msm_obj->lock); kfree(msm_obj); } @@ -780,14 +831,8 @@ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, struct drm_gem_object *obj; int ret; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - obj = msm_gem_new(dev, size, flags); - mutex_unlock(&dev->struct_mutex); - if (IS_ERR(obj)) return PTR_ERR(obj); @@ -802,13 +847,12 @@ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, static int msm_gem_new_impl(struct drm_device *dev, uint32_t size, uint32_t flags, struct reservation_object *resv, - struct drm_gem_object **obj) + struct drm_gem_object **obj, + bool struct_mutex_locked) { struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - switch (flags & MSM_BO_CACHE_MASK) { case MSM_BO_UNCACHED: case MSM_BO_CACHED: @@ -824,6 +868,8 @@ static int msm_gem_new_impl(struct drm_device *dev, if (!msm_obj) return -ENOMEM; + mutex_init(&msm_obj->lock); + msm_obj->flags = flags; msm_obj->madv = MSM_MADV_WILLNEED; @@ -837,23 +883,28 @@ static int msm_gem_new_impl(struct drm_device *dev, INIT_LIST_HEAD(&msm_obj->submit_entry); INIT_LIST_HEAD(&msm_obj->vmas); - list_add_tail(&msm_obj->mm_list, &priv->inactive_list); + if (struct_mutex_locked) { + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + list_add_tail(&msm_obj->mm_list, &priv->inactive_list); + } else { + mutex_lock(&dev->struct_mutex); + list_add_tail(&msm_obj->mm_list, &priv->inactive_list); + mutex_unlock(&dev->struct_mutex); + } *obj = &msm_obj->base; return 0; } -struct drm_gem_object *msm_gem_new(struct drm_device *dev, - uint32_t size, uint32_t flags) +static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, + uint32_t size, uint32_t flags, bool struct_mutex_locked) { struct msm_drm_private *priv = dev->dev_private; struct drm_gem_object *obj = NULL; bool use_vram = false; int ret; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - size = PAGE_ALIGN(size); if (!iommu_present(&platform_bus_type)) @@ -870,7 +921,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, if (size == 0) return ERR_PTR(-EINVAL); - ret = msm_gem_new_impl(dev, size, flags, NULL, &obj); + ret = msm_gem_new_impl(dev, size, flags, NULL, &obj, struct_mutex_locked); if (ret) goto fail; @@ -904,10 +955,22 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, return obj; fail: - drm_gem_object_unreference(obj); + drm_gem_object_unreference_unlocked(obj); return ERR_PTR(ret); } +struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev, + uint32_t size, uint32_t flags) +{ + return _msm_gem_new(dev, size, flags, true); +} + +struct drm_gem_object *msm_gem_new(struct drm_device *dev, + uint32_t size, uint32_t flags) +{ + return _msm_gem_new(dev, size, flags, false); +} + struct drm_gem_object *msm_gem_import(struct drm_device *dev, struct dma_buf *dmabuf, struct sg_table *sgt) { @@ -924,11 +987,7 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, size = PAGE_ALIGN(dmabuf->size); - /* Take mutex so we can modify the inactive list in msm_gem_new_impl */ - mutex_lock(&dev->struct_mutex); - ret = msm_gem_new_impl(dev, size, MSM_BO_WC, dmabuf->resv, &obj); - mutex_unlock(&dev->struct_mutex); - + ret = msm_gem_new_impl(dev, size, MSM_BO_WC, dmabuf->resv, &obj, false); if (ret) goto fail; @@ -937,17 +996,22 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, npages = size / PAGE_SIZE; msm_obj = to_msm_bo(obj); + mutex_lock(&msm_obj->lock); msm_obj->sgt = sgt; msm_obj->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); if (!msm_obj->pages) { + mutex_unlock(&msm_obj->lock); ret = -ENOMEM; goto fail; } ret = drm_prime_sg_to_page_addr_arrays(sgt, msm_obj->pages, NULL, npages); - if (ret) + if (ret) { + mutex_unlock(&msm_obj->lock); goto fail; + } + mutex_unlock(&msm_obj->lock); return obj; fail: diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 112eb63b5908..91c210d2359c 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -31,6 +31,7 @@ struct msm_gem_address_space { * and position mm_node->start is in # of pages: */ struct drm_mm mm; + spinlock_t lock; /* Protects drm_mm node allocation/removal */ struct msm_mmu *mmu; struct kref kref; }; @@ -89,6 +90,7 @@ struct msm_gem_object { * an IOMMU. Also used for stolen/splashscreen buffer. */ struct drm_mm_node *vram_node; + struct mutex lock; /* Protects resources associated with bo */ }; #define to_msm_bo(x) container_of(x, struct msm_gem_object, base) @@ -99,6 +101,7 @@ static inline bool is_active(struct msm_gem_object *msm_obj) static inline bool is_purgeable(struct msm_gem_object *msm_obj) { + WARN_ON(!mutex_is_locked(&msm_obj->base.dev->struct_mutex)); return (msm_obj->madv == MSM_MADV_DONTNEED) && msm_obj->sgt && !msm_obj->base.dma_buf && !msm_obj->base.import_attach; } @@ -108,6 +111,25 @@ static inline bool is_vunmapable(struct msm_gem_object *msm_obj) return (msm_obj->vmap_count == 0) && msm_obj->vaddr; } +/* The shrinker can be triggered while we hold objA->lock, and need + * to grab objB->lock to purge it. Lockdep just sees these as a single + * class of lock, so we use subclasses to teach it the difference. + * + * OBJ_LOCK_NORMAL is implicit (ie. normal mutex_lock() call), and + * OBJ_LOCK_SHRINKER is used by shrinker. + * + * It is *essential* that we never go down paths that could trigger the + * shrinker for a purgable object. This is ensured by checking that + * msm_obj->madv == MSM_MADV_WILLNEED. + */ +enum msm_gem_lock { + OBJ_LOCK_NORMAL, + OBJ_LOCK_SHRINKER, +}; + +void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass); +void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass); + /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, * associated with the cmdstream submission for synchronization (and * make it easier to unwind when things go wrong, etc). This only diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index ab1dd020eb04..b72d8e6cd51d 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -20,6 +20,18 @@ static bool msm_gem_shrinker_lock(struct drm_device *dev, bool *unlock) { + /* NOTE: we are *closer* to being able to get rid of + * mutex_trylock_recursive().. the msm_gem code itself does + * not need struct_mutex, although codepaths that can trigger + * shrinker are still called in code-paths that hold the + * struct_mutex. + * + * Also, msm_obj->madv is protected by struct_mutex. + * + * The next step is probably split out a seperate lock for + * protecting inactive_list, so that shrinker does not need + * struct_mutex. + */ switch (mutex_trylock_recursive(&dev->struct_mutex)) { case MUTEX_TRYLOCK_FAILED: return false; @@ -77,7 +89,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) if (freed >= sc->nr_to_scan) break; if (is_purgeable(msm_obj)) { - msm_gem_purge(&msm_obj->base); + msm_gem_purge(&msm_obj->base, OBJ_LOCK_SHRINKER); freed += msm_obj->base.size >> PAGE_SHIFT; } } @@ -106,7 +118,7 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) { if (is_vunmapable(msm_obj)) { - msm_gem_vunmap(&msm_obj->base); + msm_gem_vunmap(&msm_obj->base, OBJ_LOCK_SHRINKER); /* since we don't know any better, lets bail after a few * and if necessary the shrinker will be invoked again. * Seems better than unmapping *everything* diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index c8d01df993da..179cfc60b6ca 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -245,7 +245,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit) uint64_t iova; /* if locking succeeded, pin bo: */ - ret = msm_gem_get_iova_locked(&msm_obj->base, + ret = msm_gem_get_iova(&msm_obj->base, submit->gpu->aspace, &iova); if (ret) @@ -301,7 +301,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob /* For now, just map the entire thing. Eventually we probably * to do it page-by-page, w/ kmap() if not vmap()d.. */ - ptr = msm_gem_get_vaddr_locked(&obj->base); + ptr = msm_gem_get_vaddr(&obj->base); if (IS_ERR(ptr)) { ret = PTR_ERR(ptr); @@ -359,7 +359,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob } out: - msm_gem_put_vaddr_locked(&obj->base); + msm_gem_put_vaddr(&obj->base); return ret; } diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index f285d7e210db..c36321bc8714 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -50,7 +50,9 @@ msm_gem_unmap_vma(struct msm_gem_address_space *aspace, aspace->mmu->funcs->unmap(aspace->mmu, vma->iova, sgt, size); } + spin_lock(&aspace->lock); drm_mm_remove_node(&vma->node); + spin_unlock(&aspace->lock); vma->iova = 0; @@ -63,10 +65,15 @@ msm_gem_map_vma(struct msm_gem_address_space *aspace, { int ret; - if (WARN_ON(drm_mm_node_allocated(&vma->node))) + spin_lock(&aspace->lock); + if (WARN_ON(drm_mm_node_allocated(&vma->node))) { + spin_unlock(&aspace->lock); return 0; + } ret = drm_mm_insert_node(&aspace->mm, &vma->node, npages); + spin_unlock(&aspace->lock); + if (ret) return ret; @@ -94,6 +101,7 @@ msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain, if (!aspace) return ERR_PTR(-ENOMEM); + spin_lock_init(&aspace->lock); aspace->name = name; aspace->mmu = msm_iommu_new(dev, domain); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 2d5c9afbcdbe..9f3dbc236ab3 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -497,7 +497,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, /* submit takes a reference to the bo and iova until retired: */ drm_gem_object_reference(&msm_obj->base); - msm_gem_get_iova_locked(&msm_obj->base, + msm_gem_get_iova(&msm_obj->base, submit->gpu->aspace, &iova); if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) @@ -661,9 +661,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, } /* Create ringbuffer: */ - mutex_lock(&drm->struct_mutex); gpu->rb = msm_ringbuffer_new(gpu, config->ringsz); - mutex_unlock(&drm->struct_mutex); if (IS_ERR(gpu->rb)) { ret = PTR_ERR(gpu->rb); gpu->rb = NULL; diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 0e81faab2c50..0366b8092f97 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -268,7 +268,7 @@ static void snapshot_buf(struct msm_rd_state *rd, struct msm_gem_object *obj = submit->bos[idx].obj; const char *buf; - buf = msm_gem_get_vaddr_locked(&obj->base); + buf = msm_gem_get_vaddr(&obj->base); if (IS_ERR(buf)) return; @@ -283,7 +283,7 @@ static void snapshot_buf(struct msm_rd_state *rd, (uint32_t[3]){ iova, size, iova >> 32 }, 12); rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size); - msm_gem_put_vaddr_locked(&obj->base); + msm_gem_put_vaddr(&obj->base); } /* called under struct_mutex */ diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 67b34e069abf..791bca3c6a9c 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -40,7 +40,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) goto fail; } - ring->start = msm_gem_get_vaddr_locked(ring->bo); + ring->start = msm_gem_get_vaddr(ring->bo); if (IS_ERR(ring->start)) { ret = PTR_ERR(ring->start); goto fail; From 51c9fbe69486c9143877f5d26a575b16588eb08a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 13 Jun 2017 08:59:11 -0400 Subject: [PATCH 249/341] bus: SIMPLE_PM_BUS does not depend on ARCH_RENESAS In fact, it is needed for PCI to work on msm8996 (and probably other things). No idea why it was depending on renesas but that doesn't make any sense. So drop the dependency. Signed-off-by: Rob Clark Acked-by: Bjorn Andersson Acked-by: Geert Uytterhoeven Reviewed-by: Simon Horman Acked-by: Arnd Bergmann --- drivers/bus/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 0a52da439abf..b83c5351376c 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -121,7 +121,6 @@ config QCOM_EBI2 config SIMPLE_PM_BUS bool "Simple Power-Managed Bus Driver" depends on OF && PM - depends on ARCH_RENESAS || COMPILE_TEST help Driver for transparent busses that don't need a real driver, but where the bus controller is part of a PM domain, or under the control From 23ac7cba73bb2c6e80f9cdebeb39dc3dad34ebb3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 16 Jun 2017 23:49:17 -0400 Subject: [PATCH 250/341] fix signedness of timestamps on ufs1 Signed-off-by: Al Viro --- fs/ufs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 9f4590261134..7b1b810a8ab1 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -578,9 +578,9 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode)); inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size); - inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec); - inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec); - inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec); + inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec); + inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec); + inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec); inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; From c0ef65d2928249e822b813beb41b6c1478c556ab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 16 Jun 2017 23:54:47 -0400 Subject: [PATCH 251/341] ufs_iget(): fail with -ESTALE on deleted inode Signed-off-by: Al Viro --- fs/ufs/inode.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7b1b810a8ab1..f36d6a53687d 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -566,10 +566,8 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) */ inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode); set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink)); - if (inode->i_nlink == 0) { - ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); - return -1; - } + if (inode->i_nlink == 0) + return -ESTALE; /* * Linux now has 32-bit uid and gid, so we can support EFT. @@ -614,10 +612,8 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) */ inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode); set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink)); - if (inode->i_nlink == 0) { - ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); - return -1; - } + if (inode->i_nlink == 0) + return -ESTALE; /* * Linux now has 32-bit uid and gid, so we can support EFT. @@ -657,7 +653,7 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino) struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct buffer_head * bh; struct inode *inode; - int err; + int err = -EIO; UFSD("ENTER, ino %lu\n", ino); @@ -692,9 +688,10 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino) err = ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino)); } - + brelse(bh); if (err) goto bad_inode; + inode->i_version++; ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; @@ -703,15 +700,13 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino) ufs_set_inode_ops(inode); - brelse(bh); - UFSD("EXIT\n"); unlock_new_inode(inode); return inode; bad_inode: iget_failed(inode); - return ERR_PTR(-EIO); + return ERR_PTR(err); } static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode) From 57db7e4a2d92c2d3dfbca4ef8057849b2682436b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 13 Jun 2017 04:31:16 -0500 Subject: [PATCH 252/341] signal: Only reschedule timers on signals timers have sent Thomas Gleixner wrote: > The CRIU support added a 'feature' which allows a user space task to send > arbitrary (kernel) signals to itself. The changelog says: > > The kernel prevents sending of siginfo with positive si_code, because > these codes are reserved for kernel. I think we can allow a task to > send such a siginfo to itself. This operation should not be dangerous. > > Quite contrary to that claim, it turns out that it is outright dangerous > for signals with info->si_code == SI_TIMER. The following code sequence in > a user space task allows to crash the kernel: > > id = timer_create(CLOCK_XXX, ..... signo = SIGX); > timer_set(id, ....); > info->si_signo = SIGX; > info->si_code = SI_TIMER: > info->_sifields._timer._tid = id; > info->_sifields._timer._sys_private = 2; > rt_[tg]sigqueueinfo(..., SIGX, info); > sigemptyset(&sigset); > sigaddset(&sigset, SIGX); > rt_sigtimedwait(sigset, info); > > For timers based on CLOCK_PROCESS_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID this > results in a kernel crash because sigwait() dequeues the signal and the > dequeue code observes: > > info->si_code == SI_TIMER && info->_sifields._timer._sys_private != 0 > > which triggers the following callchain: > > do_schedule_next_timer() -> posix_cpu_timer_schedule() -> arm_timer() > > arm_timer() executes a list_add() on the timer, which is already armed via > the timer_set() syscall. That's a double list add which corrupts the posix > cpu timer list. As a consequence the kernel crashes on the next operation > touching the posix cpu timer list. > > Posix clocks which are internally implemented based on hrtimers are not > affected by this because hrtimer_start() can handle already armed timers > nicely, but it's a reliable way to trigger the WARN_ON() in > hrtimer_forward(), which complains about calling that function on an > already armed timer. This problem has existed since the posix timer code was merged into 2.5.63. A few releases earlier in 2.5.60 ptrace gained the ability to inject not just a signal (which linux has supported since 1.0) but the full siginfo of a signal. The core problem is that the code will reschedule in response to signals getting dequeued not just for signals the timers sent but for other signals that happen to a si_code of SI_TIMER. Avoid this confusion by testing to see if the queued signal was preallocated as all timer signals are preallocated, and so far only the timer code preallocates signals. Move the check for if a timer needs to be rescheduled up into collect_signal where the preallocation check must be performed, and pass the result back to dequeue_signal where the code reschedules timers. This makes it clear why the code cares about preallocated timers. Cc: stable@vger.kernel.org Reported-by: Thomas Gleixner History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Reference: 66dd34ad31e5 ("signal: allow to send any siginfo to itself") Reference: 1669ce53e2ff ("Add PTRACE_GETSIGINFO and PTRACE_SETSIGINFO") Fixes: db8b50ba75f2 ("[PATCH] POSIX clocks & timers") Signed-off-by: "Eric W. Biederman" --- kernel/signal.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index ca92bcfeb322..45b4c1ffe14e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -510,7 +510,8 @@ int unhandled_signal(struct task_struct *tsk, int sig) return !tsk->ptrace; } -static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) +static void collect_signal(int sig, struct sigpending *list, siginfo_t *info, + bool *resched_timer) { struct sigqueue *q, *first = NULL; @@ -532,6 +533,12 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) still_pending: list_del_init(&first->list); copy_siginfo(info, &first->info); + + *resched_timer = + (first->flags & SIGQUEUE_PREALLOC) && + (info->si_code == SI_TIMER) && + (info->si_sys_private); + __sigqueue_free(first); } else { /* @@ -548,12 +555,12 @@ still_pending: } static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, - siginfo_t *info) + siginfo_t *info, bool *resched_timer) { int sig = next_signal(pending, mask); if (sig) - collect_signal(sig, pending, info); + collect_signal(sig, pending, info, resched_timer); return sig; } @@ -565,15 +572,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, */ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) { + bool resched_timer = false; int signr; /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ - signr = __dequeue_signal(&tsk->pending, mask, info); + signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer); if (!signr) { signr = __dequeue_signal(&tsk->signal->shared_pending, - mask, info); + mask, info, &resched_timer); #ifdef CONFIG_POSIX_TIMERS /* * itimer signal ? @@ -621,7 +629,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) current->jobctl |= JOBCTL_STOP_DEQUEUED; } #ifdef CONFIG_POSIX_TIMERS - if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { + if (resched_timer) { /* * Release the siglock to ensure proper locking order * of timer locks outside of siglocks. Note, we leave From 77e9ce327d9b607cd6e57c0f4524a654dc59c4b1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 17 Jun 2017 15:44:06 -0400 Subject: [PATCH 253/341] ufs: fix the logics for tail relocation * original hysteresis loop got broken by typo back in 2002; now it never switches out of OPTTIME state. Fixed. * critical levels for switching from OPTTIME to OPTSPACE and back ought to be calculated once, at mount time. * we should use mul_u64_u32_div() for those calculations, now that ->s_dsize is 64bit. * to quote Kirk McKusick (in 1995 FreeBSD commit message): The threshold for switching from time-space and space-time is too small when minfree is 5%...so make it stay at space in this case. Signed-off-by: Al Viro --- fs/ufs/balloc.c | 22 ++++++---------------- fs/ufs/super.c | 9 +++++++++ fs/ufs/ufs_fs.h | 2 ++ 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 0315fea1d589..f80be4c5df9d 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -455,24 +455,14 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, /* * allocate new block and move data */ - switch (fs32_to_cpu(sb, usb1->fs_optim)) { - case UFS_OPTSPACE: + if (fs32_to_cpu(sb, usb1->fs_optim) == UFS_OPTSPACE) { request = newcount; - if (uspi->s_minfree < 5 || uspi->cs_total.cs_nffree - > uspi->s_dsize * uspi->s_minfree / (2 * 100)) - break; - usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); - break; - default: - usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); - - case UFS_OPTTIME: + if (uspi->cs_total.cs_nffree < uspi->s_space_to_time) + usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); + } else { request = uspi->s_fpb; - if (uspi->cs_total.cs_nffree < uspi->s_dsize * - (uspi->s_minfree - 2) / 100) - break; - usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); - break; + if (uspi->cs_total.cs_nffree > uspi->s_time_to_space) + usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE); } result = ufs_alloc_fragments (inode, cgno, goal, request, err); if (result) { diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 34656c7a8e22..f211b662dd92 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1211,6 +1211,15 @@ magic_found: uspi->s_root_blocks = mul_u64_u32_div(uspi->s_dsize, uspi->s_minfree, 100); + if (uspi->s_minfree <= 5) { + uspi->s_time_to_space = ~0ULL; + uspi->s_space_to_time = 0; + usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE); + } else { + uspi->s_time_to_space = (uspi->s_root_blocks / 2) + 1; + uspi->s_space_to_time = mul_u64_u32_div(uspi->s_dsize, + uspi->s_minfree - 2, 100) - 1; + } /* * Compute another frequently used values diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h index 823d55a37586..150eef6f1233 100644 --- a/fs/ufs/ufs_fs.h +++ b/fs/ufs/ufs_fs.h @@ -792,6 +792,8 @@ struct ufs_sb_private_info { __s32 fs_magic; /* filesystem magic */ unsigned int s_dirblksize; __u64 s_root_blocks; + __u64 s_time_to_space; + __u64 s_space_to_time; }; /* From 779f19ac9d5858a2c159030c0c166f7da46b74ae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 18 Jun 2017 15:10:26 -0700 Subject: [PATCH 254/341] Input: soc_button_array - fix leaking the ACPI button descriptor buffer We are passing a buffer with ACPI_ALLOCATE_BUFFER set to acpi_evaluate_object, so we must free it when we are done with it. Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/misc/soc_button_array.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index e37d37273182..f600f3a7a3c6 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -248,7 +248,8 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev) if (!btns_desc) { dev_err(dev, "ACPI Button Descriptors not found\n"); - return ERR_PTR(-ENODEV); + button_info = ERR_PTR(-ENODEV); + goto out; } /* The first package describes the collection */ @@ -264,24 +265,31 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev) } if (collection_uid == -1) { dev_err(dev, "Invalid Button Collection Descriptor\n"); - return ERR_PTR(-ENODEV); + button_info = ERR_PTR(-ENODEV); + goto out; } /* There are package.count - 1 buttons + 1 terminating empty entry */ button_info = devm_kcalloc(dev, btns_desc->package.count, sizeof(*button_info), GFP_KERNEL); - if (!button_info) - return ERR_PTR(-ENOMEM); + if (!button_info) { + button_info = ERR_PTR(-ENOMEM); + goto out; + } /* Parse the button descriptors */ for (i = 1, btn = 0; i < btns_desc->package.count; i++, btn++) { if (soc_button_parse_btn_desc(dev, &btns_desc->package.elements[i], collection_uid, - &button_info[btn])) - return ERR_PTR(-ENODEV); + &button_info[btn])) { + button_info = ERR_PTR(-ENODEV); + goto out; + } } +out: + kfree(buf.pointer); return button_info; } From 46f8cd9d2fc1e4e8b82b53a0007f6c92e80c930b Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sat, 17 Jun 2017 11:38:05 +0800 Subject: [PATCH 255/341] ip6_tunnel: Correct tos value in collect_md mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same as ip_gre, geneve and vxlan, use key->tos as traffic class value. CC: Peter Dawson Fixes: 0e9a709560db ("ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated packets”) Signed-off-by: Haishuang Yan Acked-by: Peter Dawson Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 035c0496b92a..8c6c3c8e7eef 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1248,7 +1248,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPIP; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; - dsfield = ip6_tclass(key->label); + dsfield = key->tos; } else { if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; @@ -1319,7 +1319,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPV6; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; - dsfield = ip6_tclass(key->label); + dsfield = key->tos; } else { offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ From 9ddb8e1743cdde5ce01234cb1c563f601086b5e3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 19 Jun 2017 09:31:38 +0200 Subject: [PATCH 256/341] drm/i915: Update DRIVER_DATE to 20170619 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e750be52b04b..e2d2b785cb65 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -80,8 +80,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170529" -#define DRIVER_TIMESTAMP 1496041258 +#define DRIVER_DATE "20170619" +#define DRIVER_TIMESTAMP 1497857498 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions From a21ef715fbb8210c50b1d684145f8acdf2339596 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Jun 2017 14:11:29 +0100 Subject: [PATCH 257/341] drm/i915: Differentiate between sw write location into ring and last hw read We need to keep track of the last location we ask the hw to read up to (RING_TAIL) separately from our last write location into the ring, so that in the event of a GPU reset we do not tell the HW to proceed into a partially written request (which can happen if that request is waiting for an external signal before being executed). v2: Refactor intel_ring_reset() (Mika) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100144 Testcase: igt/gem_exec_fence/await-hang Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") Fixes: d55ac5bf97c6 ("drm/i915: Defer transfer onto execution timeline to actual hw submission") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170425130049.26147-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala (cherry picked from commit e6ba9992de6c63fe86c028b4876338e1cb7dac34) Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170615131129.3061-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 2 +- drivers/gpu/drm/i915/i915_guc_submission.c | 4 +-- drivers/gpu/drm/i915/intel_lrc.c | 6 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 41 ++++++++++++++-------- drivers/gpu/drm/i915/intel_ringbuffer.h | 19 ++++++++-- 5 files changed, 48 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 5ddbc9499775..a74d0ac737cb 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -623,7 +623,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * GPU processing the request, we never over-estimate the * position of the head. */ - req->head = req->ring->tail; + req->head = req->ring->emit; /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 1642fff9cf13..ab5140ba108d 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, GEM_BUG_ON(freespace < wqi_size); /* The GuC firmware wants the tail index in QWords, not bytes */ - tail = rq->tail; - assert_ring_tail_valid(rq->ring, rq->tail); - tail >>= 3; + tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3; GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index dac4e003c1f3..62f44d3e7c43 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; - assert_ring_tail_valid(rq->ring, rq->tail); - reg_state[CTX_RING_TAIL+1] = rq->tail; + reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); /* True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. @@ -2036,8 +2035,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) ce->state->obj->mm.dirty = true; i915_gem_object_unpin_map(ce->state->obj); - ce->ring->head = ce->ring->tail = 0; - intel_ring_update_space(ce->ring); + intel_ring_reset(ce->ring, 0); } } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 66a2b8b83972..513a0f4b469b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size) void intel_ring_update_space(struct intel_ring *ring) { - ring->space = __intel_ring_space(ring->head, ring->tail, ring->size); + ring->space = __intel_ring_space(ring->head, ring->emit, ring->size); } static int @@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) i915_gem_request_submit(request); - assert_ring_tail_valid(request->ring, request->tail); - I915_WRITE_TAIL(request->engine, request->tail); + I915_WRITE_TAIL(request->engine, + intel_ring_set_tail(request->ring, request->tail)); } static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) @@ -1316,11 +1316,23 @@ err: return PTR_ERR(addr); } +void intel_ring_reset(struct intel_ring *ring, u32 tail) +{ + GEM_BUG_ON(!list_empty(&ring->request_list)); + ring->tail = tail; + ring->head = tail; + ring->emit = tail; + intel_ring_update_space(ring); +} + void intel_ring_unpin(struct intel_ring *ring) { GEM_BUG_ON(!ring->vma); GEM_BUG_ON(!ring->vaddr); + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->tail); + if (i915_vma_is_map_and_fenceable(ring->vma)) i915_vma_unpin_iomap(ring->vma); else @@ -1562,8 +1574,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; enum intel_engine_id id; + /* Restart from the beginning of the rings for convenience */ for_each_engine(engine, dev_priv, id) - engine->buffer->head = engine->buffer->tail; + intel_ring_reset(engine->buffer, 0); } static int ring_request_alloc(struct drm_i915_gem_request *request) @@ -1616,7 +1629,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) unsigned space; /* Would completion of this request free enough space? */ - space = __intel_ring_space(target->postfix, ring->tail, + space = __intel_ring_space(target->postfix, ring->emit, ring->size); if (space >= bytes) break; @@ -1641,8 +1654,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { struct intel_ring *ring = req->ring; - int remain_actual = ring->size - ring->tail; - int remain_usable = ring->effective_size - ring->tail; + int remain_actual = ring->size - ring->emit; + int remain_usable = ring->effective_size - ring->emit; int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; @@ -1678,17 +1691,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) if (unlikely(need_wrap)) { GEM_BUG_ON(remain_actual > ring->space); - GEM_BUG_ON(ring->tail + remain_actual > ring->size); + GEM_BUG_ON(ring->emit + remain_actual > ring->size); /* Fill the tail with MI_NOOP */ - memset(ring->vaddr + ring->tail, 0, remain_actual); - ring->tail = 0; + memset(ring->vaddr + ring->emit, 0, remain_actual); + ring->emit = 0; ring->space -= remain_actual; } - GEM_BUG_ON(ring->tail > ring->size - bytes); - cs = ring->vaddr + ring->tail; - ring->tail += bytes; + GEM_BUG_ON(ring->emit > ring->size - bytes); + cs = ring->vaddr + ring->emit; + ring->emit += bytes; ring->space -= bytes; GEM_BUG_ON(ring->space < 0); @@ -1699,7 +1712,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { int num_dwords = - (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); u32 *cs; if (num_dwords == 0) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a82a0807f64d..f7144fe09613 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -145,6 +145,7 @@ struct intel_ring { u32 head; u32 tail; + u32 emit; int space; int size; @@ -488,6 +489,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias); +void intel_ring_reset(struct intel_ring *ring, u32 tail); +void intel_ring_update_space(struct intel_ring *ring); void intel_ring_unpin(struct intel_ring *ring); void intel_ring_free(struct intel_ring *ring); @@ -511,7 +514,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); + GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); } static inline u32 @@ -540,7 +543,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) GEM_BUG_ON(tail >= ring->size); } -void intel_ring_update_space(struct intel_ring *ring); +static inline unsigned int +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) +{ + /* Whilst writes to the tail are strictly order, there is no + * serialisation between readers and the writers. The tail may be + * read by i915_gem_request_retire() just as it is being updated + * by execlists, as although the breadcrumb is complete, the context + * switch hasn't been seen. + */ + assert_ring_tail_valid(ring, tail); + ring->tail = tail; + return tail; +} void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); From b8d5a9ccfba5fc084b50b00b9f5b587a8e64b72c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 9 Jun 2017 12:03:46 +0100 Subject: [PATCH 258/341] drm/i915: Encourage our shrinker more when our shmemfs allocations fails Commit 24f8e00a8a2e ("drm/i915: Prefer to report ENOMEM rather than incur the oom for gfx allocations") made the bold decision to try and avoid the oomkiller by reporting -ENOMEM to userspace if our allocation failed after attempting to free enough buffer objects. In short, it appears we were giving up too easily (even before we start wondering if one pass of reclaim is as strong as we would like). Part of the problem is that if we only shrink just enough pages for our expected allocation, the likelihood of those pages becoming available to us is less than 100% To counter-act that we ask for twice the number of pages to be made available. Furthermore, we allow the shrinker to pull pages from the active list in later passes. v2: Be a little more cautious in paging out gfx buffers, and leave that to a more balanced approach from shrink_slab(). Important when combined with "drm/i915: Start writeback from the shrinker" as anything shrunk is immediately swapped out and so should be more conservative. Fixes: 24f8e00a8a2e ("drm/i915: Prefer to report ENOMEM rather than incur the oom for gfx allocations") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Daniel Vetter Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170609110350.1767-1-chris@chris-wilson.co.uk (cherry picked from commit 4846bf0ca8cb4304dde6140eff33a92b3fe8ef24) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 50 +++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 462031cbd77f..c93f27b981f5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2285,8 +2285,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment; + gfp_t noreclaim; int ret; - gfp_t gfp; /* Assert that the object is not currently in any GPU domain. As it * wasn't in the GTT, there shouldn't be any way it could have been in @@ -2315,22 +2315,31 @@ rebuild_st: * Fail silently without starting the shrinker */ mapping = obj->base.filp->f_mapping; - gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); - gfp |= __GFP_NORETRY | __GFP_NOWARN; + noreclaim = mapping_gfp_constraint(mapping, + ~(__GFP_IO | __GFP_RECLAIM)); + noreclaim |= __GFP_NORETRY | __GFP_NOWARN; + sg = st->sgl; st->nents = 0; for (i = 0; i < page_count; i++) { - page = shmem_read_mapping_page_gfp(mapping, i, gfp); - if (unlikely(IS_ERR(page))) { - i915_gem_shrink(dev_priv, - page_count, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE); + const unsigned int shrink[] = { + I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, + 0, + }, *s = shrink; + gfp_t gfp = noreclaim; + + do { page = shmem_read_mapping_page_gfp(mapping, i, gfp); - } - if (unlikely(IS_ERR(page))) { - gfp_t reclaim; + if (likely(!IS_ERR(page))) + break; + + if (!*s) { + ret = PTR_ERR(page); + goto err_sg; + } + + i915_gem_shrink(dev_priv, 2 * page_count, *s++); + cond_resched(); /* We've tried hard to allocate the memory by reaping * our own buffer, now let the real VM do its job and @@ -2340,15 +2349,13 @@ rebuild_st: * defer the oom here by reporting the ENOMEM back * to userspace. */ - reclaim = mapping_gfp_mask(mapping); - reclaim |= __GFP_NORETRY; /* reclaim, but no oom */ - - page = shmem_read_mapping_page_gfp(mapping, i, reclaim); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto err_sg; + if (!*s) { + /* reclaim and warn, but no oom */ + gfp = mapping_gfp_mask(mapping); + gfp |= __GFP_NORETRY; } - } + } while (1); + if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { @@ -4222,6 +4229,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) mapping = obj->base.filp->f_mapping; mapping_set_gfp_mask(mapping, mask); + GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); i915_gem_object_init(obj, &i915_gem_object_ops); From ce2c58724f7d07e76dadfeba53d6877a9e67341d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 9 Jun 2017 12:03:47 +0100 Subject: [PATCH 259/341] drm/i915: Remove __GFP_NORETRY from our buffer allocator I tried __GFP_NORETRY in the belief that __GFP_RECLAIM was effective. It struggles with handling reclaim of our dirty buffers and relies on reclaim via kswapd. As a result, a single pass of direct reclaim is unreliable when i915 occupies the majority of available memory, and the only means of effectively waiting on kswapd to amke progress is by not setting the __GFP_NORETRY flag and lopping. That leaves us with the dilemma of invoking the oomkiller instead of propagating the allocation failure back to userspace where it can be handled more gracefully (one hopes). In the future we may have __GFP_MAYFAIL to allow repeats up until we genuinely run out of memory and the oomkiller would have been invoked. Until then, let the oomkiller wreck havoc. v2: Stop playing with side-effects of gfp flags and await __GFP_MAYFAIL v3: Update comments that direct reclaim only appears to be ignoring our dirty buffers! Fixes: 24f8e00a8a2e ("drm/i915: Prefer to report ENOMEM rather than incur the oom for gfx allocations") Testcase: igt/gem_tiled_swapping Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: Michal Hocko Link: http://patchwork.freedesktop.org/patch/msgid/20170609110350.1767-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit eaf41801559a687cc7511c04dc712984765c9dd7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c93f27b981f5..615f0a855222 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2352,7 +2352,20 @@ rebuild_st: if (!*s) { /* reclaim and warn, but no oom */ gfp = mapping_gfp_mask(mapping); - gfp |= __GFP_NORETRY; + + /* Our bo are always dirty and so we require + * kswapd to reclaim our pages (direct reclaim + * does not effectively begin pageout of our + * buffers on its own). However, direct reclaim + * only waits for kswapd when under allocation + * congestion. So as a result __GFP_RECLAIM is + * unreliable and fails to actually reclaim our + * dirty pages -- unless you try over and over + * again with !__GFP_NORETRY. However, we still + * want to fail this allocation rather than + * trigger the out-of-memory killer and for + * this we want the future __GFP_MAYFAIL. + */ } } while (1); From 17b206c27366f3cee816eaf86fafc6a11f628ecf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 1 Jun 2017 17:36:13 +0300 Subject: [PATCH 260/341] drm/i915: Fix deadlock witha the pipe A quirk during resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass down the correct acquire context to the pipe A quirk load detect hack during display resume. Avoids deadlocking the entire thing. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Fixes: e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170601143619.27840-2-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst (cherry picked from commit aecd36b8a16b2302b33f49ba3fa24c955f1e32f7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 96b0b01677e2..c27bc95d763c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -120,7 +120,8 @@ static void intel_crtc_init_scalers(struct intel_crtc *crtc, static void skylake_pfit_enable(struct intel_crtc *crtc); static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force); static void ironlake_pfit_enable(struct intel_crtc *crtc); -static void intel_modeset_setup_hw_state(struct drm_device *dev); +static void intel_modeset_setup_hw_state(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx); static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc); struct intel_limit { @@ -3449,7 +3450,7 @@ __intel_display_resume(struct drm_device *dev, struct drm_crtc *crtc; int i, ret; - intel_modeset_setup_hw_state(dev); + intel_modeset_setup_hw_state(dev, ctx); i915_redisable_vga(to_i915(dev)); if (!state) @@ -15030,7 +15031,7 @@ int intel_modeset_init(struct drm_device *dev) intel_setup_outputs(dev_priv); drm_modeset_lock_all(dev); - intel_modeset_setup_hw_state(dev); + intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx); drm_modeset_unlock_all(dev); for_each_intel_crtc(dev, crtc) { @@ -15067,13 +15068,13 @@ int intel_modeset_init(struct drm_device *dev) return 0; } -static void intel_enable_pipe_a(struct drm_device *dev) +static void intel_enable_pipe_a(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx) { struct intel_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_connector *crt = NULL; struct intel_load_detect_pipe load_detect_temp; - struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx; int ret; /* We can't just switch on the pipe A, we need to set things up with a @@ -15145,7 +15146,8 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv, (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A); } -static void intel_sanitize_crtc(struct intel_crtc *crtc) +static void intel_sanitize_crtc(struct intel_crtc *crtc, + struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -15201,7 +15203,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) * resume. Force-enable the pipe to fix this, the update_dpms * call below we restore the pipe to the right state, but leave * the required bits on. */ - intel_enable_pipe_a(dev); + intel_enable_pipe_a(dev, ctx); } /* Adjust the state of the output pipe according to whether we @@ -15505,7 +15507,8 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) * and sanitizes it to the current state */ static void -intel_modeset_setup_hw_state(struct drm_device *dev) +intel_modeset_setup_hw_state(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe; @@ -15525,7 +15528,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev) for_each_pipe(dev_priv, pipe) { crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - intel_sanitize_crtc(crtc); + intel_sanitize_crtc(crtc, ctx); intel_dump_pipe_config(crtc, crtc->config, "[setup_hw_state]"); } From b7f5dd36e0c5cb9ca1070a5e0f22f666bcff07ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 1 Jun 2017 17:36:14 +0300 Subject: [PATCH 261/341] drm/i915: Plumb the correct acquire ctx into intel_crtc_disable_noatomic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If intel_crtc_disable_noatomic() were to ever get called during resume we'd end up deadlocking since resume has its own acqcuire_ctx but intel_crtc_disable_noatomic() still tries to use the mode_config.acquire_ctx. Pass down the correct acquire ctx from the top. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Fixes: e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170601143619.27840-3-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst (cherry picked from commit da1d0e265535634bba80d44510b864c620549bee) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c27bc95d763c..9106ea32b048 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5826,7 +5826,8 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_update_watermarks(intel_crtc); } -static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) +static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, + struct drm_modeset_acquire_ctx *ctx) { struct intel_encoder *encoder; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); @@ -5856,7 +5857,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) return; } - state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; + state->acquire_ctx = ctx; /* Everything's already locked, -EDEADLK can't happen. */ crtc_state = intel_atomic_get_crtc_state(state, intel_crtc); @@ -15193,7 +15194,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, plane = crtc->plane; crtc->base.primary->state->visible = true; crtc->plane = !plane; - intel_crtc_disable_noatomic(&crtc->base); + intel_crtc_disable_noatomic(&crtc->base, ctx); crtc->plane = plane; } @@ -15209,7 +15210,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ if (crtc->active && !intel_crtc_has_encoders(crtc)) - intel_crtc_disable_noatomic(&crtc->base); + intel_crtc_disable_noatomic(&crtc->base, ctx); if (crtc->active || HAS_GMCH_DISPLAY(dev_priv)) { /* From dec6b33163d24e2c19ba521c89fffbaab53ae986 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 19 Jun 2017 19:46:00 +0530 Subject: [PATCH 262/341] cxgb4: notify uP to route ctrlq compl to rdma rspq During the module initialisation there is a possible race (basically race between uld and lld) where neither the uld nor lld notifies the uP about where to route the ctrl queue completions. LLD skips notifying uP as the rdma queues were not created by then (will leave it to ULD to notify the uP). As the ULD comes up, it also skips notifying the uP as the flag FULL_INIT_DONE is not set yet (ULD assumes that the interface is not up yet). Consequently, this race between uld and lld leaves uP unnotified about where to send the ctrl queue completions to, leading to iwarp RI_RES WR failure. Here is the race: CPU 0 CPU1 - allocates nic rx queus - t4_sge_alloc_ctrl_txq() (if rdma rsp queues exists, tell uP to route ctrl queue compl to rdma rspq) - acquires the mutex_lock - allocates rdma response queues - if FULL_INIT_DONE set, tell uP to route ctrl queue compl to rdma rspq - relinquishes mutex_lock - acquires the mutex_lock - enable_rx() - set FULL_INIT_DONE - relinquishes mutex_lock This patch fixes the above issue. Fixes: e7519f9926f1('cxgb4: avoid enabling napi twice to the same queue') Signed-off-by: Raju Rangoju Acked-by: Steve Wise CC: Stable # 4.9+ Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index ea1bfcf1870a..53309f659951 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2171,9 +2171,10 @@ static int cxgb_up(struct adapter *adap) { int err; + mutex_lock(&uld_mutex); err = setup_sge_queues(adap); if (err) - goto out; + goto rel_lock; err = setup_rss(adap); if (err) goto freeq; @@ -2197,7 +2198,6 @@ static int cxgb_up(struct adapter *adap) goto irq_err; } - mutex_lock(&uld_mutex); enable_rx(adap); t4_sge_start(adap); t4_intr_enable(adap); @@ -2210,13 +2210,15 @@ static int cxgb_up(struct adapter *adap) #endif /* Initialize hash mac addr list*/ INIT_LIST_HEAD(&adap->mac_hlist); - out: return err; + irq_err: dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err); freeq: t4_free_sge_resources(adap); - goto out; + rel_lock: + mutex_unlock(&uld_mutex); + return err; } static void cxgb_down(struct adapter *adapter) From 4a9bfafc64f44ef83de4e00ca1b57352af6cd8c2 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 11 Jun 2017 16:08:21 +0900 Subject: [PATCH 263/341] ALSA: firewire-lib: Fix stall of process context at packet error At Linux v3.5, packet processing can be done in process context of ALSA PCM application as well as software IRQ context for OHCI 1394. Below is an example of the callgraph (some calls are omitted). ioctl(2) with e.g. HWSYNC (sound/core/pcm_native.c) ->snd_pcm_common_ioctl1() ->snd_pcm_hwsync() ->snd_pcm_stream_lock_irq (sound/core/pcm_lib.c) ->snd_pcm_update_hw_ptr() ->snd_pcm_udpate_hw_ptr0() ->struct snd_pcm_ops.pointer() (sound/firewire/*) = Each handler on drivers in ALSA firewire stack (sound/firewire/amdtp-stream.c) ->amdtp_stream_pcm_pointer() (drivers/firewire/core-iso.c) ->fw_iso_context_flush_completions() ->struct fw_card_driver.flush_iso_completion() (drivers/firewire/ohci.c) = flush_iso_completions() ->struct fw_iso_context.callback.sc (sound/firewire/amdtp-stream.c) = in_stream_callback() or out_stream_callback() ->... ->snd_pcm_stream_unlock_irq When packet queueing error occurs or detecting invalid packets in 'in_stream_callback()' or 'out_stream_callback()', 'snd_pcm_stop_xrun()' is called on local CPU with disabled IRQ. (sound/firewire/amdtp-stream.c) in_stream_callback() or out_stream_callback() ->amdtp_stream_pcm_abort() ->snd_pcm_stop_xrun() ->snd_pcm_stream_lock_irqsave() ->snd_pcm_stop() ->snd_pcm_stream_unlock_irqrestore() The process is stalled on the CPU due to attempt to acquire recursive lock. [ 562.630853] INFO: rcu_sched detected stalls on CPUs/tasks: [ 562.630861] 2-...: (1 GPs behind) idle=37d/140000000000000/0 softirq=38323/38323 fqs=7140 [ 562.630862] (detected by 3, t=15002 jiffies, g=21036, c=21035, q=5933) [ 562.630866] Task dump for CPU 2: [ 562.630867] alsa-source-OXF R running task 0 6619 1 0x00000008 [ 562.630870] Call Trace: [ 562.630876] ? vt_console_print+0x79/0x3e0 [ 562.630880] ? msg_print_text+0x9d/0x100 [ 562.630883] ? up+0x32/0x50 [ 562.630885] ? irq_work_queue+0x8d/0xa0 [ 562.630886] ? console_unlock+0x2b6/0x4b0 [ 562.630888] ? vprintk_emit+0x312/0x4a0 [ 562.630892] ? dev_vprintk_emit+0xbf/0x230 [ 562.630895] ? do_sys_poll+0x37a/0x550 [ 562.630897] ? dev_printk_emit+0x4e/0x70 [ 562.630900] ? __dev_printk+0x3c/0x80 [ 562.630903] ? _raw_spin_lock+0x20/0x30 [ 562.630909] ? snd_pcm_stream_lock+0x31/0x50 [snd_pcm] [ 562.630914] ? _snd_pcm_stream_lock_irqsave+0x2e/0x40 [snd_pcm] [ 562.630918] ? snd_pcm_stop_xrun+0x16/0x70 [snd_pcm] [ 562.630922] ? in_stream_callback+0x3e6/0x450 [snd_firewire_lib] [ 562.630925] ? handle_ir_packet_per_buffer+0x8e/0x1a0 [firewire_ohci] [ 562.630928] ? ohci_flush_iso_completions+0xa3/0x130 [firewire_ohci] [ 562.630932] ? fw_iso_context_flush_completions+0x15/0x20 [firewire_core] [ 562.630935] ? amdtp_stream_pcm_pointer+0x2d/0x40 [snd_firewire_lib] [ 562.630938] ? pcm_capture_pointer+0x19/0x20 [snd_oxfw] [ 562.630943] ? snd_pcm_update_hw_ptr0+0x47/0x3d0 [snd_pcm] [ 562.630945] ? poll_select_copy_remaining+0x150/0x150 [ 562.630947] ? poll_select_copy_remaining+0x150/0x150 [ 562.630952] ? snd_pcm_update_hw_ptr+0x10/0x20 [snd_pcm] [ 562.630956] ? snd_pcm_hwsync+0x45/0xb0 [snd_pcm] [ 562.630960] ? snd_pcm_common_ioctl1+0x1ff/0xc90 [snd_pcm] [ 562.630962] ? futex_wake+0x90/0x170 [ 562.630966] ? snd_pcm_capture_ioctl1+0x136/0x260 [snd_pcm] [ 562.630970] ? snd_pcm_capture_ioctl+0x27/0x40 [snd_pcm] [ 562.630972] ? do_vfs_ioctl+0xa3/0x610 [ 562.630974] ? vfs_read+0x11b/0x130 [ 562.630976] ? SyS_ioctl+0x79/0x90 [ 562.630978] ? entry_SYSCALL_64_fastpath+0x1e/0xad This commit fixes the above bug. This assumes two cases: 1. Any error is detected in software IRQ context of OHCI 1394 context. In this case, PCM substream should be aborted in packet handler. On the other hand, it should not be done in any process context. TO distinguish these two context, use 'in_interrupt()' macro. 2. Any error is detect in process context of ALSA PCM application. In this case, PCM substream should not be aborted in packet handler because PCM substream lock is acquired. The task to abort PCM substream should be done in ALSA PCM core. For this purpose, SNDRV_PCM_POS_XRUN is returned at 'struct snd_pcm_ops.pointer()'. Suggested-by: Clemens Ladisch Fixes: e9148dddc3c7("ALSA: firewire-lib: flush completed packets when reading PCM position") Cc: # 4.9+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/amdtp-stream.c | 8 ++++++-- sound/firewire/amdtp-stream.h | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 9e6f54f8c45d..1e26854b3425 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -682,7 +682,9 @@ static void out_stream_callback(struct fw_iso_context *context, u32 tstamp, cycle = increment_cycle_count(cycle, 1); if (s->handle_packet(s, 0, cycle, i) < 0) { s->packet_index = -1; - amdtp_stream_pcm_abort(s); + if (in_interrupt()) + amdtp_stream_pcm_abort(s); + WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN); return; } } @@ -734,7 +736,9 @@ static void in_stream_callback(struct fw_iso_context *context, u32 tstamp, /* Queueing error or detecting invalid payload. */ if (i < packets) { s->packet_index = -1; - amdtp_stream_pcm_abort(s); + if (in_interrupt()) + amdtp_stream_pcm_abort(s); + WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN); return; } diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index 7e8831722821..ea1a91e99875 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -135,7 +135,7 @@ struct amdtp_stream { /* For a PCM substream processing. */ struct snd_pcm_substream *pcm; struct tasklet_struct period_tasklet; - unsigned int pcm_buffer_pointer; + snd_pcm_uframes_t pcm_buffer_pointer; unsigned int pcm_period_pointer; /* To wait for first packet. */ From 9745e362add89432d2c951272a99b0a5fe4348a9 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 16 Jun 2017 15:00:02 +0800 Subject: [PATCH 264/341] net: 8021q: Fix one possible panic caused by BUG_ON in free_netdev The register_vlan_device would invoke free_netdev directly, when register_vlan_dev failed. It would trigger the BUG_ON in free_netdev if the dev was already registered. In this case, the netdev would be freed in netdev_run_todo later. So add one condition check now. Only when dev is not registered, then free it directly. The following is the part coredump when netdev_upper_dev_link failed in register_vlan_dev. I removed the lines which are too long. [ 411.237457] ------------[ cut here ]------------ [ 411.237458] kernel BUG at net/core/dev.c:7998! [ 411.237484] invalid opcode: 0000 [#1] SMP [ 411.237705] [last unloaded: 8021q] [ 411.237718] CPU: 1 PID: 12845 Comm: vconfig Tainted: G E 4.12.0-rc5+ #6 [ 411.237737] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [ 411.237764] task: ffff9cbeb6685580 task.stack: ffffa7d2807d8000 [ 411.237782] RIP: 0010:free_netdev+0x116/0x120 [ 411.237794] RSP: 0018:ffffa7d2807dbdb0 EFLAGS: 00010297 [ 411.237808] RAX: 0000000000000002 RBX: ffff9cbeb6ba8fd8 RCX: 0000000000001878 [ 411.237826] RDX: 0000000000000001 RSI: 0000000000000282 RDI: 0000000000000000 [ 411.237844] RBP: ffffa7d2807dbdc8 R08: 0002986100029841 R09: 0002982100029801 [ 411.237861] R10: 0004000100029980 R11: 0004000100029980 R12: ffff9cbeb6ba9000 [ 411.238761] R13: ffff9cbeb6ba9060 R14: ffff9cbe60f1a000 R15: ffff9cbeb6ba9000 [ 411.239518] FS: 00007fb690d81700(0000) GS:ffff9cbebb640000(0000) knlGS:0000000000000000 [ 411.239949] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 411.240454] CR2: 00007f7115624000 CR3: 0000000077cdf000 CR4: 00000000003406e0 [ 411.240936] Call Trace: [ 411.241462] vlan_ioctl_handler+0x3f1/0x400 [8021q] [ 411.241910] sock_ioctl+0x18b/0x2c0 [ 411.242394] do_vfs_ioctl+0xa1/0x5d0 [ 411.242853] ? sock_alloc_file+0xa6/0x130 [ 411.243465] SyS_ioctl+0x79/0x90 [ 411.243900] entry_SYSCALL_64_fastpath+0x1e/0xa9 [ 411.244425] RIP: 0033:0x7fb69089a357 [ 411.244863] RSP: 002b:00007ffcd04e0fc8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [ 411.245445] RAX: ffffffffffffffda RBX: 00007ffcd04e2884 RCX: 00007fb69089a357 [ 411.245903] RDX: 00007ffcd04e0fd0 RSI: 0000000000008983 RDI: 0000000000000003 [ 411.246527] RBP: 00007ffcd04e0fd0 R08: 0000000000000000 R09: 1999999999999999 [ 411.246976] R10: 000000000000053f R11: 0000000000000202 R12: 0000000000000004 [ 411.247414] R13: 00007ffcd04e1128 R14: 00007ffcd04e2888 R15: 0000000000000001 [ 411.249129] RIP: free_netdev+0x116/0x120 RSP: ffffa7d2807dbdb0 Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- net/8021q/vlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 467069b73ce1..9649579b5b9f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -277,7 +277,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) return 0; out_free_newdev: - free_netdev(new_dev); + if (new_dev->reg_state == NETREG_UNINITIALIZED) + free_netdev(new_dev); return err; } From 7fe5b914313ff67d71cb2b5aa4b850e0884e75dd Mon Sep 17 00:00:00 2001 From: Lin Yun Sheng Date: Fri, 16 Jun 2017 17:24:51 +0800 Subject: [PATCH 265/341] net/hns:bugfix of ethtool -t phy self_test This patch fixes the phy loopback self_test failed issue. when Marvell Phy Module is loaded, it will powerdown fiber when doing phy loopback self test, which cause phy loopback self_test fail. Signed-off-by: Lin Yun Sheng Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index b8fab149690f..e95795b3c841 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -288,9 +288,15 @@ static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en) /* Force 1000M Link, Default is 0x0200 */ phy_write(phy_dev, 7, 0x20C); - phy_write(phy_dev, HNS_PHY_PAGE_REG, 0); - /* Enable PHY loop-back */ + /* Powerup Fiber */ + phy_write(phy_dev, HNS_PHY_PAGE_REG, 1); + val = phy_read(phy_dev, COPPER_CONTROL_REG); + val &= ~PHY_POWER_DOWN; + phy_write(phy_dev, COPPER_CONTROL_REG, val); + + /* Enable Phy Loopback */ + phy_write(phy_dev, HNS_PHY_PAGE_REG, 0); val = phy_read(phy_dev, COPPER_CONTROL_REG); val |= PHY_LOOP_BACK; val &= ~PHY_POWER_DOWN; @@ -299,6 +305,12 @@ static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en) phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA); phy_write(phy_dev, 1, 0x400); phy_write(phy_dev, 7, 0x200); + + phy_write(phy_dev, HNS_PHY_PAGE_REG, 1); + val = phy_read(phy_dev, COPPER_CONTROL_REG); + val |= PHY_POWER_DOWN; + phy_write(phy_dev, COPPER_CONTROL_REG, val); + phy_write(phy_dev, HNS_PHY_PAGE_REG, 0); phy_write(phy_dev, 9, 0xF00); From 94fc795454f461134cdffb88bef4eb9f788c0b5d Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Thu, 4 May 2017 11:36:52 -0500 Subject: [PATCH 266/341] ntb: Correct modinfo usage statement for ntb_perf The order parameters are powers of 2; adjust the usage information to use correct mathematical representations. Signed-off-by: Gary R Hook Fixes: 8a7b6a778a85 ("ntb: ntb perf tool") Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 434e1d474f33..5cab2831ce99 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -90,11 +90,11 @@ MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); static unsigned int seg_order = 19; /* 512K */ module_param(seg_order, uint, 0644); -MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing"); +MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing"); static unsigned int run_order = 32; /* 4G */ module_param(run_order, uint, 0644); -MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer"); +MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer"); static bool use_dma; /* default to 0 */ module_param(use_dma, bool, 0644); From 07b0b22b3e58824f70b9188d085d400069ca3240 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 5 Jun 2017 10:13:24 -0600 Subject: [PATCH 267/341] NTB: ntb_test: fix bug printing ntb_perf results The code mistakenly prints the local perf results for the remote test so the script reports identical results for both directions. Fix this by ensuring we print the remote result. Signed-off-by: Logan Gunthorpe Fixes: a9c59ef77458 ("ntb_test: Add a selftest script for the NTB subsystem") Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- tools/testing/selftests/ntb/ntb_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index a676d3eefefb..13f5198ba0ee 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -305,7 +305,7 @@ function perf_test() echo "Running remote perf test $WITH DMA" write_file "" $REMOTE_PERF/run echo -n " " - read_file $LOCAL_PERF/run + read_file $REMOTE_PERF/run echo " Passed" _modprobe -r ntb_perf From cb827ee6ccc3e480f0d9c0e8e53eef55be5b0414 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 5 Jun 2017 14:00:52 -0600 Subject: [PATCH 268/341] ntb_transport: fix qp count bug In cases where there are more mw's than spads/2-2, the mw count gets reduced to match the limitation. ntb_transport also tries to ensure that there are fewer qps than mws but uses the full mw count instead of the reduced one. When this happens, the math in 'ntb_transport_setup_qp_mw' will get confused and result in a kernel paging request bug. This patch fixes the bug by reducing qp_count to the reduced mw count instead of the full mw count. Signed-off-by: Logan Gunthorpe Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers") Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 02ca45fdd892..0a778d2cab94 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1128,8 +1128,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) qp_count = ilog2(qp_bitmap); if (max_num_clients && max_num_clients < qp_count) qp_count = max_num_clients; - else if (mw_count < qp_count) - qp_count = mw_count; + else if (nt->mw_count < qp_count) + qp_count = nt->mw_count; qp_bitmap &= BIT_ULL(qp_count) - 1; From 8e8496e0e9564b66165f5219a4e8ed20b0d3fc6b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 5 Jun 2017 14:00:53 -0600 Subject: [PATCH 269/341] ntb_transport: fix bug calculating num_qps_mw A divide by zero error occurs if qp_count is less than mw_count because num_qps_mw is calculated to be zero. The calculation appears to be incorrect. The requirement is for num_qps_mw to be set to qp_count / mw_count with any remainder divided among the earlier mws. For example, if mw_count is 5 and qp_count is 12 then mws 0 and 1 will have 3 qps per window and mws 2 through 4 will have 2 qps per window. Thus, when mw_num < qp_count % mw_count, num_qps_mw is 1 higher than when mw_num >= qp_count. Signed-off-by: Logan Gunthorpe Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers") Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 0a778d2cab94..5b6b00ea6ed9 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -623,7 +623,7 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, if (!mw->virt_addr) return -ENOMEM; - if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count) + if (mw_num < qp_count % mw_count) num_qps_mw = qp_count / mw_count + 1; else num_qps_mw = qp_count / mw_count; @@ -1000,7 +1000,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, qp->event_handler = NULL; ntb_qp_link_down_reset(qp); - if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count) + if (mw_num < qp_count % mw_count) num_qps_mw = qp_count / mw_count + 1; else num_qps_mw = qp_count / mw_count; From 5eb449e15d2396785a8eb15baf42cea33db9ae13 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 8 Jun 2017 12:46:45 -0700 Subject: [PATCH 270/341] ntb: ntb_hw_intel: Skylake doorbells should be 32bits, not 64bits Fixing doorbell register length to 32bits per spec. On Skylake NTB, the doorbell registers are 32bit write only registers. The source for the doorbell is a 64bit register that shows the interrupt bits. Signed-off-by: Dave Jiang Fixes: 783dfa6cc41b ("ntb: Adding Skylake Xeon NTB support") Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index c00238491673..7b3b6fd63d7d 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -2878,7 +2878,7 @@ static const struct intel_ntb_reg skx_reg = { .link_is_up = xeon_link_is_up, .db_ioread = skx_db_ioread, .db_iowrite = skx_db_iowrite, - .db_size = sizeof(u64), + .db_size = sizeof(u32), .ntb_ctl = SKX_NTBCNTL_OFFSET, .mw_bar = {2, 4}, }; From 88931ec3dc11e7dbceb3b0df455693873b508fbe Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Fri, 9 Jun 2017 18:06:36 -0400 Subject: [PATCH 271/341] ntb: no sleep in ntb_async_tx_submit Do not sleep in ntb_async_tx_submit, which could deadlock. This reverts commit "8c874cc140d667f84ae4642bb5b5e0d6396d2ca4" Fixes: 8c874cc140d6 ("NTB: Address out of DMA descriptor issue with NTB") Reported-by: Jia-Ju Bai Signed-off-by: Allen Hubbe Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 50 ++++++------------------------------- 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 5b6b00ea6ed9..10e5bf460139 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -177,14 +177,12 @@ struct ntb_transport_qp { u64 rx_err_ver; u64 rx_memcpy; u64 rx_async; - u64 dma_rx_prep_err; u64 tx_bytes; u64 tx_pkts; u64 tx_ring_full; u64 tx_err_no_buf; u64 tx_memcpy; u64 tx_async; - u64 dma_tx_prep_err; }; struct ntb_transport_mw { @@ -254,8 +252,6 @@ enum { #define QP_TO_MW(nt, qp) ((qp) % nt->mw_count) #define NTB_QP_DEF_NUM_ENTRIES 100 #define NTB_LINK_DOWN_TIMEOUT 10 -#define DMA_RETRIES 20 -#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) static void ntb_transport_rxc_db(unsigned long data); static const struct ntb_ctx_ops ntb_transport_ops; @@ -516,12 +512,6 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, - "DMA tx prep err - \t%llu\n", - qp->dma_tx_prep_err); - out_offset += snprintf(buf + out_offset, out_count - out_offset, - "DMA rx prep err - \t%llu\n", - qp->dma_rx_prep_err); out_offset += snprintf(buf + out_offset, out_count - out_offset, "\n"); @@ -768,8 +758,6 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp) qp->tx_err_no_buf = 0; qp->tx_memcpy = 0; qp->tx_async = 0; - qp->dma_tx_prep_err = 0; - qp->dma_rx_prep_err = 0; } static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp) @@ -1317,7 +1305,6 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset) struct dmaengine_unmap_data *unmap; dma_cookie_t cookie; void *buf = entry->buf; - int retries = 0; len = entry->len; device = chan->device; @@ -1346,22 +1333,11 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset) unmap->from_cnt = 1; - for (retries = 0; retries < DMA_RETRIES; retries++) { - txd = device->device_prep_dma_memcpy(chan, - unmap->addr[1], - unmap->addr[0], len, - DMA_PREP_INTERRUPT); - if (txd) - break; - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(DMA_OUT_RESOURCE_TO); - } - - if (!txd) { - qp->dma_rx_prep_err++; + txd = device->device_prep_dma_memcpy(chan, unmap->addr[1], + unmap->addr[0], len, + DMA_PREP_INTERRUPT); + if (!txd) goto err_get_unmap; - } txd->callback_result = ntb_rx_copy_callback; txd->callback_param = entry; @@ -1606,7 +1582,6 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp, struct dmaengine_unmap_data *unmap; dma_addr_t dest; dma_cookie_t cookie; - int retries = 0; device = chan->device; dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index; @@ -1628,21 +1603,10 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp, unmap->to_cnt = 1; - for (retries = 0; retries < DMA_RETRIES; retries++) { - txd = device->device_prep_dma_memcpy(chan, dest, - unmap->addr[0], len, - DMA_PREP_INTERRUPT); - if (txd) - break; - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(DMA_OUT_RESOURCE_TO); - } - - if (!txd) { - qp->dma_tx_prep_err++; + txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len, + DMA_PREP_INTERRUPT); + if (!txd) goto err_get_unmap; - } txd->callback_result = ntb_tx_copy_callback; txd->callback_param = entry; From 86fdb3448cc1ffe0e9f55380f1410f1d12c35f95 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 17 Jun 2017 16:10:27 +0800 Subject: [PATCH 272/341] sctp: ensure ep is not destroyed before doing the dump Now before dumping a sock in sctp_diag, it only holds the sock while the ep may be already destroyed. It can cause a use-after-free panic when accessing ep->asocs. This patch is to set sctp_sk(sk)->ep NULL in sctp_endpoint_destroy, and check if this ep is already destroyed before dumping this ep. Suggested-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/endpointola.c | 1 + net/sctp/sctp_diag.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 8c589230794f..3dcd0ecf3d99 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -275,6 +275,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (sctp_sk(sk)->bind_hash) sctp_put_port(sk); + sctp_sk(sk)->ep = NULL; sock_put(sk); } diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 048954eee984..9a647214a91e 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -278,7 +278,6 @@ out: static int sctp_sock_dump(struct sock *sk, void *p) { - struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_comm_param *commp = p; struct sk_buff *skb = commp->skb; struct netlink_callback *cb = commp->cb; @@ -287,7 +286,9 @@ static int sctp_sock_dump(struct sock *sk, void *p) int err = 0; lock_sock(sk); - list_for_each_entry(assoc, &ep->asocs, asocs) { + if (!sctp_sk(sk)->ep) + goto release; + list_for_each_entry(assoc, &sctp_sk(sk)->ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; From a8ae0a773d38b4b1d4566b0edcb6bb63f4a9d22f Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Mon, 19 Jun 2017 11:08:28 -0700 Subject: [PATCH 273/341] drm/i915: Don't enable backlight at setup time. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maarten and Ville noticed that we are enabling backlight via DP aux very early in the modeset_init path via the intel_dp_aux_setup_backlight() function, since commit e7156c833903 ("drm/i915: Add Backlight Control using DPCD for eDP connectors (v9)"). Looks like all we need to do during _setup_backlight() is read the current brightness state instead of modifying it. v2: Rewrote commit message. Cc: Ville Syrjala Cc: Maarten Lankhorst Cc: Jani Nikula Cc: Yetunde Adebisi Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Maarten Lankhorst Acked-by: Jani Nikula Tested-by: Puthikorn Voravootivat Fixes: e7156c833903 ("drm/i915: Add Backlight Control using DPCD for eDP connectors (v9)") Link: http://patchwork.freedesktop.org/patch/msgid/1497384239-2965-1-git-send-email-dhinakaran.pandiyan@intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit f6262bda462e81e959b80a96dac799bd9df27f73) Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1497895708-19422-1-git-send-email-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_dp_aux_backlight.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c index 6532e226db29..40ba3134545e 100644 --- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c @@ -119,8 +119,6 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector, struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); struct intel_panel *panel = &connector->panel; - intel_dp_aux_enable_backlight(connector); - if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) panel->backlight.max = 0xFFFF; else From 6e88491cf2a3b17199c78bd53348b39dc6a88275 Mon Sep 17 00:00:00 2001 From: Junshan Fang Date: Thu, 15 Jun 2017 14:02:20 +0800 Subject: [PATCH 274/341] drm/amdgpu: add Polaris12 DID Signed-off-by: Junshan Fang Reviewed-by: Roger.He Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f2d705e6a75a..ab6b0d0febab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -449,6 +449,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, + {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, /* Vega 10 */ {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, From 4a630fadbb29d9efaedb525f1a8f7449ad107641 Mon Sep 17 00:00:00 2001 From: Kasin Li Date: Mon, 19 Jun 2017 15:36:53 -0600 Subject: [PATCH 275/341] drm/msm: Fix potential buffer overflow issue In function submit_create, if nr_cmds or nr_bos is assigned with negative value, the allocated buffer may be small than intended. Using this buffer will lead to buffer overflow issue. Signed-off-by: Kasin Li Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 179cfc60b6ca..6bfca7470141 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -31,11 +31,14 @@ #define BO_PINNED 0x2000 static struct msm_gem_submit *submit_create(struct drm_device *dev, - struct msm_gpu *gpu, int nr_bos, int nr_cmds) + struct msm_gpu *gpu, uint32_t nr_bos, uint32_t nr_cmds) { struct msm_gem_submit *submit; - int sz = sizeof(*submit) + (nr_bos * sizeof(submit->bos[0])) + - (nr_cmds * sizeof(*submit->cmd)); + uint64_t sz = sizeof(*submit) + (nr_bos * sizeof(submit->bos[0])) + + (nr_cmds * sizeof(submit->cmd[0])); + + if (sz > SIZE_MAX) + return NULL; submit = kmalloc(sz, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); if (!submit) From 4a072c71f49b0a0e495ea13423bdb850da73c58c Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 15 Jun 2017 00:45:26 +0200 Subject: [PATCH 276/341] random: silence compiler warnings and fix race Odd versions of gcc for the sh4 architecture will actually warn about flags being used while uninitialized, so we set them to zero. Non crazy gccs will optimize that out again, so it doesn't make a difference. Next, over aggressive gccs could inline the expression that defines use_lock, which could then introduce a race resulting in a lock imbalance. By using READ_ONCE, we prevent that fate. Finally, we make that assignment const, so that gcc can still optimize a nice amount. Finally, we fix a potential deadlock between primary_crng.lock and batched_entropy_reset_lock, where they could be called in opposite order. Moving the call to invalidate_batched_entropy to outside the lock rectifies this issue. Fixes: b169c13de473a85b3c859bb36216a4cb5f00a54a Signed-off-by: Jason A. Donenfeld Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- drivers/char/random.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index e870f329db88..01a260f67437 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -803,13 +803,13 @@ static int crng_fast_load(const char *cp, size_t len) p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp; cp++; crng_init_cnt++; len--; } + spin_unlock_irqrestore(&primary_crng.lock, flags); if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { invalidate_batched_entropy(); crng_init = 1; wake_up_interruptible(&crng_init_wait); pr_notice("random: fast init done\n"); } - spin_unlock_irqrestore(&primary_crng.lock, flags); return 1; } @@ -841,6 +841,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } memzero_explicit(&buf, sizeof(buf)); crng->init_time = jiffies; + spin_unlock_irqrestore(&primary_crng.lock, flags); if (crng == &primary_crng && crng_init < 2) { invalidate_batched_entropy(); crng_init = 2; @@ -848,7 +849,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) wake_up_interruptible(&crng_init_wait); pr_notice("random: crng init done\n"); } - spin_unlock_irqrestore(&primary_crng.lock, flags); } static inline void crng_wait_ready(void) @@ -2041,8 +2041,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64); u64 get_random_u64(void) { u64 ret; - bool use_lock = crng_init < 2; - unsigned long flags; + bool use_lock = READ_ONCE(crng_init) < 2; + unsigned long flags = 0; struct batched_entropy *batch; #if BITS_PER_LONG == 64 @@ -2073,8 +2073,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32); u32 get_random_u32(void) { u32 ret; - bool use_lock = crng_init < 2; - unsigned long flags; + bool use_lock = READ_ONCE(crng_init) < 2; + unsigned long flags = 0; struct batched_entropy *batch; if (arch_get_random_int(&ret)) From 6ebf81536d3be327c4f5f59bae3b841d62322343 Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Thu, 15 Jun 2017 00:10:39 -0700 Subject: [PATCH 277/341] scsi: qedi: Remove WARN_ON for untracked cleanup. Signed-off-by: Manish Rangankar Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_fw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index 8bc7ee1a8ca8..507512cc478b 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -870,7 +870,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, QEDI_ERR(&qedi->dbg_ctx, "Delayed or untracked cleanup response, itt=0x%x, tid=0x%x, cid=0x%x, task=%p\n", protoitt, cqe->itid, qedi_conn->iscsi_conn_id, task); - WARN_ON(1); } } From 02d94e04747c5df55410c7b19f3cf72a1a11899b Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Thu, 15 Jun 2017 00:10:40 -0700 Subject: [PATCH 278/341] scsi: qedi: Remove WARN_ON from clear task context. Signed-off-by: Manish Rangankar Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 09a294634bc7..879d3b7462f9 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1499,11 +1499,9 @@ err_idx: void qedi_clear_task_idx(struct qedi_ctx *qedi, int idx) { - if (!test_and_clear_bit(idx, qedi->task_idx_map)) { + if (!test_and_clear_bit(idx, qedi->task_idx_map)) QEDI_ERR(&qedi->dbg_ctx, "FW task context, already cleared, tid=0x%x\n", idx); - WARN_ON(1); - } } void qedi_update_itt_map(struct qedi_ctx *qedi, u32 tid, u32 proto_itt, From 817ae460c784f32cd45e60b2b1b21378c3c6a847 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Mon, 19 Jun 2017 19:48:52 -0700 Subject: [PATCH 279/341] Input: i8042 - add Fujitsu Lifebook AH544 to notimeout list Without this quirk, the touchpad is not responsive on this product, with the following message repeated in the logs: psmouse serio1: bad data from KBC - timeout Add it to the notimeout list alongside other similar Fujitsu laptops. Signed-off-by: Daniel Drake Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 09720d950686..f932a83b4990 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -723,6 +723,13 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"), }, }, + { + /* Fujitsu UH554 laptop */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"), + }, + }, { } }; From c7ecb9068e6772c43941ce609f08bc53f36e1dce Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jun 2017 07:37:14 +0200 Subject: [PATCH 280/341] ALSA: hda - Apply quirks to Broxton-T, too Broxton-T was a forgotten child and we didn't apply the quirks for Skylake+ properly. Meanwhile, a quirk for reducing the DMA latency seems specific to the early Broxton model, so we leave as is. Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e3c696c46a21..01eb1dc7b5b3 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -370,11 +370,12 @@ enum { #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71) #define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) +#define IS_BXT_T(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x1a98) #define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198) #define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348) -#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \ - IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci) || \ - IS_GLK(pci) || IS_CFL(pci) +#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci) || \ + IS_BXT_T(pci) || IS_KBL(pci) || IS_KBL_LP(pci) || \ + IS_KBL_H(pci) || IS_GLK(pci) || IS_CFL(pci)) static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", From 53145c2e354b5a5ed031cec7472b4f16bab060c7 Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Thu, 15 Jun 2017 13:35:50 +0100 Subject: [PATCH 281/341] Revert "HID: magicmouse: Set multi-touch keybits for Magic Mouse" Setting these bits causes libinput to fail to initialize the device; setting BTN_TOUCH and BTN_TOOL_FINGER causes it to treat the mouse as a touchpad, and it then refuses to continue when it discovers ABS_X is not set. This breaks all known Wayland compositors, as well as Xorg when the libinput driver is being used. This reverts commit f4b65b9563216b3e01a5cc844c3ba68901d9b195. Signed-off-by: Daniel Stone Cc: Che-Liang Chiou Cc: Thierry Escande Cc: Jiri Kosina Cc: Benjamin Tissoires Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 1d6c997b3001..20b40ad26325 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -349,7 +349,6 @@ static int magicmouse_raw_event(struct hid_device *hdev, if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { magicmouse_emit_buttons(msc, clicks & 3); - input_mt_report_pointer_emulation(input, true); input_report_rel(input, REL_X, x); input_report_rel(input, REL_Y, y); } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ @@ -389,16 +388,16 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd __clear_bit(BTN_RIGHT, input->keybit); __clear_bit(BTN_MIDDLE, input->keybit); __set_bit(BTN_MOUSE, input->keybit); + __set_bit(BTN_TOOL_FINGER, input->keybit); + __set_bit(BTN_TOOL_DOUBLETAP, input->keybit); + __set_bit(BTN_TOOL_TRIPLETAP, input->keybit); + __set_bit(BTN_TOOL_QUADTAP, input->keybit); + __set_bit(BTN_TOOL_QUINTTAP, input->keybit); + __set_bit(BTN_TOUCH, input->keybit); + __set_bit(INPUT_PROP_POINTER, input->propbit); __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); } - __set_bit(BTN_TOOL_FINGER, input->keybit); - __set_bit(BTN_TOOL_DOUBLETAP, input->keybit); - __set_bit(BTN_TOOL_TRIPLETAP, input->keybit); - __set_bit(BTN_TOOL_QUADTAP, input->keybit); - __set_bit(BTN_TOOL_QUINTTAP, input->keybit); - __set_bit(BTN_TOUCH, input->keybit); - __set_bit(INPUT_PROP_POINTER, input->propbit); __set_bit(EV_ABS, input->evbit); From ceea5e3771ed2378668455fa21861bead7504df5 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 8 Jun 2017 16:44:20 -0700 Subject: [PATCH 282/341] time: Fix clock->read(clock) race around clocksource changes In tests, which excercise switching of clocksources, a NULL pointer dereference can be observed on AMR64 platforms in the clocksource read() function: u64 clocksource_mmio_readl_down(struct clocksource *c) { return ~(u64)readl_relaxed(to_mmio_clksrc(c)->reg) & c->mask; } This is called from the core timekeeping code via: cycle_now = tkr->read(tkr->clock); tkr->read is the cached tkr->clock->read() function pointer. When the clocksource is changed then tkr->clock and tkr->read are updated sequentially. The code above results in a sequential load operation of tkr->read and tkr->clock as well. If the store to tkr->clock hits between the loads of tkr->read and tkr->clock, then the old read() function is called with the new clock pointer. As a consequence the read() function dereferences a different data structure and the resulting 'reg' pointer can point anywhere including NULL. This problem was introduced when the timekeeping code was switched over to use struct tk_read_base. Before that, it was theoretically possible as well when the compiler decided to reload clock in the code sequence: now = tk->clock->read(tk->clock); Add a helper function which avoids the issue by reading tk_read_base->clock once into a local variable clk and then issue the read function via clk->read(clk). This guarantees that the read() function always gets the proper clocksource pointer handed in. Since there is now no use for the tkr.read pointer, this patch also removes it, and to address stopping the fast timekeeper during suspend/resume, it introduces a dummy clocksource to use rather then just a dummy read function. Signed-off-by: John Stultz Acked-by: Ingo Molnar Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: stable Cc: Miroslav Lichvar Cc: Daniel Mentz Link: http://lkml.kernel.org/r/1496965462-20003-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/timekeeper_internal.h | 1 - kernel/time/timekeeping.c | 52 ++++++++++++++++++++--------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 110f4532188c..e9834ada4d0c 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -29,7 +29,6 @@ */ struct tk_read_base { struct clocksource *clock; - u64 (*read)(struct clocksource *cs); u64 mask; u64 cycle_last; u32 mult; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 9652bc57fd09..eff94cb8e89e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) tk->offs_boot = ktime_add(tk->offs_boot, delta); } +/* + * tk_clock_read - atomic clocksource read() helper + * + * This helper is necessary to use in the read paths because, while the + * seqlock ensures we don't return a bad value while structures are updated, + * it doesn't protect from potential crashes. There is the possibility that + * the tkr's clocksource may change between the read reference, and the + * clock reference passed to the read function. This can cause crashes if + * the wrong clocksource is passed to the wrong read function. + * This isn't necessary to use when holding the timekeeper_lock or doing + * a read of the fast-timekeeper tkrs (which is protected by its own locking + * and update logic). + */ +static inline u64 tk_clock_read(struct tk_read_base *tkr) +{ + struct clocksource *clock = READ_ONCE(tkr->clock); + + return clock->read(clock); +} + #ifdef CONFIG_DEBUG_TIMEKEEPING #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ @@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr) */ do { seq = read_seqcount_begin(&tk_core.seq); - now = tkr->read(tkr->clock); + now = tk_clock_read(tkr); last = tkr->cycle_last; mask = tkr->mask; max = tkr->clock->max_cycles; @@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr) u64 cycle_now, delta; /* read clocksource */ - cycle_now = tkr->read(tkr->clock); + cycle_now = tk_clock_read(tkr); /* calculate the delta since the last update_wall_time */ delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); @@ -238,12 +258,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) ++tk->cs_was_changed_seq; old_clock = tk->tkr_mono.clock; tk->tkr_mono.clock = clock; - tk->tkr_mono.read = clock->read; tk->tkr_mono.mask = clock->mask; - tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); + tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono); tk->tkr_raw.clock = clock; - tk->tkr_raw.read = clock->read; tk->tkr_raw.mask = clock->mask; tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; @@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) now += timekeeping_delta_to_ns(tkr, clocksource_delta( - tkr->read(tkr->clock), + tk_clock_read(tkr), tkr->cycle_last, tkr->mask)); } while (read_seqcount_retry(&tkf->seq, seq)); @@ -461,6 +479,10 @@ static u64 dummy_clock_read(struct clocksource *cs) return cycles_at_suspend; } +static struct clocksource dummy_clock = { + .read = dummy_clock_read, +}; + /** * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. * @tk: Timekeeper to snapshot. @@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk) struct tk_read_base *tkr = &tk->tkr_mono; memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); - cycles_at_suspend = tkr->read(tkr->clock); - tkr_dummy.read = dummy_clock_read; + cycles_at_suspend = tk_clock_read(tkr); + tkr_dummy.clock = &dummy_clock; update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); tkr = &tk->tkr_raw; memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); - tkr_dummy.read = dummy_clock_read; + tkr_dummy.clock = &dummy_clock; update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); } @@ -649,11 +671,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) */ static void timekeeping_forward_now(struct timekeeper *tk) { - struct clocksource *clock = tk->tkr_mono.clock; u64 cycle_now, delta; u64 nsec; - cycle_now = tk->tkr_mono.read(clock); + cycle_now = tk_clock_read(&tk->tkr_mono); delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); tk->tkr_mono.cycle_last = cycle_now; tk->tkr_raw.cycle_last = cycle_now; @@ -929,8 +950,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) do { seq = read_seqcount_begin(&tk_core.seq); - - now = tk->tkr_mono.read(tk->tkr_mono.clock); + now = tk_clock_read(&tk->tkr_mono); systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; base_real = ktime_add(tk->tkr_mono.base, @@ -1108,7 +1128,7 @@ int get_device_system_crosststamp(int (*get_time_fn) * Check whether the system counter value provided by the * device driver is on the current timekeeping interval. */ - now = tk->tkr_mono.read(tk->tkr_mono.clock); + now = tk_clock_read(&tk->tkr_mono); interval_start = tk->tkr_mono.cycle_last; if (!cycle_between(interval_start, cycles, now)) { clock_was_set_seq = tk->clock_was_set_seq; @@ -1629,7 +1649,7 @@ void timekeeping_resume(void) * The less preferred source will only be tried if there is no better * usable source. The rtc part is handled separately in rtc core code. */ - cycle_now = tk->tkr_mono.read(clock); + cycle_now = tk_clock_read(&tk->tkr_mono); if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && cycle_now > tk->tkr_mono.cycle_last) { u64 nsec, cyc_delta; @@ -2030,7 +2050,7 @@ void update_wall_time(void) #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET offset = real_tk->cycle_interval; #else - offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock), + offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), tk->tkr_mono.cycle_last, tk->tkr_mono.mask); #endif From 3d88d56c5873f6eebe23e05c3da701960146b801 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 8 Jun 2017 16:44:21 -0700 Subject: [PATCH 283/341] time: Fix CLOCK_MONOTONIC_RAW sub-nanosecond accounting Due to how the MONOTONIC_RAW accumulation logic was handled, there is the potential for a 1ns discontinuity when we do accumulations. This small discontinuity has for the most part gone un-noticed, but since ARM64 enabled CLOCK_MONOTONIC_RAW in their vDSO clock_gettime implementation, we've seen failures with the inconsistency-check test in kselftest. This patch addresses the issue by using the same sub-ns accumulation handling that CLOCK_MONOTONIC uses, which avoids the issue for in-kernel users. Since the ARM64 vDSO implementation has its own clock_gettime calculation logic, this patch reduces the frequency of errors, but failures are still seen. The ARM64 vDSO will need to be updated to include the sub-nanosecond xtime_nsec values in its calculation for this issue to be completely fixed. Signed-off-by: John Stultz Tested-by: Daniel Mentz Cc: Prarit Bhargava Cc: Kevin Brodsky Cc: Richard Cochran Cc: Stephen Boyd Cc: Will Deacon Cc: "stable #4 . 8+" Cc: Miroslav Lichvar Link: http://lkml.kernel.org/r/1496965462-20003-3-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/timekeeper_internal.h | 4 ++-- kernel/time/timekeeping.c | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e9834ada4d0c..f7043ccca81c 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -57,7 +57,7 @@ struct tk_read_base { * interval. * @xtime_remainder: Shifted nano seconds left over when rounding * @cycle_interval - * @raw_interval: Raw nano seconds accumulated per NTP interval. + * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. * @ntp_error: Difference between accumulated time and NTP time in ntp * shifted nano seconds. * @ntp_error_shift: Shift conversion between clock shifted nano seconds and @@ -99,7 +99,7 @@ struct timekeeper { u64 cycle_interval; u64 xtime_interval; s64 xtime_remainder; - u32 raw_interval; + u64 raw_interval; /* The ntp_tick_length() value currently being used. * This cached copy ensures we consistently apply the tick * length for an entire tick, as ntp_tick_length may change diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index eff94cb8e89e..b602c48cb841 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -280,7 +280,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) /* Go back from cycles -> shifted ns */ tk->xtime_interval = interval * clock->mult; tk->xtime_remainder = ntpinterval - tk->xtime_interval; - tk->raw_interval = (interval * clock->mult) >> clock->shift; + tk->raw_interval = interval * clock->mult; /* if changing clocks, convert xtime_nsec shift units */ if (old_clock) { @@ -1996,7 +1996,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, u32 shift, unsigned int *clock_set) { u64 interval = tk->cycle_interval << shift; - u64 raw_nsecs; + u64 snsec_per_sec; /* If the offset is smaller than a shifted interval, do nothing */ if (offset < interval) @@ -2011,14 +2011,15 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, *clock_set |= accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ - raw_nsecs = (u64)tk->raw_interval << shift; - raw_nsecs += tk->raw_time.tv_nsec; - if (raw_nsecs >= NSEC_PER_SEC) { - u64 raw_secs = raw_nsecs; - raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); - tk->raw_time.tv_sec += raw_secs; + tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift; + tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; + snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; + while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) { + tk->tkr_raw.xtime_nsec -= snsec_per_sec; + tk->raw_time.tv_sec++; } - tk->raw_time.tv_nsec = raw_nsecs; + tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift; + tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift; /* Accumulate error between NTP and clock interval */ tk->ntp_error += tk->ntp_tick << shift; From dbb236c1ceb697a559e0694ac4c9e7b9131d0b16 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 8 Jun 2017 16:44:22 -0700 Subject: [PATCH 284/341] arm64/vdso: Fix nsec handling for CLOCK_MONOTONIC_RAW Recently vDSO support for CLOCK_MONOTONIC_RAW was added in 49eea433b326 ("arm64: Add support for CLOCK_MONOTONIC_RAW in clock_gettime() vDSO"). Noticing that the core timekeeping code never set tkr_raw.xtime_nsec, the vDSO implementation didn't bother exposing it via the data page and instead took the unshifted tk->raw_time.tv_nsec value which was then immediately shifted left in the vDSO code. Unfortunately, by accellerating the MONOTONIC_RAW clockid, it uncovered potential 1ns time inconsistencies caused by the timekeeping core not handing sub-ns resolution. Now that the core code has been fixed and is actually setting tkr_raw.xtime_nsec, we need to take that into account in the vDSO by adding it to the shifted raw_time value, in order to fix the user-visible inconsistency. Rather than do that at each use (and expand the data page in the process), instead perform the shift/addition operation when populating the data page and remove the shift from the vDSO code entirely. [jstultz: minor whitespace tweak, tried to improve commit message to make it more clear this fixes a regression] Reported-by: John Stultz Signed-off-by: Will Deacon Signed-off-by: John Stultz Tested-by: Daniel Mentz Acked-by: Kevin Brodsky Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: "stable #4 . 8+" Cc: Miroslav Lichvar Link: http://lkml.kernel.org/r/1496965462-20003-4-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- arch/arm64/kernel/vdso.c | 5 +++-- arch/arm64/kernel/vdso/gettimeofday.S | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 41b6e31f8f55..d0cb007fa482 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk) /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; vdso_data->raw_time_sec = tk->raw_time.tv_sec; - vdso_data->raw_time_nsec = tk->raw_time.tv_nsec; + vdso_data->raw_time_nsec = (tk->raw_time.tv_nsec << + tk->tkr_raw.shift) + + tk->tkr_raw.xtime_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - /* tkr_raw.xtime_nsec == 0 */ vdso_data->cs_mono_mult = tk->tkr_mono.mult; vdso_data->cs_raw_mult = tk->tkr_raw.mult; /* tkr_mono.shift == tkr_raw.shift */ diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index e00b4671bd7c..76320e920965 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -256,7 +256,6 @@ monotonic_raw: seqcnt_check fail=monotonic_raw /* All computations are done with left-shifted nsecs. */ - lsl x14, x14, x12 get_nsec_per_sec res=x9 lsl x9, x9, x12 From 842c08846420baa619fe3cb8c9af538efdb89428 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 14 Jun 2017 10:54:52 +0200 Subject: [PATCH 285/341] livepatch: Fix stacking of patches with respect to RCU rcu_read_(un)lock(), list_*_rcu(), and synchronize_rcu() are used for a secure access and manipulation of the list of patches that modify the same function. In particular, it is the variable func_stack that is accessible from the ftrace handler via struct ftrace_ops and klp_ops. Of course, it synchronizes also some states of the patch on the top of the stack, e.g. func->transition in klp_ftrace_handler. At the same time, this mechanism guards also the manipulation of task->patch_state. It is modified according to the state of the transition and the state of the process. Now, all this works well as long as RCU works well. Sadly livepatching might get into some corner cases when this is not true. For example, RCU is not watching when rcu_read_lock() is taken in idle threads. It is because they might sleep and prevent reaching the grace period for too long. There are ways how to make RCU watching even in idle threads, see rcu_irq_enter(). But there is a small location inside RCU infrastructure when even this does not work. This small problematic location can be detected either before calling rcu_irq_enter() by rcu_irq_enter_disabled() or later by rcu_is_watching(). Sadly, there is no safe way how to handle it. Once we detect that RCU was not watching, we might see inconsistent state of the function stack and the related variables in klp_ftrace_handler(). Then we could do a wrong decision, use an incompatible implementation of the function and break the consistency of the system. We could warn but we could not avoid the damage. Fortunately, ftrace has similar problems and they seem to be solved well there. It uses a heavy weight implementation of some RCU operations. In particular, it replaces: + rcu_read_lock() with preempt_disable_notrace() + rcu_read_unlock() with preempt_enable_notrace() + synchronize_rcu() with schedule_on_each_cpu(sync_work) My understanding is that this is RCU implementation from a stone age. It meets the core RCU requirements but it is rather ineffective. Especially, it does not allow to batch or speed up the synchronize calls. On the other hand, it is very trivial. It allows to safely trace and/or livepatch even the RCU core infrastructure. And the effectiveness is a not a big issue because using ftrace or livepatches on productive systems is a rare operation. The safety is much more important than a negligible extra load. Note that the alternative implementation follows the RCU principles. Therefore, we could and actually must use list_*_rcu() variants when manipulating the func_stack. These functions allow to access the pointers in the right order and with the right barriers. But they do not use any other information that would be set only by rcu_read_lock(). Also note that there are actually two problems solved in ftrace: First, it cares about the consistency of RCU read sections. It is being solved the way as described and used in this patch. Second, ftrace needs to make sure that nobody is inside the dynamic trampoline when it is being freed. For this, it also calls synchronize_rcu_tasks() in preemptive kernel in ftrace_shutdown(). Livepatch has similar problem but it is solved by ftrace for free. klp_ftrace_handler() is a good guy and never sleeps. In addition, it is registered with FTRACE_OPS_FL_DYNAMIC. It causes that unregister_ftrace_function() calls: * schedule_on_each_cpu(ftrace_sync) - always * synchronize_rcu_tasks() - in preemptive kernel The effect is that nobody is neither inside the dynamic trampoline nor inside the ftrace handler after unregister_ftrace_function() returns. [jkosina@suse.cz: reformat changelog, fix comment] Signed-off-by: Petr Mladek Acked-by: Josh Poimboeuf Acked-by: Miroslav Benes Signed-off-by: Jiri Kosina --- kernel/livepatch/patch.c | 8 ++++++-- kernel/livepatch/transition.c | 36 ++++++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index f8269036bf0b..52c4e907c14b 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -59,7 +59,11 @@ static void notrace klp_ftrace_handler(unsigned long ip, ops = container_of(fops, struct klp_ops, fops); - rcu_read_lock(); + /* + * A variant of synchronize_sched() is used to allow patching functions + * where RCU is not watching, see klp_synchronize_transition(). + */ + preempt_disable_notrace(); func = list_first_or_null_rcu(&ops->func_stack, struct klp_func, stack_node); @@ -115,7 +119,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, klp_arch_set_pc(regs, (unsigned long)func->new_func); unlock: - rcu_read_unlock(); + preempt_enable_notrace(); } /* diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index adc0cc64aa4b..b004a1fb6032 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -48,6 +48,28 @@ static void klp_transition_work_fn(struct work_struct *work) } static DECLARE_DELAYED_WORK(klp_transition_work, klp_transition_work_fn); +/* + * This function is just a stub to implement a hard force + * of synchronize_sched(). This requires synchronizing + * tasks even in userspace and idle. + */ +static void klp_sync(struct work_struct *work) +{ +} + +/* + * We allow to patch also functions where RCU is not watching, + * e.g. before user_exit(). We can not rely on the RCU infrastructure + * to do the synchronization. Instead hard force the sched synchronization. + * + * This approach allows to use RCU functions for manipulating func_stack + * safely. + */ +static void klp_synchronize_transition(void) +{ + schedule_on_each_cpu(klp_sync); +} + /* * The transition to the target patch state is complete. Clean up the data * structures. @@ -73,7 +95,7 @@ static void klp_complete_transition(void) * func->transition gets cleared, the handler may choose a * removed function. */ - synchronize_rcu(); + klp_synchronize_transition(); } if (klp_transition_patch->immediate) @@ -92,7 +114,7 @@ static void klp_complete_transition(void) /* Prevent klp_ftrace_handler() from seeing KLP_UNDEFINED state */ if (klp_target_state == KLP_PATCHED) - synchronize_rcu(); + klp_synchronize_transition(); read_lock(&tasklist_lock); for_each_process_thread(g, task) { @@ -136,7 +158,11 @@ void klp_cancel_transition(void) */ void klp_update_patch_state(struct task_struct *task) { - rcu_read_lock(); + /* + * A variant of synchronize_sched() is used to allow patching functions + * where RCU is not watching, see klp_synchronize_transition(). + */ + preempt_disable_notrace(); /* * This test_and_clear_tsk_thread_flag() call also serves as a read @@ -153,7 +179,7 @@ void klp_update_patch_state(struct task_struct *task) if (test_and_clear_tsk_thread_flag(task, TIF_PATCH_PENDING)) task->patch_state = READ_ONCE(klp_target_state); - rcu_read_unlock(); + preempt_enable_notrace(); } /* @@ -539,7 +565,7 @@ void klp_reverse_transition(void) clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING); /* Let any remaining calls to klp_update_patch_state() complete */ - synchronize_rcu(); + klp_synchronize_transition(); klp_start_transition(); } From 6c7515c61ffa0985c57abd8892c7928b52b9a306 Mon Sep 17 00:00:00 2001 From: Ralph Sennhauser Date: Thu, 1 Jun 2017 22:08:20 +0200 Subject: [PATCH 286/341] gpio: mvebu: change compatible string for PWM support As it turns out more than just Armada 370 and XP support using GPIO lines as PWM lines. For example the Armada 38x family has the same hardware support. As such "marvell,armada-370-xp-gpio" for the compatible string is a misnomer. Change the compatible string to "marvell,armada-370-gpio" before the driver makes it out of the -rc stage. This also follows the practice of using only the first device family supported as part of the name. Also update the documentation and comments in the code accordingly. Fixes: 757642f9a584 ("gpio: mvebu: Add limited PWM support") Signed-off-by: Ralph Sennhauser Acked-by: Gregory CLEMENT Acked-by: Rob Herring Signed-off-by: Linus Walleij --- Documentation/devicetree/bindings/gpio/gpio-mvebu.txt | 6 +++--- drivers/gpio/gpio-mvebu.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt index 42c3bb2d53e8..01e331a5f3e7 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt @@ -41,9 +41,9 @@ Required properties: Optional properties: In order to use the GPIO lines in PWM mode, some additional optional -properties are required. Only Armada 370 and XP support these properties. +properties are required. -- compatible: Must contain "marvell,armada-370-xp-gpio" +- compatible: Must contain "marvell,armada-370-gpio" - reg: an additional register set is needed, for the GPIO Blink Counter on/off registers. @@ -71,7 +71,7 @@ Example: }; gpio1: gpio@18140 { - compatible = "marvell,armada-370-xp-gpio"; + compatible = "marvell,armada-370-gpio"; reg = <0x18140 0x40>, <0x181c8 0x08>; reg-names = "gpio", "pwm"; ngpios = <17>; diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 5104b6398139..c83ea68be792 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -721,7 +721,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev, u32 set; if (!of_device_is_compatible(mvchip->chip.of_node, - "marvell,armada-370-xp-gpio")) + "marvell,armada-370-gpio")) return 0; if (IS_ERR(mvchip->clk)) @@ -852,7 +852,7 @@ static const struct of_device_id mvebu_gpio_of_match[] = { .data = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP, }, { - .compatible = "marvell,armada-370-xp-gpio", + .compatible = "marvell,armada-370-gpio", .data = (void *) MVEBU_GPIO_SOC_VARIANT_ORION, }, { @@ -1128,7 +1128,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) mvchip); } - /* Armada 370/XP has simple PWM support for GPIO lines */ + /* Some MVEBU SoCs have simple PWM support for GPIO lines */ if (IS_ENABLED(CONFIG_PWM)) return mvebu_pwm_probe(pdev, mvchip, id); From e27a9eca5d4a392b96ce5d5238c8d637bcb0a52c Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Tue, 20 Jun 2017 10:57:51 +0100 Subject: [PATCH 287/341] KVM: MIPS: Fix maybe-uninitialized build failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes a "maybe-uninitialized" build failure in arch/mips/kvm/tlb.c when KVM, DYNAMIC_DEBUG and JUMP_LABEL are all enabled. The failure is: In file included from ./include/linux/printk.h:329:0, from ./include/linux/kernel.h:13, from ./include/asm-generic/bug.h:15, from ./arch/mips/include/asm/bug.h:41, from ./include/linux/bug.h:4, from ./include/linux/thread_info.h:11, from ./include/asm-generic/current.h:4, from ./arch/mips/include/generated/asm/current.h:1, from ./include/linux/sched.h:11, from arch/mips/kvm/tlb.c:13: arch/mips/kvm/tlb.c: In function ‘kvm_mips_host_tlb_inv’: ./include/linux/dynamic_debug.h:126:3: error: ‘idx_kernel’ may be used uninitialized in this function [-Werror=maybe-uninitialized] __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \ ^~~~~~~~~~~~~~~~~~ arch/mips/kvm/tlb.c:169:16: note: ‘idx_kernel’ was declared here int idx_user, idx_kernel; ^~~~~~~~~~ There is a similar error relating to "idx_user". Both errors were observed with GCC 6. As far as I can tell, it is impossible for either idx_user or idx_kernel to be uninitialized when they are later read in the calls to kvm_debug, but to satisfy the compiler, add zero initializers to both variables. Signed-off-by: James Cowgill Fixes: 57e3869cfaae ("KVM: MIPS/TLB: Generalise host TLB invalidate to kernel ASID") Cc: # 4.11+ Acked-by: James Hogan Signed-off-by: Radim Krčmář --- arch/mips/kvm/tlb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index 7c6336dd2638..7cd92166a0b9 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi) int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, bool user, bool kernel) { - int idx_user, idx_kernel; + /* + * Initialize idx_user and idx_kernel to workaround bogus + * maybe-initialized warning when using GCC 6. + */ + int idx_user = 0, idx_kernel = 0; unsigned long flags, old_entryhi; local_irq_save(flags); From 9e69672e90ccff10dab6f0c9545226a886e5973c Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 14 Jun 2017 17:13:14 +0200 Subject: [PATCH 288/341] dt-bindings: mfd: Update STM32 timers clock names Clock name has been updated during driver/DT binding review: https://lkml.org/lkml/2016/12/13/718 Update DT binding doc to reflect this. Fixes: 8f9359c6c6a0 (dt-bindings: mfd: Add bindings for STM32 Timers driver) Signed-off-by: Fabrice Gasnier Acked-by: Benjamin Gaignard Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/stm32-timers.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mfd/stm32-timers.txt b/Documentation/devicetree/bindings/mfd/stm32-timers.txt index bbd083f5600a..1db6e0057a63 100644 --- a/Documentation/devicetree/bindings/mfd/stm32-timers.txt +++ b/Documentation/devicetree/bindings/mfd/stm32-timers.txt @@ -31,7 +31,7 @@ Example: compatible = "st,stm32-timers"; reg = <0x40010000 0x400>; clocks = <&rcc 0 160>; - clock-names = "clk_int"; + clock-names = "int"; pwm { compatible = "st,stm32-pwm"; From 05b4017b37f1fce4b7185f138126dd8decdb381f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 15 Jun 2017 10:55:11 -0400 Subject: [PATCH 289/341] drm/amdgpu/atom: fix ps allocation size for EnableDispPowerGating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were using the wrong structure which lead to an overflow on some boards. bug: https://bugs.freedesktop.org/show_bug.cgi?id=101387 Acked-by: Chunming Zhou Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/atombios_crtc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c index 8c9bc75a9c2d..8a0818b23ea4 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c @@ -165,7 +165,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating); - ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args; + ENABLE_DISP_POWER_GATING_PS_ALLOCATION args; memset(&args, 0, sizeof(args)); @@ -178,7 +178,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev) { int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating); - ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args; + ENABLE_DISP_POWER_GATING_PS_ALLOCATION args; memset(&args, 0, sizeof(args)); From 52b482b0f4fd6d5267faf29fe91398e203f3c230 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 15 Jun 2017 11:12:28 -0400 Subject: [PATCH 290/341] drm/amdgpu: adjust default display clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increase the default display clock on newer asics to accomodate some high res modes with really high refresh rates. bug: https://bugs.freedesktop.org/show_bug.cgi?id=93826 Acked-by: Chunming Zhou Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 1cf78f4dd339..1e8e1123ddf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -693,6 +693,10 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev) DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n", adev->clock.default_dispclk / 100); adev->clock.default_dispclk = 60000; + } else if (adev->clock.default_dispclk <= 60000) { + DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n", + adev->clock.default_dispclk / 100); + adev->clock.default_dispclk = 62500; } adev->clock.dp_extclk = le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq); From 4eb59793cca00b0e629b6d55b5abb5acb82c5868 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 19 Jun 2017 12:52:47 -0400 Subject: [PATCH 291/341] drm/radeon: add a PX quirk for another K53TK variant Disable PX on these systems. bug: https://bugs.freedesktop.org/show_bug.cgi?id=101491 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 6ecf42783d4b..0a6444d72000 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -136,6 +136,10 @@ static struct radeon_px_quirk radeon_px_quirk_list[] = { * https://bugzilla.kernel.org/show_bug.cgi?id=51381 */ { PCI_VENDOR_ID_ATI, 0x6840, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX }, + /* Asus K53TK laptop with AMD A6-3420M APU and Radeon 7670m GPU + * https://bugs.freedesktop.org/show_bug.cgi?id=101491 + */ + { PCI_VENDOR_ID_ATI, 0x6741, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX }, /* macbook pro 8.2 */ { PCI_VENDOR_ID_ATI, 0x6741, PCI_VENDOR_ID_APPLE, 0x00e2, RADEON_PX_QUIRK_LONG_WAKEUP }, { 0, 0, 0, 0, 0 }, From acfd6ee4fa7ebeee75511825fe02be3f7ac1d668 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 19 Jun 2017 15:59:58 -0400 Subject: [PATCH 292/341] drm/radeon: add a quirk for Toshiba Satellite L20-183 Fixes resume from suspend. bug: https://bugzilla.kernel.org/show_bug.cgi?id=196121 Reported-by: Przemek Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_combios.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 432480ff9d22..3178ba0c537c 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -3393,6 +3393,13 @@ void radeon_combios_asic_init(struct drm_device *dev) rdev->pdev->subsystem_vendor == 0x103c && rdev->pdev->subsystem_device == 0x280a) return; + /* quirk for rs4xx Toshiba Sattellite L20-183 latop to make it resume + * - it hangs on resume inside the dynclk 1 table. + */ + if (rdev->family == CHIP_RS400 && + rdev->pdev->subsystem_vendor == 0x1179 && + rdev->pdev->subsystem_device == 0xff31) + return; /* DYN CLK 1 */ table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE); From bdaf32c3ced3d111b692f0af585f880f82d686c5 Mon Sep 17 00:00:00 2001 From: Serhey Popovych Date: Fri, 16 Jun 2017 15:44:47 +0300 Subject: [PATCH 293/341] fib_rules: Resolve goto rules target on delete We should avoid marking goto rules unresolved when their target is actually reachable after rule deletion. Consolder following sample scenario: # ip -4 ru sh 0: from all lookup local 32000: from all goto 32100 32100: from all lookup main 32100: from all lookup default 32766: from all lookup main 32767: from all lookup default # ip -4 ru del pref 32100 table main # ip -4 ru sh 0: from all lookup local 32000: from all goto 32100 [unresolved] 32100: from all lookup default 32766: from all lookup main 32767: from all lookup default After removal of first rule with preference 32100 we mark all goto rules as unreachable, even when rule with same preference as removed one still present. Check if next rule with same preference is available and make all rules with goto action pointing to it. Signed-off-by: Serhey Popovych Signed-off-by: David S. Miller --- net/core/fib_rules.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index f21c4d3aeae0..3bba291c6c32 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -568,7 +568,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; - struct fib_rule *rule, *tmp; + struct fib_rule *rule, *r; struct nlattr *tb[FRA_MAX+1]; struct fib_kuid_range range; int err = -EINVAL; @@ -668,16 +668,23 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, /* * Check if this rule is a target to any of them. If so, + * adjust to the next one with the same preference or * disable them. As this operation is eventually very - * expensive, it is only performed if goto rules have - * actually been added. + * expensive, it is only performed if goto rules, except + * current if it is goto rule, have actually been added. */ if (ops->nr_goto_rules > 0) { - list_for_each_entry(tmp, &ops->rules_list, list) { - if (rtnl_dereference(tmp->ctarget) == rule) { - RCU_INIT_POINTER(tmp->ctarget, NULL); + struct fib_rule *n; + + n = list_next_entry(rule, list); + if (&n->list == &ops->rules_list || n->pref != rule->pref) + n = NULL; + list_for_each_entry(r, &ops->rules_list, list) { + if (rtnl_dereference(r->ctarget) != rule) + continue; + rcu_assign_pointer(r->ctarget, n); + if (!n) ops->unresolved_rules++; - } } } From fe420d87bbc234015b4195dd239b7d3052b140ea Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Fri, 16 Jun 2017 19:24:00 +0200 Subject: [PATCH 294/341] net/core: remove explicit do_softirq() from busy_poll_stop() Since commit 217f69743681 ("net: busy-poll: allow preemption in sk_busy_loop()") there is an explicit do_softirq() invocation after local_bh_enable() has been invoked. I don't understand why we need this because local_bh_enable() will invoke do_softirq() once the softirq counter reached zero and we have softirq-related work pending. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/core/dev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6d60149287a1..7243421c9783 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5206,8 +5206,6 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) if (rc == BUSY_POLL_BUDGET) __napi_schedule(napi); local_bh_enable(); - if (local_softirq_pending()) - do_softirq(); } void napi_busy_loop(unsigned int napi_id, From 5567e989198b5a8d78f9b5868e48fc9f4726bdd5 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 19 Jun 2017 18:04:16 +0300 Subject: [PATCH 295/341] fsl/fman: propagate dma_ops Make sure dma_ops are set, to be later used by the Ethernet driver. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/mac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 0b31f8502ada..6e67d22fd0d5 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -623,6 +623,8 @@ static struct platform_device *dpaa_eth_add_device(int fman_id, goto no_mem; } + set_dma_ops(&pdev->dev, get_dma_ops(priv->dev)); + ret = platform_device_add_data(pdev, &data, sizeof(data)); if (ret) goto err; From fb52728a9294d97de808795b8e3f60fb8de50c00 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 19 Jun 2017 18:04:17 +0300 Subject: [PATCH 296/341] dpaa_eth: reuse the dma_ops provided by the FMan MAC device Remove the use of arch_setup_dma_ops() that was not exported and was breaking loadable module compilation. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 9a520e4f0df9..290ad0563320 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2647,7 +2647,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */ /* device used for DMA mapping */ - arch_setup_dma_ops(dev, 0, 0, NULL, false); + set_dma_ops(dev, get_dma_ops(&pdev->dev)); err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40)); if (err) { dev_err(dev, "dma_coerce_mask_and_coherent() failed\n"); From 7e113321eccba2b52c0e9d11129d370c9511e4db Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 19 Jun 2017 18:05:41 +0200 Subject: [PATCH 297/341] dt-bindings: net: sms911x: Add missing optional VDD regulators The lan911x family of devices require supplying from 3.3 V power supplies (connected to VDD_IO, VDD_A and VREG_3.3 pins). The existing driver however obtains only VDD_IO and VDD_A regulators in an optional way so document this in bindings. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Linus Walleij Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/smsc911x.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/smsc911x.txt b/Documentation/devicetree/bindings/net/smsc911x.txt index 16c3a9501f5d..acfafc8e143c 100644 --- a/Documentation/devicetree/bindings/net/smsc911x.txt +++ b/Documentation/devicetree/bindings/net/smsc911x.txt @@ -27,6 +27,7 @@ Optional properties: of the device. On many systems this is wired high so the device goes out of reset at power-on, but if it is under program control, this optional GPIO can wake up in response to it. +- vdd33a-supply, vddvario-supply : 3.3V analog and IO logic power supplies Examples: From 07f615574f8ac499875b21c1142f26308234a92c Mon Sep 17 00:00:00 2001 From: Serhey Popovych Date: Tue, 20 Jun 2017 13:29:25 +0300 Subject: [PATCH 298/341] ipv6: Do not leak throw route references While commit 73ba57bfae4a ("ipv6: fix backtracking for throw routes") does good job on error propagation to the fib_rules_lookup() in fib rules core framework that also corrects throw routes handling, it does not solve route reference leakage problem happened when we return -EAGAIN to the fib_rules_lookup() and leave routing table entry referenced in arg->result. If rule with matched throw route isn't last matched in the list we overwrite arg->result losing reference on throw route stored previously forever. We also partially revert commit ab997ad40839 ("ipv6: fix the incorrect return value of throw route") since we never return routing table entry with dst.error == -EAGAIN when CONFIG_IPV6_MULTIPLE_TABLES is on. Also there is no point to check for RTF_REJECT flag since it is always set throw route. Fixes: 73ba57bfae4a ("ipv6: fix backtracking for throw routes") Signed-off-by: Serhey Popovych Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 22 ++++++---------------- net/ipv6/ip6_fib.c | 3 +-- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index eea23b57c6a5..ec849d88a662 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -32,7 +32,6 @@ struct fib6_rule { struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { - struct rt6_info *rt; struct fib_lookup_arg arg = { .lookup_ptr = lookup, .flags = FIB_LOOKUP_NOREF, @@ -44,21 +43,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); - rt = arg.result; + if (arg.result) + return arg.result; - if (!rt) { - dst_hold(&net->ipv6.ip6_null_entry->dst); - return &net->ipv6.ip6_null_entry->dst; - } - - if (rt->rt6i_flags & RTF_REJECT && - rt->dst.error == -EAGAIN) { - ip6_rt_put(rt); - rt = net->ipv6.ip6_null_entry; - dst_hold(&rt->dst); - } - - return &rt->dst; + dst_hold(&net->ipv6.ip6_null_entry->dst); + return &net->ipv6.ip6_null_entry->dst; } static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, @@ -121,7 +110,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, flp6->saddr = saddr; } err = rt->dst.error; - goto out; + if (err != -EAGAIN) + goto out; } again: ip6_rt_put(rt); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d4bf2c68a545..e6b78ba0e636 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -289,8 +289,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, struct rt6_info *rt; rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); - if (rt->rt6i_flags & RTF_REJECT && - rt->dst.error == -EAGAIN) { + if (rt->dst.error == -EAGAIN) { ip6_rt_put(rt); rt = net->ipv6.ip6_null_entry; dst_hold(&rt->dst); From db833d40ad3263b2ee3b59a1ba168bb3cfed8137 Mon Sep 17 00:00:00 2001 From: Serhey Popovych Date: Tue, 20 Jun 2017 14:35:23 +0300 Subject: [PATCH 299/341] rtnetlink: add IFLA_GROUP to ifla_policy Network interface groups support added while ago, however there is no IFLA_GROUP attribute description in policy and netlink message size calculations until now. Add IFLA_GROUP attribute to the policy. Fixes: cbda10fa97d7 ("net_device: add support for network device groups") Signed-off-by: Serhey Popovych Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5e61456f6bc7..467a2f4510a7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -931,6 +931,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ + nla_total_size(4) /* IFLA_LINK_NETNSID */ + + nla_total_size(4) /* IFLA_GROUP */ + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ @@ -1468,6 +1469,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, [IFLA_PROTO_DOWN] = { .type = NLA_U8 }, [IFLA_XDP] = { .type = NLA_NESTED }, + [IFLA_GROUP] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { From 57f0c9cf58ff7fe479137ab847a886d0eed3ad1d Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 20 Jun 2017 13:08:51 +0100 Subject: [PATCH 300/341] sfc: remove duplicate up_write on VF filter_sem Somehow two copies of the line 'up_write(&vf->efx->filter_sem);' got into efx_ef10_sriov_set_vf_vlan(). This would put the mutex in a bad state and cause all subsequent down attempts to hang. Fixes: 671b53eec2ed ("sfc: Ensure down_write(&filter_sem) and up_write() are matched before calling efx_net_open()") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10_sriov.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index b7e4345c990d..019cef1d3cf7 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -661,8 +661,6 @@ restore_filters: up_write(&vf->efx->filter_sem); mutex_unlock(&vf->efx->mac_lock); - up_write(&vf->efx->filter_sem); - rc2 = efx_net_open(vf->efx->net_dev); if (rc2) goto reset_nic; From 05cf0d1bf4ed722aefff92775244dbe9e1bb4679 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 20 Jun 2017 14:32:41 +0200 Subject: [PATCH 301/341] net: stmmac: free an skb first when there are no longer any descriptors using it When having the skb pointer in the first descriptor, stmmac_tx_clean can get called at a moment where the IP has only cleared the own bit of the first descriptor, thus freeing the skb, even though there can be several descriptors whose buffers point into the same skb. By simply moving the skb pointer from the first descriptor to the last descriptor, a skb will get freed only when the IP has cleared the own bit of all the descriptors that are using that skb. Signed-off-by: Niklas Cassel Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d16d11bfc046..6e4cbc6ce0ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2831,7 +2831,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_skbuff_dma[first_entry].buf = des; tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb); - tx_q->tx_skbuff[first_entry] = skb; first->des0 = cpu_to_le32(des); @@ -2865,6 +2864,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true; + /* Only the last descriptor gets to point to the skb. */ + tx_q->tx_skbuff[tx_q->cur_tx] = skb; + + /* We've used all descriptors we need for this skb, however, + * advance cur_tx so that it references a fresh descriptor. + * ndo_start_xmit will fill this descriptor the next time it's + * called and stmmac_tx_clean may clean up to this descriptor. + */ tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE); if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) { @@ -2998,8 +3005,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) first = desc; - tx_q->tx_skbuff[first_entry] = skb; - enh_desc = priv->plat->enh_desc; /* To program the descriptors according to the size of the frame */ if (enh_desc) @@ -3047,8 +3052,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) skb->len); } - entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE); + /* Only the last descriptor gets to point to the skb. */ + tx_q->tx_skbuff[entry] = skb; + /* We've used all descriptors we need for this skb, however, + * advance cur_tx so that it references a fresh descriptor. + * ndo_start_xmit will fill this descriptor the next time it's + * called and stmmac_tx_clean may clean up to this descriptor. + */ + entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE); tx_q->cur_tx = entry; if (netif_msg_pktdata(priv)) { From 9f93d87cba63e3d18629261243b1f633519eabb5 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Fri, 9 Jun 2017 09:04:05 +0200 Subject: [PATCH 302/341] irqchip/mips-gic: Mark count and compare accessors notrace gic_read_count(), gic_write_compare() and gic_write_cpu_compare() are often used in a sequence to update the compare register with a count value increased by a small offset. With small delta values used to update the compare register, the time to update function trace for these operations may be longer than the update timeout leading to update failure. Signed-off-by: Marcin Nowakowski Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Cc: linux-mips@linux-mips.org Cc: Jason Cooper Link: http://lkml.kernel.org/r/1496991845-27031-1-git-send-email-marcin.nowakowski@imgtec.com --- drivers/irqchip/irq-mips-gic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index eb7fbe159963..929f8558bf1c 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -140,7 +140,7 @@ static inline void gic_map_to_vpe(unsigned int intr, unsigned int vpe) } #ifdef CONFIG_CLKSRC_MIPS_GIC -u64 gic_read_count(void) +u64 notrace gic_read_count(void) { unsigned int hi, hi2, lo; @@ -167,7 +167,7 @@ unsigned int gic_get_count_width(void) return bits; } -void gic_write_compare(u64 cnt) +void notrace gic_write_compare(u64 cnt) { if (mips_cm_is64) { gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE), cnt); @@ -179,7 +179,7 @@ void gic_write_compare(u64 cnt) } } -void gic_write_cpu_compare(u64 cnt, int cpu) +void notrace gic_write_cpu_compare(u64 cnt, int cpu) { unsigned long flags; From b4846fc3c8559649277e3e4e6b5cec5348a8d208 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 20 Jun 2017 10:46:27 -0700 Subject: [PATCH 303/341] igmp: add a missing spin_lock_init() Andrey reported a lockdep warning on non-initialized spinlock: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 1 PID: 4099 Comm: a.out Not tainted 4.12.0-rc6+ #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x292/0x395 lib/dump_stack.c:52 register_lock_class+0x717/0x1aa0 kernel/locking/lockdep.c:755 ? 0xffffffffa0000000 __lock_acquire+0x269/0x3690 kernel/locking/lockdep.c:3255 lock_acquire+0x22d/0x560 kernel/locking/lockdep.c:3855 __raw_spin_lock_bh ./include/linux/spinlock_api_smp.h:135 _raw_spin_lock_bh+0x36/0x50 kernel/locking/spinlock.c:175 spin_lock_bh ./include/linux/spinlock.h:304 ip_mc_clear_src+0x27/0x1e0 net/ipv4/igmp.c:2076 igmpv3_clear_delrec+0xee/0x4f0 net/ipv4/igmp.c:1194 ip_mc_destroy_dev+0x4e/0x190 net/ipv4/igmp.c:1736 We miss a spin_lock_init() in igmpv3_add_delrec(), probably because previously we never use it on this code path. Since we already unlink it from the global mc_tomb list, it is probably safe not to acquire this spinlock here. It does not harm to have it although, to avoid conditional locking. Fixes: c38b7d327aaf ("igmp: acquire pmc lock for ip_mc_clear_src()") Reported-by: Andrey Konovalov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 8f6b5bbcbf69..ec9a396fa466 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1112,6 +1112,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) pmc = kzalloc(sizeof(*pmc), GFP_KERNEL); if (!pmc) return; + spin_lock_init(&pmc->lock); spin_lock_bh(&im->lock); pmc->interface = im->interface; in_dev_hold(in_dev); From 296923e121693015d1140c42ce14f478d5b24376 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 21 Jun 2017 08:56:54 +1000 Subject: [PATCH 304/341] drm/i915: remove rate_to_index, messed up merge. This was from a merge I did incorrectly. Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_dp.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 31998d09a662..64fa774c855b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1548,17 +1548,6 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp) DRM_DEBUG_KMS("common rates: %s\n", str); } -static int rate_to_index(int find, const int *rates) -{ - int i = 0; - - for (i = 0; i < DP_MAX_SUPPORTED_RATES; ++i) - if (find == rates[i]) - break; - - return i; -} - int intel_dp_max_link_rate(struct intel_dp *intel_dp) { From 8a7b0d8e8d9962ec3b2ae64dd4e86d68a6fb9220 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 May 2017 08:30:40 +0300 Subject: [PATCH 305/341] CIFS: Set ->should_dirty in cifs_user_readv() The current code causes a static checker warning because ITER_IOVEC is zero so the condition is never true. Fixes: 6685c5e2d1ac ("CIFS: Add asynchronous read support through kernel AIO") Signed-off-by: Dan Carpenter Signed-off-by: Steve French --- fs/cifs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0fd081bd2a2f..fcef70602b27 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3271,7 +3271,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) if (!is_sync_kiocb(iocb)) ctx->iocb = iocb; - if (to->type & ITER_IOVEC) + if (to->type == ITER_IOVEC) ctx->should_dirty = true; rc = setup_aio_ctx_iter(ctx, to, READ); From ecf3411a121e7a653e309ff50a820ffa87c537f8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 17 May 2017 19:24:15 +0100 Subject: [PATCH 306/341] CIFS: check if pages is null rather than bv for a failed allocation pages is being allocated however a null check on bv is being used to see if the allocation failed. Fix this by checking if pages is null. Detected by CoverityScan, CID#1432974 ("Logically dead code") Fixes: ccf7f4088af2dd ("CIFS: Add asynchronous context to support kernel AIO") Signed-off-by: Colin Ian King Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index b08531977daa..3b147dc6af63 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -810,7 +810,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) if (!pages) { pages = vmalloc(max_pages * sizeof(struct page *)); - if (!bv) { + if (!pages) { kvfree(bv); return -ENOMEM; } From dcd87838c06f05ab7650b249ebf0d5b57ae63e1e Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 6 Jun 2017 16:58:58 -0700 Subject: [PATCH 307/341] CIFS: Improve readdir verbosity Downgrade the loglevel for SMB2 to prevent filling the log with messages if e.g. readdir was interrupted. Also make SMB2 and SMB1 codepaths do the same logging during readdir. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French CC: Stable --- fs/cifs/smb1ops.c | 9 +++++++-- fs/cifs/smb2ops.c | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 27bc360c7ffd..a723df3e0197 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid, __u16 search_flags, struct cifs_search_info *srch_inf) { - return CIFSFindFirst(xid, tcon, path, cifs_sb, - &fid->netfid, search_flags, srch_inf, true); + int rc; + + rc = CIFSFindFirst(xid, tcon, path, cifs_sb, + &fid->netfid, search_flags, srch_inf, true); + if (rc) + cifs_dbg(FYI, "find first failed=%d\n", rc); + return rc; } static int diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c58691834eb2..59726013375b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); kfree(utf16_path); if (rc) { - cifs_dbg(VFS, "open dir failed\n"); + cifs_dbg(FYI, "open dir failed rc=%d\n", rc); return rc; } @@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, fid->volatile_fid, 0, srch_inf); if (rc) { - cifs_dbg(VFS, "query directory failed\n"); + cifs_dbg(FYI, "query directory failed rc=%d\n", rc); SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } return rc; From e125f5284f81bbb765a504494622b45c02faf978 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 7 Jun 2017 00:33:45 +0100 Subject: [PATCH 308/341] cifs: remove redundant return in cifs_creation_time_get There is a redundant return in function cifs_creation_time_get that appears to be old vestigial code than can be removed. So remove it. Detected by CoverityScan, CID#1361924 ("Structurally dead code") Signed-off-by: Colin Ian King Signed-off-by: Steve French --- fs/cifs/xattr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 3cb5c9e2d4e7..de50e749ff05 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -188,8 +188,6 @@ static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode, pcreatetime = (__u64 *)value; *pcreatetime = CIFS_I(inode)->createtime; return sizeof(__u64); - - return rc; } From 517a6e43c4872c89794af5b377fa085e47345952 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sun, 11 Jun 2017 09:12:47 +0200 Subject: [PATCH 309/341] CIFS: Fix some return values in case of error in 'crypt_message' 'rc' is known to be 0 at this point. So if 'init_sg' or 'kzalloc' fails, we should return -ENOMEM instead. Also remove a useless 'rc' in a debug message as it is meaningless here. Fixes: 026e93dc0a3ee ("CIFS: Encrypt SMB3 requests before sending") Signed-off-by: Christophe JAILLET Reviewed-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel Signed-off-by: Steve French CC: Stable --- fs/cifs/smb2ops.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 59726013375b..7e48561abd29 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1809,7 +1809,8 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc) sg = init_sg(rqst, sign); if (!sg) { - cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc); + cifs_dbg(VFS, "%s: Failed to init sg", __func__); + rc = -ENOMEM; goto free_req; } @@ -1817,6 +1818,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc) iv = kzalloc(iv_len, GFP_KERNEL); if (!iv) { cifs_dbg(VFS, "%s: Failed to alloc IV", __func__); + rc = -ENOMEM; goto free_sg; } iv[0] = 3; From e94ac3510b6a0f696f2c442c4fc4051c8101ef12 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 20 Jun 2017 22:28:37 +0200 Subject: [PATCH 310/341] drm: Fix GETCONNECTOR regression In commit 91eefc05f0ac71902906b2058360e61bd25137fe Author: Daniel Vetter Date: Wed Dec 14 00:08:10 2016 +0100 drm: Tighten locking in drm_mode_getconnector I reordered the logic a bit in that IOCTL, but that broke userspace since it'll get the new mode list, but not the new property values. Fix that again. v2: Fix up the error path handling when copy_to_user for the modes failes (Dhinakaran). Fixes: 91eefc05f0ac ("drm: Tighten locking in drm_mode_getconnector") Cc: Sean Paul Cc: Daniel Vetter Cc: Jani Nikula Cc: David Airlie Cc: dri-devel@lists.freedesktop.org Reported-by: "H.J. Lu" Tested-by: "H.J. Lu" Cc: # v4.11+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100576 Cc: "H.J. Lu" Cc: "Pandiyan, Dhinakaran" Reviewed-by: Sean Paul Reviewed-by: Dhinakaran Pandiyan Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170620202837.1701-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_connector.c | 38 +++++++++++++++++---------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 9f847615ac74..48ca2457df8c 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1229,21 +1229,6 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, if (!connector) return -ENOENT; - drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - encoder = drm_connector_get_encoder(connector); - if (encoder) - out_resp->encoder_id = encoder->base.id; - else - out_resp->encoder_id = 0; - - ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic, - (uint32_t __user *)(unsigned long)(out_resp->props_ptr), - (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr), - &out_resp->count_props); - drm_modeset_unlock(&dev->mode_config.connection_mutex); - if (ret) - goto out_unref; - for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) if (connector->encoder_ids[i] != 0) encoders_count++; @@ -1256,7 +1241,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, if (put_user(connector->encoder_ids[i], encoder_ptr + copied)) { ret = -EFAULT; - goto out_unref; + goto out; } copied++; } @@ -1300,15 +1285,32 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, if (copy_to_user(mode_ptr + copied, &u_mode, sizeof(u_mode))) { ret = -EFAULT; + mutex_unlock(&dev->mode_config.mutex); + goto out; } copied++; } } out_resp->count_modes = mode_count; -out: mutex_unlock(&dev->mode_config.mutex); -out_unref: + + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + encoder = drm_connector_get_encoder(connector); + if (encoder) + out_resp->encoder_id = encoder->base.id; + else + out_resp->encoder_id = 0; + + /* Only grab properties after probing, to make sure EDID and other + * properties reflect the latest status. */ + ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic, + (uint32_t __user *)(unsigned long)(out_resp->props_ptr), + (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr), + &out_resp->count_props); + drm_modeset_unlock(&dev->mode_config.connection_mutex); + +out: drm_connector_put(connector); return ret; From de5cc8155cd250a31da67dea49aff7637ce98887 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Tue, 6 Jun 2017 15:05:21 +0100 Subject: [PATCH 311/341] drm/arm: hdlcd: Set the CRTC's port before binding the encoder. The component-based encoder(s) used by HDLCD expect the CRTC port to be set before binding in order to find the right endpoint. Without this patch, the TDA19988 encoder driver prints a warning "Falling back to first CRTC". Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_drv.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index 345c8357b273..d3da87fbd85a 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -297,6 +297,9 @@ static int hdlcd_drm_bind(struct device *dev) if (ret) goto err_free; + /* Set the CRTC's port so that the encoder component can find it */ + hdlcd->crtc.port = of_graph_get_port_by_id(dev->of_node, 0); + ret = component_bind_all(dev, drm); if (ret) { DRM_ERROR("Failed to bind all components\n"); @@ -340,11 +343,14 @@ err_register: } err_fbdev: drm_kms_helper_poll_fini(drm); + drm_vblank_cleanup(drm); err_vblank: pm_runtime_disable(drm->dev); err_pm_active: component_unbind_all(dev, drm); err_unload: + of_node_put(hdlcd->crtc.port); + hdlcd->crtc.port = NULL; drm_irq_uninstall(drm); of_reserved_mem_device_release(drm->dev); err_free: @@ -367,6 +373,9 @@ static void hdlcd_drm_unbind(struct device *dev) } drm_kms_helper_poll_fini(drm); component_unbind_all(dev, drm); + of_node_put(hdlcd->crtc.port); + hdlcd->crtc.port = NULL; + drm_vblank_cleanup(drm); pm_runtime_get_sync(drm->dev); drm_irq_uninstall(drm); pm_runtime_put_sync(drm->dev); From 49a58f26af7b5ee28ea8788fcd2bb7b590c711c5 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Tue, 13 Jun 2017 12:18:03 +0100 Subject: [PATCH 312/341] drm/arm: hdlcd: Use CMA helper for plane buffer address calculation CMA has gained a recent helper function for calculating the start of the plane buffer's physical address. Use that instead of the hand rolled version. Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_crtc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index 1a3359c0f6cd..0128ebd318f5 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -261,21 +261,14 @@ static void hdlcd_plane_atomic_update(struct drm_plane *plane, { struct drm_framebuffer *fb = plane->state->fb; struct hdlcd_drm_private *hdlcd; - struct drm_gem_cma_object *gem; u32 src_x, src_y, dest_h; dma_addr_t scanout_start; if (!fb) return; - src_x = plane->state->src.x1 >> 16; - src_y = plane->state->src.y1 >> 16; dest_h = drm_rect_height(&plane->state->dst); - gem = drm_fb_cma_get_gem_obj(fb, 0); - - scanout_start = gem->paddr + fb->offsets[0] + - src_y * fb->pitches[0] + - src_x * fb->format->cpp[0]; + scanout_start = drm_fb_cma_get_gem_addr(fb, plane->state, 0); hdlcd = plane->dev->dev_private; hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_LENGTH, fb->pitches[0]); From fee4964f0a6cd2ce6368d2c69e9ec8e6f44fb0ec Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Jun 2017 22:30:38 +0200 Subject: [PATCH 313/341] drm/arm: hdlcd: remove unused variables The last rework left behind two unused variables: drm/arm/hdlcd_crtc.c: In function 'hdlcd_plane_atomic_update': drm/arm/hdlcd_crtc.c:264:13: warning: unused variable 'src_y' [-Wunused-variable] drm/arm/hdlcd_crtc.c:264:6: warning: unused variable 'src_x' [-Wunused-variable] This removes them. Fixes: b2ae06ae9834 ("drm/arm: hdlcd: Use CMA helper for plane buffer address calculation") Signed-off-by: Arnd Bergmann Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index 0128ebd318f5..d67b6f15e8b8 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -261,7 +261,7 @@ static void hdlcd_plane_atomic_update(struct drm_plane *plane, { struct drm_framebuffer *fb = plane->state->fb; struct hdlcd_drm_private *hdlcd; - u32 src_x, src_y, dest_h; + u32 dest_h; dma_addr_t scanout_start; if (!fb) From 8a1898db51a3390241cd5fae267dc8aaa9db0f8b Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 20 Jun 2017 12:26:39 +0200 Subject: [PATCH 314/341] perf/aux: Correct return code of rb_alloc_aux() if !has_aux(ev) If the event for which an AUX area is about to be allocated, does not support setting up an AUX area, rb_alloc_aux() return -ENOTSUPP. This error condition is being returned unfiltered to the user space, and, for example, the perf tools fails with: failed to mmap with 524 (INTERNAL ERROR: strerror_r(524, 0x3fff497a1c8, 512)=22) This error can be easily seen with "perf record -m 128,256 -e cpu-clock". The 524 error code maps to -ENOTSUPP (in rb_alloc_aux()). The -ENOTSUPP error code shall be only used within the kernel. So the correct error code would then be -EOPNOTSUPP. With this commit, the perf tool then reports: failed to mmap with 95 (Operation not supported) which is more clear. Signed-off-by: Hendrik Brueckner Acked-by: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Pu Hou Cc: Thomas Gleixner Cc: Thomas-Mich Richter Cc: acme@kernel.org Cc: linux-s390@vger.kernel.org Link: http://lkml.kernel.org/r/1497954399-6355-1-git-send-email-brueckner@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 2831480c63a2..ee97196bb151 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -580,7 +580,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, int ret = -ENOMEM, max_order = 0; if (!has_aux(event)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) { /* From 7def52b78a5fda14864aab9b6fd14f09a4d4ff72 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 19 Jun 2017 10:55:47 -0400 Subject: [PATCH 315/341] dm integrity: fix to not disable/enable interrupts from interrupt context Use spin_lock_irqsave and spin_unlock_irqrestore rather than spin_{lock,unlock}_irq in submit_flush_bio(). Otherwise lockdep issues the following warning: DEBUG_LOCKS_WARN_ON(current->hardirq_context) WARNING: CPU: 1 PID: 0 at kernel/locking/lockdep.c:2748 trace_hardirqs_on_caller+0x107/0x180 Reported-by: Ondrej Kozina Tested-by: Ondrej Kozina Signed-off-by: Mike Snitzer Acked-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 4ab10cf718c9..93b181088168 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1105,10 +1105,13 @@ static void schedule_autocommit(struct dm_integrity_c *ic) static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio) { struct bio *bio; - spin_lock_irq(&ic->endio_wait.lock); + unsigned long flags; + + spin_lock_irqsave(&ic->endio_wait.lock, flags); bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); bio_list_add(&ic->flush_bio_list, bio); - spin_unlock_irq(&ic->endio_wait.lock); + spin_unlock_irqrestore(&ic->endio_wait.lock, flags); + queue_work(ic->commit_wq, &ic->commit_work); } From feb7695fe9fb83084aa29de0094774f4c9d4c9fc Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 20 Jun 2017 19:14:30 -0400 Subject: [PATCH 316/341] dm io: fix duplicate bio completion due to missing ref count If only a subset of the devices associated with multiple regions support a given special operation (eg. DISCARD) then the dec_count() that is used to set error for the region must increment the io->count. Otherwise, when the dec_count() is called it can cause the dm-io caller's bio to be completed multiple times. As was reported against the dm-mirror target that had mirror legs with a mix of discard capabilities. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=196077 Reported-by: Zhang Yi Signed-off-by: Mike Snitzer --- drivers/md/dm-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 3702e502466d..8d5ca30f6551 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -317,8 +317,8 @@ static void do_region(int op, int op_flags, unsigned region, else if (op == REQ_OP_WRITE_SAME) special_cmd_max_sectors = q->limits.max_write_same_sectors; if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES || - op == REQ_OP_WRITE_SAME) && - special_cmd_max_sectors == 0) { + op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) { + atomic_inc(&io->count); dec_count(io, region, -EOPNOTSUPP); return; } From 8e8320c9315c47a6a090188720ccff32a6a6ba18 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 20 Jun 2017 17:56:13 -0600 Subject: [PATCH 317/341] blk-mq: fix performance regression with shared tags If we have shared tags enabled, then every IO completion will trigger a full loop of every queue belonging to a tag set, and every hardware queue for each of those queues, even if nothing needs to be done. This causes a massive performance regression if you have a lot of shared devices. Instead of doing this huge full scan on every IO, add an atomic counter to the main queue that tracks how many hardware queues have been marked as needing a restart. With that, we can avoid looking for restartable queues, if we don't have to. Max reports that this restores performance. Before this patch, 4K IOPS was limited to 22-23K IOPS. With the patch, we are running at 950-970K IOPS. Fixes: 6d8c6c0f97ad ("blk-mq: Restart a single queue if tag sets are shared") Reported-by: Max Gurtovoy Tested-by: Max Gurtovoy Reviewed-by: Bart Van Assche Tested-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 58 +++++++++++++++++++++++++++++++++--------- block/blk-mq-sched.h | 9 ------- block/blk-mq.c | 16 +++++++++--- include/linux/blkdev.h | 2 ++ 4 files changed, 61 insertions(+), 24 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 1f5b692526ae..0ded5e846335 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q, __blk_mq_sched_assign_ioc(q, rq, bio, ioc); } +/* + * Mark a hardware queue as needing a restart. For shared queues, maintain + * a count of how many hardware queues are marked for restart. + */ +static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) +{ + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + return; + + if (hctx->flags & BLK_MQ_F_TAG_SHARED) { + struct request_queue *q = hctx->queue; + + if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_inc(&q->shared_hctx_restart); + } else + set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); +} + +static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) +{ + if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + return false; + + if (hctx->flags & BLK_MQ_F_TAG_SHARED) { + struct request_queue *q = hctx->queue; + + if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_dec(&q->shared_hctx_restart); + } else + clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + + if (blk_mq_hctx_has_pending(hctx)) { + blk_mq_run_hw_queue(hctx, true); + return true; + } + + return false; +} + struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, @@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, return true; } -static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) -{ - if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { - clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (blk_mq_hctx_has_pending(hctx)) { - blk_mq_run_hw_queue(hctx, true); - return true; - } - } - return false; -} - /** * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list * @pos: loop cursor. @@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) unsigned int i, j; if (set->flags & BLK_MQ_F_TAG_SHARED) { + /* + * If this is 0, then we know that no hardware queues + * have RESTART marked. We're done. + */ + if (!atomic_read(&queue->shared_hctx_restart)) + return; + rcu_read_lock(); list_for_each_entry_rcu_rr(q, queue, &set->tag_list, tag_set_list) { diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index edafb5383b7b..5007edece51a 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) return false; } -/* - * Mark a hardware queue as needing a restart. - */ -static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) -{ - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); -} - static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); diff --git a/block/blk-mq.c b/block/blk-mq.c index bb66c96850b1..958cedaff8b8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q, } } +/* + * Caller needs to ensure that we're either frozen/quiesced, or that + * the queue isn't live yet. + */ static void queue_set_hctx_shared(struct request_queue *q, bool shared) { struct blk_mq_hw_ctx *hctx; int i; queue_for_each_hw_ctx(q, hctx, i) { - if (shared) + if (shared) { + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_inc(&q->shared_hctx_restart); hctx->flags |= BLK_MQ_F_TAG_SHARED; - else + } else { + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_dec(&q->shared_hctx_restart); hctx->flags &= ~BLK_MQ_F_TAG_SHARED; + } } } -static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) +static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, + bool shared) { struct request_queue *q; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b74a3edcb3da..1ddd36bd2173 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -391,6 +391,8 @@ struct request_queue { int nr_rqs[2]; /* # allocated [a]sync rqs */ int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ + atomic_t shared_hctx_restart; + struct blk_queue_stats *stats; struct rq_wb *rq_wb; From f4cb767d76cf7ee72f97dd76f6cfa6c76a5edc89 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 20 Jun 2017 02:10:44 -0700 Subject: [PATCH 318/341] mm: fix new crash in unmapped_area_topdown() Trinity gets kernel BUG at mm/mmap.c:1963! in about 3 minutes of mmap testing. That's the VM_BUG_ON(gap_end < gap_start) at the end of unmapped_area_topdown(). Linus points out how MAP_FIXED (which does not have to respect our stack guard gap intentions) could result in gap_end below gap_start there. Fix that, and the similar case in its alternative, unmapped_area(). Cc: stable@vger.kernel.org Fixes: 1be7107fbe18 ("mm: larger stack guard gap, between vmas") Reported-by: Dave Jones Debugged-by: Linus Torvalds Signed-off-by: Hugh Dickins Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- mm/mmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 8e07976d5e47..290b77d9a01e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1817,7 +1817,8 @@ check_current: /* Check if current node has a suitable gap */ if (gap_start > high_limit) return -ENOMEM; - if (gap_end >= low_limit && gap_end - gap_start >= length) + if (gap_end >= low_limit && + gap_end > gap_start && gap_end - gap_start >= length) goto found; /* Visit right subtree if it looks promising */ @@ -1920,7 +1921,8 @@ check_current: gap_end = vm_start_gap(vma); if (gap_end < low_limit) return -ENOMEM; - if (gap_start <= high_limit && gap_end - gap_start >= length) + if (gap_start <= high_limit && + gap_end > gap_start && gap_end - gap_start >= length) goto found; /* Visit left subtree if it looks promising */ From bd726c90b6b8ce87602208701b208a208e6d5600 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 19 Jun 2017 17:34:05 +0200 Subject: [PATCH 319/341] Allow stack to grow up to address space limit Fix expand_upwards() on architectures with an upward-growing stack (parisc, metag and partly IA-64) to allow the stack to reliably grow exactly up to the address space limit given by TASK_SIZE. Signed-off-by: Helge Deller Acked-by: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/mmap.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 290b77d9a01e..a5e3dcd75e79 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2230,16 +2230,19 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) if (!(vma->vm_flags & VM_GROWSUP)) return -EFAULT; - /* Guard against wrapping around to address 0. */ + /* Guard against exceeding limits of the address space. */ address &= PAGE_MASK; - address += PAGE_SIZE; - if (!address) + if (address >= TASK_SIZE) return -ENOMEM; + address += PAGE_SIZE; /* Enforce stack_guard_gap */ gap_addr = address + stack_guard_gap; - if (gap_addr < address) - return -ENOMEM; + + /* Guard against overflow */ + if (gap_addr < address || gap_addr > TASK_SIZE) + gap_addr = TASK_SIZE; + next = vma->vm_next; if (next && next->vm_start < gap_addr) { if (!(next->vm_flags & VM_GROWSUP)) From e4330d8bf669139a983255d1801733b64c2ae841 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 19 Jun 2017 15:53:01 +0300 Subject: [PATCH 320/341] ACPI / scan: Fix enumeration for special SPI and I2C devices Commit f406270bf73d ("ACPI / scan: Set the visited flag for all enumerated devices") caused that two group of special SPI or I2C devices do not enumerate. SPI and I2C devices are expected to be enumerated by the SPI and I2C subsystems but change caused that acpi_bus_attach() marks those devices with acpi_device_set_enumerated(). First group of devices are matched using Device Tree compatible property with special _HID "PRP0001". Those devices have matched scan handler, acpi_scan_attach_handler() retuns 1 and acpi_bus_attach() marks them with acpi_device_set_enumerated(). Second group of devices without valid _HID such as "LNXVIDEO" have device->pnp.type.platform_id set to zero and change again marks them with acpi_device_set_enumerated(). Fix this by flagging the SPI and I2C devices during struct acpi_device object initialization time and let the code in acpi_bus_attach() to go through the device_attach() and acpi_default_enumeration() path for all SPI and I2C devices. Fixes: f406270bf73d (ACPI / scan: Set the visited flag for all enumerated devices) Signed-off-by: Jarkko Nikula Acked-by: Mika Westerberg Cc: 4.11+ # 4.11+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 67 +++++++++++++++++++++++------------------ include/acpi/acpi_bus.h | 3 +- 2 files changed, 39 insertions(+), 31 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 3a10d7573477..d53162997f32 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1428,6 +1428,37 @@ static void acpi_init_coherency(struct acpi_device *adev) adev->flags.coherent_dma = cca; } +static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data) +{ + bool *is_spi_i2c_slave_p = data; + + if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) + return 1; + + /* + * devices that are connected to UART still need to be enumerated to + * platform bus + */ + if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART) + *is_spi_i2c_slave_p = true; + + /* no need to do more checking */ + return -1; +} + +static bool acpi_is_spi_i2c_slave(struct acpi_device *device) +{ + struct list_head resource_list; + bool is_spi_i2c_slave = false; + + INIT_LIST_HEAD(&resource_list); + acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave, + &is_spi_i2c_slave); + acpi_dev_free_resource_list(&resource_list); + + return is_spi_i2c_slave; +} + void acpi_init_device_object(struct acpi_device *device, acpi_handle handle, int type, unsigned long long sta) { @@ -1443,6 +1474,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle, acpi_bus_get_flags(device); device->flags.match_driver = false; device->flags.initialized = true; + device->flags.spi_i2c_slave = acpi_is_spi_i2c_slave(device); acpi_device_clear_enumerated(device); device_initialize(&device->dev); dev_set_uevent_suppress(&device->dev, true); @@ -1727,38 +1759,13 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used, return AE_OK; } -static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data) -{ - bool *is_spi_i2c_slave_p = data; - - if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) - return 1; - - /* - * devices that are connected to UART still need to be enumerated to - * platform bus - */ - if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART) - *is_spi_i2c_slave_p = true; - - /* no need to do more checking */ - return -1; -} - static void acpi_default_enumeration(struct acpi_device *device) { - struct list_head resource_list; - bool is_spi_i2c_slave = false; - /* * Do not enumerate SPI/I2C slaves as they will be enumerated by their * respective parents. */ - INIT_LIST_HEAD(&resource_list); - acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave, - &is_spi_i2c_slave); - acpi_dev_free_resource_list(&resource_list); - if (!is_spi_i2c_slave) { + if (!device->flags.spi_i2c_slave) { acpi_create_platform_device(device, NULL); acpi_device_set_enumerated(device); } else { @@ -1854,7 +1861,7 @@ static void acpi_bus_attach(struct acpi_device *device) return; device->flags.match_driver = true; - if (ret > 0) { + if (ret > 0 && !device->flags.spi_i2c_slave) { acpi_device_set_enumerated(device); goto ok; } @@ -1863,10 +1870,10 @@ static void acpi_bus_attach(struct acpi_device *device) if (ret < 0) return; - if (device->pnp.type.platform_id) - acpi_default_enumeration(device); - else + if (!device->pnp.type.platform_id && !device->flags.spi_i2c_slave) acpi_device_set_enumerated(device); + else + acpi_default_enumeration(device); ok: list_for_each_entry(child, &device->children, node) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 197f3fffc9a7..408c7820e200 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -210,7 +210,8 @@ struct acpi_device_flags { u32 of_compatible_ok:1; u32 coherent_dma:1; u32 cca_seen:1; - u32 reserved:20; + u32 spi_i2c_slave:1; + u32 reserved:19; }; /* File System */ From 2f263d145140ea4b9f5762b15886ae26195a764a Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Thu, 15 Jun 2017 10:36:22 +0200 Subject: [PATCH 321/341] kbuild: fix header installation under fakechroot environment Since commit fcc8487d477a ("uapi: export all headers under uapi directories") fakechroot make bindeb-pkg fails, mismatching files for directories: touch: cannot touch 'usr/include/video/uvesafb.h/.install': Not a directory This due to a bug in fakechroot: when using the function $(wildcard $(srcdir)/*/.) in a makefile, under a fakechroot environment, not only directories but also files are returned. To circumvent that, we are using the functions: $(sort $(dir $(wildcard $(srcdir)/*/)))) Fixes: fcc8487d477a ("uapi: export all headers under uapi directories") Signed-off-by: Richard Genoud Signed-off-by: Masahiro Yamada --- scripts/Makefile.headersinst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst index ce753a408c56..c583a1e1bd3c 100644 --- a/scripts/Makefile.headersinst +++ b/scripts/Makefile.headersinst @@ -14,7 +14,15 @@ __headers: include scripts/Kbuild.include srcdir := $(srctree)/$(obj) -subdirs := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.)) + +# When make is run under a fakechroot environment, the function +# $(wildcard $(srcdir)/*/.) doesn't only return directories, but also regular +# files. So, we are using a combination of sort/dir/wildcard which works +# with fakechroot. +subdirs := $(patsubst $(srcdir)/%/,%,\ + $(filter-out $(srcdir)/,\ + $(sort $(dir $(wildcard $(srcdir)/*/))))) + # caller may set destination dir (when installing to asm/) _dst := $(if $(dst),$(dst),$(obj)) From eb5e248d502bec191bd99f04cae8b49992b3abde Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 21 Jun 2017 20:27:35 -0700 Subject: [PATCH 322/341] xfs: don't allow bmap on rt files bmap returns a dumb LBA address but not the block device that goes with that LBA. Swapfiles don't care about this and will blindly assume that the data volume is the correct blockdev, which is totally bogus for files on the rt subvolume. This results in the swap code doing IOs to arbitrary locations on the data device(!) if the passed in mapping is a realtime file, so just turn off bmap for rt files. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_aops.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 09af0f7cd55e..3b91faacc1ba 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1316,9 +1316,12 @@ xfs_vm_bmap( * The swap code (ab-)uses ->bmap to get a block mapping and then * bypasseѕ the file system for actual I/O. We really can't allow * that on reflinks inodes, so we have to skip out here. And yes, - * 0 is the magic code for a bmap error.. + * 0 is the magic code for a bmap error. + * + * Since we don't pass back blockdev info, we can't return bmap + * information for rt files either. */ - if (xfs_is_reflink_inode(ip)) + if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip)) return 0; filemap_write_and_wait(mapping); From 6c782a5ea56a799658e213a78dc1455264938afa Mon Sep 17 00:00:00 2001 From: Michail Georgios Etairidis Date: Tue, 20 Jun 2017 10:20:42 +0200 Subject: [PATCH 323/341] i2c: imx: Use correct function to write to register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The i2c-imx driver incorrectly uses readb()/writeb() to read and write to the appropriate registers when performing a repeated start. The appropriate imx_i2c_read_reg()/imx_i2c_write_reg() functions should be used instead. Performing a repeated start results in a kernel panic. The platform is imx. Signed-off-by: Michail G Etairidis Fixes: ce1a78840ff7 ("i2c: imx: add DMA support for freescale i2c driver") Fixes: 054b62d9f25c ("i2c: imx: fix the i2c bus hang issue when do repeat restart") Acked-by: Fugang Duan Acked-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 95ed17183e73..54a47b40546f 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -734,9 +734,9 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx, * the first read operation, otherwise the first read cost * one extra clock cycle. */ - temp = readb(i2c_imx->base + IMX_I2C_I2CR); + temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); temp |= I2CR_MTX; - writeb(temp, i2c_imx->base + IMX_I2C_I2CR); + imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); } msgs->buf[msgs->len-1] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); @@ -857,9 +857,9 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo * the first read operation, otherwise the first read cost * one extra clock cycle. */ - temp = readb(i2c_imx->base + IMX_I2C_I2CR); + temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); temp |= I2CR_MTX; - writeb(temp, i2c_imx->base + IMX_I2C_I2CR); + imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); } } else if (i == (msgs->len - 2)) { dev_dbg(&i2c_imx->adapter.dev, From fb3a5055cd7098f8d1dd0cd38d7172211113255f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 19 Jun 2017 07:26:09 -0700 Subject: [PATCH 324/341] perf/x86/intel: Add 1G DTLB load/store miss support for SKL Current DTLB load/store miss events (0x608/0x649) only counts 4K,2M and 4M page size. Need to extend the events to support any page size (4K/2M/4M/1G). The complete DTLB load/store miss events are: DTLB_LOAD_MISSES.WALK_COMPLETED 0xe08 DTLB_STORE_MISSES.WALK_COMPLETED 0xe49 Signed-off-by: Kan Liang Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: eranian@google.com Link: http://lkml.kernel.org/r/20170619142609.11058-1-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a6d91d4e37a1..110ce8238466 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids [ C(DTLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ - [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ + [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ - [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */ + [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */ }, [ C(OP_PREFETCH) ] = { [ C(RESULT_ACCESS) ] = 0x0, From addb63c18a0d52a9ce2611d039f981f7b6148d2b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 19 Jun 2017 08:02:28 +0200 Subject: [PATCH 325/341] KVM: s390: gaccess: fix real-space designation asce handling for gmap shadows For real-space designation asces the asce origin part is only a token. The asce token origin must not be used to generate an effective address for storage references. This however is erroneously done within kvm_s390_shadow_tables(). Furthermore within the same function the wrong parts of virtual addresses are used to generate a corresponding real address (e.g. the region second index is used as region first index). Both of the above can result in incorrect address translations. Only for real space designations with a token origin of zero and addresses below one megabyte the translation was correct. Furthermore replace a "!asce.r" statement with a "!*fake" statement to make it more obvious that a specific condition has nothing to do with the architecture, but with the fake handling of real space designations. Fixes: 3218f7094b6b ("s390/mm: support real-space for gmap shadows") Cc: David Hildenbrand Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens Reviewed-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 9da243d94cc3..3b297fa3aa67 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, ptr = asce.origin * 4096; if (asce.r) { *fake = 1; + ptr = 0; asce.dt = ASCE_TYPE_REGION1; } switch (asce.dt) { case ASCE_TYPE_REGION1: - if (vaddr.rfx01 > asce.tl && !asce.r) + if (vaddr.rfx01 > asce.tl && !*fake) return PGM_REGION_FIRST_TRANS; break; case ASCE_TYPE_REGION2: @@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, union region1_table_entry rfte; if (*fake) { - /* offset in 16EB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.rsx << 53UL); + ptr += (unsigned long) vaddr.rfx << 53; rfte.val = ptr; goto shadow_r2t; } @@ -1036,8 +1036,7 @@ shadow_r2t: union region2_table_entry rste; if (*fake) { - /* offset in 8PB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.rtx << 42UL); + ptr += (unsigned long) vaddr.rsx << 42; rste.val = ptr; goto shadow_r3t; } @@ -1064,8 +1063,7 @@ shadow_r3t: union region3_table_entry rtte; if (*fake) { - /* offset in 4TB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.sx << 31UL); + ptr += (unsigned long) vaddr.rtx << 31; rtte.val = ptr; goto shadow_sgt; } @@ -1101,8 +1099,7 @@ shadow_sgt: union segment_table_entry ste; if (*fake) { - /* offset in 2G guest memory block */ - ptr = ptr + ((unsigned long) vaddr.sx << 20UL); + ptr += (unsigned long) vaddr.sx << 20; ste.val = ptr; goto shadow_pgt; } From bbd5ff50afffcf4a01d05367524736c57607a478 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 20 Jun 2017 18:37:28 +1000 Subject: [PATCH 326/341] powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD NPU2 requires an extra explicit flush to an active GPU PID when sending address translation shoot downs (ATSDs) to reliably flush the GPU TLB. This patch adds just such a flush at the end of each sequence of ATSDs. We can safely use PID 0 which is always reserved and active on the GPU. PID 0 is only used for init_mm which will never be a user mm on the GPU. To enforce this we add a check in pnv_npu2_init_context() just in case someone tries to use PID 0 on the GPU. Signed-off-by: Alistair Popple [mpe: Use true/false for bool literals] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 94 ++++++++++++++++-------- 1 file changed, 65 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index e6f444b46207..b5d960d6db3d 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch, return mmio_atsd_reg; } -static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) +static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) { unsigned long launch; @@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) /* PID */ launch |= pid << PPC_BITLSHIFT(38); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); + /* Invalidating the entire process doesn't use a va */ return mmio_launch_invalidate(npu, launch, 0); } static int mmio_invalidate_va(struct npu *npu, unsigned long va, - unsigned long pid) + unsigned long pid, bool flush) { unsigned long launch; @@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va, /* PID */ launch |= pid << PPC_BITLSHIFT(38); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); + return mmio_launch_invalidate(npu, launch, va); } #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) +struct mmio_atsd_reg { + struct npu *npu; + int reg; +}; + +static void mmio_invalidate_wait( + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) +{ + struct npu *npu; + int i, reg; + + /* Wait for all invalidations to complete */ + for (i = 0; i <= max_npu2_index; i++) { + if (mmio_atsd_reg[i].reg < 0) + continue; + + /* Wait for completion */ + npu = mmio_atsd_reg[i].npu; + reg = mmio_atsd_reg[i].reg; + while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) + cpu_relax(); + + put_mmio_atsd_reg(npu, reg); + + /* + * The GPU requires two flush ATSDs to ensure all entries have + * been flushed. We use PID 0 as it will never be used for a + * process on the GPU. + */ + if (flush) + mmio_invalidate_pid(npu, 0, true); + } +} + /* * Invalidate either a single address or an entire PID depending on * the value of va. */ static void mmio_invalidate(struct npu_context *npu_context, int va, - unsigned long address) + unsigned long address, bool flush) { - int i, j, reg; + int i, j; struct npu *npu; struct pnv_phb *nphb; struct pci_dev *npdev; - struct { - struct npu *npu; - int reg; - } mmio_atsd_reg[NV_MAX_NPUS]; + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; unsigned long pid = npu_context->mm->context.id; /* @@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, if (va) mmio_atsd_reg[i].reg = - mmio_invalidate_va(npu, address, pid); + mmio_invalidate_va(npu, address, pid, + flush); else mmio_atsd_reg[i].reg = - mmio_invalidate_pid(npu, pid); + mmio_invalidate_pid(npu, pid, flush); /* * The NPU hardware forwards the shootdown to all GPUs @@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, */ flush_tlb_mm(npu_context->mm); - /* Wait for all invalidations to complete */ - for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) - continue; - - /* Wait for completion */ - npu = mmio_atsd_reg[i].npu; - reg = mmio_atsd_reg[i].reg; - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) - cpu_relax(); - put_mmio_atsd_reg(npu, reg); - } + mmio_invalidate_wait(mmio_atsd_reg, flush); + if (flush) + /* Wait for the flush to complete */ + mmio_invalidate_wait(mmio_atsd_reg, false); } static void pnv_npu2_mn_release(struct mmu_notifier *mn, @@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn, * There should be no more translation requests for this PID, but we * need to ensure any entries for it are removed from the TLB. */ - mmio_invalidate(npu_context, 0, 0); + mmio_invalidate(npu_context, 0, 0, true); } static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, @@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, { struct npu_context *npu_context = mn_to_npu_context(mn); - mmio_invalidate(npu_context, 1, address); + mmio_invalidate(npu_context, 1, address, true); } static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, @@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, { struct npu_context *npu_context = mn_to_npu_context(mn); - mmio_invalidate(npu_context, 1, address); + mmio_invalidate(npu_context, 1, address, true); } static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, @@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, struct npu_context *npu_context = mn_to_npu_context(mn); unsigned long address; - for (address = start; address <= end; address += PAGE_SIZE) - mmio_invalidate(npu_context, 1, address); + for (address = start; address < end; address += PAGE_SIZE) + mmio_invalidate(npu_context, 1, address, false); + + /* Do the flush only on the final addess == end */ + mmio_invalidate(npu_context, 1, address, true); } static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { @@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, /* No nvlink associated with this GPU device */ return ERR_PTR(-ENODEV); - if (!mm) { - /* kernel thread contexts are not supported */ + if (!mm || mm->context.id == 0) { + /* + * Kernel thread contexts are not supported and context id 0 is + * reserved on the GPU. + */ return ERR_PTR(-EINVAL); } From c8401dda2f0a00cd25c0af6a95ed50e478d25de4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 7 Jun 2017 15:13:14 +0200 Subject: [PATCH 327/341] KVM: x86: fix singlestepping over syscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TF is handled a bit differently for syscall and sysret, compared to the other instructions: TF is checked after the instruction completes, so that the OS can disable #DB at a syscall by adding TF to FMASK. When the sysret is executed the #DB is taken "as if" the syscall insn just completed. KVM emulates syscall so that it can trap 32-bit syscall on Intel processors. Fix the behavior, otherwise you could get #DB on a user stack which is not nice. This does not affect Linux guests, as they use an IST or task gate for #DB. This fixes CVE-2017-7518. Cc: stable@vger.kernel.org Reported-by: Andy Lutomirski Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 1 + arch/x86/kvm/x86.c | 62 +++++++++++++++--------------- 3 files changed, 34 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 055962615779..722d0e568863 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -296,6 +296,7 @@ struct x86_emulate_ctxt { bool perm_ok; /* do not check permissions if true */ bool ud; /* inject an #UD if host doesn't support insn */ + bool tf; /* TF value before instruction (after for syscall/sysret) */ bool have_exception; struct x86_exception exception; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0816ab2e8adc..80890dee66ce 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); } + ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 87d3cb901935..0e846f0cb83b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); ctxt->eflags = kvm_get_rflags(vcpu); + ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; + ctxt->eip = kvm_rip_read(vcpu); ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : @@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, return dr6; } -static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r) +static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) { struct kvm_run *kvm_run = vcpu->run; - /* - * rflags is the old, "raw" value of the flags. The new value has - * not been saved yet. - * - * This is correct even for TF set by the guest, because "the - * processor will not generate this exception after the instruction - * that sets the TF flag". - */ - if (unlikely(rflags & X86_EFLAGS_TF)) { - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | - DR6_RTM; - kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; - kvm_run->debug.arch.exception = DB_VECTOR; - kvm_run->exit_reason = KVM_EXIT_DEBUG; - *r = EMULATE_USER_EXIT; - } else { - /* - * "Certain debug exceptions may clear bit 0-3. The - * remaining contents of the DR6 register are never - * cleared by the processor". - */ - vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= DR6_BS | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); - } + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM; + kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; + kvm_run->debug.arch.exception = DB_VECTOR; + kvm_run->exit_reason = KVM_EXIT_DEBUG; + *r = EMULATE_USER_EXIT; + } else { + /* + * "Certain debug exceptions may clear bit 0-3. The + * remaining contents of the DR6 register are never + * cleared by the processor". + */ + vcpu->arch.dr6 &= ~15; + vcpu->arch.dr6 |= DR6_BS | DR6_RTM; + kvm_queue_exception(vcpu, DB_VECTOR); } } @@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) int r = EMULATE_DONE; kvm_x86_ops->skip_emulated_instruction(vcpu); - kvm_vcpu_check_singlestep(vcpu, rflags, &r); + + /* + * rflags is the old, "raw" value of the flags. The new value has + * not been saved yet. + * + * This is correct even for TF set by the guest, because "the + * processor will not generate this exception after the instruction + * that sets the TF flag". + */ + if (unlikely(rflags & X86_EFLAGS_TF)) + kvm_vcpu_do_singlestep(vcpu, &r); return r == EMULATE_DONE; } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); @@ -5726,8 +5727,9 @@ restart: toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE) - kvm_vcpu_check_singlestep(vcpu, rflags, &r); + if (r == EMULATE_DONE && + (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) + kvm_vcpu_do_singlestep(vcpu, &r); if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) __kvm_set_rflags(vcpu, ctxt->eflags); From 7598f8bc1383ffd77686cb4e92e749bef3c75937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 21 Jun 2017 18:41:34 +0200 Subject: [PATCH 328/341] perf probe: Fix probe definition for inlined functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 613f050d68a8 ("perf probe: Fix to probe on gcc generated functions in modules"), the offset from symbol is, incorrectly, added to the trace point address. This leads to incorrect probe trace points for inlined functions and when using relative line number on symbols. Prior this patch: $ perf probe -m nf_nat -D in_range p:probe/in_range nf_nat:in_range.isra.9+0 $ perf probe -m i40e -D i40e_clean_rx_irq p:probe/i40e_clean_rx_irq i40e:i40e_napi_poll+2212 $ perf probe -m i40e -D i40e_clean_rx_irq:16 p:probe/i40e_clean_rx_irq i40e:i40e_lan_xmit_frame+626 After: $ perf probe -m nf_nat -D in_range p:probe/in_range nf_nat:in_range.isra.9+0 $ perf probe -m i40e -D i40e_clean_rx_irq p:probe/i40e_clean_rx_irq i40e:i40e_napi_poll+1106 $ perf probe -m i40e -D i40e_clean_rx_irq:16 p:probe/i40e_clean_rx_irq i40e:i40e_napi_poll+2665 Committer testing: Using 'pfunct', a tool found in the 'dwarves' package [1], one can ask what are the functions that while not being explicitely marked as inline, were inlined by the compiler: # pfunct --cc_inlined /lib/modules/4.12.0-rc4+/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko | head __ew32 e1000_regdump e1000e_dump_ps_pages e1000_desc_unused e1000e_systim_to_hwtstamp e1000e_rx_hwtstamp e1000e_update_rdt_wa e1000e_update_tdt_wa e1000_put_txbuf e1000_consume_page Then ask 'perf probe' to produce the kprobe_tracer probe definitions for two of them: # perf probe -m e1000e -D e1000e_rx_hwtstamp p:probe/e1000e_rx_hwtstamp e1000e:e1000_receive_skb+74 # perf probe -m e1000e -D e1000_consume_page p:probe/e1000_consume_page e1000e:e1000_clean_jumbo_rx_irq+876 p:probe/e1000_consume_page_1 e1000e:e1000_clean_jumbo_rx_irq+1506 p:probe/e1000_consume_page_2 e1000e:e1000_clean_rx_irq_ps+1074 Now lets concentrate on the 'e1000_consume_page' one, that was inlined twice in e1000_clean_jumbo_rx_irq(), lets see what readelf says about the DWARF tags for that function: $ readelf -wi /lib/modules/4.12.0-rc4+/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko <1><13e27b>: Abbrev Number: 121 (DW_TAG_subprogram) <13e27c> DW_AT_name : (indirect string, offset: 0xa8945): e1000_clean_jumbo_rx_irq <13e287> DW_AT_low_pc : 0x17a30 <3><13e6ef>: Abbrev Number: 119 (DW_TAG_inlined_subroutine) <13e6f0> DW_AT_abstract_origin: <0x13ed2c> <13e6f4> DW_AT_low_pc : 0x17be6 <1><13ed2c>: Abbrev Number: 142 (DW_TAG_subprogram) <13ed2e> DW_AT_name : (indirect string, offset: 0xa54c3): e1000_consume_page So, the first time in e1000_clean_jumbo_rx_irq() where e1000_consume_page() is inlined is at PC 0x17be6, which subtracted from e1000_clean_jumbo_rx_irq()'s address, gives us the offset we should use in the probe definition: 0x17be6 - 0x17a30 = 438 but above we have 876, which is twice as much. Lets see the second inline expansion of e1000_consume_page() in e1000_clean_jumbo_rx_irq(): <3><13e86e>: Abbrev Number: 119 (DW_TAG_inlined_subroutine) <13e86f> DW_AT_abstract_origin: <0x13ed2c> <13e873> DW_AT_low_pc : 0x17d21 0x17d21 - 0x17a30 = 753 So we where adding it at twice the offset from the containing function as we should. And then after this patch: # perf probe -m e1000e -D e1000e_rx_hwtstamp p:probe/e1000e_rx_hwtstamp e1000e:e1000_receive_skb+37 # perf probe -m e1000e -D e1000_consume_page p:probe/e1000_consume_page e1000e:e1000_clean_jumbo_rx_irq+438 p:probe/e1000_consume_page_1 e1000e:e1000_clean_jumbo_rx_irq+753 p:probe/e1000_consume_page_2 e1000e:e1000_clean_jumbo_rx_irq+1353 # Which matches the two first expansions and shows that because we were doubling the offset it would spill over the next function: readelf -sw /lib/modules/4.12.0-rc4+/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko 673: 0000000000017a30 1626 FUNC LOCAL DEFAULT 2 e1000_clean_jumbo_rx_irq 674: 0000000000018090 2013 FUNC LOCAL DEFAULT 2 e1000_clean_rx_irq_ps This is the 3rd inline expansion of e1000_consume_page() in e1000_clean_jumbo_rx_irq(): <3><13ec77>: Abbrev Number: 119 (DW_TAG_inlined_subroutine) <13ec78> DW_AT_abstract_origin: <0x13ed2c> <13ec7c> DW_AT_low_pc : 0x17f79 0x17f79 - 0x17a30 = 1353 So: 0x17a30 + 2 * 1353 = 0x184c2 And: 0x184c2 - 0x18090 = 1074 Which explains the bogus third expansion for e1000_consume_page() to end up at: p:probe/e1000_consume_page_2 e1000e:e1000_clean_rx_irq_ps+1074 All fixed now :-) [1] https://git.kernel.org/pub/scm/devel/pahole/pahole.git/ Signed-off-by: Björn Töpel Tested-by: Arnaldo Carvalho de Melo Acked-by: Magnus Karlsson Acked-by: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 613f050d68a8 ("perf probe: Fix to probe on gcc generated functions in modules") Link: http://lkml.kernel.org/r/20170621164134.5701-1-bjorn.topel@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 84e7e698411e..a2670e9d652d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -619,7 +619,7 @@ static int post_process_probe_trace_point(struct probe_trace_point *tp, struct map *map, unsigned long offs) { struct symbol *sym; - u64 addr = tp->address + tp->offset - offs; + u64 addr = tp->address - offs; sym = map__find_symbol(map, addr); if (!sym) From ad8181060788c80c0ad75b583f24c22fa962a7a6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 22 May 2017 18:44:57 -0700 Subject: [PATCH 329/341] kconfig: fix sparse warnings in nconfig Fix sparse warnings in scripts/kconfig/nconf* ('make nconfig'): ../scripts/kconfig/nconf.c:1071:32: warning: Using plain integer as NULL pointer ../scripts/kconfig/nconf.c:1238:30: warning: Using plain integer as NULL pointer ../scripts/kconfig/nconf.c:511:51: warning: Using plain integer as NULL pointer ../scripts/kconfig/nconf.c:1460:6: warning: symbol 'setup_windows' was not declared. Should it be static? ../scripts/kconfig/nconf.c:274:12: warning: symbol 'current_instructions' was not declared. Should it be static? ../scripts/kconfig/nconf.c:308:22: warning: symbol 'function_keys' was not declared. Should it be static? ../scripts/kconfig/nconf.gui.c:132:17: warning: non-ANSI function declaration of function 'set_colors' ../scripts/kconfig/nconf.gui.c:195:24: warning: Using plain integer as NULL pointer nconf.gui.o before/after files are the same. nconf.o before/after files are the same until the 'static' function declarations are added. Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- scripts/kconfig/nconf.c | 12 ++++++------ scripts/kconfig/nconf.gui.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c index a9bc5334a478..003114779815 100644 --- a/scripts/kconfig/nconf.c +++ b/scripts/kconfig/nconf.c @@ -271,7 +271,7 @@ static struct mitem k_menu_items[MAX_MENU_ITEMS]; static int items_num; static int global_exit; /* the currently selected button */ -const char *current_instructions = menu_instructions; +static const char *current_instructions = menu_instructions; static char *dialog_input_result; static int dialog_input_result_len; @@ -305,7 +305,7 @@ struct function_keys { }; static const int function_keys_num = 9; -struct function_keys function_keys[] = { +static struct function_keys function_keys[] = { { .key_str = "F1", .func = "Help", @@ -508,7 +508,7 @@ static int get_mext_match(const char *match_str, match_f flag) index = (index + items_num) % items_num; while (true) { char *str = k_menu_items[index].str; - if (strcasestr(str, match_str) != 0) + if (strcasestr(str, match_str) != NULL) return index; if (flag == FIND_NEXT_MATCH_UP || flag == MATCH_TINKER_PATTERN_UP) @@ -1067,7 +1067,7 @@ static int do_match(int key, struct match_state *state, int *ans) static void conf(struct menu *menu) { - struct menu *submenu = 0; + struct menu *submenu = NULL; const char *prompt = menu_get_prompt(menu); struct symbol *sym; int res; @@ -1234,7 +1234,7 @@ static void show_help(struct menu *menu) static void conf_choice(struct menu *menu) { const char *prompt = _(menu_get_prompt(menu)); - struct menu *child = 0; + struct menu *child = NULL; struct symbol *active; int selected_index = 0; int last_top_row = 0; @@ -1456,7 +1456,7 @@ static void conf_save(void) } } -void setup_windows(void) +static void setup_windows(void) { int lines, columns; diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c index 4b2f44c20caf..a64b1c31253e 100644 --- a/scripts/kconfig/nconf.gui.c +++ b/scripts/kconfig/nconf.gui.c @@ -129,7 +129,7 @@ static void no_colors_theme(void) mkattrn(FUNCTION_TEXT, A_REVERSE); } -void set_colors() +void set_colors(void) { start_color(); use_default_colors(); @@ -192,7 +192,7 @@ const char *get_line(const char *text, int line_no) int lines = 0; if (!text) - return 0; + return NULL; for (i = 0; text[i] != '\0' && lines < line_no; i++) if (text[i] == '\n') From 34f19ff1b5a0d11e46df479623d6936460105c9f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Jun 2017 15:58:29 +1000 Subject: [PATCH 330/341] powerpc/64: Initialise thread_info for emergency stacks Emergency stacks have their thread_info mostly uninitialised, which in particular means garbage preempt_count values. Emergency stack code runs with interrupts disabled entirely, and is used very rarely, so this has been unnoticed so far. It was found by a proposed new powerpc watchdog that takes a soft-NMI directly from the masked_interrupt handler and using the emergency stack. That crashed at BUG_ON(in_nmi()) in nmi_enter(). preempt_count()s were found to be garbage. To fix this, zero the entire THREAD_SIZE allocation, and initialize the thread_info. Cc: stable@vger.kernel.org Reported-by: Abdul Haleem Signed-off-by: Nicholas Piggin [mpe: Move it all into setup_64.c, use a function not a macro. Fix crashes on Cell by setting preempt_count to 0 not HARDIRQ_OFFSET] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a8c1f99e9607..4640f6d64f8b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -615,6 +615,24 @@ void __init exc_lvl_early_init(void) } #endif +/* + * Emergency stacks are used for a range of things, from asynchronous + * NMIs (system reset, machine check) to synchronous, process context. + * We set preempt_count to zero, even though that isn't necessarily correct. To + * get the right value we'd need to copy it from the previous thread_info, but + * doing that might fault causing more problems. + * TODO: what to do with accounting? + */ +static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu) +{ + ti->task = NULL; + ti->cpu = cpu; + ti->preempt_count = 0; + ti->local_flags = 0; + ti->flags = 0; + klp_init_thread_info(ti); +} + /* * Stack space used when we detect a bad kernel stack pointer, and * early in SMP boots before relocation is enabled. Exclusive emergency @@ -633,24 +651,31 @@ void __init emergency_stack_init(void) * Since we use these as temporary stacks during secondary CPU * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. + * + * The IRQ stacks allocated elsewhere in this file are zeroed and + * initialized in kernel/irq.c. These are initialized here in order + * to have emergency stacks available as early as possible. */ limit = min(safe_stack_limit(), ppc64_rma_size); for_each_possible_cpu(i) { struct thread_info *ti; ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 /* emergency stack for NMI exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; /* emergency stack for machine check exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; #endif } From 9768935264c4f0e4afd788a185d8e8d89c28e41d Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Fri, 23 Jun 2017 00:04:51 -0700 Subject: [PATCH 331/341] Input: synaptics-rmi4 - only read the F54 query registers which are used The F54 driver is currently only using the first 6 bytes of F54 so there is no need to read all 27 bytes. Some Dell systems (Dell XP13 9333 and similar) have an issue with the touchpad or I2C bus when reading reports larger then 16 bytes. Reads larger then 16 bytes are reported in two HID reports. Something about the back to back reports seems to cause the next read to report incorrect data. This results in F30 failing to load and the click button failing to work. Previous issues with the I2C controller or touchpad were addressed in: commit 5b65c2a02966 ("HID: rmi: check sanity of the incoming report") Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=195949 Signed-off-by: Andrew Duggan Reviewed-by: Benjamin Tissoires Reviewed-by: Nick Dyer Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f54.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index dea63e2db3e6..f5206e2c767e 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -31,9 +31,6 @@ #define F54_GET_REPORT 1 #define F54_FORCE_CAL 2 -/* Fixed sizes of reports */ -#define F54_QUERY_LEN 27 - /* F54 capabilities */ #define F54_CAP_BASELINE (1 << 2) #define F54_CAP_IMAGE8 (1 << 3) @@ -95,7 +92,6 @@ struct rmi_f54_reports { struct f54_data { struct rmi_function *fn; - u8 qry[F54_QUERY_LEN]; u8 num_rx_electrodes; u8 num_tx_electrodes; u8 capabilities; @@ -632,22 +628,23 @@ static int rmi_f54_detect(struct rmi_function *fn) { int error; struct f54_data *f54; + u8 buf[6]; f54 = dev_get_drvdata(&fn->dev); error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr, - &f54->qry, sizeof(f54->qry)); + buf, sizeof(buf)); if (error) { dev_err(&fn->dev, "%s: Failed to query F54 properties\n", __func__); return error; } - f54->num_rx_electrodes = f54->qry[0]; - f54->num_tx_electrodes = f54->qry[1]; - f54->capabilities = f54->qry[2]; - f54->clock_rate = f54->qry[3] | (f54->qry[4] << 8); - f54->family = f54->qry[5]; + f54->num_rx_electrodes = buf[0]; + f54->num_tx_electrodes = buf[1]; + f54->capabilities = buf[2]; + f54->clock_rate = buf[3] | (buf[4] << 8); + f54->family = buf[5]; rmi_dbg(RMI_DEBUG_FN, &fn->dev, "F54 num_rx_electrodes: %d\n", f54->num_rx_electrodes); From c891d9f6bf2a78c9c657656872a60807820db4c8 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 23 Jun 2017 15:08:38 -0700 Subject: [PATCH 332/341] mm, thp: remove cond_resched from __collapse_huge_page_copy This is a partial revert of commit 338a16ba1549 ("mm, thp: copying user pages must schedule on collapse") which added a cond_resched() to __collapse_huge_page_copy(). On x86 with CONFIG_HIGHPTE, __collapse_huge_page_copy is called in atomic context and thus scheduling is not possible. This is only a possible config on arm and i386. Although need_resched has been shown to be set for over 100 jiffies while doing the iteration in __collapse_huge_page_copy, this is better than doing if (in_atomic()) cond_resched() to cover only non-CONFIG_HIGHPTE configs. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1706191341550.97821@chino.kir.corp.google.com Signed-off-by: David Rientjes Reported-by: Larry Finger Tested-by: Larry Finger Acked-by: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 945fd1ca49b5..df4ebdb2b10a 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -652,7 +652,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, spin_unlock(ptl); free_page_and_swap_cache(src_page); } - cond_resched(); } } From 029c54b09599573015a5c18dbe59cbdf42742237 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 23 Jun 2017 15:08:41 -0700 Subject: [PATCH 333/341] mm/vmalloc.c: huge-vmap: fail gracefully on unexpected huge vmap mappings Existing code that uses vmalloc_to_page() may assume that any address for which is_vmalloc_addr() returns true may be passed into vmalloc_to_page() to retrieve the associated struct page. This is not un unreasonable assumption to make, but on architectures that have CONFIG_HAVE_ARCH_HUGE_VMAP=y, it no longer holds, and we need to ensure that vmalloc_to_page() does not go off into the weeds trying to dereference huge PUDs or PMDs as table entries. Given that vmalloc() and vmap() themselves never create huge mappings or deal with compound pages at all, there is no correct answer in this case, so return NULL instead, and issue a warning. When reading /proc/kcore on arm64, you will hit an oops as soon as you hit the huge mappings used for the various segments that make up the mapping of vmlinux. With this patch applied, you will no longer hit the oops, but the kcore contents willl be incorrect (these regions will be zeroed out) We are fixing this for kcore specifically, so it avoids vread() for those regions. At least one other problematic user exists, i.e., /dev/kmem, but that is currently broken on arm64 for other reasons. Link: http://lkml.kernel.org/r/20170609082226.26152-1-ard.biesheuvel@linaro.org Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Reviewed-by: Laura Abbott Cc: Michal Hocko Cc: zhong jiang Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 34a1c3e46ed7..ecc97f74ab18 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -287,10 +287,21 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) if (p4d_none(*p4d)) return NULL; pud = pud_offset(p4d, addr); - if (pud_none(*pud)) + + /* + * Don't dereference bad PUD or PMD (below) entries. This will also + * identify huge mappings, which we may encounter on architectures + * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be + * identified as vmalloc addresses by is_vmalloc_addr(), but are + * not [unambiguously] associated with a struct page, so there is + * no correct value to return for them. + */ + WARN_ON_ONCE(pud_bad(*pud)); + if (pud_none(*pud) || pud_bad(*pud)) return NULL; pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + WARN_ON_ONCE(pmd_bad(*pmd)); + if (pmd_none(*pmd) || pmd_bad(*pmd)) return NULL; ptep = pte_offset_map(pmd, addr); From 9fa4eb8e490a28de40964b1b0e583d8db4c7e57c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 23 Jun 2017 15:08:43 -0700 Subject: [PATCH 334/341] autofs: sanity check status reported with AUTOFS_DEV_IOCTL_FAIL If a positive status is passed with the AUTOFS_DEV_IOCTL_FAIL ioctl, autofs4_d_automount() will return ERR_PTR(status) with that status to follow_automount(), which will then dereference an invalid pointer. So treat a positive status the same as zero, and map to ENOENT. See comment in systemd src/core/automount.c::automount_send_ready(). Link: http://lkml.kernel.org/r/871sqwczx5.fsf@notabene.neil.brown.name Signed-off-by: NeilBrown Cc: Ian Kent Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/dev-ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 734cbf8d9676..dd9f1bebb5a3 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct file *fp, int status; token = (autofs_wqt_t) param->fail.token; - status = param->fail.status ? param->fail.status : -ENOENT; + status = param->fail.status < 0 ? param->fail.status : -ENOENT; return autofs4_wait_release(sbi, token, status); } From 1eb643d02b21412e603b42cdd96010a2ac31c05f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 23 Jun 2017 15:08:46 -0700 Subject: [PATCH 335/341] fs/dax.c: fix inefficiency in dax_writeback_mapping_range() dax_writeback_mapping_range() fails to update iteration index when searching radix tree for entries needing cache flushing. Thus each pagevec worth of entries is searched starting from the start which is inefficient and prone to livelocks. Update index properly. Link: http://lkml.kernel.org/r/20170619124531.21491-1-jack@suse.cz Fixes: 9973c98ecfda3 ("dax: add support for fsync/sync") Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/dax.c b/fs/dax.c index 2a6889b3585f..9187f3b07f3e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -859,6 +859,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, if (ret < 0) goto out; } + start_index = indices[pvec.nr - 1] + 1; } out: put_dax(dax_dev); From a91e0f680bcd9e10c253ae8b62462a38bd48f09f Mon Sep 17 00:00:00 2001 From: Ilya Matveychikov Date: Fri, 23 Jun 2017 15:08:49 -0700 Subject: [PATCH 336/341] lib/cmdline.c: fix get_options() overflow while parsing ranges When using get_options() it's possible to specify a range of numbers, like 1-100500. The problem is that it doesn't track array size while calling internally to get_range() which iterates over the range and fills the memory with numbers. Link: http://lkml.kernel.org/r/2613C75C-B04D-4BFF-82A6-12F97BA0F620@gmail.com Signed-off-by: Ilya V. Matveychikov Cc: Jonathan Corbet Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/cmdline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/cmdline.c b/lib/cmdline.c index 3c6432df7e63..4c0888c4a68d 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -23,14 +23,14 @@ * the values[M, M+1, ..., N] into the ints array in get_options. */ -static int get_range(char **str, int *pint) +static int get_range(char **str, int *pint, int n) { int x, inc_counter, upper_range; (*str)++; upper_range = simple_strtol((*str), NULL, 0); inc_counter = upper_range - *pint; - for (x = *pint; x < upper_range; x++) + for (x = *pint; n && x < upper_range; x++, n--) *pint++ = x; return inc_counter; } @@ -97,7 +97,7 @@ char *get_options(const char *str, int nints, int *ints) break; if (res == 3) { int range_nums; - range_nums = get_range((char **)&str, ints + i); + range_nums = get_range((char **)&str, ints + i, nints - i); if (range_nums < 0) break; /* From 3b7b314053d021601940c50b07f5f1423ae67e21 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 23 Jun 2017 15:08:52 -0700 Subject: [PATCH 337/341] slub: make sysfs file removal asynchronous Commit bf5eb3de3847 ("slub: separate out sysfs_slab_release() from sysfs_slab_remove()") made slub sysfs file removals synchronous to kmem_cache shutdown. Unfortunately, this created a possible ABBA deadlock between slab_mutex and sysfs draining mechanism triggering the following lockdep warning. ====================================================== [ INFO: possible circular locking dependency detected ] 4.10.0-test+ #48 Not tainted ------------------------------------------------------- rmmod/1211 is trying to acquire lock: (s_active#120){++++.+}, at: [] kernfs_remove+0x23/0x40 but task is already holding lock: (slab_mutex){+.+.+.}, at: [] kmem_cache_destroy+0x41/0x2d0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (slab_mutex){+.+.+.}: lock_acquire+0xf6/0x1f0 __mutex_lock+0x75/0x950 mutex_lock_nested+0x1b/0x20 slab_attr_store+0x75/0xd0 sysfs_kf_write+0x45/0x60 kernfs_fop_write+0x13c/0x1c0 __vfs_write+0x28/0x120 vfs_write+0xc8/0x1e0 SyS_write+0x49/0xa0 entry_SYSCALL_64_fastpath+0x1f/0xc2 -> #0 (s_active#120){++++.+}: __lock_acquire+0x10ed/0x1260 lock_acquire+0xf6/0x1f0 __kernfs_remove+0x254/0x320 kernfs_remove+0x23/0x40 sysfs_remove_dir+0x51/0x80 kobject_del+0x18/0x50 __kmem_cache_shutdown+0x3e6/0x460 kmem_cache_destroy+0x1fb/0x2d0 kvm_exit+0x2d/0x80 [kvm] vmx_exit+0x19/0xa1b [kvm_intel] SyS_delete_module+0x198/0x1f0 entry_SYSCALL_64_fastpath+0x1f/0xc2 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(slab_mutex); lock(s_active#120); lock(slab_mutex); lock(s_active#120); *** DEADLOCK *** 2 locks held by rmmod/1211: #0: (cpu_hotplug.dep_map){++++++}, at: [] get_online_cpus+0x37/0x80 #1: (slab_mutex){+.+.+.}, at: [] kmem_cache_destroy+0x41/0x2d0 stack backtrace: CPU: 3 PID: 1211 Comm: rmmod Not tainted 4.10.0-test+ #48 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012 Call Trace: print_circular_bug+0x1be/0x210 __lock_acquire+0x10ed/0x1260 lock_acquire+0xf6/0x1f0 __kernfs_remove+0x254/0x320 kernfs_remove+0x23/0x40 sysfs_remove_dir+0x51/0x80 kobject_del+0x18/0x50 __kmem_cache_shutdown+0x3e6/0x460 kmem_cache_destroy+0x1fb/0x2d0 kvm_exit+0x2d/0x80 [kvm] vmx_exit+0x19/0xa1b [kvm_intel] SyS_delete_module+0x198/0x1f0 ? SyS_delete_module+0x5/0x1f0 entry_SYSCALL_64_fastpath+0x1f/0xc2 It'd be the cleanest to deal with the issue by removing sysfs files without holding slab_mutex before the rest of shutdown; however, given the current code structure, it is pretty difficult to do so. This patch punts sysfs file removal to a work item. Before commit bf5eb3de3847, the removal was punted to a RCU delayed work item which is executed after release. Now, we're punting to a different work item on shutdown which still maintains the goal removing the sysfs files earlier when destroying kmem_caches. Link: http://lkml.kernel.org/r/20170620204512.GI21326@htj.duckdns.org Fixes: bf5eb3de3847 ("slub: separate out sysfs_slab_release() from sysfs_slab_remove()") Signed-off-by: Tejun Heo Reported-by: Steven Rostedt (VMware) Tested-by: Steven Rostedt (VMware) Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 1 + mm/slub.c | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 07ef550c6627..93315d6b21a8 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -84,6 +84,7 @@ struct kmem_cache { int red_left_pad; /* Left redzone padding size */ #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ + struct work_struct kobj_remove_work; #endif #ifdef CONFIG_MEMCG struct memcg_cache_params memcg_params; diff --git a/mm/slub.c b/mm/slub.c index 7449593fca72..8addc535bcdc 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5625,6 +5625,28 @@ static char *create_unique_id(struct kmem_cache *s) return name; } +static void sysfs_slab_remove_workfn(struct work_struct *work) +{ + struct kmem_cache *s = + container_of(work, struct kmem_cache, kobj_remove_work); + + if (!s->kobj.state_in_sysfs) + /* + * For a memcg cache, this may be called during + * deactivation and again on shutdown. Remove only once. + * A cache is never shut down before deactivation is + * complete, so no need to worry about synchronization. + */ + return; + +#ifdef CONFIG_MEMCG + kset_unregister(s->memcg_kset); +#endif + kobject_uevent(&s->kobj, KOBJ_REMOVE); + kobject_del(&s->kobj); + kobject_put(&s->kobj); +} + static int sysfs_slab_add(struct kmem_cache *s) { int err; @@ -5632,6 +5654,8 @@ static int sysfs_slab_add(struct kmem_cache *s) struct kset *kset = cache_kset(s); int unmergeable = slab_unmergeable(s); + INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn); + if (!kset) { kobject_init(&s->kobj, &slab_ktype); return 0; @@ -5695,20 +5719,8 @@ static void sysfs_slab_remove(struct kmem_cache *s) */ return; - if (!s->kobj.state_in_sysfs) - /* - * For a memcg cache, this may be called during - * deactivation and again on shutdown. Remove only once. - * A cache is never shut down before deactivation is - * complete, so no need to worry about synchronization. - */ - return; - -#ifdef CONFIG_MEMCG - kset_unregister(s->memcg_kset); -#endif - kobject_uevent(&s->kobj, KOBJ_REMOVE); - kobject_del(&s->kobj); + kobject_get(&s->kobj); + schedule_work(&s->kobj_remove_work); } void sysfs_slab_release(struct kmem_cache *s) From 8818efaaacb78c60a9d90c5705b6c99b75d7d442 Mon Sep 17 00:00:00 2001 From: Eric Ren Date: Fri, 23 Jun 2017 15:08:55 -0700 Subject: [PATCH 338/341] ocfs2: fix deadlock caused by recursive locking in xattr Another deadlock path caused by recursive locking is reported. This kind of issue was introduced since commit 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()"). Two deadlock paths have been fixed by commit b891fa5024a9 ("ocfs2: fix deadlock issue when taking inode lock at vfs entry points"). Yes, we intend to fix this kind of case in incremental way, because it's hard to find out all possible paths at once. This one can be reproduced like this. On node1, cp a large file from home directory to ocfs2 mountpoint. While on node2, run setfacl/getfacl. Both nodes will hang up there. The backtraces: On node1: __ocfs2_cluster_lock.isra.39+0x357/0x740 [ocfs2] ocfs2_inode_lock_full_nested+0x17d/0x840 [ocfs2] ocfs2_write_begin+0x43/0x1a0 [ocfs2] generic_perform_write+0xa9/0x180 __generic_file_write_iter+0x1aa/0x1d0 ocfs2_file_write_iter+0x4f4/0xb40 [ocfs2] __vfs_write+0xc3/0x130 vfs_write+0xb1/0x1a0 SyS_write+0x46/0xa0 On node2: __ocfs2_cluster_lock.isra.39+0x357/0x740 [ocfs2] ocfs2_inode_lock_full_nested+0x17d/0x840 [ocfs2] ocfs2_xattr_set+0x12e/0xe80 [ocfs2] ocfs2_set_acl+0x22d/0x260 [ocfs2] ocfs2_iop_set_acl+0x65/0xb0 [ocfs2] set_posix_acl+0x75/0xb0 posix_acl_xattr_set+0x49/0xa0 __vfs_setxattr+0x69/0x80 __vfs_setxattr_noperm+0x72/0x1a0 vfs_setxattr+0xa7/0xb0 setxattr+0x12d/0x190 path_setxattr+0x9f/0xb0 SyS_setxattr+0x14/0x20 Fix this one by using ocfs2_inode_{lock|unlock}_tracker, which is exported by commit 439a36b8ef38 ("ocfs2/dlmglue: prepare tracking logic to avoid recursive cluster lock"). Link: http://lkml.kernel.org/r/20170622014746.5815-1-zren@suse.com Fixes: 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()") Signed-off-by: Eric Ren Reported-by: Thomas Voegtle Tested-by: Thomas Voegtle Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlmglue.c | 4 ++++ fs/ocfs2/xattr.c | 23 +++++++++++++---------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 3b7c937a36b5..4689940a953c 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2591,6 +2591,10 @@ void ocfs2_inode_unlock_tracker(struct inode *inode, struct ocfs2_lock_res *lockres; lockres = &OCFS2_I(inode)->ip_inode_lockres; + /* had_lock means that the currect process already takes the cluster + * lock previously. If had_lock is 1, we have nothing to do here, and + * it will get unlocked where we got the lock. + */ if (!had_lock) { ocfs2_remove_holder(lockres, oh); ocfs2_inode_unlock(inode, ex); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3c5384d9b3a5..f70c3778d600 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1328,20 +1328,21 @@ static int ocfs2_xattr_get(struct inode *inode, void *buffer, size_t buffer_size) { - int ret; + int ret, had_lock; struct buffer_head *di_bh = NULL; + struct ocfs2_lock_holder oh; - ret = ocfs2_inode_lock(inode, &di_bh, 0); - if (ret < 0) { - mlog_errno(ret); - return ret; + had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); + if (had_lock < 0) { + mlog_errno(had_lock); + return had_lock; } down_read(&OCFS2_I(inode)->ip_xattr_sem); ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, name, buffer, buffer_size); up_read(&OCFS2_I(inode)->ip_xattr_sem); - ocfs2_inode_unlock(inode, 0); + ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); brelse(di_bh); @@ -3537,11 +3538,12 @@ int ocfs2_xattr_set(struct inode *inode, { struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; - int ret, credits, ref_meta = 0, ref_credits = 0; + int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; struct ocfs2_refcount_tree *ref_tree = NULL; + struct ocfs2_lock_holder oh; struct ocfs2_xattr_info xi = { .xi_name_index = name_index, @@ -3572,8 +3574,9 @@ int ocfs2_xattr_set(struct inode *inode, return -ENOMEM; } - ret = ocfs2_inode_lock(inode, &di_bh, 1); - if (ret < 0) { + had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); + if (had_lock < 0) { + ret = had_lock; mlog_errno(ret); goto cleanup_nolock; } @@ -3670,7 +3673,7 @@ cleanup: if (ret) mlog_errno(ret); } - ocfs2_inode_unlock(inode, 1); + ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); cleanup_nolock: brelse(di_bh); brelse(xbs.xattr_bh); From 98da7d08850fb8bdeb395d6368ed15753304aa0c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 23 Jun 2017 15:08:57 -0700 Subject: [PATCH 339/341] fs/exec.c: account for argv/envp pointers When limiting the argv/envp strings during exec to 1/4 of the stack limit, the storage of the pointers to the strings was not included. This means that an exec with huge numbers of tiny strings could eat 1/4 of the stack limit in strings and then additional space would be later used by the pointers to the strings. For example, on 32-bit with a 8MB stack rlimit, an exec with 1677721 single-byte strings would consume less than 2MB of stack, the max (8MB / 4) amount allowed, but the pointers to the strings would consume the remaining additional stack space (1677721 * 4 == 6710884). The result (1677721 + 6710884 == 8388605) would exhaust stack space entirely. Controlling this stack exhaustion could result in pathological behavior in setuid binaries (CVE-2017-1000365). [akpm@linux-foundation.org: additional commenting from Kees] Fixes: b6a2fea39318 ("mm: variable length argument support") Link: http://lkml.kernel.org/r/20170622001720.GA32173@beast Signed-off-by: Kees Cook Acked-by: Rik van Riel Acked-by: Michal Hocko Cc: Alexander Viro Cc: Qualys Security Advisory Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 72934df68471..904199086490 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -220,8 +220,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, if (write) { unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; + unsigned long ptr_size; struct rlimit *rlim; + /* + * Since the stack will hold pointers to the strings, we + * must account for them as well. + * + * The size calculation is the entire vma while each arg page is + * built, so each time we get here it's calculating how far it + * is currently (rather than each call being just the newly + * added size from the arg page). As a result, we need to + * always add the entire size of the pointers, so that on the + * last call to get_arg_page() we'll actually have the entire + * correct size. + */ + ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); + if (ptr_size > ULONG_MAX - size) + goto fail; + size += ptr_size; + acct_arg_size(bprm, size / PAGE_SIZE); /* @@ -239,13 +257,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, * to work from. */ rlim = current->signal->rlim; - if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) { - put_page(page); - return NULL; - } + if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) + goto fail; } return page; + +fail: + put_page(page); + return NULL; } static void put_arg_page(struct page *page) From 26fcd952d5c977a94ac64bb44ed409e37607b2c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 23 Jun 2017 10:50:38 +0200 Subject: [PATCH 340/341] x86/mshyperv: Remove excess #includes from mshyperv.h A recent commit included linux/slab.h in linux/irq.h. This breaks the build of vdso32 on a 64-bit kernel. The reason is that linux/irq.h gets included into the vdso code via linux/interrupt.h which is included from asm/mshyperv.h. That makes the 32-bit vdso compile fail, because slab.h includes the pgtable headers for 64-bit on a 64-bit build. Neither linux/clocksource.h nor linux/interrupt.h are needed in the mshyperv.h header file itself - it has a dependency on . Remove the includes and unbreak the build. Reported-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Fixes: dee863b571b0 ("hv: export current Hyper-V clocksource") Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1706231038460.2647@nanos Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mshyperv.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index fba100713924..d5acc27ed1cc 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -2,8 +2,7 @@ #define _ASM_X86_MSHYPER_H #include -#include -#include +#include #include /* From c0bc126f97fb929b3ae02c1c62322645d70eb408 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Jun 2017 18:30:05 -0700 Subject: [PATCH 341/341] Linux 4.12-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 57df7569c1bf..6d8a984ed9c9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Fearless Coyote # *DOCUMENTATION*