2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2025-01-19 02:54:00 +08:00
linux-next/drivers/gpu/drm/i915/i915_debugfs.c

5600 lines
149 KiB
C
Raw Normal View History

/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Keith Packard <keithp@keithp.com>
*
*/
#include <linux/seq_file.h>
#include <linux/circ_buf.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/list_sort.h>
#include <asm/msr-index.h>
#include <drm/drmP.h>
#include "intel_drv.h"
#include "intel_ringbuffer.h"
#include <drm/i915_drm.h>
#include "i915_drv.h"
enum {
ACTIVE_LIST,
INACTIVE_LIST,
PINNED_LIST,
};
/* As the drm_debugfs_init() routines are called before dev->dev_private is
* allocated we need to hook into the minor for release. */
static int
drm_add_fake_info_node(struct drm_minor *minor,
struct dentry *ent,
const void *key)
{
struct drm_info_node *node;
node = kmalloc(sizeof(*node), GFP_KERNEL);
if (node == NULL) {
debugfs_remove(ent);
return -ENOMEM;
}
node->minor = minor;
node->dent = ent;
node->info_ent = (void *) key;
mutex_lock(&minor->debugfs_lock);
list_add(&node->list, &minor->debugfs_list);
mutex_unlock(&minor->debugfs_lock);
return 0;
}
static int i915_capabilities(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
const struct intel_device_info *info = INTEL_INFO(dev);
seq_printf(m, "gen: %d\n", info->gen);
seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev));
#define PRINT_FLAG(x) seq_printf(m, #x ": %s\n", yesno(info->x))
#define SEP_SEMICOLON ;
DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG, SEP_SEMICOLON);
#undef PRINT_FLAG
#undef SEP_SEMICOLON
return 0;
}
static const char *get_pin_flag(struct drm_i915_gem_object *obj)
{
if (obj->pin_display)
return "p";
else
return " ";
}
static const char *get_tiling_flag(struct drm_i915_gem_object *obj)
{
switch (obj->tiling_mode) {
default:
case I915_TILING_NONE: return " ";
case I915_TILING_X: return "X";
case I915_TILING_Y: return "Y";
}
}
static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
{
return i915_gem_obj_to_ggtt(obj) ? "g" : " ";
}
static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
{
u64 size = 0;
struct i915_vma *vma;
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (vma->is_ggtt && drm_mm_node_allocated(&vma->node))
size += vma->node.size;
}
return size;
}
static void
describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
{
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-27 20:41:17 +08:00
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct intel_engine_cs *engine;
struct i915_vma *vma;
int pin_count = 0;
enum intel_engine_id id;
drm/i915: Add struct_mutex locking for debugs/i915_gem_framebuffer Since describe_obj() looks at state guarded by the struct_mutex, we need to be holding it. [ 580.201054] drv_suspend: starting subtest debugfs-reader [ 580.239652] ------------[ cut here ]------------ [ 580.239696] WARNING: CPU: 0 PID: 920 at include/linux/list_check.h:25 describe_obj+0x419/0x440() [ 580.239725] CPU: 0 PID: 920 Comm: cat Not tainted 4.5.0-rc6+ #835 [ 580.239745] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 580.239767] 0000000000000000 ffff88027554fcf8 ffffffff812c1135 0000000000000000 [ 580.239815] ffffffff8193dc42 ffff88027554fd30 ffffffff8107419d ffff880071727c00 [ 580.239858] ffff8802757d8000 ffffffff818f693c ffffffff818f693c ffff8802757b9048 [ 580.239896] Call Trace: [ 580.239917] [<ffffffff812c1135>] dump_stack+0x67/0x92 [ 580.239939] [<ffffffff8107419d>] warn_slowpath_common+0x7d/0xb0 [ 580.239959] [<ffffffff810742ba>] warn_slowpath_null+0x1a/0x20 [ 580.239981] [<ffffffff813ce579>] describe_obj+0x419/0x440 [ 580.240006] [<ffffffff813ced22>] i915_gem_framebuffer_info+0xa2/0x100 [ 580.240033] [<ffffffff811a9286>] seq_read+0xe6/0x3b0 [ 580.240059] [<ffffffff81182288>] __vfs_read+0x28/0xd0 [ 580.240085] [<ffffffff81173378>] ? SyS_fadvise64+0x228/0x2c0 [ 580.240112] [<ffffffff811823b2>] vfs_read+0x82/0x110 [ 580.240137] [<ffffffff811827d9>] SyS_read+0x49/0xa0 [ 580.240162] [<ffffffff815bac57>] entry_SYSCALL_64_fastpath+0x12/0x6b [ 580.240187] ---[ end trace 3e2cbf34576c9878 ]--- [ 580.281900] ------------[ cut here ]------------ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1459689261-7920-1-git-send-email-chris@chris-wilson.co.uk
2016-04-03 21:14:20 +08:00
lockdep_assert_held(&obj->base.dev->struct_mutex);
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-27 20:41:17 +08:00
seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x [ ",
&obj->base,
obj->active ? "*" : " ",
get_pin_flag(obj),
get_tiling_flag(obj),
get_global_flag(obj),
obj->base.size / 1024,
obj->base.read_domains,
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-27 20:41:17 +08:00
obj->base.write_domain);
for_each_engine_id(engine, dev_priv, id)
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-27 20:41:17 +08:00
seq_printf(m, "%x ",
i915_gem_request_get_seqno(obj->last_read_req[id]));
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-27 20:41:17 +08:00
seq_printf(m, "] %x %x%s%s%s",
i915_gem_request_get_seqno(obj->last_write_req),
i915_gem_request_get_seqno(obj->last_fenced_req),
i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
obj->dirty ? " dirty" : "",
obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
if (obj->base.name)
seq_printf(m, " (name: %d)", obj->base.name);
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (vma->pin_count > 0)
pin_count++;
}
seq_printf(m, " (pinned x %d)", pin_count);
if (obj->pin_display)
seq_printf(m, " (display)");
if (obj->fence_reg != I915_FENCE_REG_NONE)
seq_printf(m, " (fence: %d)", obj->fence_reg);
list_for_each_entry(vma, &obj->vma_list, obj_link) {
seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
vma->is_ggtt ? "g" : "pp",
vma->node.start, vma->node.size);
if (vma->is_ggtt)
seq_printf(m, ", type: %u", vma->ggtt_view.type);
seq_puts(m, ")");
}
if (obj->stolen)
seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
if (obj->pin_display || obj->fault_mappable) {
char s[3], *t = s;
if (obj->pin_display)
*t++ = 'p';
if (obj->fault_mappable)
*t++ = 'f';
*t = '\0';
seq_printf(m, " (%s mappable)", s);
}
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-27 20:41:17 +08:00
if (obj->last_write_req != NULL)
seq_printf(m, " (%s)",
i915_gem_request_get_engine(obj->last_write_req)->name);
if (obj->frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
}
static void describe_ctx(struct seq_file *m, struct intel_context *ctx)
drm/i915: Do remaps for all contexts On both Ivybridge and Haswell, row remapping information is saved and restored with context. This means, we never actually properly supported the l3 remapping because our sysfs interface is asynchronous (and not tied to any context), and the known faulty HW would be reused by the next context to run. Not that due to the asynchronous nature of the sysfs entry, there is no point modifying the registers for the existing context. Instead we set a flag for all contexts to load the correct remapping information on the next run. Interested clients can use debugfs to determine whether or not the row has been remapped. One could propose at this point that we just do the remapping in the kernel. I guess since we have to maintain the sysfs interface anyway, I'm not sure how useful it is, and I do like keeping the policy in userspace; (it wasn't my original decision to make the interface the way it is, so I'm not attached). v2: Force a context switch when we have a remap on the next switch. (Ville) Don't let userspace use the interface with disabled contexts. v3: Don't force a context switch, just let it nop Improper context slice remap initialization, 1<<1 instead of 1<<i, but I rewrote it to avoid a second round of confusion. Error print moved to error path (All Ville) Added a comment on why the slice remap initialization happens. CC: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-19 10:03:18 +08:00
{
drm/i915: Emphasize that ctx->obj & ctx->is_initialized refer to the legacy rcs ctx We have already advanced that Logical Ring Contexts have their own kind of backing objects, but everything will be better explained in the Execlists series. For now, suffice it to say that the current backing object is only ever used with the render ring, so we're making this fact more explicit (which is a good reason on its own). As for the is_initialized flag, we only use to signify that the render state has been initialized (a.k.a. golden context, a.k.a. null context). It doesn't mean anything for the other engines, so make that distinction obvious. Done with the following Coccinelle patch (plus manual frobbing of the struct): @@ struct intel_context c; @@ - (c).obj + c.legacy_hw_ctx.rcs_state @@ struct intel_context *c; @@ - (c)->obj + c->legacy_hw_ctx.rcs_state @@ struct intel_context c; @@ - (c).is_initialized + c.legacy_hw_ctx.initialized @@ struct intel_context *c; @@ - (c)->is_initialized + c->legacy_hw_ctx.initialized This Execlists prep-work patch has been suggested by Chris Wilson and Daniel Vetter separately. Initially, it was two separate patches: drm/i915: Rename ctx->obj to ctx->rcs_state drm/i915: Make it obvious that ctx->id is merely a user handle Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: s/id/is_initialized/ to fix the subject and resolve a conflict in i915_gem_context_reset. Also introduce a new lctx local variable to avoid overtly long lines.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-03 23:27:59 +08:00
seq_putc(m, ctx->legacy_hw_ctx.initialized ? 'I' : 'i');
drm/i915: Do remaps for all contexts On both Ivybridge and Haswell, row remapping information is saved and restored with context. This means, we never actually properly supported the l3 remapping because our sysfs interface is asynchronous (and not tied to any context), and the known faulty HW would be reused by the next context to run. Not that due to the asynchronous nature of the sysfs entry, there is no point modifying the registers for the existing context. Instead we set a flag for all contexts to load the correct remapping information on the next run. Interested clients can use debugfs to determine whether or not the row has been remapped. One could propose at this point that we just do the remapping in the kernel. I guess since we have to maintain the sysfs interface anyway, I'm not sure how useful it is, and I do like keeping the policy in userspace; (it wasn't my original decision to make the interface the way it is, so I'm not attached). v2: Force a context switch when we have a remap on the next switch. (Ville) Don't let userspace use the interface with disabled contexts. v3: Don't force a context switch, just let it nop Improper context slice remap initialization, 1<<1 instead of 1<<i, but I rewrote it to avoid a second round of confusion. Error print moved to error path (All Ville) Added a comment on why the slice remap initialization happens. CC: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-19 10:03:18 +08:00
seq_putc(m, ctx->remap_slice ? 'R' : 'r');
seq_putc(m, ' ');
}
static int i915_gem_object_list_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
uintptr_t list = (uintptr_t) node->info_ent->data;
struct list_head *head;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
2013-08-01 08:00:14 +08:00
struct i915_vma *vma;
u64 total_obj_size, total_gtt_size;
int count, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
2013-08-01 08:00:14 +08:00
/* FIXME: the user of this interface might want more than just GGTT */
switch (list) {
case ACTIVE_LIST:
seq_puts(m, "Active:\n");
head = &ggtt->base.active_list;
break;
case INACTIVE_LIST:
seq_puts(m, "Inactive:\n");
head = &ggtt->base.inactive_list;
break;
default:
mutex_unlock(&dev->struct_mutex);
return -EINVAL;
}
total_obj_size = total_gtt_size = count = 0;
list_for_each_entry(vma, head, vm_link) {
2013-08-01 08:00:14 +08:00
seq_printf(m, " ");
describe_obj(m, vma->obj);
seq_printf(m, "\n");
total_obj_size += vma->obj->base.size;
total_gtt_size += vma->node.size;
count++;
}
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
count, total_obj_size, total_gtt_size);
return 0;
}
static int obj_rank_by_stolen(void *priv,
struct list_head *A, struct list_head *B)
{
struct drm_i915_gem_object *a =
container_of(A, struct drm_i915_gem_object, obj_exec_link);
struct drm_i915_gem_object *b =
container_of(B, struct drm_i915_gem_object, obj_exec_link);
if (a->stolen->start < b->stolen->start)
return -1;
if (a->stolen->start > b->stolen->start)
return 1;
return 0;
}
static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj;
u64 total_obj_size, total_gtt_size;
LIST_HEAD(stolen);
int count, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
total_obj_size = total_gtt_size = count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (obj->stolen == NULL)
continue;
list_add(&obj->obj_exec_link, &stolen);
total_obj_size += obj->base.size;
total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
count++;
}
list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
if (obj->stolen == NULL)
continue;
list_add(&obj->obj_exec_link, &stolen);
total_obj_size += obj->base.size;
count++;
}
list_sort(NULL, &stolen, obj_rank_by_stolen);
seq_puts(m, "Stolen:\n");
while (!list_empty(&stolen)) {
obj = list_first_entry(&stolen, typeof(*obj), obj_exec_link);
seq_puts(m, " ");
describe_obj(m, obj);
seq_putc(m, '\n');
list_del_init(&obj->obj_exec_link);
}
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
count, total_obj_size, total_gtt_size);
return 0;
}
#define count_objects(list, member) do { \
list_for_each_entry(obj, list, member) { \
size += i915_gem_obj_total_ggtt_size(obj); \
++count; \
if (obj->map_and_fenceable) { \
mappable_size += i915_gem_obj_ggtt_size(obj); \
++mappable_count; \
} \
} \
} while (0)
struct file_stats {
struct drm_i915_file_private *file_priv;
unsigned long count;
u64 total, unbound;
u64 global, shared;
u64 active, inactive;
};
static int per_file_stats(int id, void *ptr, void *data)
{
struct drm_i915_gem_object *obj = ptr;
struct file_stats *stats = data;
struct i915_vma *vma;
stats->count++;
stats->total += obj->base.size;
if (obj->base.name || obj->base.dma_buf)
stats->shared += obj->base.size;
if (USES_FULL_PPGTT(obj->base.dev)) {
list_for_each_entry(vma, &obj->vma_list, obj_link) {
struct i915_hw_ppgtt *ppgtt;
if (!drm_mm_node_allocated(&vma->node))
continue;
if (vma->is_ggtt) {
stats->global += obj->base.size;
continue;
}
ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base);
if (ppgtt->file_priv != stats->file_priv)
continue;
if (obj->active) /* XXX per-vma statistic */
stats->active += obj->base.size;
else
stats->inactive += obj->base.size;
return 0;
}
} else {
if (i915_gem_obj_ggtt_bound(obj)) {
stats->global += obj->base.size;
if (obj->active)
stats->active += obj->base.size;
else
stats->inactive += obj->base.size;
return 0;
}
}
if (!list_empty(&obj->global_list))
stats->unbound += obj->base.size;
return 0;
}
#define print_file_stats(m, name, stats) do { \
if (stats.count) \
seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound)\n", \
name, \
stats.count, \
stats.total, \
stats.active, \
stats.inactive, \
stats.global, \
stats.shared, \
stats.unbound); \
} while (0)
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
static void print_batch_pool_stats(struct seq_file *m,
struct drm_i915_private *dev_priv)
{
struct drm_i915_gem_object *obj;
struct file_stats stats;
struct intel_engine_cs *engine;
int j;
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
memset(&stats, 0, sizeof(stats));
for_each_engine(engine, dev_priv) {
for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
list_for_each_entry(obj,
&engine->batch_pool.cache_list[j],
batch_pool_link)
per_file_stats(0, obj, &stats);
}
}
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
print_file_stats(m, "[k]batch pool", stats);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
}
2013-08-01 08:00:14 +08:00
#define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
size += i915_gem_obj_total_ggtt_size(vma->obj); \
2013-08-01 08:00:14 +08:00
++count; \
if (vma->obj->map_and_fenceable) { \
mappable_size += i915_gem_obj_ggtt_size(vma->obj); \
++mappable_count; \
} \
} \
} while (0)
static int i915_gem_object_info(struct seq_file *m, void* data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
u32 count, mappable_count, purgeable_count;
u64 size, mappable_size, purgeable_size;
struct drm_i915_gem_object *obj;
struct drm_file *file;
2013-08-01 08:00:14 +08:00
struct i915_vma *vma;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
seq_printf(m, "%u objects, %zu bytes\n",
dev_priv->mm.object_count,
dev_priv->mm.object_memory);
size = count = mappable_size = mappable_count = 0;
count_objects(&dev_priv->mm.bound_list, global_list);
seq_printf(m, "%u [%u] objects, %llu [%llu] bytes in gtt\n",
count, mappable_count, size, mappable_size);
size = count = mappable_size = mappable_count = 0;
count_vmas(&ggtt->base.active_list, vm_link);
seq_printf(m, " %u [%u] active objects, %llu [%llu] bytes\n",
count, mappable_count, size, mappable_size);
size = count = mappable_size = mappable_count = 0;
count_vmas(&ggtt->base.inactive_list, vm_link);
seq_printf(m, " %u [%u] inactive objects, %llu [%llu] bytes\n",
count, mappable_count, size, mappable_size);
size = count = purgeable_size = purgeable_count = 0;
list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 17:40:46 +08:00
size += obj->base.size, ++count;
if (obj->madv == I915_MADV_DONTNEED)
purgeable_size += obj->base.size, ++purgeable_count;
}
seq_printf(m, "%u unbound objects, %llu bytes\n", count, size);
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 17:40:46 +08:00
size = count = mappable_size = mappable_count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (obj->fault_mappable) {
size += i915_gem_obj_ggtt_size(obj);
++count;
}
if (obj->pin_display) {
mappable_size += i915_gem_obj_ggtt_size(obj);
++mappable_count;
}
if (obj->madv == I915_MADV_DONTNEED) {
purgeable_size += obj->base.size;
++purgeable_count;
}
}
seq_printf(m, "%u purgeable objects, %llu bytes\n",
purgeable_count, purgeable_size);
seq_printf(m, "%u pinned mappable objects, %llu bytes\n",
mappable_count, mappable_size);
seq_printf(m, "%u fault mappable objects, %llu bytes\n",
count, size);
seq_printf(m, "%llu [%llu] gtt total\n",
ggtt->base.total, ggtt->mappable_end - ggtt->base.start);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
seq_putc(m, '\n');
print_batch_pool_stats(m, dev_priv);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct file_stats stats;
struct task_struct *task;
memset(&stats, 0, sizeof(stats));
stats.file_priv = file->driver_priv;
drm/i915: Hold the table lock whilst walking the file's idr and counting the objects in debugfs Fixes an issue whereby we may race with the table updates (before the core takes the struct_mutex) and so risk dereferencing a stale pointer in the iterator for /debugfs/.../i915_gem_objects. For example, [ 1524.757545] BUG: unable to handle kernel paging request at f53af748 [ 1524.757572] IP: [<c1406982>] per_file_stats+0x12/0x100 [ 1524.757599] *pdpt = 0000000001b13001 *pde = 00000000379fb067 *pte = 80000000353af060 [ 1524.757621] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 1524.757637] Modules linked in: ctr ccm arc4 ath9k ath9k_common ath9k_hw ath snd_hda_codec_conexant mac80211 snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec bnep snd_hwdep rfcomm snd_pcm gpio_ich dell_wmi sparse_keymap snd_seq_midi hid_multitouch uvcvideo snd_seq_midi_event dell_laptop snd_rawmidi dcdbas snd_seq videobuf2_vmalloc videobuf2_memops videobuf2_core usbhid videodev snd_seq_device coretemp snd_timer hid joydev kvm_intel cfg80211 ath3k kvm btusb bluetooth serio_raw snd microcode soundcore lpc_ich wmi mac_hid parport_pc ppdev lp parport psmouse ahci libahci [ 1524.757825] CPU: 3 PID: 1911 Comm: intel-gpu-overl Tainted: G W OE 3.15.0-rc3+ #96 [ 1524.757840] Hardware name: Dell Inc. Inspiron 1090/Inspiron 1090, BIOS A06 08/23/2011 [ 1524.757855] task: f52f36c0 ti: f4cbc000 task.ti: f4cbc000 [ 1524.757869] EIP: 0060:[<c1406982>] EFLAGS: 00210202 CPU: 3 [ 1524.757884] EIP is at per_file_stats+0x12/0x100 [ 1524.757896] EAX: 0000002d EBX: 00000000 ECX: f4cbdefc EDX: f53af700 [ 1524.757909] ESI: c1406970 EDI: f53af700 EBP: f4cbde6c ESP: f4cbde5c [ 1524.757922] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 1524.757934] CR0: 80050033 CR2: f53af748 CR3: 356af000 CR4: 000007f0 [ 1524.757945] Stack: [ 1524.757957] f4cbdefc 00000000 c1406970 f53af700 f4cbdea8 c12e5f15 f4cbdefc c1406970 [ 1524.757993] 0000ffff f4cbde90 0000002d f5dc5cd0 e4e80438 c1181d59 f4cbded8 f4d89900 [ 1524.758027] f5631b40 e5131074 c1903f37 f4cbdf28 c14068e6 f52648a0 c1927748 c1903f37 [ 1524.758062] Call Trace: [ 1524.758084] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758106] [<c12e5f15>] idr_for_each+0xa5/0x100 [ 1524.758126] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758148] [<c1181d59>] ? seq_vprintf+0x29/0x50 [ 1524.758168] [<c14068e6>] i915_gem_object_info+0x486/0x510 [ 1524.758189] [<c11823a6>] seq_read+0xd6/0x380 [ 1524.758208] [<c116d11d>] ? final_putname+0x1d/0x40 [ 1524.758227] [<c11822d0>] ? seq_hlist_next_percpu+0x90/0x90 [ 1524.758246] [<c1163e52>] vfs_read+0x82/0x150 [ 1524.758265] [<c11645d6>] SyS_read+0x46/0x90 [ 1524.758285] [<c16b8d8c>] sysenter_do_call+0x12/0x22 [ 1524.758298] Code: f5 8f 2a 00 83 c4 6c 31 c0 5b 5e 5f 5d c3 8d 74 26 00 8d bc 27 00 00 00 00 55 89 e5 57 56 53 83 ec 04 3e 8d 74 26 00 83 41 04 01 <8b> 42 48 01 41 08 8b 42 4c 89 d7 85 c0 75 07 8b 42 60 85 c0 74 [ 1524.758461] EIP: [<c1406982>] per_file_stats+0x12/0x100 SS:ESP 0068:f4cbde5c [ 1524.758485] CR2: 00000000f53af748 Reported-by: Sam Jansen <sam.jansen@starleaf.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Sam Jansen <sam.jansen@starleaf.com> Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Jani Nikula <jani.nikula@intel.com>
2014-06-17 16:56:24 +08:00
spin_lock(&file->table_lock);
idr_for_each(&file->object_idr, per_file_stats, &stats);
drm/i915: Hold the table lock whilst walking the file's idr and counting the objects in debugfs Fixes an issue whereby we may race with the table updates (before the core takes the struct_mutex) and so risk dereferencing a stale pointer in the iterator for /debugfs/.../i915_gem_objects. For example, [ 1524.757545] BUG: unable to handle kernel paging request at f53af748 [ 1524.757572] IP: [<c1406982>] per_file_stats+0x12/0x100 [ 1524.757599] *pdpt = 0000000001b13001 *pde = 00000000379fb067 *pte = 80000000353af060 [ 1524.757621] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 1524.757637] Modules linked in: ctr ccm arc4 ath9k ath9k_common ath9k_hw ath snd_hda_codec_conexant mac80211 snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec bnep snd_hwdep rfcomm snd_pcm gpio_ich dell_wmi sparse_keymap snd_seq_midi hid_multitouch uvcvideo snd_seq_midi_event dell_laptop snd_rawmidi dcdbas snd_seq videobuf2_vmalloc videobuf2_memops videobuf2_core usbhid videodev snd_seq_device coretemp snd_timer hid joydev kvm_intel cfg80211 ath3k kvm btusb bluetooth serio_raw snd microcode soundcore lpc_ich wmi mac_hid parport_pc ppdev lp parport psmouse ahci libahci [ 1524.757825] CPU: 3 PID: 1911 Comm: intel-gpu-overl Tainted: G W OE 3.15.0-rc3+ #96 [ 1524.757840] Hardware name: Dell Inc. Inspiron 1090/Inspiron 1090, BIOS A06 08/23/2011 [ 1524.757855] task: f52f36c0 ti: f4cbc000 task.ti: f4cbc000 [ 1524.757869] EIP: 0060:[<c1406982>] EFLAGS: 00210202 CPU: 3 [ 1524.757884] EIP is at per_file_stats+0x12/0x100 [ 1524.757896] EAX: 0000002d EBX: 00000000 ECX: f4cbdefc EDX: f53af700 [ 1524.757909] ESI: c1406970 EDI: f53af700 EBP: f4cbde6c ESP: f4cbde5c [ 1524.757922] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 1524.757934] CR0: 80050033 CR2: f53af748 CR3: 356af000 CR4: 000007f0 [ 1524.757945] Stack: [ 1524.757957] f4cbdefc 00000000 c1406970 f53af700 f4cbdea8 c12e5f15 f4cbdefc c1406970 [ 1524.757993] 0000ffff f4cbde90 0000002d f5dc5cd0 e4e80438 c1181d59 f4cbded8 f4d89900 [ 1524.758027] f5631b40 e5131074 c1903f37 f4cbdf28 c14068e6 f52648a0 c1927748 c1903f37 [ 1524.758062] Call Trace: [ 1524.758084] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758106] [<c12e5f15>] idr_for_each+0xa5/0x100 [ 1524.758126] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758148] [<c1181d59>] ? seq_vprintf+0x29/0x50 [ 1524.758168] [<c14068e6>] i915_gem_object_info+0x486/0x510 [ 1524.758189] [<c11823a6>] seq_read+0xd6/0x380 [ 1524.758208] [<c116d11d>] ? final_putname+0x1d/0x40 [ 1524.758227] [<c11822d0>] ? seq_hlist_next_percpu+0x90/0x90 [ 1524.758246] [<c1163e52>] vfs_read+0x82/0x150 [ 1524.758265] [<c11645d6>] SyS_read+0x46/0x90 [ 1524.758285] [<c16b8d8c>] sysenter_do_call+0x12/0x22 [ 1524.758298] Code: f5 8f 2a 00 83 c4 6c 31 c0 5b 5e 5f 5d c3 8d 74 26 00 8d bc 27 00 00 00 00 55 89 e5 57 56 53 83 ec 04 3e 8d 74 26 00 83 41 04 01 <8b> 42 48 01 41 08 8b 42 4c 89 d7 85 c0 75 07 8b 42 60 85 c0 74 [ 1524.758461] EIP: [<c1406982>] per_file_stats+0x12/0x100 SS:ESP 0068:f4cbde5c [ 1524.758485] CR2: 00000000f53af748 Reported-by: Sam Jansen <sam.jansen@starleaf.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Sam Jansen <sam.jansen@starleaf.com> Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Jani Nikula <jani.nikula@intel.com>
2014-06-17 16:56:24 +08:00
spin_unlock(&file->table_lock);
/*
* Although we have a valid reference on file->pid, that does
* not guarantee that the task_struct who called get_pid() is
* still alive (e.g. get_pid(current) => fork() => exit()).
* Therefore, we need to protect this ->comm access using RCU.
*/
rcu_read_lock();
task = pid_task(file->pid, PIDTYPE_PID);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
print_file_stats(m, task ? task->comm : "<unknown>", stats);
rcu_read_unlock();
}
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_gem_gtt_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
uintptr_t list = (uintptr_t) node->info_ent->data;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj;
u64 total_obj_size, total_gtt_size;
int count, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
total_obj_size = total_gtt_size = count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (list == PINNED_LIST && !i915_gem_obj_is_pinned(obj))
continue;
seq_puts(m, " ");
describe_obj(m, obj);
seq_putc(m, '\n');
total_obj_size += obj->base.size;
total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
count++;
}
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
count, total_obj_size, total_gtt_size);
return 0;
}
static int i915_gem_pageflip_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
drm/i915: Check for a stalled page flip after each vblank Long ago, back in the racy haydays of 915gm interrupt handling, page flips would occasionally go astray and leave the hardware stuck, and the display not updating. This annoyed people who relied on their systems being able to display continuously updating information 24/7, and so some code to detect when the driver missed the page flip completion signal was added. Until recently, it was presumed that the interrupt handling was now flawless, but once again Simon Farnsworth has found a system whose display will stall. Reinstate the pageflip stall detection, which works by checking to see if the hardware has been updated to the new framebuffer address following each vblank. If the hardware is scanning out from the new framebuffer, but we still think the flip is pending, then we kick our driver into submision. This is a continuation of the effort started with commit 4e5359cd053bfb7d8dabe4a63624a5726848ffbc Author: Simon Farnsworth <simon.farnsworth@onelan.co.uk> Date: Wed Sep 1 17:47:52 2010 +0100 drm/i915: Avoid pageflipping freeze when we miss the flip prepare interrupt This now includes a belt-and-braces approach to make sure the driver (or the hardware) doesn't miss an interrupt and cause us to stop updating the display should the unthinkable happen and the pageflip fail - i.e. that the user is able to continue submitting flips. v2: Cleanup, refactor, and rename v3: Only start counting vblanks after the flip command has been seen by the hardware. v4: Record the seqno after we touch the ring, or else there may be no seqno allocated yet. v5: Rebase on mmio-flip. v6: Rebase, rebase. Reported-by: Simon Farnsworth <simon@farnz.org.uk> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75502 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> [v4] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-09-05 14:13:24 +08:00
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *crtc;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
for_each_intel_crtc(dev, crtc) {
const char pipe = pipe_name(crtc->pipe);
const char plane = plane_name(crtc->plane);
struct intel_unpin_work *work;
spin_lock_irq(&dev->event_lock);
work = crtc->unpin_work;
if (work == NULL) {
seq_printf(m, "No flip due on pipe %c (plane %c)\n",
pipe, plane);
} else {
drm/i915: Check for a stalled page flip after each vblank Long ago, back in the racy haydays of 915gm interrupt handling, page flips would occasionally go astray and leave the hardware stuck, and the display not updating. This annoyed people who relied on their systems being able to display continuously updating information 24/7, and so some code to detect when the driver missed the page flip completion signal was added. Until recently, it was presumed that the interrupt handling was now flawless, but once again Simon Farnsworth has found a system whose display will stall. Reinstate the pageflip stall detection, which works by checking to see if the hardware has been updated to the new framebuffer address following each vblank. If the hardware is scanning out from the new framebuffer, but we still think the flip is pending, then we kick our driver into submision. This is a continuation of the effort started with commit 4e5359cd053bfb7d8dabe4a63624a5726848ffbc Author: Simon Farnsworth <simon.farnsworth@onelan.co.uk> Date: Wed Sep 1 17:47:52 2010 +0100 drm/i915: Avoid pageflipping freeze when we miss the flip prepare interrupt This now includes a belt-and-braces approach to make sure the driver (or the hardware) doesn't miss an interrupt and cause us to stop updating the display should the unthinkable happen and the pageflip fail - i.e. that the user is able to continue submitting flips. v2: Cleanup, refactor, and rename v3: Only start counting vblanks after the flip command has been seen by the hardware. v4: Record the seqno after we touch the ring, or else there may be no seqno allocated yet. v5: Rebase on mmio-flip. v6: Rebase, rebase. Reported-by: Simon Farnsworth <simon@farnz.org.uk> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75502 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> [v4] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-09-05 14:13:24 +08:00
u32 addr;
if (atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) {
seq_printf(m, "Flip queued on pipe %c (plane %c)\n",
pipe, plane);
} else {
seq_printf(m, "Flip pending (waiting for vsync) on pipe %c (plane %c)\n",
pipe, plane);
}
if (work->flip_queued_req) {
struct intel_engine_cs *engine = i915_gem_request_get_engine(work->flip_queued_req);
seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n",
engine->name,
i915_gem_request_get_seqno(work->flip_queued_req),
drm/i915: Check for a stalled page flip after each vblank Long ago, back in the racy haydays of 915gm interrupt handling, page flips would occasionally go astray and leave the hardware stuck, and the display not updating. This annoyed people who relied on their systems being able to display continuously updating information 24/7, and so some code to detect when the driver missed the page flip completion signal was added. Until recently, it was presumed that the interrupt handling was now flawless, but once again Simon Farnsworth has found a system whose display will stall. Reinstate the pageflip stall detection, which works by checking to see if the hardware has been updated to the new framebuffer address following each vblank. If the hardware is scanning out from the new framebuffer, but we still think the flip is pending, then we kick our driver into submision. This is a continuation of the effort started with commit 4e5359cd053bfb7d8dabe4a63624a5726848ffbc Author: Simon Farnsworth <simon.farnsworth@onelan.co.uk> Date: Wed Sep 1 17:47:52 2010 +0100 drm/i915: Avoid pageflipping freeze when we miss the flip prepare interrupt This now includes a belt-and-braces approach to make sure the driver (or the hardware) doesn't miss an interrupt and cause us to stop updating the display should the unthinkable happen and the pageflip fail - i.e. that the user is able to continue submitting flips. v2: Cleanup, refactor, and rename v3: Only start counting vblanks after the flip command has been seen by the hardware. v4: Record the seqno after we touch the ring, or else there may be no seqno allocated yet. v5: Rebase on mmio-flip. v6: Rebase, rebase. Reported-by: Simon Farnsworth <simon@farnz.org.uk> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75502 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> [v4] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-09-05 14:13:24 +08:00
dev_priv->next_seqno,
engine->get_seqno(engine),
i915_gem_request_completed(work->flip_queued_req, true));
drm/i915: Check for a stalled page flip after each vblank Long ago, back in the racy haydays of 915gm interrupt handling, page flips would occasionally go astray and leave the hardware stuck, and the display not updating. This annoyed people who relied on their systems being able to display continuously updating information 24/7, and so some code to detect when the driver missed the page flip completion signal was added. Until recently, it was presumed that the interrupt handling was now flawless, but once again Simon Farnsworth has found a system whose display will stall. Reinstate the pageflip stall detection, which works by checking to see if the hardware has been updated to the new framebuffer address following each vblank. If the hardware is scanning out from the new framebuffer, but we still think the flip is pending, then we kick our driver into submision. This is a continuation of the effort started with commit 4e5359cd053bfb7d8dabe4a63624a5726848ffbc Author: Simon Farnsworth <simon.farnsworth@onelan.co.uk> Date: Wed Sep 1 17:47:52 2010 +0100 drm/i915: Avoid pageflipping freeze when we miss the flip prepare interrupt This now includes a belt-and-braces approach to make sure the driver (or the hardware) doesn't miss an interrupt and cause us to stop updating the display should the unthinkable happen and the pageflip fail - i.e. that the user is able to continue submitting flips. v2: Cleanup, refactor, and rename v3: Only start counting vblanks after the flip command has been seen by the hardware. v4: Record the seqno after we touch the ring, or else there may be no seqno allocated yet. v5: Rebase on mmio-flip. v6: Rebase, rebase. Reported-by: Simon Farnsworth <simon@farnz.org.uk> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75502 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> [v4] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-09-05 14:13:24 +08:00
} else
seq_printf(m, "Flip not associated with any ring\n");
seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
work->flip_queued_vblank,
work->flip_ready_vblank,
drm_crtc_vblank_count(&crtc->base));
if (work->enable_stall_check)
seq_puts(m, "Stall check enabled, ");
else
seq_puts(m, "Stall check waiting for page flip ioctl, ");
seq_printf(m, "%d prepares\n", atomic_read(&work->pending));
drm/i915: Check for a stalled page flip after each vblank Long ago, back in the racy haydays of 915gm interrupt handling, page flips would occasionally go astray and leave the hardware stuck, and the display not updating. This annoyed people who relied on their systems being able to display continuously updating information 24/7, and so some code to detect when the driver missed the page flip completion signal was added. Until recently, it was presumed that the interrupt handling was now flawless, but once again Simon Farnsworth has found a system whose display will stall. Reinstate the pageflip stall detection, which works by checking to see if the hardware has been updated to the new framebuffer address following each vblank. If the hardware is scanning out from the new framebuffer, but we still think the flip is pending, then we kick our driver into submision. This is a continuation of the effort started with commit 4e5359cd053bfb7d8dabe4a63624a5726848ffbc Author: Simon Farnsworth <simon.farnsworth@onelan.co.uk> Date: Wed Sep 1 17:47:52 2010 +0100 drm/i915: Avoid pageflipping freeze when we miss the flip prepare interrupt This now includes a belt-and-braces approach to make sure the driver (or the hardware) doesn't miss an interrupt and cause us to stop updating the display should the unthinkable happen and the pageflip fail - i.e. that the user is able to continue submitting flips. v2: Cleanup, refactor, and rename v3: Only start counting vblanks after the flip command has been seen by the hardware. v4: Record the seqno after we touch the ring, or else there may be no seqno allocated yet. v5: Rebase on mmio-flip. v6: Rebase, rebase. Reported-by: Simon Farnsworth <simon@farnz.org.uk> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75502 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> [v4] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-09-05 14:13:24 +08:00
if (INTEL_INFO(dev)->gen >= 4)
addr = I915_HI_DISPBASE(I915_READ(DSPSURF(crtc->plane)));
else
addr = I915_READ(DSPADDR(crtc->plane));
seq_printf(m, "Current scanout address 0x%08x\n", addr);
if (work->pending_flip_obj) {
drm/i915: Check for a stalled page flip after each vblank Long ago, back in the racy haydays of 915gm interrupt handling, page flips would occasionally go astray and leave the hardware stuck, and the display not updating. This annoyed people who relied on their systems being able to display continuously updating information 24/7, and so some code to detect when the driver missed the page flip completion signal was added. Until recently, it was presumed that the interrupt handling was now flawless, but once again Simon Farnsworth has found a system whose display will stall. Reinstate the pageflip stall detection, which works by checking to see if the hardware has been updated to the new framebuffer address following each vblank. If the hardware is scanning out from the new framebuffer, but we still think the flip is pending, then we kick our driver into submision. This is a continuation of the effort started with commit 4e5359cd053bfb7d8dabe4a63624a5726848ffbc Author: Simon Farnsworth <simon.farnsworth@onelan.co.uk> Date: Wed Sep 1 17:47:52 2010 +0100 drm/i915: Avoid pageflipping freeze when we miss the flip prepare interrupt This now includes a belt-and-braces approach to make sure the driver (or the hardware) doesn't miss an interrupt and cause us to stop updating the display should the unthinkable happen and the pageflip fail - i.e. that the user is able to continue submitting flips. v2: Cleanup, refactor, and rename v3: Only start counting vblanks after the flip command has been seen by the hardware. v4: Record the seqno after we touch the ring, or else there may be no seqno allocated yet. v5: Rebase on mmio-flip. v6: Rebase, rebase. Reported-by: Simon Farnsworth <simon@farnz.org.uk> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75502 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> [v4] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-09-05 14:13:24 +08:00
seq_printf(m, "New framebuffer address 0x%08lx\n", (long)work->gtt_offset);
seq_printf(m, "MMIO update completed? %d\n", addr == work->gtt_offset);
}
}
spin_unlock_irq(&dev->event_lock);
}
mutex_unlock(&dev->struct_mutex);
return 0;
}
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj;
struct intel_engine_cs *engine;
int total = 0;
int ret, j;
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
for_each_engine(engine, dev_priv) {
for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
int count;
count = 0;
list_for_each_entry(obj,
&engine->batch_pool.cache_list[j],
batch_pool_link)
count++;
seq_printf(m, "%s cache[%d]: %d objects\n",
engine->name, j, count);
list_for_each_entry(obj,
&engine->batch_pool.cache_list[j],
batch_pool_link) {
seq_puts(m, " ");
describe_obj(m, obj);
seq_putc(m, '\n');
}
total += count;
}
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
}
seq_printf(m, "total: %d\n", total);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_gem_request_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *engine;
struct drm_i915_gem_request *req;
int ret, any;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
any = 0;
for_each_engine(engine, dev_priv) {
int count;
count = 0;
list_for_each_entry(req, &engine->request_list, list)
count++;
if (count == 0)
continue;
seq_printf(m, "%s requests: %d\n", engine->name, count);
list_for_each_entry(req, &engine->request_list, list) {
struct task_struct *task;
rcu_read_lock();
task = NULL;
if (req->pid)
task = pid_task(req->pid, PIDTYPE_PID);
seq_printf(m, " %x @ %d: %s [%d]\n",
req->seqno,
(int) (jiffies - req->emitted_jiffies),
task ? task->comm : "<unknown>",
task ? task->pid : -1);
rcu_read_unlock();
}
any++;
}
mutex_unlock(&dev->struct_mutex);
if (any == 0)
seq_puts(m, "No requests\n");
return 0;
}
static void i915_ring_seqno_info(struct seq_file *m,
struct intel_engine_cs *engine)
{
seq_printf(m, "Current sequence (%s): %x\n",
engine->name, engine->get_seqno(engine));
seq_printf(m, "Current user interrupts (%s): %x\n",
engine->name, READ_ONCE(engine->user_interrupts));
}
static int i915_gem_seqno_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *engine;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
for_each_engine(engine, dev_priv)
i915_ring_seqno_info(m, engine);
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_interrupt_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *engine;
int ret, i, pipe;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
if (IS_CHERRYVIEW(dev)) {
seq_printf(m, "Master Interrupt Control:\t%08x\n",
I915_READ(GEN8_MASTER_IRQ));
seq_printf(m, "Display IER:\t%08x\n",
I915_READ(VLV_IER));
seq_printf(m, "Display IIR:\t%08x\n",
I915_READ(VLV_IIR));
seq_printf(m, "Display IIR_RW:\t%08x\n",
I915_READ(VLV_IIR_RW));
seq_printf(m, "Display IMR:\t%08x\n",
I915_READ(VLV_IMR));
for_each_pipe(dev_priv, pipe)
seq_printf(m, "Pipe %c stat:\t%08x\n",
pipe_name(pipe),
I915_READ(PIPESTAT(pipe)));
seq_printf(m, "Port hotplug:\t%08x\n",
I915_READ(PORT_HOTPLUG_EN));
seq_printf(m, "DPFLIPSTAT:\t%08x\n",
I915_READ(VLV_DPFLIPSTAT));
seq_printf(m, "DPINVGTT:\t%08x\n",
I915_READ(DPINVGTT));
for (i = 0; i < 4; i++) {
seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IMR(i)));
seq_printf(m, "GT Interrupt IIR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IIR(i)));
seq_printf(m, "GT Interrupt IER %d:\t%08x\n",
i, I915_READ(GEN8_GT_IER(i)));
}
seq_printf(m, "PCU interrupt mask:\t%08x\n",
I915_READ(GEN8_PCU_IMR));
seq_printf(m, "PCU interrupt identity:\t%08x\n",
I915_READ(GEN8_PCU_IIR));
seq_printf(m, "PCU interrupt enable:\t%08x\n",
I915_READ(GEN8_PCU_IER));
} else if (INTEL_INFO(dev)->gen >= 8) {
seq_printf(m, "Master Interrupt Control:\t%08x\n",
I915_READ(GEN8_MASTER_IRQ));
for (i = 0; i < 4; i++) {
seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IMR(i)));
seq_printf(m, "GT Interrupt IIR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IIR(i)));
seq_printf(m, "GT Interrupt IER %d:\t%08x\n",
i, I915_READ(GEN8_GT_IER(i)));
}
for_each_pipe(dev_priv, pipe) {
enum intel_display_power_domain power_domain;
power_domain = POWER_DOMAIN_PIPE(pipe);
if (!intel_display_power_get_if_enabled(dev_priv,
power_domain)) {
seq_printf(m, "Pipe %c power disabled\n",
pipe_name(pipe));
continue;
}
seq_printf(m, "Pipe %c IMR:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IMR(pipe)));
seq_printf(m, "Pipe %c IIR:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IIR(pipe)));
seq_printf(m, "Pipe %c IER:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IER(pipe)));
intel_display_power_put(dev_priv, power_domain);
}
seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
I915_READ(GEN8_DE_PORT_IMR));
seq_printf(m, "Display Engine port interrupt identity:\t%08x\n",
I915_READ(GEN8_DE_PORT_IIR));
seq_printf(m, "Display Engine port interrupt enable:\t%08x\n",
I915_READ(GEN8_DE_PORT_IER));
seq_printf(m, "Display Engine misc interrupt mask:\t%08x\n",
I915_READ(GEN8_DE_MISC_IMR));
seq_printf(m, "Display Engine misc interrupt identity:\t%08x\n",
I915_READ(GEN8_DE_MISC_IIR));
seq_printf(m, "Display Engine misc interrupt enable:\t%08x\n",
I915_READ(GEN8_DE_MISC_IER));
seq_printf(m, "PCU interrupt mask:\t%08x\n",
I915_READ(GEN8_PCU_IMR));
seq_printf(m, "PCU interrupt identity:\t%08x\n",
I915_READ(GEN8_PCU_IIR));
seq_printf(m, "PCU interrupt enable:\t%08x\n",
I915_READ(GEN8_PCU_IER));
} else if (IS_VALLEYVIEW(dev)) {
seq_printf(m, "Display IER:\t%08x\n",
I915_READ(VLV_IER));
seq_printf(m, "Display IIR:\t%08x\n",
I915_READ(VLV_IIR));
seq_printf(m, "Display IIR_RW:\t%08x\n",
I915_READ(VLV_IIR_RW));
seq_printf(m, "Display IMR:\t%08x\n",
I915_READ(VLV_IMR));
for_each_pipe(dev_priv, pipe)
seq_printf(m, "Pipe %c stat:\t%08x\n",
pipe_name(pipe),
I915_READ(PIPESTAT(pipe)));
seq_printf(m, "Master IER:\t%08x\n",
I915_READ(VLV_MASTER_IER));
seq_printf(m, "Render IER:\t%08x\n",
I915_READ(GTIER));
seq_printf(m, "Render IIR:\t%08x\n",
I915_READ(GTIIR));
seq_printf(m, "Render IMR:\t%08x\n",
I915_READ(GTIMR));
seq_printf(m, "PM IER:\t\t%08x\n",
I915_READ(GEN6_PMIER));
seq_printf(m, "PM IIR:\t\t%08x\n",
I915_READ(GEN6_PMIIR));
seq_printf(m, "PM IMR:\t\t%08x\n",
I915_READ(GEN6_PMIMR));
seq_printf(m, "Port hotplug:\t%08x\n",
I915_READ(PORT_HOTPLUG_EN));
seq_printf(m, "DPFLIPSTAT:\t%08x\n",
I915_READ(VLV_DPFLIPSTAT));
seq_printf(m, "DPINVGTT:\t%08x\n",
I915_READ(DPINVGTT));
} else if (!HAS_PCH_SPLIT(dev)) {
seq_printf(m, "Interrupt enable: %08x\n",
I915_READ(IER));
seq_printf(m, "Interrupt identity: %08x\n",
I915_READ(IIR));
seq_printf(m, "Interrupt mask: %08x\n",
I915_READ(IMR));
for_each_pipe(dev_priv, pipe)
seq_printf(m, "Pipe %c stat: %08x\n",
pipe_name(pipe),
I915_READ(PIPESTAT(pipe)));
} else {
seq_printf(m, "North Display Interrupt enable: %08x\n",
I915_READ(DEIER));
seq_printf(m, "North Display Interrupt identity: %08x\n",
I915_READ(DEIIR));
seq_printf(m, "North Display Interrupt mask: %08x\n",
I915_READ(DEIMR));
seq_printf(m, "South Display Interrupt enable: %08x\n",
I915_READ(SDEIER));
seq_printf(m, "South Display Interrupt identity: %08x\n",
I915_READ(SDEIIR));
seq_printf(m, "South Display Interrupt mask: %08x\n",
I915_READ(SDEIMR));
seq_printf(m, "Graphics Interrupt enable: %08x\n",
I915_READ(GTIER));
seq_printf(m, "Graphics Interrupt identity: %08x\n",
I915_READ(GTIIR));
seq_printf(m, "Graphics Interrupt mask: %08x\n",
I915_READ(GTIMR));
}
for_each_engine(engine, dev_priv) {
if (INTEL_INFO(dev)->gen >= 6) {
seq_printf(m,
"Graphics Interrupt mask (%s): %08x\n",
engine->name, I915_READ_IMR(engine));
}
i915_ring_seqno_info(m, engine);
}
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
int i, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
for (i = 0; i < dev_priv->num_fence_regs; i++) {
struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj;
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 17:40:46 +08:00
seq_printf(m, "Fence %d, pin count = %d, object = ",
i, dev_priv->fence_regs[i].pin_count);
if (obj == NULL)
seq_puts(m, "unused");
else
describe_obj(m, obj);
seq_putc(m, '\n');
}
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_hws_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *engine;
const u32 *hws;
int i;
engine = &dev_priv->engine[(uintptr_t)node->info_ent->data];
hws = engine->status_page.page_addr;
if (hws == NULL)
return 0;
for (i = 0; i < 4096 / sizeof(u32) / 4; i += 4) {
seq_printf(m, "0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
i * 4,
hws[i], hws[i + 1], hws[i + 2], hws[i + 3]);
}
return 0;
}
static ssize_t
i915_error_state_write(struct file *filp,
const char __user *ubuf,
size_t cnt,
loff_t *ppos)
{
struct i915_error_state_file_priv *error_priv = filp->private_data;
struct drm_device *dev = error_priv->dev;
int ret;
DRM_DEBUG_DRIVER("Resetting error state\n");
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
i915_destroy_error_state(dev);
mutex_unlock(&dev->struct_mutex);
return cnt;
}
static int i915_error_state_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
struct i915_error_state_file_priv *error_priv;
error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL);
if (!error_priv)
return -ENOMEM;
error_priv->dev = dev;
i915_error_state_get(dev, error_priv);
file->private_data = error_priv;
return 0;
}
static int i915_error_state_release(struct inode *inode, struct file *file)
{
struct i915_error_state_file_priv *error_priv = file->private_data;
i915_error_state_put(error_priv);
kfree(error_priv);
return 0;
}
static ssize_t i915_error_state_read(struct file *file, char __user *userbuf,
size_t count, loff_t *pos)
{
struct i915_error_state_file_priv *error_priv = file->private_data;
struct drm_i915_error_state_buf error_str;
loff_t tmp_pos = 0;
ssize_t ret_count = 0;
int ret;
ret = i915_error_state_buf_init(&error_str, to_i915(error_priv->dev), count, *pos);
if (ret)
return ret;
ret = i915_error_state_to_str(&error_str, error_priv);
if (ret)
goto out;
ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos,
error_str.buf,
error_str.bytes);
if (ret_count < 0)
ret = ret_count;
else
*pos = error_str.start + ret_count;
out:
i915_error_state_buf_release(&error_str);
return ret ?: ret_count;
}
static const struct file_operations i915_error_state_fops = {
.owner = THIS_MODULE,
.open = i915_error_state_open,
.read = i915_error_state_read,
.write = i915_error_state_write,
.llseek = default_llseek,
.release = i915_error_state_release,
};
static int
i915_next_seqno_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
*val = dev_priv->next_seqno;
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int
i915_next_seqno_set(void *data, u64 val)
{
struct drm_device *dev = data;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
ret = i915_gem_set_seqno(dev, val);
mutex_unlock(&dev->struct_mutex);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
i915_next_seqno_get, i915_next_seqno_set,
"0x%llx\n");
static int i915_frequency_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret = 0;
intel_runtime_pm_get(dev_priv);
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
if (IS_GEN5(dev)) {
u16 rgvswctl = I915_READ16(MEMSWCTL);
u16 rgvstat = I915_READ16(MEMSTAT_ILK);
seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f);
seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >>
MEMSTAT_VID_SHIFT);
seq_printf(m, "Current P-state: %d\n",
(rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
} else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
u32 freq_sts;
mutex_lock(&dev_priv->rps.hw_lock);
freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq);
seq_printf(m, "actual GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff));
seq_printf(m, "current GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
seq_printf(m, "max GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
seq_printf(m, "min GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
seq_printf(m, "idle GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
seq_printf(m,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq));
mutex_unlock(&dev_priv->rps.hw_lock);
} else if (INTEL_INFO(dev)->gen >= 6) {
u32 rp_state_limits;
u32 gt_perf_status;
u32 rp_state_cap;
u32 rpmodectl, rpinclimit, rpdeclimit;
u32 rpstat, cagf, reqf;
u32 rpupei, rpcurup, rpprevup;
u32 rpdownei, rpcurdown, rpprevdown;
u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
int max_freq;
rp_state_limits = I915_READ(GEN6_RP_STATE_LIMITS);
if (IS_BROXTON(dev)) {
rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
gt_perf_status = I915_READ(BXT_GT_PERF_STATUS);
} else {
rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
}
/* RPSTAT1 is in the GT power well */
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
goto out;
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
reqf = I915_READ(GEN6_RPNSWREQ);
if (IS_GEN9(dev))
reqf >>= 23;
else {
reqf &= ~GEN6_TURBO_DISABLE;
if (IS_HASWELL(dev) || IS_BROADWELL(dev))
reqf >>= 24;
else
reqf >>= 25;
}
2015-01-24 03:04:26 +08:00
reqf = intel_gpu_freq(dev_priv, reqf);
rpmodectl = I915_READ(GEN6_RP_CONTROL);
rpinclimit = I915_READ(GEN6_RP_UP_THRESHOLD);
rpdeclimit = I915_READ(GEN6_RP_DOWN_THRESHOLD);
rpstat = I915_READ(GEN6_RPSTAT1);
rpupei = I915_READ(GEN6_RP_CUR_UP_EI);
rpcurup = I915_READ(GEN6_RP_CUR_UP);
rpprevup = I915_READ(GEN6_RP_PREV_UP);
rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI);
rpcurdown = I915_READ(GEN6_RP_CUR_DOWN);
rpprevdown = I915_READ(GEN6_RP_PREV_DOWN);
if (IS_GEN9(dev))
cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
else
cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
2015-01-24 03:04:26 +08:00
cagf = intel_gpu_freq(dev_priv, cagf);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
mutex_unlock(&dev->struct_mutex);
if (IS_GEN6(dev) || IS_GEN7(dev)) {
pm_ier = I915_READ(GEN6_PMIER);
pm_imr = I915_READ(GEN6_PMIMR);
pm_isr = I915_READ(GEN6_PMISR);
pm_iir = I915_READ(GEN6_PMIIR);
pm_mask = I915_READ(GEN6_PMINTRMSK);
} else {
pm_ier = I915_READ(GEN8_GT_IER(2));
pm_imr = I915_READ(GEN8_GT_IMR(2));
pm_isr = I915_READ(GEN8_GT_ISR(2));
pm_iir = I915_READ(GEN8_GT_IIR(2));
pm_mask = I915_READ(GEN6_PMINTRMSK);
}
seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n",
pm_ier, pm_imr, pm_isr, pm_iir, pm_mask);
seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
seq_printf(m, "Render p-state ratio: %d\n",
(gt_perf_status & (IS_GEN9(dev) ? 0x1ff00 : 0xff00)) >> 8);
seq_printf(m, "Render p-state VID: %d\n",
gt_perf_status & 0xff);
seq_printf(m, "Render p-state limit: %d\n",
rp_state_limits & 0xff);
seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl);
seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit);
seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
seq_printf(m, "CAGF: %dMHz\n", cagf);
seq_printf(m, "RP CUR UP EI: %dus\n", rpupei &
GEN6_CURICONT_MASK);
seq_printf(m, "RP CUR UP: %dus\n", rpcurup &
GEN6_CURBSYTAVG_MASK);
seq_printf(m, "RP PREV UP: %dus\n", rpprevup &
GEN6_CURBSYTAVG_MASK);
seq_printf(m, "Up threshold: %d%%\n",
dev_priv->rps.up_threshold);
seq_printf(m, "RP CUR DOWN EI: %dus\n", rpdownei &
GEN6_CURIAVG_MASK);
seq_printf(m, "RP CUR DOWN: %dus\n", rpcurdown &
GEN6_CURBSYTAVG_MASK);
seq_printf(m, "RP PREV DOWN: %dus\n", rpprevdown &
GEN6_CURBSYTAVG_MASK);
seq_printf(m, "Down threshold: %d%%\n",
dev_priv->rps.down_threshold);
max_freq = (IS_BROXTON(dev) ? rp_state_cap >> 0 :
rp_state_cap >> 16) & 0xff;
max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
GEN9_FREQ_SCALER : 1);
seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
2015-01-24 03:04:26 +08:00
intel_gpu_freq(dev_priv, max_freq));
max_freq = (rp_state_cap & 0xff00) >> 8;
max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
GEN9_FREQ_SCALER : 1);
seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
2015-01-24 03:04:26 +08:00
intel_gpu_freq(dev_priv, max_freq));
max_freq = (IS_BROXTON(dev) ? rp_state_cap >> 16 :
rp_state_cap >> 0) & 0xff;
max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
GEN9_FREQ_SCALER : 1);
seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
2015-01-24 03:04:26 +08:00
intel_gpu_freq(dev_priv, max_freq));
seq_printf(m, "Max overclocked frequency: %dMHz\n",
2015-01-24 03:04:26 +08:00
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
seq_printf(m, "Current freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
seq_printf(m, "Actual freq: %d MHz\n", cagf);
seq_printf(m, "Idle freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
seq_printf(m, "Min freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
seq_printf(m, "Max freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
seq_printf(m,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq));
} else {
seq_puts(m, "no P-state info available\n");
}
seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk_freq);
seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq);
seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq);
out:
intel_runtime_pm_put(dev_priv);
return ret;
}
static int i915_hangcheck_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *engine;
u64 acthd[I915_NUM_ENGINES];
u32 seqno[I915_NUM_ENGINES];
u32 instdone[I915_NUM_INSTDONE_REG];
enum intel_engine_id id;
int j;
if (!i915.enable_hangcheck) {
seq_printf(m, "Hangcheck disabled\n");
return 0;
}
intel_runtime_pm_get(dev_priv);
for_each_engine_id(engine, dev_priv, id) {
acthd[id] = intel_ring_get_active_head(engine);
seqno[id] = engine->get_seqno(engine);
}
i915_get_extra_instdone(dev, instdone);
intel_runtime_pm_put(dev_priv);
if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
seq_printf(m, "Hangcheck active, fires in %dms\n",
jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
jiffies));
} else
seq_printf(m, "Hangcheck inactive\n");
for_each_engine_id(engine, dev_priv, id) {
seq_printf(m, "%s:\n", engine->name);
seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
engine->hangcheck.seqno,
seqno[id],
engine->last_submitted_seqno);
seq_printf(m, "\tuser interrupts = %x [current %x]\n",
engine->hangcheck.user_interrupts,
READ_ONCE(engine->user_interrupts));
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
(long long)acthd[id]);
seq_printf(m, "\tscore = %d\n", engine->hangcheck.score);
seq_printf(m, "\taction = %d\n", engine->hangcheck.action);
if (engine->id == RCS) {
seq_puts(m, "\tinstdone read =");
for (j = 0; j < I915_NUM_INSTDONE_REG; j++)
seq_printf(m, " 0x%08x", instdone[j]);
seq_puts(m, "\n\tinstdone accu =");
for (j = 0; j < I915_NUM_INSTDONE_REG; j++)
seq_printf(m, " 0x%08x",
engine->hangcheck.instdone[j]);
seq_puts(m, "\n");
}
}
return 0;
}
static int ironlake_drpc_info(struct seq_file *m)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 rgvmodectl, rstdbyctl;
u16 crstandvid;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
rgvmodectl = I915_READ(MEMMODECTL);
rstdbyctl = I915_READ(RSTDBYCTL);
crstandvid = I915_READ16(CRSTANDVID);
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
seq_printf(m, "Boost freq: %d\n",
(rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >>
MEMMODE_BOOST_FREQ_SHIFT);
seq_printf(m, "HW control enabled: %s\n",
yesno(rgvmodectl & MEMMODE_HWIDLE_EN));
seq_printf(m, "SW control enabled: %s\n",
yesno(rgvmodectl & MEMMODE_SWMODE_EN));
seq_printf(m, "Gated voltage change: %s\n",
yesno(rgvmodectl & MEMMODE_RCLK_GATE));
seq_printf(m, "Starting frequency: P%d\n",
(rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT);
seq_printf(m, "Max P-state: P%d\n",
(rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT);
seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK));
seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f));
seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f));
seq_printf(m, "Render standby enabled: %s\n",
yesno(!(rstdbyctl & RCX_SW_EXIT)));
seq_puts(m, "Current RS state: ");
switch (rstdbyctl & RSX_STATUS_MASK) {
case RSX_STATUS_ON:
seq_puts(m, "on\n");
break;
case RSX_STATUS_RC1:
seq_puts(m, "RC1\n");
break;
case RSX_STATUS_RC1E:
seq_puts(m, "RC1E\n");
break;
case RSX_STATUS_RS1:
seq_puts(m, "RS1\n");
break;
case RSX_STATUS_RS2:
seq_puts(m, "RS2 (RC6)\n");
break;
case RSX_STATUS_RS3:
seq_puts(m, "RC3 (RC6+)\n");
break;
default:
seq_puts(m, "unknown\n");
break;
}
return 0;
}
static int i915_forcewake_domains(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_uncore_forcewake_domain *fw_domain;
spin_lock_irq(&dev_priv->uncore.lock);
for_each_fw_domain(fw_domain, dev_priv) {
seq_printf(m, "%s.wake_count = %u\n",
intel_uncore_forcewake_domain_to_str(fw_domain->id),
fw_domain->wake_count);
}
spin_unlock_irq(&dev_priv->uncore.lock);
return 0;
}
static int vlv_drpc_info(struct seq_file *m)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 rpmodectl1, rcctl1, pw_status;
intel_runtime_pm_get(dev_priv);
pw_status = I915_READ(VLV_GTLC_PW_STATUS);
rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
rcctl1 = I915_READ(GEN6_RC_CONTROL);
intel_runtime_pm_put(dev_priv);
seq_printf(m, "Video Turbo Mode: %s\n",
yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO));
seq_printf(m, "Turbo enabled: %s\n",
yesno(rpmodectl1 & GEN6_RP_ENABLE));
seq_printf(m, "HW control enabled: %s\n",
yesno(rpmodectl1 & GEN6_RP_ENABLE));
seq_printf(m, "SW control enabled: %s\n",
yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) ==
GEN6_RP_MEDIA_SW_MODE));
seq_printf(m, "RC6 Enabled: %s\n",
yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE |
GEN6_RC_CTL_EI_MODE(1))));
seq_printf(m, "Render Power Well: %s\n",
(pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down");
seq_printf(m, "Media Power Well: %s\n",
(pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
seq_printf(m, "Render RC6 residency since boot: %u\n",
I915_READ(VLV_GT_RENDER_RC6));
seq_printf(m, "Media RC6 residency since boot: %u\n",
I915_READ(VLV_GT_MEDIA_RC6));
return i915_forcewake_domains(m, NULL);
}
static int gen6_drpc_info(struct seq_file *m)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0;
unsigned forcewake_count;
int count = 0, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
spin_lock_irq(&dev_priv->uncore.lock);
forcewake_count = dev_priv->uncore.fw_domain[FW_DOMAIN_ID_RENDER].wake_count;
spin_unlock_irq(&dev_priv->uncore.lock);
if (forcewake_count) {
seq_puts(m, "RC information inaccurate because somebody "
"holds a forcewake reference \n");
} else {
/* NB: we cannot use forcewake, else we read the wrong values */
while (count++ < 50 && (I915_READ_NOTRACE(FORCEWAKE_ACK) & 1))
udelay(10);
seq_printf(m, "RC information accurate: %s\n", yesno(count < 51));
}
gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS);
trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true);
rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
rcctl1 = I915_READ(GEN6_RC_CONTROL);
mutex_unlock(&dev->struct_mutex);
mutex_lock(&dev_priv->rps.hw_lock);
sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
mutex_unlock(&dev_priv->rps.hw_lock);
intel_runtime_pm_put(dev_priv);
seq_printf(m, "Video Turbo Mode: %s\n",
yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO));
seq_printf(m, "HW control enabled: %s\n",
yesno(rpmodectl1 & GEN6_RP_ENABLE));
seq_printf(m, "SW control enabled: %s\n",
yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) ==
GEN6_RP_MEDIA_SW_MODE));
seq_printf(m, "RC1e Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
seq_printf(m, "RC6 Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
seq_printf(m, "Deep RC6 Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE));
seq_printf(m, "Deepest RC6 Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE));
seq_puts(m, "Current RC state: ");
switch (gt_core_status & GEN6_RCn_MASK) {
case GEN6_RC0:
if (gt_core_status & GEN6_CORE_CPD_STATE_MASK)
seq_puts(m, "Core Power Down\n");
else
seq_puts(m, "on\n");
break;
case GEN6_RC3:
seq_puts(m, "RC3\n");
break;
case GEN6_RC6:
seq_puts(m, "RC6\n");
break;
case GEN6_RC7:
seq_puts(m, "RC7\n");
break;
default:
seq_puts(m, "Unknown\n");
break;
}
seq_printf(m, "Core Power Down: %s\n",
yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK));
/* Not exactly sure what this is */
seq_printf(m, "RC6 \"Locked to RPn\" residency since boot: %u\n",
I915_READ(GEN6_GT_GFX_RC6_LOCKED));
seq_printf(m, "RC6 residency since boot: %u\n",
I915_READ(GEN6_GT_GFX_RC6));
seq_printf(m, "RC6+ residency since boot: %u\n",
I915_READ(GEN6_GT_GFX_RC6p));
seq_printf(m, "RC6++ residency since boot: %u\n",
I915_READ(GEN6_GT_GFX_RC6pp));
seq_printf(m, "RC6 voltage: %dmV\n",
GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff)));
seq_printf(m, "RC6+ voltage: %dmV\n",
GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff)));
seq_printf(m, "RC6++ voltage: %dmV\n",
GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff)));
return 0;
}
static int i915_drpc_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
return vlv_drpc_info(m);
else if (INTEL_INFO(dev)->gen >= 6)
return gen6_drpc_info(m);
else
return ironlake_drpc_info(m);
}
static int i915_frontbuffer_tracking(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
seq_printf(m, "FB tracking busy bits: 0x%08x\n",
dev_priv->fb_tracking.busy_bits);
seq_printf(m, "FB tracking flip bits: 0x%08x\n",
dev_priv->fb_tracking.flip_bits);
return 0;
}
static int i915_fbc_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
if (!HAS_FBC(dev)) {
seq_puts(m, "FBC unsupported on this chipset\n");
return 0;
}
intel_runtime_pm_get(dev_priv);
mutex_lock(&dev_priv->fbc.lock);
if (intel_fbc_is_active(dev_priv))
seq_puts(m, "FBC enabled\n");
else
seq_printf(m, "FBC disabled: %s\n",
dev_priv->fbc.no_fbc_reason);
if (INTEL_INFO(dev_priv)->gen >= 7)
seq_printf(m, "Compressing: %s\n",
yesno(I915_READ(FBC_STATUS2) &
FBC_COMPRESSION_MASK));
mutex_unlock(&dev_priv->fbc.lock);
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_fbc_fc_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev))
return -ENODEV;
*val = dev_priv->fbc.false_color;
return 0;
}
static int i915_fbc_fc_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 reg;
if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev))
return -ENODEV;
mutex_lock(&dev_priv->fbc.lock);
reg = I915_READ(ILK_DPFC_CONTROL);
dev_priv->fbc.false_color = val;
I915_WRITE(ILK_DPFC_CONTROL, val ?
(reg | FBC_CTL_FALSE_COLOR) :
(reg & ~FBC_CTL_FALSE_COLOR));
mutex_unlock(&dev_priv->fbc.lock);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_fc_fops,
i915_fbc_fc_get, i915_fbc_fc_set,
"%llu\n");
static int i915_ips_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
if (!HAS_IPS(dev)) {
seq_puts(m, "not supported\n");
return 0;
}
intel_runtime_pm_get(dev_priv);
seq_printf(m, "Enabled by kernel parameter: %s\n",
yesno(i915.enable_ips));
if (INTEL_INFO(dev)->gen >= 8) {
seq_puts(m, "Currently: unknown\n");
} else {
if (I915_READ(IPS_CTL) & IPS_ENABLE)
seq_puts(m, "Currently: enabled\n");
else
seq_puts(m, "Currently: disabled\n");
}
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_sr_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
bool sr_enabled = false;
intel_runtime_pm_get(dev_priv);
if (HAS_PCH_SPLIT(dev))
sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN;
else if (IS_CRESTLINE(dev) || IS_G4X(dev) ||
IS_I945G(dev) || IS_I945GM(dev))
sr_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
else if (IS_I915GM(dev))
sr_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
else if (IS_PINEVIEW(dev))
sr_enabled = I915_READ(DSPFW3) & PINEVIEW_SELF_REFRESH_EN;
else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
intel_runtime_pm_put(dev_priv);
seq_printf(m, "self-refresh: %s\n",
sr_enabled ? "enabled" : "disabled");
return 0;
}
static int i915_emon_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
unsigned long temp, chipset, gfx;
int ret;
if (!IS_GEN5(dev))
return -ENODEV;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
temp = i915_mch_val(dev_priv);
chipset = i915_chipset_val(dev_priv);
gfx = i915_gfx_val(dev_priv);
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "GMCH temp: %ld\n", temp);
seq_printf(m, "Chipset power: %ld\n", chipset);
seq_printf(m, "GFX power: %ld\n", gfx);
seq_printf(m, "Total power: %ld\n", chipset + gfx);
return 0;
}
static int i915_ring_freq_table(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret = 0;
int gpu_freq, ia_freq;
unsigned int max_gpu_freq, min_gpu_freq;
if (!HAS_CORE_RING_FREQ(dev)) {
seq_puts(m, "unsupported on this chipset\n");
return 0;
}
intel_runtime_pm_get(dev_priv);
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
if (ret)
goto out;
if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
/* Convert GT frequency to 50 HZ units */
min_gpu_freq =
dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER;
max_gpu_freq =
dev_priv->rps.max_freq_softlimit / GEN9_FREQ_SCALER;
} else {
min_gpu_freq = dev_priv->rps.min_freq_softlimit;
max_gpu_freq = dev_priv->rps.max_freq_softlimit;
}
seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
ia_freq = gpu_freq;
sandybridge_pcode_read(dev_priv,
GEN6_PCODE_READ_MIN_FREQ_TABLE,
&ia_freq);
seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
intel_gpu_freq(dev_priv, (gpu_freq *
(IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
GEN9_FREQ_SCALER : 1))),
((ia_freq >> 0) & 0xff) * 100,
((ia_freq >> 8) & 0xff) * 100);
}
mutex_unlock(&dev_priv->rps.hw_lock);
out:
intel_runtime_pm_put(dev_priv);
return ret;
}
static int i915_opregion(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_opregion *opregion = &dev_priv->opregion;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
goto out;
if (opregion->header)
seq_write(m, opregion->header, OPREGION_SIZE);
mutex_unlock(&dev->struct_mutex);
out:
return 0;
}
static int i915_vbt(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_opregion *opregion = &dev_priv->opregion;
if (opregion->vbt)
seq_write(m, opregion->vbt, opregion->vbt_size);
return 0;
}
static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct intel_framebuffer *fbdev_fb = NULL;
struct drm_framebuffer *drm_fb;
drm/i915: Add struct_mutex locking for debugs/i915_gem_framebuffer Since describe_obj() looks at state guarded by the struct_mutex, we need to be holding it. [ 580.201054] drv_suspend: starting subtest debugfs-reader [ 580.239652] ------------[ cut here ]------------ [ 580.239696] WARNING: CPU: 0 PID: 920 at include/linux/list_check.h:25 describe_obj+0x419/0x440() [ 580.239725] CPU: 0 PID: 920 Comm: cat Not tainted 4.5.0-rc6+ #835 [ 580.239745] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 580.239767] 0000000000000000 ffff88027554fcf8 ffffffff812c1135 0000000000000000 [ 580.239815] ffffffff8193dc42 ffff88027554fd30 ffffffff8107419d ffff880071727c00 [ 580.239858] ffff8802757d8000 ffffffff818f693c ffffffff818f693c ffff8802757b9048 [ 580.239896] Call Trace: [ 580.239917] [<ffffffff812c1135>] dump_stack+0x67/0x92 [ 580.239939] [<ffffffff8107419d>] warn_slowpath_common+0x7d/0xb0 [ 580.239959] [<ffffffff810742ba>] warn_slowpath_null+0x1a/0x20 [ 580.239981] [<ffffffff813ce579>] describe_obj+0x419/0x440 [ 580.240006] [<ffffffff813ced22>] i915_gem_framebuffer_info+0xa2/0x100 [ 580.240033] [<ffffffff811a9286>] seq_read+0xe6/0x3b0 [ 580.240059] [<ffffffff81182288>] __vfs_read+0x28/0xd0 [ 580.240085] [<ffffffff81173378>] ? SyS_fadvise64+0x228/0x2c0 [ 580.240112] [<ffffffff811823b2>] vfs_read+0x82/0x110 [ 580.240137] [<ffffffff811827d9>] SyS_read+0x49/0xa0 [ 580.240162] [<ffffffff815bac57>] entry_SYSCALL_64_fastpath+0x12/0x6b [ 580.240187] ---[ end trace 3e2cbf34576c9878 ]--- [ 580.281900] ------------[ cut here ]------------ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1459689261-7920-1-git-send-email-chris@chris-wilson.co.uk
2016-04-03 21:14:20 +08:00
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
#ifdef CONFIG_DRM_FBDEV_EMULATION
if (to_i915(dev)->fbdev) {
fbdev_fb = to_intel_framebuffer(to_i915(dev)->fbdev->helper.fb);
seq_printf(m, "fbcon size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
fbdev_fb->base.width,
fbdev_fb->base.height,
fbdev_fb->base.depth,
fbdev_fb->base.bits_per_pixel,
fbdev_fb->base.modifier[0],
atomic_read(&fbdev_fb->base.refcount.refcount));
describe_obj(m, fbdev_fb->obj);
seq_putc(m, '\n');
}
#endif
drm: revamp locking around fb creation/destruction Well, at least step 1. The goal here is that framebuffer objects can survive outside of the mode_config lock, with just a reference held as protection. The first step to get there is to introduce a special fb_lock which protects fb lookup, creation and destruction, to make them appear atomic. This new fb_lock can nest within the mode_config lock. But the idea is (once the reference counting part is completed) that we only quickly take that fb_lock to lookup a framebuffer and grab a reference, without any other locks involved. vmwgfx is the only driver which does framebuffer lookups itself, also wrap those calls to drm_mode_object_find with the new lock. Also protect the fb_list walking in i915 and omapdrm with the new lock. As a slight complication there's also the list of user-created fbs attached to the file private. The problem now is that at fclose() time we need to walk that list, eventually do a modeset call to remove the fb from active usage (and are required to be able to take the mode_config lock), but in the end we need to grab the new fb_lock to remove the fb from the list. The easiest solution is to add another mutex to protect this per-file list. Currently that new fbs_lock nests within the modeset locks and so appears redudant. But later patches will switch around this sequence so that taking the modeset locks in the fb destruction path is optional in the fastpath. Ultimately the goal is that addfb and rmfb do not require the mode_config lock, since otherwise they have the potential to introduce stalls in the pageflip sequence of a compositor (if the compositor e.g. switches to a fullscreen client or if it enables a plane). But that requires a few more steps and hoops to jump through. Note that framebuffer creation/destruction is now double-protected - once by the fb_lock and in parts by the idr_lock. The later would be unnecessariy if framebuffers would have their own idr allocator. But that's material for another patch (series). v2: Properly initialize the fb->filp_head list in _init, otherwise the newly added WARN to check whether the fb isn't on a fpriv list any more will fail for driver-private objects. v3: Fixup two error-case unlock bugs spotted by Richard Wilbur. Reviewed-by: Rob Clark <rob@ti.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-11 04:19:18 +08:00
mutex_lock(&dev->mode_config.fb_lock);
drm_for_each_fb(drm_fb, dev) {
struct intel_framebuffer *fb = to_intel_framebuffer(drm_fb);
if (fb == fbdev_fb)
continue;
seq_printf(m, "user size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
fb->base.width,
fb->base.height,
fb->base.depth,
fb->base.bits_per_pixel,
fb->base.modifier[0],
atomic_read(&fb->base.refcount.refcount));
describe_obj(m, fb->obj);
seq_putc(m, '\n');
}
drm: revamp locking around fb creation/destruction Well, at least step 1. The goal here is that framebuffer objects can survive outside of the mode_config lock, with just a reference held as protection. The first step to get there is to introduce a special fb_lock which protects fb lookup, creation and destruction, to make them appear atomic. This new fb_lock can nest within the mode_config lock. But the idea is (once the reference counting part is completed) that we only quickly take that fb_lock to lookup a framebuffer and grab a reference, without any other locks involved. vmwgfx is the only driver which does framebuffer lookups itself, also wrap those calls to drm_mode_object_find with the new lock. Also protect the fb_list walking in i915 and omapdrm with the new lock. As a slight complication there's also the list of user-created fbs attached to the file private. The problem now is that at fclose() time we need to walk that list, eventually do a modeset call to remove the fb from active usage (and are required to be able to take the mode_config lock), but in the end we need to grab the new fb_lock to remove the fb from the list. The easiest solution is to add another mutex to protect this per-file list. Currently that new fbs_lock nests within the modeset locks and so appears redudant. But later patches will switch around this sequence so that taking the modeset locks in the fb destruction path is optional in the fastpath. Ultimately the goal is that addfb and rmfb do not require the mode_config lock, since otherwise they have the potential to introduce stalls in the pageflip sequence of a compositor (if the compositor e.g. switches to a fullscreen client or if it enables a plane). But that requires a few more steps and hoops to jump through. Note that framebuffer creation/destruction is now double-protected - once by the fb_lock and in parts by the idr_lock. The later would be unnecessariy if framebuffers would have their own idr allocator. But that's material for another patch (series). v2: Properly initialize the fb->filp_head list in _init, otherwise the newly added WARN to check whether the fb isn't on a fpriv list any more will fail for driver-private objects. v3: Fixup two error-case unlock bugs spotted by Richard Wilbur. Reviewed-by: Rob Clark <rob@ti.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-11 04:19:18 +08:00
mutex_unlock(&dev->mode_config.fb_lock);
drm/i915: Add struct_mutex locking for debugs/i915_gem_framebuffer Since describe_obj() looks at state guarded by the struct_mutex, we need to be holding it. [ 580.201054] drv_suspend: starting subtest debugfs-reader [ 580.239652] ------------[ cut here ]------------ [ 580.239696] WARNING: CPU: 0 PID: 920 at include/linux/list_check.h:25 describe_obj+0x419/0x440() [ 580.239725] CPU: 0 PID: 920 Comm: cat Not tainted 4.5.0-rc6+ #835 [ 580.239745] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 580.239767] 0000000000000000 ffff88027554fcf8 ffffffff812c1135 0000000000000000 [ 580.239815] ffffffff8193dc42 ffff88027554fd30 ffffffff8107419d ffff880071727c00 [ 580.239858] ffff8802757d8000 ffffffff818f693c ffffffff818f693c ffff8802757b9048 [ 580.239896] Call Trace: [ 580.239917] [<ffffffff812c1135>] dump_stack+0x67/0x92 [ 580.239939] [<ffffffff8107419d>] warn_slowpath_common+0x7d/0xb0 [ 580.239959] [<ffffffff810742ba>] warn_slowpath_null+0x1a/0x20 [ 580.239981] [<ffffffff813ce579>] describe_obj+0x419/0x440 [ 580.240006] [<ffffffff813ced22>] i915_gem_framebuffer_info+0xa2/0x100 [ 580.240033] [<ffffffff811a9286>] seq_read+0xe6/0x3b0 [ 580.240059] [<ffffffff81182288>] __vfs_read+0x28/0xd0 [ 580.240085] [<ffffffff81173378>] ? SyS_fadvise64+0x228/0x2c0 [ 580.240112] [<ffffffff811823b2>] vfs_read+0x82/0x110 [ 580.240137] [<ffffffff811827d9>] SyS_read+0x49/0xa0 [ 580.240162] [<ffffffff815bac57>] entry_SYSCALL_64_fastpath+0x12/0x6b [ 580.240187] ---[ end trace 3e2cbf34576c9878 ]--- [ 580.281900] ------------[ cut here ]------------ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1459689261-7920-1-git-send-email-chris@chris-wilson.co.uk
2016-04-03 21:14:20 +08:00
mutex_unlock(&dev->struct_mutex);
return 0;
}
static void describe_ctx_ringbuf(struct seq_file *m,
struct intel_ringbuffer *ringbuf)
{
seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)",
ringbuf->space, ringbuf->head, ringbuf->tail,
ringbuf->last_retired_head);
}
static int i915_context_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *engine;
struct intel_context *ctx;
enum intel_engine_id id;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
list_for_each_entry(ctx, &dev_priv->context_list, link) {
if (!i915.enable_execlists &&
ctx->legacy_hw_ctx.rcs_state == NULL)
continue;
seq_puts(m, "HW context ");
drm/i915: Do remaps for all contexts On both Ivybridge and Haswell, row remapping information is saved and restored with context. This means, we never actually properly supported the l3 remapping because our sysfs interface is asynchronous (and not tied to any context), and the known faulty HW would be reused by the next context to run. Not that due to the asynchronous nature of the sysfs entry, there is no point modifying the registers for the existing context. Instead we set a flag for all contexts to load the correct remapping information on the next run. Interested clients can use debugfs to determine whether or not the row has been remapped. One could propose at this point that we just do the remapping in the kernel. I guess since we have to maintain the sysfs interface anyway, I'm not sure how useful it is, and I do like keeping the policy in userspace; (it wasn't my original decision to make the interface the way it is, so I'm not attached). v2: Force a context switch when we have a remap on the next switch. (Ville) Don't let userspace use the interface with disabled contexts. v3: Don't force a context switch, just let it nop Improper context slice remap initialization, 1<<1 instead of 1<<i, but I rewrote it to avoid a second round of confusion. Error print moved to error path (All Ville) Added a comment on why the slice remap initialization happens. CC: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-19 10:03:18 +08:00
describe_ctx(m, ctx);
if (ctx == dev_priv->kernel_context)
seq_printf(m, "(kernel context) ");
if (i915.enable_execlists) {
seq_putc(m, '\n');
for_each_engine_id(engine, dev_priv, id) {
struct drm_i915_gem_object *ctx_obj =
ctx->engine[id].state;
struct intel_ringbuffer *ringbuf =
ctx->engine[id].ringbuf;
seq_printf(m, "%s: ", engine->name);
if (ctx_obj)
describe_obj(m, ctx_obj);
if (ringbuf)
describe_ctx_ringbuf(m, ringbuf);
seq_putc(m, '\n');
}
} else {
describe_obj(m, ctx->legacy_hw_ctx.rcs_state);
}
seq_putc(m, '\n');
}
mutex_unlock(&dev->struct_mutex);
return 0;
}
static void i915_dump_lrc_obj(struct seq_file *m,
struct intel_context *ctx,
struct intel_engine_cs *engine)
{
struct page *page;
uint32_t *reg_state;
int j;
struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
unsigned long ggtt_offset = 0;
if (ctx_obj == NULL) {
seq_printf(m, "Context on %s with no gem object\n",
engine->name);
return;
}
seq_printf(m, "CONTEXT: %s %u\n", engine->name,
intel_execlists_ctx_id(ctx, engine));
if (!i915_gem_obj_ggtt_bound(ctx_obj))
seq_puts(m, "\tNot bound in GGTT\n");
else
ggtt_offset = i915_gem_obj_ggtt_offset(ctx_obj);
if (i915_gem_object_get_pages(ctx_obj)) {
seq_puts(m, "\tFailed to get pages for context object\n");
return;
}
drm/i915: Integrate GuC-based command submission GuC-based submission is mostly the same as execlist mode, up to intel_logical_ring_advance_and_submit(), where the context being dispatched would be added to the execlist queue; at this point we submit the context to the GuC backend instead. There are, however, a few other changes also required, notably: 1. Contexts must be pinned at GGTT addresses accessible by the GuC i.e. NOT in the range [0..WOPCM_SIZE), so we have to add the PIN_OFFSET_BIAS flag to the relevant GGTT-pinning calls. 2. The GuC's TLB must be invalidated after a context is pinned at a new GGTT address. 3. GuC firmware uses the one page before Ring Context as shared data. Therefore, whenever driver wants to get base address of LRC, we will offset one page for it. LRC_PPHWSP_PN is defined as the page number of LRCA. 4. In the work queue used to pass requests to the GuC, the GuC firmware requires the ring-tail-offset to be represented as an 11-bit value, expressed in QWords. Therefore, the ringbuffer size must be reduced to the representable range (4 pages). v2: Defer adding #defines until needed [Chris Wilson] Rationalise type declarations [Chris Wilson] v4: Squashed kerneldoc patch into here [Daniel Vetter] v5: Update request->tail in code common to both GuC and execlist modes. Add a private version of lr_context_update(), as sharing the execlist version leads to race conditions when the CPU and the GuC both update TAIL in the context image. Conversion of error-captured HWS page to string must account for offset from start of object to actual HWS (LRC_PPHWSP_PN). Issue: VIZ-4884 Signed-off-by: Alex Dai <yu.dai@intel.com> Signed-off-by: Dave Gordon <david.s.gordon@intel.com> Reviewed-by: Tom O'Rourke <Tom.O'Rourke@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-08-12 22:43:43 +08:00
page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
if (!WARN_ON(page == NULL)) {
reg_state = kmap_atomic(page);
for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n",
ggtt_offset + 4096 + (j * 4),
reg_state[j], reg_state[j + 1],
reg_state[j + 2], reg_state[j + 3]);
}
kunmap_atomic(reg_state);
}
seq_putc(m, '\n');
}
static int i915_dump_lrc(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *engine;
struct intel_context *ctx;
int ret;
if (!i915.enable_execlists) {
seq_printf(m, "Logical Ring Contexts are disabled\n");
return 0;
}
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
list_for_each_entry(ctx, &dev_priv->context_list, link)
if (ctx != dev_priv->kernel_context)
for_each_engine(engine, dev_priv)
i915_dump_lrc_obj(m, ctx, engine);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_execlists(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *engine;
u32 status_pointer;
u8 read_pointer;
u8 write_pointer;
u32 status;
u32 ctx_id;
struct list_head *cursor;
int i, ret;
if (!i915.enable_execlists) {
seq_puts(m, "Logical Ring Contexts are disabled\n");
return 0;
}
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
for_each_engine(engine, dev_priv) {
struct drm_i915_gem_request *head_req = NULL;
int count = 0;
seq_printf(m, "%s\n", engine->name);
status = I915_READ(RING_EXECLIST_STATUS_LO(engine));
ctx_id = I915_READ(RING_EXECLIST_STATUS_HI(engine));
seq_printf(m, "\tExeclist status: 0x%08X, context: %u\n",
status, ctx_id);
status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
read_pointer = engine->next_context_status_buffer;
write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
if (read_pointer > write_pointer)
write_pointer += GEN8_CSB_ENTRIES;
seq_printf(m, "\tRead pointer: 0x%08X, write pointer 0x%08X\n",
read_pointer, write_pointer);
for (i = 0; i < GEN8_CSB_ENTRIES; i++) {
status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, i));
ctx_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, i));
seq_printf(m, "\tStatus buffer %d: 0x%08X, context: %u\n",
i, status, ctx_id);
}
drm/i915: Move execlists irq handler to a bottom half Doing a lot of work in the interrupt handler introduces huge latencies to the system as a whole. Most dramatic effect can be seen by running an all engine stress test like igt/gem_exec_nop/all where, when the kernel config is lean enough, the whole system can be brought into multi-second periods of complete non-interactivty. That can look for example like this: NMI watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/u8:3:143] Modules linked in: [redacted for brevity] CPU: 0 PID: 143 Comm: kworker/u8:3 Tainted: G U L 4.5.0-160321+ #183 Hardware name: Intel Corporation Broadwell Client platform/WhiteTip Mountain 1 Workqueue: i915 gen6_pm_rps_work [i915] task: ffff8800aae88000 ti: ffff8800aae90000 task.ti: ffff8800aae90000 RIP: 0010:[<ffffffff8104a3c2>] [<ffffffff8104a3c2>] __do_softirq+0x72/0x1d0 RSP: 0000:ffff88014f403f38 EFLAGS: 00000206 RAX: ffff8800aae94000 RBX: 0000000000000000 RCX: 00000000000006e0 RDX: 0000000000000020 RSI: 0000000004208060 RDI: 0000000000215d80 RBP: ffff88014f403f80 R08: 0000000b1b42c180 R09: 0000000000000022 R10: 0000000000000004 R11: 00000000ffffffff R12: 000000000000a030 R13: 0000000000000082 R14: ffff8800aa4d0080 R15: 0000000000000082 FS: 0000000000000000(0000) GS:ffff88014f400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa53b90c000 CR3: 0000000001a0a000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Stack: 042080601b33869f ffff8800aae94000 00000000fffc2678 ffff88010000000a 0000000000000000 000000000000a030 0000000000005302 ffff8800aa4d0080 0000000000000206 ffff88014f403f90 ffffffff8104a716 ffff88014f403fa8 Call Trace: <IRQ> [<ffffffff8104a716>] irq_exit+0x86/0x90 [<ffffffff81031e7d>] smp_apic_timer_interrupt+0x3d/0x50 [<ffffffff814f3eac>] apic_timer_interrupt+0x7c/0x90 <EOI> [<ffffffffa01c5b40>] ? gen8_write64+0x1a0/0x1a0 [i915] [<ffffffff814f2b39>] ? _raw_spin_unlock_irqrestore+0x9/0x20 [<ffffffffa01c5c44>] gen8_write32+0x104/0x1a0 [i915] [<ffffffff8132c6a2>] ? n_tty_receive_buf_common+0x372/0xae0 [<ffffffffa017cc9e>] gen6_set_rps_thresholds+0x1be/0x330 [i915] [<ffffffffa017eaf0>] gen6_set_rps+0x70/0x200 [i915] [<ffffffffa0185375>] intel_set_rps+0x25/0x30 [i915] [<ffffffffa01768fd>] gen6_pm_rps_work+0x10d/0x2e0 [i915] [<ffffffff81063852>] ? finish_task_switch+0x72/0x1c0 [<ffffffff8105ab29>] process_one_work+0x139/0x350 [<ffffffff8105b186>] worker_thread+0x126/0x490 [<ffffffff8105b060>] ? rescuer_thread+0x320/0x320 [<ffffffff8105fa64>] kthread+0xc4/0xe0 [<ffffffff8105f9a0>] ? kthread_create_on_node+0x170/0x170 [<ffffffff814f351f>] ret_from_fork+0x3f/0x70 [<ffffffff8105f9a0>] ? kthread_create_on_node+0x170/0x170 I could not explain, or find a code path, which would explain a +20 second lockup, but from some instrumentation it was apparent the interrupts off proportion of time was between 10-25% under heavy load which is quite bad. When a interrupt "cliff" is reached, which was >~320k irq/s on my machine, the whole system goes into a terrible state of the above described multi-second lockups. By moving the GT interrupt handling to a tasklet in a most simple way, the problem above disappears completely. Testing the effect on sytem-wide latencies using igt/gem_syslatency shows the following before this patch: gem_syslatency: cycles=1532739, latency mean=416531.829us max=2499237us gem_syslatency: cycles=1839434, latency mean=1458099.157us max=4998944us gem_syslatency: cycles=1432570, latency mean=2688.451us max=1201185us gem_syslatency: cycles=1533543, latency mean=416520.499us max=2498886us This shows that the unrelated process is experiencing huge delays in its wake-up latency. After the patch the results look like this: gem_syslatency: cycles=808907, latency mean=53.133us max=1640us gem_syslatency: cycles=862154, latency mean=62.778us max=2117us gem_syslatency: cycles=856039, latency mean=58.079us max=2123us gem_syslatency: cycles=841683, latency mean=56.914us max=1667us Showing a huge improvement in the unrelated process wake-up latency. It also shows an approximate halving in the number of total empty batches submitted during the test. This may not be worrying since the test puts the driver under a very unrealistic load with ncpu threads doing empty batch submission to all GPU engines each. Another benefit compared to the hard-irq handling is that now work on all engines can be dispatched in parallel since we can have up to number of CPUs active tasklets. (While previously a single hard-irq would serially dispatch on one engine after another.) More interesting scenario with regards to throughput is "gem_latency -n 100" which shows 25% better throughput and CPU usage, and 14% better dispatch latencies. I did not find any gains or regressions with Synmark2 or GLbench under light testing. More benchmarking is certainly required. v2: * execlists_lock should be taken as spin_lock_bh when queuing work from userspace now. (Chris Wilson) * uncore.lock must be taken with spin_lock_irq when submitting requests since that now runs from either softirq or process context. v3: * Expanded commit message with more testing data; * converted missed locking sites to _bh; * added execlist_lock comment. (Chris Wilson) v4: * Mention dispatch parallelism in commit. (Chris Wilson) * Do not hold uncore.lock over MMIO reads since the block is already serialised per-engine via the tasklet itself. (Chris Wilson) * intel_lrc_irq_handler should be static. (Chris Wilson) * Cancel/sync the tasklet on GPU reset. (Chris Wilson) * Document and WARN that tasklet cannot be active/pending on engine cleanup. (Chris Wilson/Imre Deak) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Imre Deak <imre.deak@intel.com> Testcase: igt/gem_exec_nop/all Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94350 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1459768316-6670-1-git-send-email-tvrtko.ursulin@linux.intel.com
2016-04-04 19:11:56 +08:00
spin_lock_bh(&engine->execlist_lock);
list_for_each(cursor, &engine->execlist_queue)
count++;
head_req = list_first_entry_or_null(&engine->execlist_queue,
struct drm_i915_gem_request,
execlist_link);
drm/i915: Move execlists irq handler to a bottom half Doing a lot of work in the interrupt handler introduces huge latencies to the system as a whole. Most dramatic effect can be seen by running an all engine stress test like igt/gem_exec_nop/all where, when the kernel config is lean enough, the whole system can be brought into multi-second periods of complete non-interactivty. That can look for example like this: NMI watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/u8:3:143] Modules linked in: [redacted for brevity] CPU: 0 PID: 143 Comm: kworker/u8:3 Tainted: G U L 4.5.0-160321+ #183 Hardware name: Intel Corporation Broadwell Client platform/WhiteTip Mountain 1 Workqueue: i915 gen6_pm_rps_work [i915] task: ffff8800aae88000 ti: ffff8800aae90000 task.ti: ffff8800aae90000 RIP: 0010:[<ffffffff8104a3c2>] [<ffffffff8104a3c2>] __do_softirq+0x72/0x1d0 RSP: 0000:ffff88014f403f38 EFLAGS: 00000206 RAX: ffff8800aae94000 RBX: 0000000000000000 RCX: 00000000000006e0 RDX: 0000000000000020 RSI: 0000000004208060 RDI: 0000000000215d80 RBP: ffff88014f403f80 R08: 0000000b1b42c180 R09: 0000000000000022 R10: 0000000000000004 R11: 00000000ffffffff R12: 000000000000a030 R13: 0000000000000082 R14: ffff8800aa4d0080 R15: 0000000000000082 FS: 0000000000000000(0000) GS:ffff88014f400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa53b90c000 CR3: 0000000001a0a000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Stack: 042080601b33869f ffff8800aae94000 00000000fffc2678 ffff88010000000a 0000000000000000 000000000000a030 0000000000005302 ffff8800aa4d0080 0000000000000206 ffff88014f403f90 ffffffff8104a716 ffff88014f403fa8 Call Trace: <IRQ> [<ffffffff8104a716>] irq_exit+0x86/0x90 [<ffffffff81031e7d>] smp_apic_timer_interrupt+0x3d/0x50 [<ffffffff814f3eac>] apic_timer_interrupt+0x7c/0x90 <EOI> [<ffffffffa01c5b40>] ? gen8_write64+0x1a0/0x1a0 [i915] [<ffffffff814f2b39>] ? _raw_spin_unlock_irqrestore+0x9/0x20 [<ffffffffa01c5c44>] gen8_write32+0x104/0x1a0 [i915] [<ffffffff8132c6a2>] ? n_tty_receive_buf_common+0x372/0xae0 [<ffffffffa017cc9e>] gen6_set_rps_thresholds+0x1be/0x330 [i915] [<ffffffffa017eaf0>] gen6_set_rps+0x70/0x200 [i915] [<ffffffffa0185375>] intel_set_rps+0x25/0x30 [i915] [<ffffffffa01768fd>] gen6_pm_rps_work+0x10d/0x2e0 [i915] [<ffffffff81063852>] ? finish_task_switch+0x72/0x1c0 [<ffffffff8105ab29>] process_one_work+0x139/0x350 [<ffffffff8105b186>] worker_thread+0x126/0x490 [<ffffffff8105b060>] ? rescuer_thread+0x320/0x320 [<ffffffff8105fa64>] kthread+0xc4/0xe0 [<ffffffff8105f9a0>] ? kthread_create_on_node+0x170/0x170 [<ffffffff814f351f>] ret_from_fork+0x3f/0x70 [<ffffffff8105f9a0>] ? kthread_create_on_node+0x170/0x170 I could not explain, or find a code path, which would explain a +20 second lockup, but from some instrumentation it was apparent the interrupts off proportion of time was between 10-25% under heavy load which is quite bad. When a interrupt "cliff" is reached, which was >~320k irq/s on my machine, the whole system goes into a terrible state of the above described multi-second lockups. By moving the GT interrupt handling to a tasklet in a most simple way, the problem above disappears completely. Testing the effect on sytem-wide latencies using igt/gem_syslatency shows the following before this patch: gem_syslatency: cycles=1532739, latency mean=416531.829us max=2499237us gem_syslatency: cycles=1839434, latency mean=1458099.157us max=4998944us gem_syslatency: cycles=1432570, latency mean=2688.451us max=1201185us gem_syslatency: cycles=1533543, latency mean=416520.499us max=2498886us This shows that the unrelated process is experiencing huge delays in its wake-up latency. After the patch the results look like this: gem_syslatency: cycles=808907, latency mean=53.133us max=1640us gem_syslatency: cycles=862154, latency mean=62.778us max=2117us gem_syslatency: cycles=856039, latency mean=58.079us max=2123us gem_syslatency: cycles=841683, latency mean=56.914us max=1667us Showing a huge improvement in the unrelated process wake-up latency. It also shows an approximate halving in the number of total empty batches submitted during the test. This may not be worrying since the test puts the driver under a very unrealistic load with ncpu threads doing empty batch submission to all GPU engines each. Another benefit compared to the hard-irq handling is that now work on all engines can be dispatched in parallel since we can have up to number of CPUs active tasklets. (While previously a single hard-irq would serially dispatch on one engine after another.) More interesting scenario with regards to throughput is "gem_latency -n 100" which shows 25% better throughput and CPU usage, and 14% better dispatch latencies. I did not find any gains or regressions with Synmark2 or GLbench under light testing. More benchmarking is certainly required. v2: * execlists_lock should be taken as spin_lock_bh when queuing work from userspace now. (Chris Wilson) * uncore.lock must be taken with spin_lock_irq when submitting requests since that now runs from either softirq or process context. v3: * Expanded commit message with more testing data; * converted missed locking sites to _bh; * added execlist_lock comment. (Chris Wilson) v4: * Mention dispatch parallelism in commit. (Chris Wilson) * Do not hold uncore.lock over MMIO reads since the block is already serialised per-engine via the tasklet itself. (Chris Wilson) * intel_lrc_irq_handler should be static. (Chris Wilson) * Cancel/sync the tasklet on GPU reset. (Chris Wilson) * Document and WARN that tasklet cannot be active/pending on engine cleanup. (Chris Wilson/Imre Deak) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Imre Deak <imre.deak@intel.com> Testcase: igt/gem_exec_nop/all Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94350 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1459768316-6670-1-git-send-email-tvrtko.ursulin@linux.intel.com
2016-04-04 19:11:56 +08:00
spin_unlock_bh(&engine->execlist_lock);
seq_printf(m, "\t%d requests in queue\n", count);
if (head_req) {
seq_printf(m, "\tHead request id: %u\n",
intel_execlists_ctx_id(head_req->ctx, engine));
seq_printf(m, "\tHead request tail: %u\n",
head_req->tail);
}
seq_putc(m, '\n');
}
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static const char *swizzle_string(unsigned swizzle)
{
switch (swizzle) {
case I915_BIT_6_SWIZZLE_NONE:
return "none";
case I915_BIT_6_SWIZZLE_9:
return "bit9";
case I915_BIT_6_SWIZZLE_9_10:
return "bit9/bit10";
case I915_BIT_6_SWIZZLE_9_11:
return "bit9/bit11";
case I915_BIT_6_SWIZZLE_9_10_11:
return "bit9/bit10/bit11";
case I915_BIT_6_SWIZZLE_9_17:
return "bit9/bit17";
case I915_BIT_6_SWIZZLE_9_10_17:
return "bit9/bit10/bit17";
case I915_BIT_6_SWIZZLE_UNKNOWN:
return "unknown";
}
return "bug";
}
static int i915_swizzle_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
swizzle_string(dev_priv->mm.bit_6_swizzle_x));
seq_printf(m, "bit6 swizzle for Y-tiling = %s\n",
swizzle_string(dev_priv->mm.bit_6_swizzle_y));
if (IS_GEN3(dev) || IS_GEN4(dev)) {
seq_printf(m, "DDC = 0x%08x\n",
I915_READ(DCC));
seq_printf(m, "DDC2 = 0x%08x\n",
I915_READ(DCC2));
seq_printf(m, "C0DRB3 = 0x%04x\n",
I915_READ16(C0DRB3));
seq_printf(m, "C1DRB3 = 0x%04x\n",
I915_READ16(C1DRB3));
} else if (INTEL_INFO(dev)->gen >= 6) {
seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n",
I915_READ(MAD_DIMM_C0));
seq_printf(m, "MAD_DIMM_C1 = 0x%08x\n",
I915_READ(MAD_DIMM_C1));
seq_printf(m, "MAD_DIMM_C2 = 0x%08x\n",
I915_READ(MAD_DIMM_C2));
seq_printf(m, "TILECTL = 0x%08x\n",
I915_READ(TILECTL));
if (INTEL_INFO(dev)->gen >= 8)
seq_printf(m, "GAMTARBMODE = 0x%08x\n",
I915_READ(GAMTARBMODE));
else
seq_printf(m, "ARB_MODE = 0x%08x\n",
I915_READ(ARB_MODE));
seq_printf(m, "DISP_ARB_CTL = 0x%08x\n",
I915_READ(DISP_ARB_CTL));
}
if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
seq_puts(m, "L-shaped memory detected\n");
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int per_file_ctx(int id, void *ptr, void *data)
{
struct intel_context *ctx = ptr;
struct seq_file *m = data;
struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
if (!ppgtt) {
seq_printf(m, " no ppgtt for context %d\n",
ctx->user_handle);
return 0;
}
if (i915_gem_context_is_default(ctx))
seq_puts(m, " default context:\n");
else
drm/i915: Emphasize that ctx->id is merely a user handle This is an Execlists preparatory patch, since they make context ID become an overloaded term: - In the software, it was used to distinguish which context userspace was trying to use. - In the BSpec, the term is used to describe the 20-bits long field the hardware uses to it to discriminate the contexts that are submitted to the ELSP and inform the driver about their current status (via Context Switch Interrupts and Context Status Buffers). Initially, I tried to make the different meanings converge, but it proved impossible: - The software ctx->id is per-filp, while the hardware one needs to be globally unique. - Also, we multiplex several backing states objects per intel_context, and all of them need unique HW IDs. - I tried adding a per-filp ID and then composing the HW context ID as: ctx->id + file_priv->id + ring->id, but the fact that the hardware only uses 20-bits means we have to artificially limit the number of filps or contexts the userspace can create. The ctx->user_handle renaming bits are done with this Cocci patch (plus manual frobbing of the struct declaration): @@ struct intel_context c; @@ - (c).id + c.user_handle @@ struct intel_context *c; @@ - (c)->id + c->user_handle Also, while we are at it, s/DEFAULT_CONTEXT_ID/DEFAULT_CONTEXT_HANDLE and change the type to unsigned 32 bits. v2: s/handle/user_handle and change the type to uint32_t as suggested by Chris Wilson. Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v1) Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-03 23:28:00 +08:00
seq_printf(m, " context %d:\n", ctx->user_handle);
ppgtt->debug_dump(ppgtt, m);
return 0;
}
static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *engine;
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
int i;
if (!ppgtt)
return;
for_each_engine(engine, dev_priv) {
seq_printf(m, "%s\n", engine->name);
for (i = 0; i < 4; i++) {
u64 pdp = I915_READ(GEN8_RING_PDP_UDW(engine, i));
pdp <<= 32;
pdp |= I915_READ(GEN8_RING_PDP_LDW(engine, i));
seq_printf(m, "\tPDP%d 0x%016llx\n", i, pdp);
}
}
}
static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *engine;
if (INTEL_INFO(dev)->gen == 6)
seq_printf(m, "GFX_MODE: 0x%08x\n", I915_READ(GFX_MODE));
for_each_engine(engine, dev_priv) {
seq_printf(m, "%s\n", engine->name);
if (INTEL_INFO(dev)->gen == 7)
seq_printf(m, "GFX_MODE: 0x%08x\n",
I915_READ(RING_MODE_GEN7(engine)));
seq_printf(m, "PP_DIR_BASE: 0x%08x\n",
I915_READ(RING_PP_DIR_BASE(engine)));
seq_printf(m, "PP_DIR_BASE_READ: 0x%08x\n",
I915_READ(RING_PP_DIR_BASE_READ(engine)));
seq_printf(m, "PP_DIR_DCLV: 0x%08x\n",
I915_READ(RING_PP_DIR_DCLV(engine)));
}
if (dev_priv->mm.aliasing_ppgtt) {
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
seq_puts(m, "aliasing PPGTT:\n");
seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd.base.ggtt_offset);
ppgtt->debug_dump(ppgtt, m);
}
seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
}
static int i915_ppgtt_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_file *file;
int ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
if (INTEL_INFO(dev)->gen >= 8)
gen8_ppgtt_info(m, dev);
else if (INTEL_INFO(dev)->gen >= 6)
gen6_ppgtt_info(m, dev);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct drm_i915_file_private *file_priv = file->driver_priv;
struct task_struct *task;
task = get_pid_task(file->pid, PIDTYPE_PID);
if (!task) {
ret = -ESRCH;
goto out_put;
}
seq_printf(m, "\nproc: %s\n", task->comm);
put_task_struct(task);
idr_for_each(&file_priv->context_idr, per_file_ctx,
(void *)(unsigned long)m);
}
out_put:
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return ret;
}
static int count_irq_waiters(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
int count = 0;
for_each_engine(engine, i915)
count += engine->irq_refcount;
return count;
}
static int i915_rps_boost_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_file *file;
seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled);
seq_printf(m, "GPU busy? %d\n", dev_priv->mm.busy);
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n",
intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit),
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit),
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
spin_lock(&dev_priv->rps.client_lock);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct drm_i915_file_private *file_priv = file->driver_priv;
struct task_struct *task;
rcu_read_lock();
task = pid_task(file->pid, PIDTYPE_PID);
seq_printf(m, "%s [%d]: %d boosts%s\n",
task ? task->comm : "<unknown>",
task ? task->pid : -1,
file_priv->rps.boosts,
list_empty(&file_priv->rps.link) ? "" : ", active");
rcu_read_unlock();
}
seq_printf(m, "Semaphore boosts: %d%s\n",
dev_priv->rps.semaphores.boosts,
list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active");
seq_printf(m, "MMIO flip boosts: %d%s\n",
dev_priv->rps.mmioflips.boosts,
list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active");
seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
spin_unlock(&dev_priv->rps.client_lock);
return 0;
}
static int i915_llc(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
const bool edram = INTEL_GEN(dev_priv) > 8;
seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(dev)));
seq_printf(m, "%s: %lluMB\n", edram ? "eDRAM" : "eLLC",
intel_uncore_edram_size(dev_priv)/1024/1024);
return 0;
}
static int i915_guc_load_status_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_i915_private *dev_priv = node->minor->dev->dev_private;
struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
u32 tmp, i;
if (!HAS_GUC_UCODE(dev_priv))
return 0;
seq_printf(m, "GuC firmware status:\n");
seq_printf(m, "\tpath: %s\n",
guc_fw->guc_fw_path);
seq_printf(m, "\tfetch: %s\n",
intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status));
seq_printf(m, "\tload: %s\n",
intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
seq_printf(m, "\tversion wanted: %d.%d\n",
guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted);
seq_printf(m, "\tversion found: %d.%d\n",
guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found);
seq_printf(m, "\theader: offset is %d; size = %d\n",
guc_fw->header_offset, guc_fw->header_size);
seq_printf(m, "\tuCode: offset is %d; size = %d\n",
guc_fw->ucode_offset, guc_fw->ucode_size);
seq_printf(m, "\tRSA: offset is %d; size = %d\n",
guc_fw->rsa_offset, guc_fw->rsa_size);
tmp = I915_READ(GUC_STATUS);
seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
seq_printf(m, "\tBootrom status = 0x%x\n",
(tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
seq_printf(m, "\tuKernel status = 0x%x\n",
(tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
seq_printf(m, "\tMIA Core status = 0x%x\n",
(tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
seq_puts(m, "\nScratch registers:\n");
for (i = 0; i < 16; i++)
seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
return 0;
}
static void i915_guc_client_info(struct seq_file *m,
struct drm_i915_private *dev_priv,
struct i915_guc_client *client)
{
struct intel_engine_cs *engine;
uint64_t tot = 0;
seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
client->priority, client->ctx_index, client->proc_desc_offset);
seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n",
client->doorbell_id, client->doorbell_offset, client->cookie);
seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
client->wq_size, client->wq_offset, client->wq_tail);
seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
seq_printf(m, "\tLast submission result: %d\n", client->retcode);
for_each_engine(engine, dev_priv) {
seq_printf(m, "\tSubmissions: %llu %s\n",
client->submissions[engine->guc_id],
engine->name);
tot += client->submissions[engine->guc_id];
}
seq_printf(m, "\tTotal: %llu\n", tot);
}
static int i915_guc_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_guc guc;
struct i915_guc_client client = {};
struct intel_engine_cs *engine;
u64 total = 0;
if (!HAS_GUC_SCHED(dev_priv))
return 0;
if (mutex_lock_interruptible(&dev->struct_mutex))
return 0;
/* Take a local copy of the GuC data, so we can dump it at leisure */
guc = dev_priv->guc;
if (guc.execbuf_client)
client = *guc.execbuf_client;
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
seq_printf(m, "GuC last action command: 0x%x\n", guc.action_cmd);
seq_printf(m, "GuC last action status: 0x%x\n", guc.action_status);
seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
seq_printf(m, "\nGuC submissions:\n");
for_each_engine(engine, dev_priv) {
seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
engine->name, guc.submissions[engine->guc_id],
guc.last_seqno[engine->guc_id]);
total += guc.submissions[engine->guc_id];
}
seq_printf(m, "\t%s: %llu\n", "Total", total);
seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
i915_guc_client_info(m, dev_priv, &client);
/* Add more as required ... */
return 0;
}
static int i915_guc_log_dump(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj;
u32 *log;
int i = 0, pg;
if (!log_obj)
return 0;
for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) {
log = kmap_atomic(i915_gem_object_get_page(log_obj, pg));
for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
*(log + i), *(log + i + 1),
*(log + i + 2), *(log + i + 3));
kunmap_atomic(log);
}
seq_putc(m, '\n');
return 0;
}
static int i915_edp_psr_status(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 psrperf = 0;
u32 stat[3];
enum pipe pipe;
bool enabled = false;
if (!HAS_PSR(dev)) {
seq_puts(m, "PSR not supported\n");
return 0;
}
intel_runtime_pm_get(dev_priv);
mutex_lock(&dev_priv->psr.lock);
seq_printf(m, "Sink_Support: %s\n", yesno(dev_priv->psr.sink_support));
seq_printf(m, "Source_OK: %s\n", yesno(dev_priv->psr.source_ok));
seq_printf(m, "Enabled: %s\n", yesno((bool)dev_priv->psr.enabled));
seq_printf(m, "Active: %s\n", yesno(dev_priv->psr.active));
seq_printf(m, "Busy frontbuffer bits: 0x%03x\n",
dev_priv->psr.busy_frontbuffer_bits);
seq_printf(m, "Re-enable work scheduled: %s\n",
yesno(work_busy(&dev_priv->psr.work.work)));
if (HAS_DDI(dev))
enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE;
else {
for_each_pipe(dev_priv, pipe) {
stat[pipe] = I915_READ(VLV_PSRSTAT(pipe)) &
VLV_EDP_PSR_CURR_STATE_MASK;
if ((stat[pipe] == VLV_EDP_PSR_ACTIVE_NORFB_UP) ||
(stat[pipe] == VLV_EDP_PSR_ACTIVE_SF_UPDATE))
enabled = true;
}
}
seq_printf(m, "Main link in standby mode: %s\n",
yesno(dev_priv->psr.link_standby));
seq_printf(m, "HW Enabled & Active bit: %s", yesno(enabled));
if (!HAS_DDI(dev))
for_each_pipe(dev_priv, pipe) {
if ((stat[pipe] == VLV_EDP_PSR_ACTIVE_NORFB_UP) ||
(stat[pipe] == VLV_EDP_PSR_ACTIVE_SF_UPDATE))
seq_printf(m, " pipe %c", pipe_name(pipe));
}
seq_puts(m, "\n");
/*
* VLV/CHV PSR has no kind of performance counter
* SKL+ Perf counter is reset to 0 everytime DC state is entered
*/
if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
psrperf = I915_READ(EDP_PSR_PERF_CNT) &
EDP_PSR_PERF_CNT_MASK;
seq_printf(m, "Performance_Counter: %u\n", psrperf);
}
mutex_unlock(&dev_priv->psr.lock);
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_sink_crc(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct intel_encoder *encoder;
struct intel_connector *connector;
struct intel_dp *intel_dp = NULL;
int ret;
u8 crc[6];
drm_modeset_lock_all(dev);
for_each_intel_connector(dev, connector) {
if (connector->base.dpms != DRM_MODE_DPMS_ON)
continue;
drm/i915: don't reference null pointer at i915_sink_crc Reproducible by runtime suspending a Haswell machine with eDP + HDMI outputs connected. [ 209.600086] [drm:i915_runtime_suspend], Suspending device [ 209.688435] BUG: unable to handle kernel NULL pointer dereference at 0000000000000060 [ 209.688500] IP: [<ffffffffa0109d4e>] i915_sink_crc+0x6e/0xf0 [i915] [ 209.688577] PGD 36aba067 PUD 35d7f067 PMD 0 [ 209.688613] Oops: 0000 [#1] SMP [ 209.688641] Modules linked in: fuse ip6table_filter ip6_tables ebtable_nat ebtables iTCO_wdt iTCO_vendor_support x86_pkg_temp_thermal coretemp microcode serio_raw e1000e pcspkr i2c_i801 ptp mei_me mei lpc_ich mfd_core pps_core dm_crypt i915 i2c_algo_bit crc32_pclmul drm_kms_helper crc32c_intel drm ghash_clmulni_intel video [ 209.688893] CPU: 1 PID: 1797 Comm: pm_pc8 Not tainted 3.13.0+ #118 [ 209.688937] Hardware name: Intel Corporation Shark Bay Client platform/WhiteTip Mountain 1, BIOS HSWLPTU1.86C.0133.R00.1309172123 09/17/2013 [ 209.689023] task: ffff88007fb4b690 ti: ffff88007d9d2000 task.ti: ffff88007d9d2000 [ 209.689074] RIP: 0010:[<ffffffffa0109d4e>] [<ffffffffa0109d4e>] i915_sink_crc+0x6e/0xf0 [i915] [ 209.689169] RSP: 0018:ffff88007d9d3e68 EFLAGS: 00010246 [ 209.689205] RAX: 0000000000000000 RBX: ffff880036a03478 RCX: ffff8800366c9770 [ 209.689252] RDX: ffff88014325cf38 RSI: ffff88007fb4bd08 RDI: ffff88007fb4b690 [ 209.689299] RBP: ffff88007d9d3e98 R08: 0000000000000000 R09: 0000000000000000 [ 209.689346] R10: 0000000000000001 R11: 0000000000000000 R12: ffff8800366c9148 [ 209.689393] R13: 00000000ffffffed R14: ffff88007d9d3f50 R15: ffff880036a03478 [ 209.689441] FS: 00007f5a74bc29c0(0000) GS:ffff88014f240000(0000) knlGS:0000000000000000 [ 209.689494] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 209.689533] CR2: 0000000000000060 CR3: 0000000079d7e000 CR4: 00000000001407e0 [ 209.689580] Stack: [ 209.689594] 0000000000001000 ffff880146083980 ffff880146083980 0000000000000000 [ 209.689649] ffff880146083980 0000000000000001 ffff88007d9d3f00 ffffffff811d0744 [ 209.689702] 0000000000000046 00007fff7949fe20 ffff880036a034b8 0000000000000080 [ 209.689756] Call Trace: [ 209.689778] [<ffffffff811d0744>] seq_read+0x164/0x3e0 [ 209.689816] [<ffffffff811ab165>] vfs_read+0x95/0x160 [ 209.689851] [<ffffffff811abc79>] SyS_read+0x49/0xa0 [ 209.689888] [<ffffffff810ef64c>] ? __audit_syscall_entry+0x9c/0xf0 [ 209.689933] [<ffffffff81659412>] system_call_fastpath+0x16/0x1b Testcase: igt/pm_pc8 (do a full run, it will fail at the debugfs-read subtest) Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> [danvet: Flip around NULL check for robustness.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-14 03:51:33 +08:00
if (!connector->base.encoder)
continue;
encoder = to_intel_encoder(connector->base.encoder);
if (encoder->type != INTEL_OUTPUT_EDP)
continue;
intel_dp = enc_to_intel_dp(&encoder->base);
ret = intel_dp_sink_crc(intel_dp, crc);
if (ret)
goto out;
seq_printf(m, "%02x%02x%02x%02x%02x%02x\n",
crc[0], crc[1], crc[2],
crc[3], crc[4], crc[5]);
goto out;
}
ret = -ENODEV;
out:
drm_modeset_unlock_all(dev);
return ret;
}
static int i915_energy_uJ(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
u64 power;
u32 units;
if (INTEL_INFO(dev)->gen < 6)
return -ENODEV;
intel_runtime_pm_get(dev_priv);
rdmsrl(MSR_RAPL_POWER_UNIT, power);
power = (power & 0x1f00) >> 8;
units = 1000000 / (1 << power); /* convert to uJ */
power = I915_READ(MCH_SECP_NRG_STTS);
power *= units;
intel_runtime_pm_put(dev_priv);
seq_printf(m, "%llu", (long long unsigned)power);
return 0;
}
static int i915_runtime_pm_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
if (!HAS_RUNTIME_PM(dev_priv))
seq_puts(m, "Runtime power management not supported\n");
seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->mm.busy));
seq_printf(m, "IRQs disabled: %s\n",
yesno(!intel_irqs_enabled(dev_priv)));
#ifdef CONFIG_PM
seq_printf(m, "Usage count: %d\n",
atomic_read(&dev->dev->power.usage_count));
#else
seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n");
#endif
seq_printf(m, "PCI device power state: %s [%d]\n",
pci_power_name(dev_priv->dev->pdev->current_state),
dev_priv->dev->pdev->current_state);
return 0;
}
static int i915_power_domain_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct i915_power_domains *power_domains = &dev_priv->power_domains;
int i;
mutex_lock(&power_domains->lock);
seq_printf(m, "%-25s %s\n", "Power well/domain", "Use count");
for (i = 0; i < power_domains->power_well_count; i++) {
struct i915_power_well *power_well;
enum intel_display_power_domain power_domain;
power_well = &power_domains->power_wells[i];
seq_printf(m, "%-25s %d\n", power_well->name,
power_well->count);
for (power_domain = 0; power_domain < POWER_DOMAIN_NUM;
power_domain++) {
if (!(BIT(power_domain) & power_well->domains))
continue;
seq_printf(m, " %-23s %d\n",
intel_display_power_domain_str(power_domain),
power_domains->domain_use_count[power_domain]);
}
}
mutex_unlock(&power_domains->lock);
return 0;
}
static int i915_dmc_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_csr *csr;
if (!HAS_CSR(dev)) {
seq_puts(m, "not supported\n");
return 0;
}
csr = &dev_priv->csr;
intel_runtime_pm_get(dev_priv);
seq_printf(m, "fw loaded: %s\n", yesno(csr->dmc_payload != NULL));
seq_printf(m, "path: %s\n", csr->fw_path);
if (!csr->dmc_payload)
goto out;
seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version),
CSR_VERSION_MINOR(csr->version));
if (IS_SKYLAKE(dev) && csr->version >= CSR_VERSION(1, 6)) {
seq_printf(m, "DC3 -> DC5 count: %d\n",
I915_READ(SKL_CSR_DC3_DC5_COUNT));
seq_printf(m, "DC5 -> DC6 count: %d\n",
I915_READ(SKL_CSR_DC5_DC6_COUNT));
} else if (IS_BROXTON(dev) && csr->version >= CSR_VERSION(1, 4)) {
seq_printf(m, "DC3 -> DC5 count: %d\n",
I915_READ(BXT_CSR_DC3_DC5_COUNT));
}
out:
seq_printf(m, "program base: 0x%08x\n", I915_READ(CSR_PROGRAM(0)));
seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE));
seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL));
intel_runtime_pm_put(dev_priv);
return 0;
}
static void intel_seq_print_mode(struct seq_file *m, int tabs,
struct drm_display_mode *mode)
{
int i;
for (i = 0; i < tabs; i++)
seq_putc(m, '\t');
seq_printf(m, "id %d:\"%s\" freq %d clock %d hdisp %d hss %d hse %d htot %d vdisp %d vss %d vse %d vtot %d type 0x%x flags 0x%x\n",
mode->base.id, mode->name,
mode->vrefresh, mode->clock,
mode->hdisplay, mode->hsync_start,
mode->hsync_end, mode->htotal,
mode->vdisplay, mode->vsync_start,
mode->vsync_end, mode->vtotal,
mode->type, mode->flags);
}
static void intel_encoder_info(struct seq_file *m,
struct intel_crtc *intel_crtc,
struct intel_encoder *intel_encoder)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_crtc *crtc = &intel_crtc->base;
struct intel_connector *intel_connector;
struct drm_encoder *encoder;
encoder = &intel_encoder->base;
seq_printf(m, "\tencoder %d: type: %s, connectors:\n",
encoder->base.id, encoder->name);
for_each_connector_on_encoder(dev, encoder, intel_connector) {
struct drm_connector *connector = &intel_connector->base;
seq_printf(m, "\t\tconnector %d: type: %s, status: %s",
connector->base.id,
connector->name,
drm_get_connector_status_name(connector->status));
if (connector->status == connector_status_connected) {
struct drm_display_mode *mode = &crtc->mode;
seq_printf(m, ", mode:\n");
intel_seq_print_mode(m, 2, mode);
} else {
seq_putc(m, '\n');
}
}
}
static void intel_crtc_info(struct seq_file *m, struct intel_crtc *intel_crtc)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_crtc *crtc = &intel_crtc->base;
struct intel_encoder *intel_encoder;
struct drm_plane_state *plane_state = crtc->primary->state;
struct drm_framebuffer *fb = plane_state->fb;
if (fb)
seq_printf(m, "\tfb: %d, pos: %dx%d, size: %dx%d\n",
fb->base.id, plane_state->src_x >> 16,
plane_state->src_y >> 16, fb->width, fb->height);
else
seq_puts(m, "\tprimary plane disabled\n");
for_each_encoder_on_crtc(dev, crtc, intel_encoder)
intel_encoder_info(m, intel_crtc, intel_encoder);
}
static void intel_panel_info(struct seq_file *m, struct intel_panel *panel)
{
struct drm_display_mode *mode = panel->fixed_mode;
seq_printf(m, "\tfixed mode:\n");
intel_seq_print_mode(m, 2, mode);
}
static void intel_dp_info(struct seq_file *m,
struct intel_connector *intel_connector)
{
struct intel_encoder *intel_encoder = intel_connector->encoder;
struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
seq_printf(m, "\tDPCD rev: %x\n", intel_dp->dpcd[DP_DPCD_REV]);
seq_printf(m, "\taudio support: %s\n", yesno(intel_dp->has_audio));
if (intel_encoder->type == INTEL_OUTPUT_EDP)
intel_panel_info(m, &intel_connector->panel);
}
static void intel_dp_mst_info(struct seq_file *m,
struct intel_connector *intel_connector)
{
struct intel_encoder *intel_encoder = intel_connector->encoder;
struct intel_dp_mst_encoder *intel_mst =
enc_to_mst(&intel_encoder->base);
struct intel_digital_port *intel_dig_port = intel_mst->primary;
struct intel_dp *intel_dp = &intel_dig_port->dp;
bool has_audio = drm_dp_mst_port_has_audio(&intel_dp->mst_mgr,
intel_connector->port);
seq_printf(m, "\taudio support: %s\n", yesno(has_audio));
}
static void intel_hdmi_info(struct seq_file *m,
struct intel_connector *intel_connector)
{
struct intel_encoder *intel_encoder = intel_connector->encoder;
struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&intel_encoder->base);
seq_printf(m, "\taudio support: %s\n", yesno(intel_hdmi->has_audio));
}
static void intel_lvds_info(struct seq_file *m,
struct intel_connector *intel_connector)
{
intel_panel_info(m, &intel_connector->panel);
}
static void intel_connector_info(struct seq_file *m,
struct drm_connector *connector)
{
struct intel_connector *intel_connector = to_intel_connector(connector);
struct intel_encoder *intel_encoder = intel_connector->encoder;
struct drm_display_mode *mode;
seq_printf(m, "connector %d: type %s, status: %s\n",
connector->base.id, connector->name,
drm_get_connector_status_name(connector->status));
if (connector->status == connector_status_connected) {
seq_printf(m, "\tname: %s\n", connector->display_info.name);
seq_printf(m, "\tphysical dimensions: %dx%dmm\n",
connector->display_info.width_mm,
connector->display_info.height_mm);
seq_printf(m, "\tsubpixel order: %s\n",
drm_get_subpixel_order_name(connector->display_info.subpixel_order));
seq_printf(m, "\tCEA rev: %d\n",
connector->display_info.cea_rev);
}
if (intel_encoder) {
if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
intel_encoder->type == INTEL_OUTPUT_EDP)
intel_dp_info(m, intel_connector);
else if (intel_encoder->type == INTEL_OUTPUT_HDMI)
intel_hdmi_info(m, intel_connector);
else if (intel_encoder->type == INTEL_OUTPUT_LVDS)
intel_lvds_info(m, intel_connector);
else if (intel_encoder->type == INTEL_OUTPUT_DP_MST)
intel_dp_mst_info(m, intel_connector);
}
seq_printf(m, "\tmodes:\n");
list_for_each_entry(mode, &connector->modes, head)
intel_seq_print_mode(m, 2, mode);
}
static bool cursor_active(struct drm_device *dev, int pipe)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 state;
if (IS_845G(dev) || IS_I865G(dev))
state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
else
state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
return state;
}
static bool cursor_position(struct drm_device *dev, int pipe, int *x, int *y)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 pos;
pos = I915_READ(CURPOS(pipe));
*x = (pos >> CURSOR_X_SHIFT) & CURSOR_POS_MASK;
if (pos & (CURSOR_POS_SIGN << CURSOR_X_SHIFT))
*x = -*x;
*y = (pos >> CURSOR_Y_SHIFT) & CURSOR_POS_MASK;
if (pos & (CURSOR_POS_SIGN << CURSOR_Y_SHIFT))
*y = -*y;
return cursor_active(dev, pipe);
}
static const char *plane_type(enum drm_plane_type type)
{
switch (type) {
case DRM_PLANE_TYPE_OVERLAY:
return "OVL";
case DRM_PLANE_TYPE_PRIMARY:
return "PRI";
case DRM_PLANE_TYPE_CURSOR:
return "CUR";
/*
* Deliberately omitting default: to generate compiler warnings
* when a new drm_plane_type gets added.
*/
}
return "unknown";
}
static const char *plane_rotation(unsigned int rotation)
{
static char buf[48];
/*
* According to doc only one DRM_ROTATE_ is allowed but this
* will print them all to visualize if the values are misused
*/
snprintf(buf, sizeof(buf),
"%s%s%s%s%s%s(0x%08x)",
(rotation & BIT(DRM_ROTATE_0)) ? "0 " : "",
(rotation & BIT(DRM_ROTATE_90)) ? "90 " : "",
(rotation & BIT(DRM_ROTATE_180)) ? "180 " : "",
(rotation & BIT(DRM_ROTATE_270)) ? "270 " : "",
(rotation & BIT(DRM_REFLECT_X)) ? "FLIPX " : "",
(rotation & BIT(DRM_REFLECT_Y)) ? "FLIPY " : "",
rotation);
return buf;
}
static void intel_plane_info(struct seq_file *m, struct intel_crtc *intel_crtc)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct intel_plane *intel_plane;
for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
struct drm_plane_state *state;
struct drm_plane *plane = &intel_plane->base;
if (!plane->state) {
seq_puts(m, "plane->state is NULL!\n");
continue;
}
state = plane->state;
seq_printf(m, "\t--Plane id %d: type=%s, crtc_pos=%4dx%4d, crtc_size=%4dx%4d, src_pos=%d.%04ux%d.%04u, src_size=%d.%04ux%d.%04u, format=%s, rotation=%s\n",
plane->base.id,
plane_type(intel_plane->base.type),
state->crtc_x, state->crtc_y,
state->crtc_w, state->crtc_h,
(state->src_x >> 16),
((state->src_x & 0xffff) * 15625) >> 10,
(state->src_y >> 16),
((state->src_y & 0xffff) * 15625) >> 10,
(state->src_w >> 16),
((state->src_w & 0xffff) * 15625) >> 10,
(state->src_h >> 16),
((state->src_h & 0xffff) * 15625) >> 10,
state->fb ? drm_get_format_name(state->fb->pixel_format) : "N/A",
plane_rotation(state->rotation));
}
}
static void intel_scaler_info(struct seq_file *m, struct intel_crtc *intel_crtc)
{
struct intel_crtc_state *pipe_config;
int num_scalers = intel_crtc->num_scalers;
int i;
pipe_config = to_intel_crtc_state(intel_crtc->base.state);
/* Not all platformas have a scaler */
if (num_scalers) {
seq_printf(m, "\tnum_scalers=%d, scaler_users=%x scaler_id=%d",
num_scalers,
pipe_config->scaler_state.scaler_users,
pipe_config->scaler_state.scaler_id);
for (i = 0; i < SKL_NUM_SCALERS; i++) {
struct intel_scaler *sc =
&pipe_config->scaler_state.scalers[i];
seq_printf(m, ", scalers[%d]: use=%s, mode=%x",
i, yesno(sc->in_use), sc->mode);
}
seq_puts(m, "\n");
} else {
seq_puts(m, "\tNo scalers available on this platform\n");
}
}
static int i915_display_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *crtc;
struct drm_connector *connector;
intel_runtime_pm_get(dev_priv);
drm_modeset_lock_all(dev);
seq_printf(m, "CRTC info\n");
seq_printf(m, "---------\n");
for_each_intel_crtc(dev, crtc) {
bool active;
struct intel_crtc_state *pipe_config;
int x, y;
pipe_config = to_intel_crtc_state(crtc->base.state);
seq_printf(m, "CRTC %d: pipe: %c, active=%s, (size=%dx%d), dither=%s, bpp=%d\n",
crtc->base.base.id, pipe_name(crtc->pipe),
yesno(pipe_config->base.active),
pipe_config->pipe_src_w, pipe_config->pipe_src_h,
yesno(pipe_config->dither), pipe_config->pipe_bpp);
if (pipe_config->base.active) {
intel_crtc_info(m, crtc);
active = cursor_position(dev, crtc->pipe, &x, &y);
seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x, active? %s\n",
yesno(crtc->cursor_base),
drm/i915: Kill intel_crtc->cursor_{width, height} (v2) The cursor size fields in intel_crtc just duplicate the data from cursor->state.crtc_{w,h} so we don't need them any more. Worse, their use in the watermark code actually introduces a subtle bug since they don't get updated to mirror the state values until the plane commit stage, which is *after* we've already used them to calculate new watermark values. This happens because we had to move watermark updates slightly earlier (outside vblank evasion) in commit commit 32b7eeec4d1e861230b09d437e95d76c86ff4a68 Author: Matt Roper <matthew.d.roper@intel.com> Date: Wed Dec 24 07:59:06 2014 -0800 drm/i915: Refactor work that can sleep out of commit (v7) Dropping the intel_crtc fields and just using the state values (which are properly updated by the time watermark updates happen) should solve the problem. Aside from the actual removal of the struct fields (which are formatted in a way that I couldn't figure out how to match in Coccinelle), the rest of this patch was generated via the following semantic patch: // Drop assignment @@ struct intel_crtc *C; struct drm_plane_state S; @@ ( - C->cursor_width = S.crtc_w; | - C->cursor_height = S.crtc_h; ) // Replace usage @@ struct intel_crtc *C; expression E; @@ ( - C->cursor_width + C->base.cursor->state->crtc_w | - C->cursor_height + C->base.cursor->state->crtc_h | - to_intel_crtc(E)->cursor_width + E->cursor->state->crtc_w | - to_intel_crtc(E)->cursor_height + E->cursor->state->crtc_h ) v2: Rebase Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Joe Konno <joe.konno@linux.intel.com> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89346 Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-28 02:12:00 +08:00
x, y, crtc->base.cursor->state->crtc_w,
crtc->base.cursor->state->crtc_h,
crtc->cursor_addr, yesno(active));
intel_scaler_info(m, crtc);
intel_plane_info(m, crtc);
}
seq_printf(m, "\tunderrun reporting: cpu=%s pch=%s \n",
yesno(!crtc->cpu_fifo_underrun_disabled),
yesno(!crtc->pch_fifo_underrun_disabled));
}
seq_printf(m, "\n");
seq_printf(m, "Connector info\n");
seq_printf(m, "--------------\n");
list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
intel_connector_info(m, connector);
}
drm_modeset_unlock_all(dev);
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_semaphore_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *engine;
int num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
enum intel_engine_id id;
int j, ret;
if (!i915_semaphore_is_enabled(dev)) {
seq_puts(m, "Semaphores are disabled\n");
return 0;
}
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
if (IS_BROADWELL(dev)) {
struct page *page;
uint64_t *seqno;
page = i915_gem_object_get_page(dev_priv->semaphore_obj, 0);
seqno = (uint64_t *)kmap_atomic(page);
for_each_engine_id(engine, dev_priv, id) {
uint64_t offset;
seq_printf(m, "%s\n", engine->name);
seq_puts(m, " Last signal:");
for (j = 0; j < num_rings; j++) {
offset = id * I915_NUM_ENGINES + j;
seq_printf(m, "0x%08llx (0x%02llx) ",
seqno[offset], offset * 8);
}
seq_putc(m, '\n');
seq_puts(m, " Last wait: ");
for (j = 0; j < num_rings; j++) {
offset = id + (j * I915_NUM_ENGINES);
seq_printf(m, "0x%08llx (0x%02llx) ",
seqno[offset], offset * 8);
}
seq_putc(m, '\n');
}
kunmap_atomic(seqno);
} else {
seq_puts(m, " Last signal:");
for_each_engine(engine, dev_priv)
for (j = 0; j < num_rings; j++)
seq_printf(m, "0x%08x\n",
I915_READ(engine->semaphore.mbox.signal[j]));
seq_putc(m, '\n');
}
seq_puts(m, "\nSync seqno:\n");
for_each_engine(engine, dev_priv) {
for (j = 0; j < num_rings; j++)
seq_printf(m, " 0x%08x ",
engine->semaphore.sync_seqno[j]);
seq_putc(m, '\n');
}
seq_putc(m, '\n');
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_shared_dplls_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
int i;
drm_modeset_lock_all(dev);
for (i = 0; i < dev_priv->num_shared_dpll; i++) {
struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i];
seq_printf(m, "DPLL%i: %s, id: %i\n", i, pll->name, pll->id);
seq_printf(m, " crtc_mask: 0x%08x, active: 0x%x, on: %s\n",
pll->config.crtc_mask, pll->active_mask, yesno(pll->on));
seq_printf(m, " tracked hardware state:\n");
seq_printf(m, " dpll: 0x%08x\n", pll->config.hw_state.dpll);
seq_printf(m, " dpll_md: 0x%08x\n",
pll->config.hw_state.dpll_md);
seq_printf(m, " fp0: 0x%08x\n", pll->config.hw_state.fp0);
seq_printf(m, " fp1: 0x%08x\n", pll->config.hw_state.fp1);
seq_printf(m, " wrpll: 0x%08x\n", pll->config.hw_state.wrpll);
}
drm_modeset_unlock_all(dev);
return 0;
}
static int i915_wa_registers(struct seq_file *m, void *unused)
{
int i;
int ret;
struct intel_engine_cs *engine;
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
drm/i915/gen9: Add framework to whitelist specific GPU registers Some of the HW registers are privileged and cannot be written to from non-privileged batch buffers coming from userspace unless they are added to the HW whitelist. This whitelist is maintained by HW and it is different from SW whitelist. Userspace need write access to them to implement preemption related WA. The reason for using this approach is, the register bits that control preemption granularity at the HW level are not context save/restored; so even if we set these bits always in kernel they are going to change once the context is switched out. We can consider making them non-privileged by default but these registers also contain other chicken bits which should not be allowed to be modified. In the later revisions controlling bits are save/restored at context level but in the existing revisions these are exported via other debug registers and should be on the whitelist. This patch adds changes to provide HW with a list of registers to be whitelisted. HW checks this list during execution and provides access accordingly. HW imposes a limit on the number of registers on whitelist and it is per-engine. At this point we are only enabling whitelist for RCS and we don't foresee any requirement for other engines. The registers to be whitelisted are added using generic workaround list mechanism, even these are only enablers for userspace workarounds. But by sharing this mechanism we get some test assets without additional cost (Mika). v2: rebase v3: parameterize RING_FORCE_TO_NONPRIV() as _MMIO() should be limited to i915_reg.h (Ville), drop inline for wa_ring_whitelist_reg (Mika). v4: improvements suggested by Chris Wilson. Clarify that this is HW whitelist and different from the one maintained in driver. This list is engine specific but it gets initialized along with other WA which is RCS specific thing, so make it clear that we are not doing any cross engine setup during initialization. Make HW whitelist count of each engine available in debugfs. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1453412634-29238-2-git-send-email-arun.siluvery@linux.intel.com Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2016-01-22 05:43:47 +08:00
struct i915_workarounds *workarounds = &dev_priv->workarounds;
enum intel_engine_id id;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
drm/i915/gen9: Add framework to whitelist specific GPU registers Some of the HW registers are privileged and cannot be written to from non-privileged batch buffers coming from userspace unless they are added to the HW whitelist. This whitelist is maintained by HW and it is different from SW whitelist. Userspace need write access to them to implement preemption related WA. The reason for using this approach is, the register bits that control preemption granularity at the HW level are not context save/restored; so even if we set these bits always in kernel they are going to change once the context is switched out. We can consider making them non-privileged by default but these registers also contain other chicken bits which should not be allowed to be modified. In the later revisions controlling bits are save/restored at context level but in the existing revisions these are exported via other debug registers and should be on the whitelist. This patch adds changes to provide HW with a list of registers to be whitelisted. HW checks this list during execution and provides access accordingly. HW imposes a limit on the number of registers on whitelist and it is per-engine. At this point we are only enabling whitelist for RCS and we don't foresee any requirement for other engines. The registers to be whitelisted are added using generic workaround list mechanism, even these are only enablers for userspace workarounds. But by sharing this mechanism we get some test assets without additional cost (Mika). v2: rebase v3: parameterize RING_FORCE_TO_NONPRIV() as _MMIO() should be limited to i915_reg.h (Ville), drop inline for wa_ring_whitelist_reg (Mika). v4: improvements suggested by Chris Wilson. Clarify that this is HW whitelist and different from the one maintained in driver. This list is engine specific but it gets initialized along with other WA which is RCS specific thing, so make it clear that we are not doing any cross engine setup during initialization. Make HW whitelist count of each engine available in debugfs. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1453412634-29238-2-git-send-email-arun.siluvery@linux.intel.com Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2016-01-22 05:43:47 +08:00
seq_printf(m, "Workarounds applied: %d\n", workarounds->count);
for_each_engine_id(engine, dev_priv, id)
drm/i915/gen9: Add framework to whitelist specific GPU registers Some of the HW registers are privileged and cannot be written to from non-privileged batch buffers coming from userspace unless they are added to the HW whitelist. This whitelist is maintained by HW and it is different from SW whitelist. Userspace need write access to them to implement preemption related WA. The reason for using this approach is, the register bits that control preemption granularity at the HW level are not context save/restored; so even if we set these bits always in kernel they are going to change once the context is switched out. We can consider making them non-privileged by default but these registers also contain other chicken bits which should not be allowed to be modified. In the later revisions controlling bits are save/restored at context level but in the existing revisions these are exported via other debug registers and should be on the whitelist. This patch adds changes to provide HW with a list of registers to be whitelisted. HW checks this list during execution and provides access accordingly. HW imposes a limit on the number of registers on whitelist and it is per-engine. At this point we are only enabling whitelist for RCS and we don't foresee any requirement for other engines. The registers to be whitelisted are added using generic workaround list mechanism, even these are only enablers for userspace workarounds. But by sharing this mechanism we get some test assets without additional cost (Mika). v2: rebase v3: parameterize RING_FORCE_TO_NONPRIV() as _MMIO() should be limited to i915_reg.h (Ville), drop inline for wa_ring_whitelist_reg (Mika). v4: improvements suggested by Chris Wilson. Clarify that this is HW whitelist and different from the one maintained in driver. This list is engine specific but it gets initialized along with other WA which is RCS specific thing, so make it clear that we are not doing any cross engine setup during initialization. Make HW whitelist count of each engine available in debugfs. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1453412634-29238-2-git-send-email-arun.siluvery@linux.intel.com Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2016-01-22 05:43:47 +08:00
seq_printf(m, "HW whitelist count for %s: %d\n",
engine->name, workarounds->hw_whitelist_count[id]);
drm/i915/gen9: Add framework to whitelist specific GPU registers Some of the HW registers are privileged and cannot be written to from non-privileged batch buffers coming from userspace unless they are added to the HW whitelist. This whitelist is maintained by HW and it is different from SW whitelist. Userspace need write access to them to implement preemption related WA. The reason for using this approach is, the register bits that control preemption granularity at the HW level are not context save/restored; so even if we set these bits always in kernel they are going to change once the context is switched out. We can consider making them non-privileged by default but these registers also contain other chicken bits which should not be allowed to be modified. In the later revisions controlling bits are save/restored at context level but in the existing revisions these are exported via other debug registers and should be on the whitelist. This patch adds changes to provide HW with a list of registers to be whitelisted. HW checks this list during execution and provides access accordingly. HW imposes a limit on the number of registers on whitelist and it is per-engine. At this point we are only enabling whitelist for RCS and we don't foresee any requirement for other engines. The registers to be whitelisted are added using generic workaround list mechanism, even these are only enablers for userspace workarounds. But by sharing this mechanism we get some test assets without additional cost (Mika). v2: rebase v3: parameterize RING_FORCE_TO_NONPRIV() as _MMIO() should be limited to i915_reg.h (Ville), drop inline for wa_ring_whitelist_reg (Mika). v4: improvements suggested by Chris Wilson. Clarify that this is HW whitelist and different from the one maintained in driver. This list is engine specific but it gets initialized along with other WA which is RCS specific thing, so make it clear that we are not doing any cross engine setup during initialization. Make HW whitelist count of each engine available in debugfs. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1453412634-29238-2-git-send-email-arun.siluvery@linux.intel.com Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2016-01-22 05:43:47 +08:00
for (i = 0; i < workarounds->count; ++i) {
drm/i915: Type safe register read/write Make I915_READ and I915_WRITE more type safe by wrapping the register offset in a struct. This should eliminate most of the fumbles we've had with misplaced parens. This only takes care of normal mmio registers. We could extend the idea to other register types and define each with its own struct. That way you wouldn't be able to accidentally pass the wrong thing to a specific register access function. The gpio_reg setup is probably the ugliest thing left. But I figure I'd just leave it for now, and wait for some divine inspiration to strike before making it nice. As for the generated code, it's actually a bit better sometimes. Eg. looking at i915_irq_handler(), we can see the following change: lea 0x70024(%rdx,%rax,1),%r9d mov $0x1,%edx - movslq %r9d,%r9 - mov %r9,%rsi - mov %r9,-0x58(%rbp) - callq *0xd8(%rbx) + mov %r9d,%esi + mov %r9d,-0x48(%rbp) callq *0xd8(%rbx) So previously gcc thought the register offset might be signed and decided to sign extend it, just in case. The rest appears to be mostly just minor shuffling of instructions. v2: i915_mmio_reg_{offset,equal,valid}() helpers added s/_REG/_MMIO/ in the register defines mo more switch statements left to worry about ring_emit stuff got sorted in a prep patch cmd parser, lrc context and w/a batch buildup also in prep patch vgpu stuff cleaned up and moved to a prep patch all other unrelated changes split out v3: Rebased due to BXT DSI/BLC, MOCS, etc. v4: Rebased due to churn, s/i915_mmio_reg_t/i915_reg_t/ Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1447853606-2751-1-git-send-email-ville.syrjala@linux.intel.com
2015-11-18 21:33:26 +08:00
i915_reg_t addr;
u32 mask, value, read;
bool ok;
drm/i915/gen9: Add framework to whitelist specific GPU registers Some of the HW registers are privileged and cannot be written to from non-privileged batch buffers coming from userspace unless they are added to the HW whitelist. This whitelist is maintained by HW and it is different from SW whitelist. Userspace need write access to them to implement preemption related WA. The reason for using this approach is, the register bits that control preemption granularity at the HW level are not context save/restored; so even if we set these bits always in kernel they are going to change once the context is switched out. We can consider making them non-privileged by default but these registers also contain other chicken bits which should not be allowed to be modified. In the later revisions controlling bits are save/restored at context level but in the existing revisions these are exported via other debug registers and should be on the whitelist. This patch adds changes to provide HW with a list of registers to be whitelisted. HW checks this list during execution and provides access accordingly. HW imposes a limit on the number of registers on whitelist and it is per-engine. At this point we are only enabling whitelist for RCS and we don't foresee any requirement for other engines. The registers to be whitelisted are added using generic workaround list mechanism, even these are only enablers for userspace workarounds. But by sharing this mechanism we get some test assets without additional cost (Mika). v2: rebase v3: parameterize RING_FORCE_TO_NONPRIV() as _MMIO() should be limited to i915_reg.h (Ville), drop inline for wa_ring_whitelist_reg (Mika). v4: improvements suggested by Chris Wilson. Clarify that this is HW whitelist and different from the one maintained in driver. This list is engine specific but it gets initialized along with other WA which is RCS specific thing, so make it clear that we are not doing any cross engine setup during initialization. Make HW whitelist count of each engine available in debugfs. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1453412634-29238-2-git-send-email-arun.siluvery@linux.intel.com Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2016-01-22 05:43:47 +08:00
addr = workarounds->reg[i].addr;
mask = workarounds->reg[i].mask;
value = workarounds->reg[i].value;
read = I915_READ(addr);
ok = (value & mask) == (read & mask);
seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
drm/i915: Type safe register read/write Make I915_READ and I915_WRITE more type safe by wrapping the register offset in a struct. This should eliminate most of the fumbles we've had with misplaced parens. This only takes care of normal mmio registers. We could extend the idea to other register types and define each with its own struct. That way you wouldn't be able to accidentally pass the wrong thing to a specific register access function. The gpio_reg setup is probably the ugliest thing left. But I figure I'd just leave it for now, and wait for some divine inspiration to strike before making it nice. As for the generated code, it's actually a bit better sometimes. Eg. looking at i915_irq_handler(), we can see the following change: lea 0x70024(%rdx,%rax,1),%r9d mov $0x1,%edx - movslq %r9d,%r9 - mov %r9,%rsi - mov %r9,-0x58(%rbp) - callq *0xd8(%rbx) + mov %r9d,%esi + mov %r9d,-0x48(%rbp) callq *0xd8(%rbx) So previously gcc thought the register offset might be signed and decided to sign extend it, just in case. The rest appears to be mostly just minor shuffling of instructions. v2: i915_mmio_reg_{offset,equal,valid}() helpers added s/_REG/_MMIO/ in the register defines mo more switch statements left to worry about ring_emit stuff got sorted in a prep patch cmd parser, lrc context and w/a batch buildup also in prep patch vgpu stuff cleaned up and moved to a prep patch all other unrelated changes split out v3: Rebased due to BXT DSI/BLC, MOCS, etc. v4: Rebased due to churn, s/i915_mmio_reg_t/i915_reg_t/ Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1447853606-2751-1-git-send-email-ville.syrjala@linux.intel.com
2015-11-18 21:33:26 +08:00
i915_mmio_reg_offset(addr), value, mask, read, ok ? "OK" : "FAIL");
}
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_ddb_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct skl_ddb_allocation *ddb;
struct skl_ddb_entry *entry;
enum pipe pipe;
int plane;
if (INTEL_INFO(dev)->gen < 9)
return 0;
drm_modeset_lock_all(dev);
ddb = &dev_priv->wm.skl_hw.ddb;
seq_printf(m, "%-15s%8s%8s%8s\n", "", "Start", "End", "Size");
for_each_pipe(dev_priv, pipe) {
seq_printf(m, "Pipe %c\n", pipe_name(pipe));
for_each_plane(dev_priv, pipe, plane) {
entry = &ddb->plane[pipe][plane];
seq_printf(m, " Plane%-8d%8u%8u%8u\n", plane + 1,
entry->start, entry->end,
skl_ddb_entry_size(entry));
}
drm/i915/skl: Simplify wm structures slightly (v2) A bunch of SKL watermark-related structures have the cursor plane as a separate entry from the rest of the planes. Since a previous patch updated I915_MAX_PLANES such that those plane arrays now have a slot for the cursor, update the code to use the new slot in the existing plane arrays and kill off the cursor-specific structures. There shouldn't be any functional change here; this is just shuffling around how the data is stored in some of the data structures. The whole patch is generated with Coccinelle via the following semantic patch: @@ struct skl_pipe_wm_parameters WMP; @@ - WMP.cursor + WMP.plane[PLANE_CURSOR] @@ struct skl_pipe_wm_parameters *WMP; @@ - WMP->cursor + WMP->plane[PLANE_CURSOR] @@ @@ struct skl_pipe_wm_parameters { ... - struct intel_plane_wm_parameters cursor; ... }; @@ struct skl_ddb_allocation DDB; expression E; @@ - DDB.cursor[E] + DDB.plane[E][PLANE_CURSOR] @@ struct skl_ddb_allocation *DDB; expression E; @@ - DDB->cursor[E] + DDB->plane[E][PLANE_CURSOR] @@ @@ struct skl_ddb_allocation { ... - struct skl_ddb_entry cursor[I915_MAX_PIPES]; ... }; @@ struct skl_wm_values WMV; expression E1, E2; @@ ( - WMV.cursor[E1][E2] + WMV.plane[E1][PLANE_CURSOR][E2] | - WMV.cursor_trans[E1] + WMV.plane_trans[E1][PLANE_CURSOR] ) @@ struct skl_wm_values *WMV; expression E1, E2; @@ ( - WMV->cursor[E1][E2] + WMV->plane[E1][PLANE_CURSOR][E2] | - WMV->cursor_trans[E1] + WMV->plane_trans[E1][PLANE_CURSOR] ) @@ @@ struct skl_wm_values { ... - uint32_t cursor[I915_MAX_PIPES][8]; ... - uint32_t cursor_trans[I915_MAX_PIPES]; ... }; @@ struct skl_wm_level WML; @@ ( - WML.cursor_en + WML.plane_en[PLANE_CURSOR] | - WML.cursor_res_b + WML.plane_res_b[PLANE_CURSOR] | - WML.cursor_res_l + WML.plane_res_l[PLANE_CURSOR] ) @@ struct skl_wm_level *WML; @@ ( - WML->cursor_en + WML->plane_en[PLANE_CURSOR] | - WML->cursor_res_b + WML->plane_res_b[PLANE_CURSOR] | - WML->cursor_res_l + WML->plane_res_l[PLANE_CURSOR] ) @@ @@ struct skl_wm_level { ... - bool cursor_en; ... - uint16_t cursor_res_b; - uint8_t cursor_res_l; ... }; v2: Use a PLANE_CURSOR enum entry rather than making the code reference I915_MAX_PLANES or I915_MAX_PLANES+1, which was confusing. (Ander) Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-09-25 06:53:10 +08:00
entry = &ddb->plane[pipe][PLANE_CURSOR];
seq_printf(m, " %-13s%8u%8u%8u\n", "Cursor", entry->start,
entry->end, skl_ddb_entry_size(entry));
}
drm_modeset_unlock_all(dev);
return 0;
}
static void drrs_status_per_crtc(struct seq_file *m,
struct drm_device *dev, struct intel_crtc *intel_crtc)
{
struct intel_encoder *intel_encoder;
struct drm_i915_private *dev_priv = dev->dev_private;
struct i915_drrs *drrs = &dev_priv->drrs;
int vrefresh = 0;
for_each_encoder_on_crtc(dev, &intel_crtc->base, intel_encoder) {
/* Encoder connected on this CRTC */
switch (intel_encoder->type) {
case INTEL_OUTPUT_EDP:
seq_puts(m, "eDP:\n");
break;
case INTEL_OUTPUT_DSI:
seq_puts(m, "DSI:\n");
break;
case INTEL_OUTPUT_HDMI:
seq_puts(m, "HDMI:\n");
break;
case INTEL_OUTPUT_DISPLAYPORT:
seq_puts(m, "DP:\n");
break;
default:
seq_printf(m, "Other encoder (id=%d).\n",
intel_encoder->type);
return;
}
}
if (dev_priv->vbt.drrs_type == STATIC_DRRS_SUPPORT)
seq_puts(m, "\tVBT: DRRS_type: Static");
else if (dev_priv->vbt.drrs_type == SEAMLESS_DRRS_SUPPORT)
seq_puts(m, "\tVBT: DRRS_type: Seamless");
else if (dev_priv->vbt.drrs_type == DRRS_NOT_SUPPORTED)
seq_puts(m, "\tVBT: DRRS_type: None");
else
seq_puts(m, "\tVBT: DRRS_type: FIXME: Unrecognized Value");
seq_puts(m, "\n\n");
if (to_intel_crtc_state(intel_crtc->base.state)->has_drrs) {
struct intel_panel *panel;
mutex_lock(&drrs->mutex);
/* DRRS Supported */
seq_puts(m, "\tDRRS Supported: Yes\n");
/* disable_drrs() will make drrs->dp NULL */
if (!drrs->dp) {
seq_puts(m, "Idleness DRRS: Disabled");
mutex_unlock(&drrs->mutex);
return;
}
panel = &drrs->dp->attached_connector->panel;
seq_printf(m, "\t\tBusy_frontbuffer_bits: 0x%X",
drrs->busy_frontbuffer_bits);
seq_puts(m, "\n\t\t");
if (drrs->refresh_rate_type == DRRS_HIGH_RR) {
seq_puts(m, "DRRS_State: DRRS_HIGH_RR\n");
vrefresh = panel->fixed_mode->vrefresh;
} else if (drrs->refresh_rate_type == DRRS_LOW_RR) {
seq_puts(m, "DRRS_State: DRRS_LOW_RR\n");
vrefresh = panel->downclock_mode->vrefresh;
} else {
seq_printf(m, "DRRS_State: Unknown(%d)\n",
drrs->refresh_rate_type);
mutex_unlock(&drrs->mutex);
return;
}
seq_printf(m, "\t\tVrefresh: %d", vrefresh);
seq_puts(m, "\n\t\t");
mutex_unlock(&drrs->mutex);
} else {
/* DRRS not supported. Print the VBT parameter*/
seq_puts(m, "\tDRRS Supported : No");
}
seq_puts(m, "\n");
}
static int i915_drrs_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct intel_crtc *intel_crtc;
int active_crtc_cnt = 0;
for_each_intel_crtc(dev, intel_crtc) {
drm_modeset_lock(&intel_crtc->base.mutex, NULL);
if (intel_crtc->base.state->active) {
active_crtc_cnt++;
seq_printf(m, "\nCRTC %d: ", active_crtc_cnt);
drrs_status_per_crtc(m, dev, intel_crtc);
}
drm_modeset_unlock(&intel_crtc->base.mutex);
}
if (!active_crtc_cnt)
seq_puts(m, "No active crtc found\n");
return 0;
}
struct pipe_crc_info {
const char *name;
struct drm_device *dev;
enum pipe pipe;
};
static int i915_dp_mst_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct drm_encoder *encoder;
struct intel_encoder *intel_encoder;
struct intel_digital_port *intel_dig_port;
drm_modeset_lock_all(dev);
list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
intel_encoder = to_intel_encoder(encoder);
if (intel_encoder->type != INTEL_OUTPUT_DISPLAYPORT)
continue;
intel_dig_port = enc_to_dig_port(encoder);
if (!intel_dig_port->dp.can_mst)
continue;
drm_dp_mst_dump_topology(m, &intel_dig_port->dp.mst_mgr);
}
drm_modeset_unlock_all(dev);
return 0;
}
static int i915_pipe_crc_open(struct inode *inode, struct file *filep)
{
struct pipe_crc_info *info = inode->i_private;
struct drm_i915_private *dev_priv = info->dev->dev_private;
struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe];
if (info->pipe >= INTEL_INFO(info->dev)->num_pipes)
return -ENODEV;
spin_lock_irq(&pipe_crc->lock);
if (pipe_crc->opened) {
spin_unlock_irq(&pipe_crc->lock);
return -EBUSY; /* already open */
}
pipe_crc->opened = true;
filep->private_data = inode->i_private;
spin_unlock_irq(&pipe_crc->lock);
return 0;
}
static int i915_pipe_crc_release(struct inode *inode, struct file *filep)
{
struct pipe_crc_info *info = inode->i_private;
struct drm_i915_private *dev_priv = info->dev->dev_private;
struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe];
spin_lock_irq(&pipe_crc->lock);
pipe_crc->opened = false;
spin_unlock_irq(&pipe_crc->lock);
return 0;
}
/* (6 fields, 8 chars each, space separated (5) + '\n') */
#define PIPE_CRC_LINE_LEN (6 * 8 + 5 + 1)
/* account for \'0' */
#define PIPE_CRC_BUFFER_LEN (PIPE_CRC_LINE_LEN + 1)
static int pipe_crc_data_count(struct intel_pipe_crc *pipe_crc)
{
assert_spin_locked(&pipe_crc->lock);
return CIRC_CNT(pipe_crc->head, pipe_crc->tail,
INTEL_PIPE_CRC_ENTRIES_NR);
}
static ssize_t
i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count,
loff_t *pos)
{
struct pipe_crc_info *info = filep->private_data;
struct drm_device *dev = info->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe];
char buf[PIPE_CRC_BUFFER_LEN];
int n_entries;
ssize_t bytes_read;
/*
* Don't allow user space to provide buffers not big enough to hold
* a line of data.
*/
if (count < PIPE_CRC_LINE_LEN)
return -EINVAL;
if (pipe_crc->source == INTEL_PIPE_CRC_SOURCE_NONE)
return 0;
/* nothing to read */
spin_lock_irq(&pipe_crc->lock);
while (pipe_crc_data_count(pipe_crc) == 0) {
int ret;
if (filep->f_flags & O_NONBLOCK) {
spin_unlock_irq(&pipe_crc->lock);
return -EAGAIN;
}
ret = wait_event_interruptible_lock_irq(pipe_crc->wq,
pipe_crc_data_count(pipe_crc), pipe_crc->lock);
if (ret) {
spin_unlock_irq(&pipe_crc->lock);
return ret;
}
}
/* We now have one or more entries to read */
n_entries = count / PIPE_CRC_LINE_LEN;
bytes_read = 0;
while (n_entries > 0) {
struct intel_pipe_crc_entry *entry =
&pipe_crc->entries[pipe_crc->tail];
int ret;
if (CIRC_CNT(pipe_crc->head, pipe_crc->tail,
INTEL_PIPE_CRC_ENTRIES_NR) < 1)
break;
BUILD_BUG_ON_NOT_POWER_OF_2(INTEL_PIPE_CRC_ENTRIES_NR);
pipe_crc->tail = (pipe_crc->tail + 1) & (INTEL_PIPE_CRC_ENTRIES_NR - 1);
bytes_read += snprintf(buf, PIPE_CRC_BUFFER_LEN,
"%8u %8x %8x %8x %8x %8x\n",
entry->frame, entry->crc[0],
entry->crc[1], entry->crc[2],
entry->crc[3], entry->crc[4]);
spin_unlock_irq(&pipe_crc->lock);
ret = copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN);
if (ret == PIPE_CRC_LINE_LEN)
return -EFAULT;
user_buf += PIPE_CRC_LINE_LEN;
n_entries--;
spin_lock_irq(&pipe_crc->lock);
}
spin_unlock_irq(&pipe_crc->lock);
return bytes_read;
}
static const struct file_operations i915_pipe_crc_fops = {
.owner = THIS_MODULE,
.open = i915_pipe_crc_open,
.read = i915_pipe_crc_read,
.release = i915_pipe_crc_release,
};
static struct pipe_crc_info i915_pipe_crc_data[I915_MAX_PIPES] = {
{
.name = "i915_pipe_A_crc",
.pipe = PIPE_A,
},
{
.name = "i915_pipe_B_crc",
.pipe = PIPE_B,
},
{
.name = "i915_pipe_C_crc",
.pipe = PIPE_C,
},
};
static int i915_pipe_crc_create(struct dentry *root, struct drm_minor *minor,
enum pipe pipe)
{
struct drm_device *dev = minor->dev;
struct dentry *ent;
struct pipe_crc_info *info = &i915_pipe_crc_data[pipe];
info->dev = dev;
ent = debugfs_create_file(info->name, S_IRUGO, root, info,
&i915_pipe_crc_fops);
if (!ent)
return -ENOMEM;
return drm_add_fake_info_node(minor, ent, info);
}
static const char * const pipe_crc_sources[] = {
"none",
"plane1",
"plane2",
"pf",
"pipe",
"TV",
"DP-B",
"DP-C",
"DP-D",
"auto",
};
static const char *pipe_crc_source_name(enum intel_pipe_crc_source source)
{
BUILD_BUG_ON(ARRAY_SIZE(pipe_crc_sources) != INTEL_PIPE_CRC_SOURCE_MAX);
return pipe_crc_sources[source];
}
static int display_crc_ctl_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = dev->dev_private;
int i;
for (i = 0; i < I915_MAX_PIPES; i++)
seq_printf(m, "%c %s\n", pipe_name(i),
pipe_crc_source_name(dev_priv->pipe_crc[i].source));
return 0;
}
static int display_crc_ctl_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
return single_open(file, display_crc_ctl_show, dev);
}
static int i8xx_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source,
uint32_t *val)
{
if (*source == INTEL_PIPE_CRC_SOURCE_AUTO)
*source = INTEL_PIPE_CRC_SOURCE_PIPE;
switch (*source) {
case INTEL_PIPE_CRC_SOURCE_PIPE:
*val = PIPE_CRC_ENABLE | PIPE_CRC_INCLUDE_BORDER_I8XX;
break;
case INTEL_PIPE_CRC_SOURCE_NONE:
*val = 0;
break;
default:
return -EINVAL;
}
return 0;
}
static int i9xx_pipe_crc_auto_source(struct drm_device *dev, enum pipe pipe,
enum intel_pipe_crc_source *source)
{
struct intel_encoder *encoder;
struct intel_crtc *crtc;
struct intel_digital_port *dig_port;
int ret = 0;
*source = INTEL_PIPE_CRC_SOURCE_PIPE;
drm: Split connection_mutex out of mode_config.mutex (v3) After the split-out of crtc locks from the big mode_config.mutex there's still two major areas it protects: - Various connector probe states, like connector->status, EDID properties, probed mode lists and similar information. - The links from connector->encoder and encoder->crtc and other modeset-relevant connector state (e.g. properties which control the panel fitter). The later is used by modeset operations. But they don't really care about the former since it's allowed to e.g. enable a disconnected VGA output or with a mode not in the probed list. Thus far this hasn't been a problem, but for the atomic modeset conversion Rob Clark needs to convert all modeset relevant locks into w/w locks. This is required because the order of acquisition is determined by how userspace supplies the atomic modeset data. This has run into troubles in the detect path since the i915 load detect code needs _both_ protections offered by the mode_config.mutex: It updates probe state and it needs to change the modeset configuration to enable the temporary load detect pipe. The big deal here is that for the probe/detect users of this lock a plain mutex fits best, but for atomic modesets we really want a w/w mutex. To fix this lets split out a new connection_mutex lock for the modeset relevant parts. For simplicity I've decided to only add one additional lock for all connector/encoder links and modeset configuration states. We have piles of different modeset objects in addition to those (like bridges or panels), so adding per-object locks would be much more effort. Also, we're guaranteed (at least for now) to do a full modeset if we need to acquire this lock. Which means that fine-grained locking is fairly irrelevant compared to the amount of time the full modeset will take. I've done a full audit, and there's just a few things that justify special focus: - Locking in drm_sysfs.c is almost completely absent. We should sprinkle mode_config.connection_mutex over this file a bit, but since it already lacks mode_config.mutex this patch wont make the situation any worse. This is material for a follow-up patch. - omap has a omap_framebuffer_flush function which walks the connector->encoder->crtc links and is called from many contexts. Some look like they don't acquire mode_config.mutex, so this is already racy. Again fixing this is material for a separate patch. - The radeon hot_plug function to retrain DP links looks at connector->dpms. Currently this happens without any locking, so is already racy. I think radeon_hotplug_work_func should gain mutex_lock/unlock calls for the mode_config.connection_mutex. - Same applies to i915's intel_dp_hot_plug. But again, this is already racy. - i915 load_detect code needs to acquire this lock. Which means the w/w dance due to Rob's work will be nicely contained to _just_ this function. I've added fixme comments everywhere where it looks suspicious but in the sysfs code. After a quick irc discussion with Dave Airlie it sounds like the lack of locking in there is due to sysfs cleanup fun at module unload. v1: original (only compile tested) v2: missing mutex_init(), etc (from Rob Clark) v3: i915 needs more care in the conversion: - Protect the edp pp logic with the connection_mutex. - Use connection_mutex in the backlight code due to get_pipe_from_connector. - Use drm_modeset_lock_all in suspend/resume paths. - Update lock checks in the overlay code. Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Rob Clark <robdclark@gmail.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Rob Clark <robdclark@gmail.com>
2014-05-30 05:54:47 +08:00
drm_modeset_lock_all(dev);
for_each_intel_encoder(dev, encoder) {
if (!encoder->base.crtc)
continue;
crtc = to_intel_crtc(encoder->base.crtc);
if (crtc->pipe != pipe)
continue;
switch (encoder->type) {
case INTEL_OUTPUT_TVOUT:
*source = INTEL_PIPE_CRC_SOURCE_TV;
break;
case INTEL_OUTPUT_DISPLAYPORT:
case INTEL_OUTPUT_EDP:
dig_port = enc_to_dig_port(&encoder->base);
switch (dig_port->port) {
case PORT_B:
*source = INTEL_PIPE_CRC_SOURCE_DP_B;
break;
case PORT_C:
*source = INTEL_PIPE_CRC_SOURCE_DP_C;
break;
case PORT_D:
*source = INTEL_PIPE_CRC_SOURCE_DP_D;
break;
default:
WARN(1, "nonexisting DP port %c\n",
port_name(dig_port->port));
break;
}
break;
default:
break;
}
}
drm: Split connection_mutex out of mode_config.mutex (v3) After the split-out of crtc locks from the big mode_config.mutex there's still two major areas it protects: - Various connector probe states, like connector->status, EDID properties, probed mode lists and similar information. - The links from connector->encoder and encoder->crtc and other modeset-relevant connector state (e.g. properties which control the panel fitter). The later is used by modeset operations. But they don't really care about the former since it's allowed to e.g. enable a disconnected VGA output or with a mode not in the probed list. Thus far this hasn't been a problem, but for the atomic modeset conversion Rob Clark needs to convert all modeset relevant locks into w/w locks. This is required because the order of acquisition is determined by how userspace supplies the atomic modeset data. This has run into troubles in the detect path since the i915 load detect code needs _both_ protections offered by the mode_config.mutex: It updates probe state and it needs to change the modeset configuration to enable the temporary load detect pipe. The big deal here is that for the probe/detect users of this lock a plain mutex fits best, but for atomic modesets we really want a w/w mutex. To fix this lets split out a new connection_mutex lock for the modeset relevant parts. For simplicity I've decided to only add one additional lock for all connector/encoder links and modeset configuration states. We have piles of different modeset objects in addition to those (like bridges or panels), so adding per-object locks would be much more effort. Also, we're guaranteed (at least for now) to do a full modeset if we need to acquire this lock. Which means that fine-grained locking is fairly irrelevant compared to the amount of time the full modeset will take. I've done a full audit, and there's just a few things that justify special focus: - Locking in drm_sysfs.c is almost completely absent. We should sprinkle mode_config.connection_mutex over this file a bit, but since it already lacks mode_config.mutex this patch wont make the situation any worse. This is material for a follow-up patch. - omap has a omap_framebuffer_flush function which walks the connector->encoder->crtc links and is called from many contexts. Some look like they don't acquire mode_config.mutex, so this is already racy. Again fixing this is material for a separate patch. - The radeon hot_plug function to retrain DP links looks at connector->dpms. Currently this happens without any locking, so is already racy. I think radeon_hotplug_work_func should gain mutex_lock/unlock calls for the mode_config.connection_mutex. - Same applies to i915's intel_dp_hot_plug. But again, this is already racy. - i915 load_detect code needs to acquire this lock. Which means the w/w dance due to Rob's work will be nicely contained to _just_ this function. I've added fixme comments everywhere where it looks suspicious but in the sysfs code. After a quick irc discussion with Dave Airlie it sounds like the lack of locking in there is due to sysfs cleanup fun at module unload. v1: original (only compile tested) v2: missing mutex_init(), etc (from Rob Clark) v3: i915 needs more care in the conversion: - Protect the edp pp logic with the connection_mutex. - Use connection_mutex in the backlight code due to get_pipe_from_connector. - Use drm_modeset_lock_all in suspend/resume paths. - Update lock checks in the overlay code. Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Rob Clark <robdclark@gmail.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Rob Clark <robdclark@gmail.com>
2014-05-30 05:54:47 +08:00
drm_modeset_unlock_all(dev);
return ret;
}
static int vlv_pipe_crc_ctl_reg(struct drm_device *dev,
enum pipe pipe,
enum intel_pipe_crc_source *source,
uint32_t *val)
{
struct drm_i915_private *dev_priv = dev->dev_private;
bool need_stable_symbols = false;
if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) {
int ret = i9xx_pipe_crc_auto_source(dev, pipe, source);
if (ret)
return ret;
}
switch (*source) {
case INTEL_PIPE_CRC_SOURCE_PIPE:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PIPE_VLV;
break;
case INTEL_PIPE_CRC_SOURCE_DP_B:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_B_VLV;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_DP_C:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_C_VLV;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_DP_D:
if (!IS_CHERRYVIEW(dev))
return -EINVAL;
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_D_VLV;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_NONE:
*val = 0;
break;
default:
return -EINVAL;
}
/*
* When the pipe CRC tap point is after the transcoders we need
* to tweak symbol-level features to produce a deterministic series of
* symbols for a given frame. We need to reset those features only once
* a frame (instead of every nth symbol):
* - DC-balance: used to ensure a better clock recovery from the data
* link (SDVO)
* - DisplayPort scrambling: used for EMI reduction
*/
if (need_stable_symbols) {
uint32_t tmp = I915_READ(PORT_DFT2_G4X);
tmp |= DC_BALANCE_RESET_VLV;
switch (pipe) {
case PIPE_A:
tmp |= PIPE_A_SCRAMBLE_RESET;
break;
case PIPE_B:
tmp |= PIPE_B_SCRAMBLE_RESET;
break;
case PIPE_C:
tmp |= PIPE_C_SCRAMBLE_RESET;
break;
default:
return -EINVAL;
}
I915_WRITE(PORT_DFT2_G4X, tmp);
}
return 0;
}
static int i9xx_pipe_crc_ctl_reg(struct drm_device *dev,
enum pipe pipe,
enum intel_pipe_crc_source *source,
uint32_t *val)
{
struct drm_i915_private *dev_priv = dev->dev_private;
bool need_stable_symbols = false;
if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) {
int ret = i9xx_pipe_crc_auto_source(dev, pipe, source);
if (ret)
return ret;
}
switch (*source) {
case INTEL_PIPE_CRC_SOURCE_PIPE:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PIPE_I9XX;
break;
case INTEL_PIPE_CRC_SOURCE_TV:
if (!SUPPORTS_TV(dev))
return -EINVAL;
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_TV_PRE;
break;
case INTEL_PIPE_CRC_SOURCE_DP_B:
if (!IS_G4X(dev))
return -EINVAL;
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_B_G4X;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_DP_C:
if (!IS_G4X(dev))
return -EINVAL;
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_C_G4X;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_DP_D:
if (!IS_G4X(dev))
return -EINVAL;
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_D_G4X;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_NONE:
*val = 0;
break;
default:
return -EINVAL;
}
/*
* When the pipe CRC tap point is after the transcoders we need
* to tweak symbol-level features to produce a deterministic series of
* symbols for a given frame. We need to reset those features only once
* a frame (instead of every nth symbol):
* - DC-balance: used to ensure a better clock recovery from the data
* link (SDVO)
* - DisplayPort scrambling: used for EMI reduction
*/
if (need_stable_symbols) {
uint32_t tmp = I915_READ(PORT_DFT2_G4X);
WARN_ON(!IS_G4X(dev));
I915_WRITE(PORT_DFT_I9XX,
I915_READ(PORT_DFT_I9XX) | DC_BALANCE_RESET);
if (pipe == PIPE_A)
tmp |= PIPE_A_SCRAMBLE_RESET;
else
tmp |= PIPE_B_SCRAMBLE_RESET;
I915_WRITE(PORT_DFT2_G4X, tmp);
}
return 0;
}
static void vlv_undo_pipe_scramble_reset(struct drm_device *dev,
enum pipe pipe)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t tmp = I915_READ(PORT_DFT2_G4X);
switch (pipe) {
case PIPE_A:
tmp &= ~PIPE_A_SCRAMBLE_RESET;
break;
case PIPE_B:
tmp &= ~PIPE_B_SCRAMBLE_RESET;
break;
case PIPE_C:
tmp &= ~PIPE_C_SCRAMBLE_RESET;
break;
default:
return;
}
if (!(tmp & PIPE_SCRAMBLE_RESET_MASK))
tmp &= ~DC_BALANCE_RESET_VLV;
I915_WRITE(PORT_DFT2_G4X, tmp);
}
static void g4x_undo_pipe_scramble_reset(struct drm_device *dev,
enum pipe pipe)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t tmp = I915_READ(PORT_DFT2_G4X);
if (pipe == PIPE_A)
tmp &= ~PIPE_A_SCRAMBLE_RESET;
else
tmp &= ~PIPE_B_SCRAMBLE_RESET;
I915_WRITE(PORT_DFT2_G4X, tmp);
if (!(tmp & PIPE_SCRAMBLE_RESET_MASK)) {
I915_WRITE(PORT_DFT_I9XX,
I915_READ(PORT_DFT_I9XX) & ~DC_BALANCE_RESET);
}
}
static int ilk_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source,
uint32_t *val)
{
if (*source == INTEL_PIPE_CRC_SOURCE_AUTO)
*source = INTEL_PIPE_CRC_SOURCE_PIPE;
switch (*source) {
case INTEL_PIPE_CRC_SOURCE_PLANE1:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PRIMARY_ILK;
break;
case INTEL_PIPE_CRC_SOURCE_PLANE2:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_SPRITE_ILK;
break;
case INTEL_PIPE_CRC_SOURCE_PIPE:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PIPE_ILK;
break;
case INTEL_PIPE_CRC_SOURCE_NONE:
*val = 0;
break;
default:
return -EINVAL;
}
return 0;
}
static void hsw_trans_edp_pipe_A_crc_wa(struct drm_device *dev, bool enable)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *crtc =
to_intel_crtc(dev_priv->pipe_to_crtc_mapping[PIPE_A]);
struct intel_crtc_state *pipe_config;
struct drm_atomic_state *state;
int ret = 0;
drm_modeset_lock_all(dev);
state = drm_atomic_state_alloc(dev);
if (!state) {
ret = -ENOMEM;
goto out;
}
state->acquire_ctx = drm_modeset_legacy_acquire_ctx(&crtc->base);
pipe_config = intel_atomic_get_crtc_state(state, crtc);
if (IS_ERR(pipe_config)) {
ret = PTR_ERR(pipe_config);
goto out;
}
pipe_config->pch_pfit.force_thru = enable;
if (pipe_config->cpu_transcoder == TRANSCODER_EDP &&
pipe_config->pch_pfit.enabled != enable)
pipe_config->base.connectors_changed = true;
ret = drm_atomic_commit(state);
out:
drm_modeset_unlock_all(dev);
WARN(ret, "Toggling workaround to %i returns %i\n", enable, ret);
if (ret)
drm_atomic_state_free(state);
}
static int ivb_pipe_crc_ctl_reg(struct drm_device *dev,
enum pipe pipe,
enum intel_pipe_crc_source *source,
uint32_t *val)
{
if (*source == INTEL_PIPE_CRC_SOURCE_AUTO)
*source = INTEL_PIPE_CRC_SOURCE_PF;
switch (*source) {
case INTEL_PIPE_CRC_SOURCE_PLANE1:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PRIMARY_IVB;
break;
case INTEL_PIPE_CRC_SOURCE_PLANE2:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_SPRITE_IVB;
break;
case INTEL_PIPE_CRC_SOURCE_PF:
if (IS_HASWELL(dev) && pipe == PIPE_A)
hsw_trans_edp_pipe_A_crc_wa(dev, true);
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PF_IVB;
break;
case INTEL_PIPE_CRC_SOURCE_NONE:
*val = 0;
break;
default:
return -EINVAL;
}
return 0;
}
static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
enum intel_pipe_crc_source source)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe];
struct intel_crtc *crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev,
pipe));
enum intel_display_power_domain power_domain;
u32 val = 0; /* shut up gcc */
int ret;
if (pipe_crc->source == source)
return 0;
/* forbid changing the source without going back to 'none' */
if (pipe_crc->source && source)
return -EINVAL;
power_domain = POWER_DOMAIN_PIPE(pipe);
if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) {
DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n");
return -EIO;
}
if (IS_GEN2(dev))
ret = i8xx_pipe_crc_ctl_reg(&source, &val);
else if (INTEL_INFO(dev)->gen < 5)
ret = i9xx_pipe_crc_ctl_reg(dev, pipe, &source, &val);
else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
ret = vlv_pipe_crc_ctl_reg(dev, pipe, &source, &val);
else if (IS_GEN5(dev) || IS_GEN6(dev))
ret = ilk_pipe_crc_ctl_reg(&source, &val);
else
ret = ivb_pipe_crc_ctl_reg(dev, pipe, &source, &val);
if (ret != 0)
goto out;
/* none -> real source transition */
if (source) {
struct intel_pipe_crc_entry *entries;
DRM_DEBUG_DRIVER("collecting CRCs for pipe %c, %s\n",
pipe_name(pipe), pipe_crc_source_name(source));
entries = kcalloc(INTEL_PIPE_CRC_ENTRIES_NR,
sizeof(pipe_crc->entries[0]),
GFP_KERNEL);
if (!entries) {
ret = -ENOMEM;
goto out;
}
/*
* When IPS gets enabled, the pipe CRC changes. Since IPS gets
* enabled and disabled dynamically based on package C states,
* user space can't make reliable use of the CRCs, so let's just
* completely disable it.
*/
hsw_disable_ips(crtc);
spin_lock_irq(&pipe_crc->lock);
kfree(pipe_crc->entries);
pipe_crc->entries = entries;
pipe_crc->head = 0;
pipe_crc->tail = 0;
spin_unlock_irq(&pipe_crc->lock);
}
pipe_crc->source = source;
I915_WRITE(PIPE_CRC_CTL(pipe), val);
POSTING_READ(PIPE_CRC_CTL(pipe));
/* real source -> none transition */
if (source == INTEL_PIPE_CRC_SOURCE_NONE) {
struct intel_pipe_crc_entry *entries;
struct intel_crtc *crtc =
to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
DRM_DEBUG_DRIVER("stopping CRCs for pipe %c\n",
pipe_name(pipe));
drm_modeset_lock(&crtc->base.mutex, NULL);
if (crtc->base.state->active)
intel_wait_for_vblank(dev, pipe);
drm_modeset_unlock(&crtc->base.mutex);
spin_lock_irq(&pipe_crc->lock);
entries = pipe_crc->entries;
pipe_crc->entries = NULL;
pipe_crc->head = 0;
pipe_crc->tail = 0;
spin_unlock_irq(&pipe_crc->lock);
kfree(entries);
if (IS_G4X(dev))
g4x_undo_pipe_scramble_reset(dev, pipe);
else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
vlv_undo_pipe_scramble_reset(dev, pipe);
else if (IS_HASWELL(dev) && pipe == PIPE_A)
hsw_trans_edp_pipe_A_crc_wa(dev, false);
hsw_enable_ips(crtc);
}
ret = 0;
out:
intel_display_power_put(dev_priv, power_domain);
return ret;
}
/*
* Parse pipe CRC command strings:
* command: wsp* object wsp+ name wsp+ source wsp*
* object: 'pipe'
* name: (A | B | C)
* source: (none | plane1 | plane2 | pf)
* wsp: (#0x20 | #0x9 | #0xA)+
*
* eg.:
* "pipe A plane1" -> Start CRC computations on plane1 of pipe A
* "pipe A none" -> Stop CRC
*/
static int display_crc_ctl_tokenize(char *buf, char *words[], int max_words)
{
int n_words = 0;
while (*buf) {
char *end;
/* skip leading white space */
buf = skip_spaces(buf);
if (!*buf)
break; /* end of buffer */
/* find end of word */
for (end = buf; *end && !isspace(*end); end++)
;
if (n_words == max_words) {
DRM_DEBUG_DRIVER("too many words, allowed <= %d\n",
max_words);
return -EINVAL; /* ran out of words[] before bytes */
}
if (*end)
*end++ = '\0';
words[n_words++] = buf;
buf = end;
}
return n_words;
}
enum intel_pipe_crc_object {
PIPE_CRC_OBJECT_PIPE,
};
static const char * const pipe_crc_objects[] = {
"pipe",
};
static int
display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o)
{
int i;
for (i = 0; i < ARRAY_SIZE(pipe_crc_objects); i++)
if (!strcmp(buf, pipe_crc_objects[i])) {
*o = i;
return 0;
}
return -EINVAL;
}
static int display_crc_ctl_parse_pipe(const char *buf, enum pipe *pipe)
{
const char name = buf[0];
if (name < 'A' || name >= pipe_name(I915_MAX_PIPES))
return -EINVAL;
*pipe = name - 'A';
return 0;
}
static int
display_crc_ctl_parse_source(const char *buf, enum intel_pipe_crc_source *s)
{
int i;
for (i = 0; i < ARRAY_SIZE(pipe_crc_sources); i++)
if (!strcmp(buf, pipe_crc_sources[i])) {
*s = i;
return 0;
}
return -EINVAL;
}
static int display_crc_ctl_parse(struct drm_device *dev, char *buf, size_t len)
{
#define N_WORDS 3
int n_words;
char *words[N_WORDS];
enum pipe pipe;
enum intel_pipe_crc_object object;
enum intel_pipe_crc_source source;
n_words = display_crc_ctl_tokenize(buf, words, N_WORDS);
if (n_words != N_WORDS) {
DRM_DEBUG_DRIVER("tokenize failed, a command is %d words\n",
N_WORDS);
return -EINVAL;
}
if (display_crc_ctl_parse_object(words[0], &object) < 0) {
DRM_DEBUG_DRIVER("unknown object %s\n", words[0]);
return -EINVAL;
}
if (display_crc_ctl_parse_pipe(words[1], &pipe) < 0) {
DRM_DEBUG_DRIVER("unknown pipe %s\n", words[1]);
return -EINVAL;
}
if (display_crc_ctl_parse_source(words[2], &source) < 0) {
DRM_DEBUG_DRIVER("unknown source %s\n", words[2]);
return -EINVAL;
}
return pipe_crc_set_source(dev, pipe, source);
}
static ssize_t display_crc_ctl_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_device *dev = m->private;
char *tmpbuf;
int ret;
if (len == 0)
return 0;
if (len > PAGE_SIZE - 1) {
DRM_DEBUG_DRIVER("expected <%lu bytes into pipe crc control\n",
PAGE_SIZE);
return -E2BIG;
}
tmpbuf = kmalloc(len + 1, GFP_KERNEL);
if (!tmpbuf)
return -ENOMEM;
if (copy_from_user(tmpbuf, ubuf, len)) {
ret = -EFAULT;
goto out;
}
tmpbuf[len] = '\0';
ret = display_crc_ctl_parse(dev, tmpbuf, len);
out:
kfree(tmpbuf);
if (ret < 0)
return ret;
*offp += len;
return len;
}
static const struct file_operations i915_display_crc_ctl_fops = {
.owner = THIS_MODULE,
.open = display_crc_ctl_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = display_crc_ctl_write
};
static ssize_t i915_displayport_test_active_write(struct file *file,
const char __user *ubuf,
size_t len, loff_t *offp)
{
char *input_buffer;
int status = 0;
struct drm_device *dev;
struct drm_connector *connector;
struct list_head *connector_list;
struct intel_dp *intel_dp;
int val = 0;
dev = ((struct seq_file *)file->private_data)->private;
connector_list = &dev->mode_config.connector_list;
if (len == 0)
return 0;
input_buffer = kmalloc(len + 1, GFP_KERNEL);
if (!input_buffer)
return -ENOMEM;
if (copy_from_user(input_buffer, ubuf, len)) {
status = -EFAULT;
goto out;
}
input_buffer[len] = '\0';
DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len);
list_for_each_entry(connector, connector_list, head) {
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
if (connector->status == connector_status_connected &&
connector->encoder != NULL) {
intel_dp = enc_to_intel_dp(connector->encoder);
status = kstrtoint(input_buffer, 10, &val);
if (status < 0)
goto out;
DRM_DEBUG_DRIVER("Got %d for test active\n", val);
/* To prevent erroneous activation of the compliance
* testing code, only accept an actual value of 1 here
*/
if (val == 1)
intel_dp->compliance_test_active = 1;
else
intel_dp->compliance_test_active = 0;
}
}
out:
kfree(input_buffer);
if (status < 0)
return status;
*offp += len;
return len;
}
static int i915_displayport_test_active_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_connector *connector;
struct list_head *connector_list = &dev->mode_config.connector_list;
struct intel_dp *intel_dp;
list_for_each_entry(connector, connector_list, head) {
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
if (connector->status == connector_status_connected &&
connector->encoder != NULL) {
intel_dp = enc_to_intel_dp(connector->encoder);
if (intel_dp->compliance_test_active)
seq_puts(m, "1");
else
seq_puts(m, "0");
} else
seq_puts(m, "0");
}
return 0;
}
static int i915_displayport_test_active_open(struct inode *inode,
struct file *file)
{
struct drm_device *dev = inode->i_private;
return single_open(file, i915_displayport_test_active_show, dev);
}
static const struct file_operations i915_displayport_test_active_fops = {
.owner = THIS_MODULE,
.open = i915_displayport_test_active_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = i915_displayport_test_active_write
};
static int i915_displayport_test_data_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_connector *connector;
struct list_head *connector_list = &dev->mode_config.connector_list;
struct intel_dp *intel_dp;
list_for_each_entry(connector, connector_list, head) {
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
if (connector->status == connector_status_connected &&
connector->encoder != NULL) {
intel_dp = enc_to_intel_dp(connector->encoder);
seq_printf(m, "%lx", intel_dp->compliance_test_data);
} else
seq_puts(m, "0");
}
return 0;
}
static int i915_displayport_test_data_open(struct inode *inode,
struct file *file)
{
struct drm_device *dev = inode->i_private;
return single_open(file, i915_displayport_test_data_show, dev);
}
static const struct file_operations i915_displayport_test_data_fops = {
.owner = THIS_MODULE,
.open = i915_displayport_test_data_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release
};
static int i915_displayport_test_type_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_connector *connector;
struct list_head *connector_list = &dev->mode_config.connector_list;
struct intel_dp *intel_dp;
list_for_each_entry(connector, connector_list, head) {
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
if (connector->status == connector_status_connected &&
connector->encoder != NULL) {
intel_dp = enc_to_intel_dp(connector->encoder);
seq_printf(m, "%02lx", intel_dp->compliance_test_type);
} else
seq_puts(m, "0");
}
return 0;
}
static int i915_displayport_test_type_open(struct inode *inode,
struct file *file)
{
struct drm_device *dev = inode->i_private;
return single_open(file, i915_displayport_test_type_show, dev);
}
static const struct file_operations i915_displayport_test_type_fops = {
.owner = THIS_MODULE,
.open = i915_displayport_test_type_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release
};
static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
{
struct drm_device *dev = m->private;
int level;
int num_levels;
if (IS_CHERRYVIEW(dev))
num_levels = 3;
else if (IS_VALLEYVIEW(dev))
num_levels = 1;
else
num_levels = ilk_wm_max_level(dev) + 1;
drm_modeset_lock_all(dev);
for (level = 0; level < num_levels; level++) {
unsigned int latency = wm[level];
/*
* - WM1+ latency values in 0.5us units
* - latencies are in us on gen9/vlv/chv
*/
if (INTEL_INFO(dev)->gen >= 9 || IS_VALLEYVIEW(dev) ||
IS_CHERRYVIEW(dev))
latency *= 10;
else if (level > 0)
latency *= 5;
seq_printf(m, "WM%d %u (%u.%u usec)\n",
level, wm[level], latency / 10, latency % 10);
}
drm_modeset_unlock_all(dev);
}
static int pri_wm_latency_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = dev->dev_private;
const uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.pri_latency;
wm_latency_show(m, latencies);
return 0;
}
static int spr_wm_latency_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = dev->dev_private;
const uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.spr_latency;
wm_latency_show(m, latencies);
return 0;
}
static int cur_wm_latency_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = dev->dev_private;
const uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.cur_latency;
wm_latency_show(m, latencies);
return 0;
}
static int pri_wm_latency_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
if (INTEL_INFO(dev)->gen < 5)
return -ENODEV;
return single_open(file, pri_wm_latency_show, dev);
}
static int spr_wm_latency_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
if (HAS_GMCH_DISPLAY(dev))
return -ENODEV;
return single_open(file, spr_wm_latency_show, dev);
}
static int cur_wm_latency_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
if (HAS_GMCH_DISPLAY(dev))
return -ENODEV;
return single_open(file, cur_wm_latency_show, dev);
}
static ssize_t wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp, uint16_t wm[8])
{
struct seq_file *m = file->private_data;
struct drm_device *dev = m->private;
uint16_t new[8] = { 0 };
int num_levels;
int level;
int ret;
char tmp[32];
if (IS_CHERRYVIEW(dev))
num_levels = 3;
else if (IS_VALLEYVIEW(dev))
num_levels = 1;
else
num_levels = ilk_wm_max_level(dev) + 1;
if (len >= sizeof(tmp))
return -EINVAL;
if (copy_from_user(tmp, ubuf, len))
return -EFAULT;
tmp[len] = '\0';
ret = sscanf(tmp, "%hu %hu %hu %hu %hu %hu %hu %hu",
&new[0], &new[1], &new[2], &new[3],
&new[4], &new[5], &new[6], &new[7]);
if (ret != num_levels)
return -EINVAL;
drm_modeset_lock_all(dev);
for (level = 0; level < num_levels; level++)
wm[level] = new[level];
drm_modeset_unlock_all(dev);
return len;
}
static ssize_t pri_wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = dev->dev_private;
uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.pri_latency;
return wm_latency_write(file, ubuf, len, offp, latencies);
}
static ssize_t spr_wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = dev->dev_private;
uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.spr_latency;
return wm_latency_write(file, ubuf, len, offp, latencies);
}
static ssize_t cur_wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = dev->dev_private;
uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.cur_latency;
return wm_latency_write(file, ubuf, len, offp, latencies);
}
static const struct file_operations i915_pri_wm_latency_fops = {
.owner = THIS_MODULE,
.open = pri_wm_latency_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = pri_wm_latency_write
};
static const struct file_operations i915_spr_wm_latency_fops = {
.owner = THIS_MODULE,
.open = spr_wm_latency_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = spr_wm_latency_write
};
static const struct file_operations i915_cur_wm_latency_fops = {
.owner = THIS_MODULE,
.open = cur_wm_latency_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = cur_wm_latency_write
};
static int
i915_wedged_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
*val = i915_terminally_wedged(&dev_priv->gpu_error);
return 0;
}
static int
i915_wedged_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
/*
* There is no safeguard against this debugfs entry colliding
* with the hangcheck calling same i915_handle_error() in
* parallel, causing an explosion. For now we assume that the
* test harness is responsible enough not to inject gpu hangs
* while it is writing to 'i915_wedged'
*/
if (i915_reset_in_progress(&dev_priv->gpu_error))
return -EAGAIN;
intel_runtime_pm_get(dev_priv);
i915_handle_error(dev, val,
"Manually setting wedged to %llu", val);
intel_runtime_pm_put(dev_priv);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
i915_wedged_get, i915_wedged_set,
"%llu\n");
static int
i915_ring_stop_get(void *data, u64 *val)
2012-05-03 20:48:16 +08:00
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
2012-05-03 20:48:16 +08:00
*val = dev_priv->gpu_error.stop_rings;
2012-05-03 20:48:16 +08:00
return 0;
2012-05-03 20:48:16 +08:00
}
static int
i915_ring_stop_set(void *data, u64 val)
2012-05-03 20:48:16 +08:00
{
struct drm_device *dev = data;
2012-05-03 20:48:16 +08:00
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
2012-05-03 20:48:16 +08:00
DRM_DEBUG_DRIVER("Stopping rings 0x%08llx\n", val);
2012-05-03 20:48:16 +08:00
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
dev_priv->gpu_error.stop_rings = val;
2012-05-03 20:48:16 +08:00
mutex_unlock(&dev->struct_mutex);
return 0;
2012-05-03 20:48:16 +08:00
}
DEFINE_SIMPLE_ATTRIBUTE(i915_ring_stop_fops,
i915_ring_stop_get, i915_ring_stop_set,
"0x%08llx\n");
static int
i915_ring_missed_irq_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
*val = dev_priv->gpu_error.missed_irq_rings;
return 0;
}
static int
i915_ring_missed_irq_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
/* Lock against concurrent debugfs callers */
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
dev_priv->gpu_error.missed_irq_rings = val;
mutex_unlock(&dev->struct_mutex);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops,
i915_ring_missed_irq_get, i915_ring_missed_irq_set,
"0x%08llx\n");
static int
i915_ring_test_irq_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
*val = dev_priv->gpu_error.test_irq_rings;
return 0;
}
static int
i915_ring_test_irq_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val);
/* Lock against concurrent debugfs callers */
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
dev_priv->gpu_error.test_irq_rings = val;
mutex_unlock(&dev->struct_mutex);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
i915_ring_test_irq_get, i915_ring_test_irq_set,
"0x%08llx\n");
#define DROP_UNBOUND 0x1
#define DROP_BOUND 0x2
#define DROP_RETIRE 0x4
#define DROP_ACTIVE 0x8
#define DROP_ALL (DROP_UNBOUND | \
DROP_BOUND | \
DROP_RETIRE | \
DROP_ACTIVE)
static int
i915_drop_caches_get(void *data, u64 *val)
{
*val = DROP_ALL;
return 0;
}
static int
i915_drop_caches_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
DRM_DEBUG("Dropping caches: 0x%08llx\n", val);
/* No need to check and wait for gpu resets, only libdrm auto-restarts
* on ioctls on -EAGAIN. */
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
if (val & DROP_ACTIVE) {
ret = i915_gpu_idle(dev);
if (ret)
goto unlock;
}
if (val & (DROP_RETIRE | DROP_ACTIVE))
i915_gem_retire_requests(dev);
if (val & DROP_BOUND)
i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND);
if (val & DROP_UNBOUND)
i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_UNBOUND);
unlock:
mutex_unlock(&dev->struct_mutex);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
i915_drop_caches_get, i915_drop_caches_set,
"0x%08llx\n");
static int
i915_max_freq_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
if (INTEL_INFO(dev)->gen < 6)
return -ENODEV;
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
if (ret)
return ret;
2015-01-24 03:04:26 +08:00
*val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit);
mutex_unlock(&dev_priv->rps.hw_lock);
return 0;
}
static int
i915_max_freq_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 hw_max, hw_min;
int ret;
if (INTEL_INFO(dev)->gen < 6)
return -ENODEV;
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
if (ret)
return ret;
/*
* Turbo will still be enabled, but won't go above the set value.
*/
val = intel_freq_opcode(dev_priv, val);
hw_max = dev_priv->rps.max_freq;
hw_min = dev_priv->rps.min_freq;
if (val < hw_min || val > hw_max || val < dev_priv->rps.min_freq_softlimit) {
mutex_unlock(&dev_priv->rps.hw_lock);
return -EINVAL;
}
dev_priv->rps.max_freq_softlimit = val;
intel_set_rps(dev, val);
mutex_unlock(&dev_priv->rps.hw_lock);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops,
i915_max_freq_get, i915_max_freq_set,
"%llu\n");
static int
i915_min_freq_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
if (INTEL_INFO(dev)->gen < 6)
return -ENODEV;
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
if (ret)
return ret;
2015-01-24 03:04:26 +08:00
*val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit);
mutex_unlock(&dev_priv->rps.hw_lock);
return 0;
}
static int
i915_min_freq_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 hw_max, hw_min;
int ret;
if (INTEL_INFO(dev)->gen < 6)
return -ENODEV;
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
if (ret)
return ret;
/*
* Turbo will still be enabled, but won't go below the set value.
*/
val = intel_freq_opcode(dev_priv, val);
hw_max = dev_priv->rps.max_freq;
hw_min = dev_priv->rps.min_freq;
if (val < hw_min || val > hw_max || val > dev_priv->rps.max_freq_softlimit) {
mutex_unlock(&dev_priv->rps.hw_lock);
return -EINVAL;
}
dev_priv->rps.min_freq_softlimit = val;
intel_set_rps(dev, val);
mutex_unlock(&dev_priv->rps.hw_lock);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops,
i915_min_freq_get, i915_min_freq_set,
"%llu\n");
static int
i915_cache_sharing_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 snpcr;
int ret;
if (!(IS_GEN6(dev) || IS_GEN7(dev)))
return -ENODEV;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev_priv->dev->struct_mutex);
*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
return 0;
}
static int
i915_cache_sharing_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 snpcr;
if (!(IS_GEN6(dev) || IS_GEN7(dev)))
return -ENODEV;
if (val > 3)
return -EINVAL;
intel_runtime_pm_get(dev_priv);
DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val);
/* Update the cache sharing policy here as well */
snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
snpcr &= ~GEN6_MBC_SNPCR_MASK;
snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
intel_runtime_pm_put(dev_priv);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops,
i915_cache_sharing_get, i915_cache_sharing_set,
"%llu\n");
struct sseu_dev_status {
unsigned int slice_total;
unsigned int subslice_total;
unsigned int subslice_per_slice;
unsigned int eu_total;
unsigned int eu_per_subslice;
};
static void cherryview_sseu_device_status(struct drm_device *dev,
struct sseu_dev_status *stat)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int ss_max = 2;
int ss;
u32 sig1[ss_max], sig2[ss_max];
sig1[0] = I915_READ(CHV_POWER_SS0_SIG1);
sig1[1] = I915_READ(CHV_POWER_SS1_SIG1);
sig2[0] = I915_READ(CHV_POWER_SS0_SIG2);
sig2[1] = I915_READ(CHV_POWER_SS1_SIG2);
for (ss = 0; ss < ss_max; ss++) {
unsigned int eu_cnt;
if (sig1[ss] & CHV_SS_PG_ENABLE)
/* skip disabled subslice */
continue;
stat->slice_total = 1;
stat->subslice_per_slice++;
eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
((sig2[ss] & CHV_EU311_PG_ENABLE) ? 0 : 2);
stat->eu_total += eu_cnt;
stat->eu_per_subslice = max(stat->eu_per_subslice, eu_cnt);
}
stat->subslice_total = stat->subslice_per_slice;
}
static void gen9_sseu_device_status(struct drm_device *dev,
struct sseu_dev_status *stat)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int s_max = 3, ss_max = 4;
int s, ss;
u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2];
/* BXT has a single slice and at most 3 subslices. */
if (IS_BROXTON(dev)) {
s_max = 1;
ss_max = 3;
}
for (s = 0; s < s_max; s++) {
s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s));
eu_reg[2*s] = I915_READ(GEN9_SS01_EU_PGCTL_ACK(s));
eu_reg[2*s + 1] = I915_READ(GEN9_SS23_EU_PGCTL_ACK(s));
}
eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK |
GEN9_PGCTL_SSA_EU19_ACK |
GEN9_PGCTL_SSA_EU210_ACK |
GEN9_PGCTL_SSA_EU311_ACK;
eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK |
GEN9_PGCTL_SSB_EU19_ACK |
GEN9_PGCTL_SSB_EU210_ACK |
GEN9_PGCTL_SSB_EU311_ACK;
for (s = 0; s < s_max; s++) {
unsigned int ss_cnt = 0;
if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
/* skip disabled slice */
continue;
stat->slice_total++;
if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
ss_cnt = INTEL_INFO(dev)->subslice_per_slice;
for (ss = 0; ss < ss_max; ss++) {
unsigned int eu_cnt;
if (IS_BROXTON(dev) &&
!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
/* skip disabled subslice */
continue;
if (IS_BROXTON(dev))
ss_cnt++;
eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
eu_mask[ss%2]);
stat->eu_total += eu_cnt;
stat->eu_per_subslice = max(stat->eu_per_subslice,
eu_cnt);
}
stat->subslice_total += ss_cnt;
stat->subslice_per_slice = max(stat->subslice_per_slice,
ss_cnt);
}
}
static void broadwell_sseu_device_status(struct drm_device *dev,
struct sseu_dev_status *stat)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int s;
u32 slice_info = I915_READ(GEN8_GT_SLICE_INFO);
stat->slice_total = hweight32(slice_info & GEN8_LSLICESTAT_MASK);
if (stat->slice_total) {
stat->subslice_per_slice = INTEL_INFO(dev)->subslice_per_slice;
stat->subslice_total = stat->slice_total *
stat->subslice_per_slice;
stat->eu_per_subslice = INTEL_INFO(dev)->eu_per_subslice;
stat->eu_total = stat->eu_per_subslice * stat->subslice_total;
/* subtract fused off EU(s) from enabled slice(s) */
for (s = 0; s < stat->slice_total; s++) {
u8 subslice_7eu = INTEL_INFO(dev)->subslice_7eu[s];
stat->eu_total -= hweight8(subslice_7eu);
}
}
}
static int i915_sseu_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct sseu_dev_status stat;
if (INTEL_INFO(dev)->gen < 8)
return -ENODEV;
seq_puts(m, "SSEU Device Info\n");
seq_printf(m, " Available Slice Total: %u\n",
INTEL_INFO(dev)->slice_total);
seq_printf(m, " Available Subslice Total: %u\n",
INTEL_INFO(dev)->subslice_total);
seq_printf(m, " Available Subslice Per Slice: %u\n",
INTEL_INFO(dev)->subslice_per_slice);
seq_printf(m, " Available EU Total: %u\n",
INTEL_INFO(dev)->eu_total);
seq_printf(m, " Available EU Per Subslice: %u\n",
INTEL_INFO(dev)->eu_per_subslice);
seq_printf(m, " Has Slice Power Gating: %s\n",
yesno(INTEL_INFO(dev)->has_slice_pg));
seq_printf(m, " Has Subslice Power Gating: %s\n",
yesno(INTEL_INFO(dev)->has_subslice_pg));
seq_printf(m, " Has EU Power Gating: %s\n",
yesno(INTEL_INFO(dev)->has_eu_pg));
seq_puts(m, "SSEU Device Status\n");
memset(&stat, 0, sizeof(stat));
if (IS_CHERRYVIEW(dev)) {
cherryview_sseu_device_status(dev, &stat);
} else if (IS_BROADWELL(dev)) {
broadwell_sseu_device_status(dev, &stat);
} else if (INTEL_INFO(dev)->gen >= 9) {
gen9_sseu_device_status(dev, &stat);
}
seq_printf(m, " Enabled Slice Total: %u\n",
stat.slice_total);
seq_printf(m, " Enabled Subslice Total: %u\n",
stat.subslice_total);
seq_printf(m, " Enabled Subslice Per Slice: %u\n",
stat.subslice_per_slice);
seq_printf(m, " Enabled EU Total: %u\n",
stat.eu_total);
seq_printf(m, " Enabled EU Per Subslice: %u\n",
stat.eu_per_subslice);
return 0;
}
static int i915_forcewake_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
struct drm_i915_private *dev_priv = dev->dev_private;
if (INTEL_INFO(dev)->gen < 6)
return 0;
intel_runtime_pm_get(dev_priv);
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
return 0;
}
static int i915_forcewake_release(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
struct drm_i915_private *dev_priv = dev->dev_private;
if (INTEL_INFO(dev)->gen < 6)
return 0;
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
intel_runtime_pm_put(dev_priv);
return 0;
}
static const struct file_operations i915_forcewake_fops = {
.owner = THIS_MODULE,
.open = i915_forcewake_open,
.release = i915_forcewake_release,
};
static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor)
{
struct drm_device *dev = minor->dev;
struct dentry *ent;
ent = debugfs_create_file("i915_forcewake_user",
S_IRUSR,
root, dev,
&i915_forcewake_fops);
if (!ent)
return -ENOMEM;
return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops);
}
static int i915_debugfs_create(struct dentry *root,
struct drm_minor *minor,
const char *name,
const struct file_operations *fops)
{
struct drm_device *dev = minor->dev;
struct dentry *ent;
ent = debugfs_create_file(name,
S_IRUGO | S_IWUSR,
root, dev,
fops);
if (!ent)
return -ENOMEM;
return drm_add_fake_info_node(minor, ent, fops);
}
static const struct drm_info_list i915_debugfs_list[] = {
{"i915_capabilities", i915_capabilities, 0},
{"i915_gem_objects", i915_gem_object_info, 0},
{"i915_gem_gtt", i915_gem_gtt_info, 0},
{"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST},
{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
{"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST},
{"i915_gem_stolen", i915_gem_stolen_list_info },
{"i915_gem_pageflip", i915_gem_pageflip_info, 0},
{"i915_gem_request", i915_gem_request_info, 0},
{"i915_gem_seqno", i915_gem_seqno_info, 0},
{"i915_gem_fence_regs", i915_gem_fence_regs_info, 0},
{"i915_gem_interrupt", i915_interrupt_info, 0},
{"i915_gem_hws", i915_hws_info, 0, (void *)RCS},
{"i915_gem_hws_blt", i915_hws_info, 0, (void *)BCS},
{"i915_gem_hws_bsd", i915_hws_info, 0, (void *)VCS},
{"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS},
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
{"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
{"i915_guc_info", i915_guc_info, 0},
{"i915_guc_load_status", i915_guc_load_status_info, 0},
{"i915_guc_log_dump", i915_guc_log_dump, 0},
{"i915_frequency_info", i915_frequency_info, 0},
{"i915_hangcheck_info", i915_hangcheck_info, 0},
{"i915_drpc_info", i915_drpc_info, 0},
{"i915_emon_status", i915_emon_status, 0},
{"i915_ring_freq_table", i915_ring_freq_table, 0},
{"i915_frontbuffer_tracking", i915_frontbuffer_tracking, 0},
{"i915_fbc_status", i915_fbc_status, 0},
{"i915_ips_status", i915_ips_status, 0},
{"i915_sr_status", i915_sr_status, 0},
{"i915_opregion", i915_opregion, 0},
{"i915_vbt", i915_vbt, 0},
{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
{"i915_context_status", i915_context_status, 0},
{"i915_dump_lrc", i915_dump_lrc, 0},
{"i915_execlists", i915_execlists, 0},
{"i915_forcewake_domains", i915_forcewake_domains, 0},
{"i915_swizzle_info", i915_swizzle_info, 0},
{"i915_ppgtt_info", i915_ppgtt_info, 0},
{"i915_llc", i915_llc, 0},
{"i915_edp_psr_status", i915_edp_psr_status, 0},
{"i915_sink_crc_eDP1", i915_sink_crc, 0},
{"i915_energy_uJ", i915_energy_uJ, 0},
{"i915_runtime_pm_status", i915_runtime_pm_status, 0},
{"i915_power_domain_info", i915_power_domain_info, 0},
{"i915_dmc_info", i915_dmc_info, 0},
{"i915_display_info", i915_display_info, 0},
{"i915_semaphore_status", i915_semaphore_status, 0},
{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
{"i915_dp_mst_info", i915_dp_mst_info, 0},
{"i915_wa_registers", i915_wa_registers, 0},
{"i915_ddb_info", i915_ddb_info, 0},
{"i915_sseu_status", i915_sseu_status, 0},
{"i915_drrs_status", i915_drrs_status, 0},
{"i915_rps_boost_info", i915_rps_boost_info, 0},
};
#define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
static const struct i915_debugfs_files {
const char *name;
const struct file_operations *fops;
} i915_debugfs_files[] = {
{"i915_wedged", &i915_wedged_fops},
{"i915_max_freq", &i915_max_freq_fops},
{"i915_min_freq", &i915_min_freq_fops},
{"i915_cache_sharing", &i915_cache_sharing_fops},
{"i915_ring_stop", &i915_ring_stop_fops},
{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
{"i915_ring_test_irq", &i915_ring_test_irq_fops},
{"i915_gem_drop_caches", &i915_drop_caches_fops},
{"i915_error_state", &i915_error_state_fops},
{"i915_next_seqno", &i915_next_seqno_fops},
{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
{"i915_spr_wm_latency", &i915_spr_wm_latency_fops},
{"i915_cur_wm_latency", &i915_cur_wm_latency_fops},
{"i915_fbc_false_color", &i915_fbc_fc_fops},
{"i915_dp_test_data", &i915_displayport_test_data_fops},
{"i915_dp_test_type", &i915_displayport_test_type_fops},
{"i915_dp_test_active", &i915_displayport_test_active_fops}
};
void intel_display_crc_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
enum pipe pipe;
for_each_pipe(dev_priv, pipe) {
struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe];
pipe_crc->opened = false;
spin_lock_init(&pipe_crc->lock);
init_waitqueue_head(&pipe_crc->wq);
}
}
int i915_debugfs_init(struct drm_minor *minor)
{
int ret, i;
ret = i915_forcewake_create(minor->debugfs_root, minor);
if (ret)
return ret;
for (i = 0; i < ARRAY_SIZE(i915_pipe_crc_data); i++) {
ret = i915_pipe_crc_create(minor->debugfs_root, minor, i);
if (ret)
return ret;
}
for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) {
ret = i915_debugfs_create(minor->debugfs_root, minor,
i915_debugfs_files[i].name,
i915_debugfs_files[i].fops);
if (ret)
return ret;
}
return drm_debugfs_create_files(i915_debugfs_list,
I915_DEBUGFS_ENTRIES,
minor->debugfs_root, minor);
}
void i915_debugfs_cleanup(struct drm_minor *minor)
{
int i;
drm_debugfs_remove_files(i915_debugfs_list,
I915_DEBUGFS_ENTRIES, minor);
drm_debugfs_remove_files((struct drm_info_list *) &i915_forcewake_fops,
1, minor);
for (i = 0; i < ARRAY_SIZE(i915_pipe_crc_data); i++) {
struct drm_info_list *info_list =
(struct drm_info_list *)&i915_pipe_crc_data[i];
drm_debugfs_remove_files(info_list, 1, minor);
}
for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) {
struct drm_info_list *info_list =
(struct drm_info_list *) i915_debugfs_files[i].fops;
drm_debugfs_remove_files(info_list, 1, minor);
}
}
struct dpcd_block {
/* DPCD dump start address. */
unsigned int offset;
/* DPCD dump end address, inclusive. If unset, .size will be used. */
unsigned int end;
/* DPCD dump size. Used if .end is unset. If unset, defaults to 1. */
size_t size;
/* Only valid for eDP. */
bool edp;
};
static const struct dpcd_block i915_dpcd_debug[] = {
{ .offset = DP_DPCD_REV, .size = DP_RECEIVER_CAP_SIZE },
{ .offset = DP_PSR_SUPPORT, .end = DP_PSR_CAPS },
{ .offset = DP_DOWNSTREAM_PORT_0, .size = 16 },
{ .offset = DP_LINK_BW_SET, .end = DP_EDP_CONFIGURATION_SET },
{ .offset = DP_SINK_COUNT, .end = DP_ADJUST_REQUEST_LANE2_3 },
{ .offset = DP_SET_POWER },
{ .offset = DP_EDP_DPCD_REV },
{ .offset = DP_EDP_GENERAL_CAP_1, .end = DP_EDP_GENERAL_CAP_3 },
{ .offset = DP_EDP_DISPLAY_CONTROL_REGISTER, .end = DP_EDP_BACKLIGHT_FREQ_CAP_MAX_LSB },
{ .offset = DP_EDP_DBC_MINIMUM_BRIGHTNESS_SET, .end = DP_EDP_DBC_MAXIMUM_BRIGHTNESS_SET },
};
static int i915_dpcd_show(struct seq_file *m, void *data)
{
struct drm_connector *connector = m->private;
struct intel_dp *intel_dp =
enc_to_intel_dp(&intel_attached_encoder(connector)->base);
uint8_t buf[16];
ssize_t err;
int i;
if (connector->status != connector_status_connected)
return -ENODEV;
for (i = 0; i < ARRAY_SIZE(i915_dpcd_debug); i++) {
const struct dpcd_block *b = &i915_dpcd_debug[i];
size_t size = b->end ? b->end - b->offset + 1 : (b->size ?: 1);
if (b->edp &&
connector->connector_type != DRM_MODE_CONNECTOR_eDP)
continue;
/* low tech for now */
if (WARN_ON(size > sizeof(buf)))
continue;
err = drm_dp_dpcd_read(&intel_dp->aux, b->offset, buf, size);
if (err <= 0) {
DRM_ERROR("dpcd read (%zu bytes at %u) failed (%zd)\n",
size, b->offset, err);
continue;
}
seq_printf(m, "%04x: %*ph\n", b->offset, (int) size, buf);
}
return 0;
}
static int i915_dpcd_open(struct inode *inode, struct file *file)
{
return single_open(file, i915_dpcd_show, inode->i_private);
}
static const struct file_operations i915_dpcd_fops = {
.owner = THIS_MODULE,
.open = i915_dpcd_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/**
* i915_debugfs_connector_add - add i915 specific connector debugfs files
* @connector: pointer to a registered drm_connector
*
* Cleanup will be done by drm_connector_unregister() through a call to
* drm_debugfs_connector_remove().
*
* Returns 0 on success, negative error codes on error.
*/
int i915_debugfs_connector_add(struct drm_connector *connector)
{
struct dentry *root = connector->debugfs_entry;
/* The connector must have been registered beforehands. */
if (!root)
return -ENODEV;
if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
connector->connector_type == DRM_MODE_CONNECTOR_eDP)
debugfs_create_file("i915_dpcd", S_IRUGO, root, connector,
&i915_dpcd_fops);
return 0;
}