2017-08-03 16:31:28 +08:00
# export-to-sqlite.py: export perf data to a sqlite3 database
# Copyright (c) 2017, Intel Corporation.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms and conditions of the GNU General Public License,
# version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
2019-03-09 08:05:17 +08:00
from __future__ import print_function
2017-08-03 16:31:28 +08:00
import os
import sys
import struct
import datetime
# To use this script you will need to have installed package python-pyside which
# provides LGPL-licensed Python bindings for Qt. You will also need the package
# libqt4-sql-sqlite for Qt sqlite3 support.
#
# An example of using this script with Intel PT:
#
# $ perf record -e intel_pt//u ls
# $ perf script -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py pt_example branches calls
# 2017-07-31 14:26:07.326913 Creating database...
# 2017-07-31 14:26:07.538097 Writing records...
# 2017-07-31 14:26:09.889292 Adding indexes
# 2017-07-31 14:26:09.958746 Done
#
# To browse the database, sqlite3 can be used e.g.
#
# $ sqlite3 pt_example
# sqlite> .header on
# sqlite> select * from samples_view where id < 10;
# sqlite> .mode column
# sqlite> select * from samples_view where id < 10;
# sqlite> .tables
# sqlite> .schema samples_view
# sqlite> .quit
#
# An example of using the database is provided by the script
2018-10-01 14:28:46 +08:00
# exported-sql-viewer.py. Refer to that script for details.
2017-08-03 16:31:28 +08:00
#
# The database structure is practically the same as created by the script
# export-to-postgresql.py. Refer to that script for details. A notable
# difference is the 'transaction' column of the 'samples' table which is
# renamed 'transaction_' in sqlite because 'transaction' is a reserved word.
from PySide . QtSql import *
sys . path . append ( os . environ [ ' PERF_EXEC_PATH ' ] + \
' /scripts/python/Perf-Trace-Util/lib/Perf/Trace ' )
# These perf imports are not used at present
#from perf_trace_context import *
#from Core import *
perf_db_export_mode = True
perf_db_export_calls = False
perf_db_export_callchains = False
2019-03-09 08:05:17 +08:00
def printerr ( * args , * * keyword_args ) :
print ( * args , file = sys . stderr , * * keyword_args )
2019-03-09 08:05:18 +08:00
def printdate ( * args , * * kw_args ) :
print ( datetime . datetime . today ( ) , * args , sep = ' ' , * * kw_args )
2017-08-03 16:31:28 +08:00
def usage ( ) :
2019-03-09 08:05:17 +08:00
printerr ( " Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>] " ) ;
printerr ( " where: columns ' all ' or ' branches ' " ) ;
printerr ( " calls ' calls ' => create calls and call_paths table " ) ;
printerr ( " callchains ' callchains ' => create call_paths table " ) ;
2017-08-03 16:31:28 +08:00
raise Exception ( " Too few arguments " )
if ( len ( sys . argv ) < 2 ) :
usage ( )
dbname = sys . argv [ 1 ]
if ( len ( sys . argv ) > = 3 ) :
columns = sys . argv [ 2 ]
else :
columns = " all "
if columns not in ( " all " , " branches " ) :
usage ( )
branches = ( columns == " branches " )
for i in range ( 3 , len ( sys . argv ) ) :
if ( sys . argv [ i ] == " calls " ) :
perf_db_export_calls = True
elif ( sys . argv [ i ] == " callchains " ) :
perf_db_export_callchains = True
else :
usage ( )
def do_query ( q , s ) :
if ( q . exec_ ( s ) ) :
return
raise Exception ( " Query failed: " + q . lastError ( ) . text ( ) )
def do_query_ ( q ) :
if ( q . exec_ ( ) ) :
return
raise Exception ( " Query failed: " + q . lastError ( ) . text ( ) )
2019-03-09 08:05:18 +08:00
printdate ( " Creating database ... " )
2017-08-03 16:31:28 +08:00
db_exists = False
try :
f = open ( dbname )
f . close ( )
db_exists = True
except :
pass
if db_exists :
raise Exception ( dbname + " already exists " )
db = QSqlDatabase . addDatabase ( ' QSQLITE ' )
db . setDatabaseName ( dbname )
db . open ( )
query = QSqlQuery ( db )
do_query ( query , ' PRAGMA journal_mode = OFF ' )
do_query ( query , ' BEGIN TRANSACTION ' )
do_query ( query , ' CREATE TABLE selected_events ( '
' id integer NOT NULL PRIMARY KEY, '
' name varchar(80)) ' )
do_query ( query , ' CREATE TABLE machines ( '
' id integer NOT NULL PRIMARY KEY, '
' pid integer, '
' root_dir varchar(4096)) ' )
do_query ( query , ' CREATE TABLE threads ( '
' id integer NOT NULL PRIMARY KEY, '
' machine_id bigint, '
' process_id bigint, '
' pid integer, '
' tid integer) ' )
do_query ( query , ' CREATE TABLE comms ( '
' id integer NOT NULL PRIMARY KEY, '
' comm varchar(16)) ' )
do_query ( query , ' CREATE TABLE comm_threads ( '
' id integer NOT NULL PRIMARY KEY, '
' comm_id bigint, '
' thread_id bigint) ' )
do_query ( query , ' CREATE TABLE dsos ( '
' id integer NOT NULL PRIMARY KEY, '
' machine_id bigint, '
' short_name varchar(256), '
' long_name varchar(4096), '
' build_id varchar(64)) ' )
do_query ( query , ' CREATE TABLE symbols ( '
' id integer NOT NULL PRIMARY KEY, '
' dso_id bigint, '
' sym_start bigint, '
' sym_end bigint, '
' binding integer, '
' name varchar(2048)) ' )
do_query ( query , ' CREATE TABLE branch_types ( '
' id integer NOT NULL PRIMARY KEY, '
' name varchar(80)) ' )
if branches :
do_query ( query , ' CREATE TABLE samples ( '
' id integer NOT NULL PRIMARY KEY, '
' evsel_id bigint, '
' machine_id bigint, '
' thread_id bigint, '
' comm_id bigint, '
' dso_id bigint, '
' symbol_id bigint, '
' sym_offset bigint, '
' ip bigint, '
' time bigint, '
' cpu integer, '
' to_dso_id bigint, '
' to_symbol_id bigint, '
' to_sym_offset bigint, '
' to_ip bigint, '
' branch_type integer, '
' in_tx boolean, '
' call_path_id bigint) ' )
else :
do_query ( query , ' CREATE TABLE samples ( '
' id integer NOT NULL PRIMARY KEY, '
' evsel_id bigint, '
' machine_id bigint, '
' thread_id bigint, '
' comm_id bigint, '
' dso_id bigint, '
' symbol_id bigint, '
' sym_offset bigint, '
' ip bigint, '
' time bigint, '
' cpu integer, '
' to_dso_id bigint, '
' to_symbol_id bigint, '
' to_sym_offset bigint, '
' to_ip bigint, '
' period bigint, '
' weight bigint, '
' transaction_ bigint, '
' data_src bigint, '
' branch_type integer, '
' in_tx boolean, '
' call_path_id bigint) ' )
if perf_db_export_calls or perf_db_export_callchains :
do_query ( query , ' CREATE TABLE call_paths ( '
' id integer NOT NULL PRIMARY KEY, '
' parent_id bigint, '
' symbol_id bigint, '
' ip bigint) ' )
if perf_db_export_calls :
do_query ( query , ' CREATE TABLE calls ( '
' id integer NOT NULL PRIMARY KEY, '
' thread_id bigint, '
' comm_id bigint, '
' call_path_id bigint, '
' call_time bigint, '
' return_time bigint, '
' branch_count bigint, '
' call_id bigint, '
' return_id bigint, '
' parent_call_path_id bigint, '
2019-02-28 21:00:25 +08:00
' flags integer, '
' parent_id bigint) ' )
2017-08-03 16:31:28 +08:00
# printf was added to sqlite in version 3.8.3
sqlite_has_printf = False
try :
do_query ( query , ' SELECT printf( " " ) FROM machines ' )
sqlite_has_printf = True
except :
pass
def emit_to_hex ( x ) :
if sqlite_has_printf :
return ' printf( " %x " , ' + x + ' ) '
else :
return x
do_query ( query , ' CREATE VIEW machines_view AS '
' SELECT '
' id, '
' pid, '
' root_dir, '
' CASE WHEN id=0 THEN \' unknown \' WHEN pid=-1 THEN \' host \' ELSE \' guest \' END AS host_or_guest '
' FROM machines ' )
do_query ( query , ' CREATE VIEW dsos_view AS '
' SELECT '
' id, '
' machine_id, '
' (SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest, '
' short_name, '
' long_name, '
' build_id '
' FROM dsos ' )
do_query ( query , ' CREATE VIEW symbols_view AS '
' SELECT '
' id, '
' name, '
' (SELECT short_name FROM dsos WHERE id=dso_id) AS dso, '
' dso_id, '
' sym_start, '
' sym_end, '
' CASE WHEN binding=0 THEN \' local \' WHEN binding=1 THEN \' global \' ELSE \' weak \' END AS binding '
' FROM symbols ' )
do_query ( query , ' CREATE VIEW threads_view AS '
' SELECT '
' id, '
' machine_id, '
' (SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest, '
' process_id, '
' pid, '
' tid '
' FROM threads ' )
do_query ( query , ' CREATE VIEW comm_threads_view AS '
' SELECT '
' comm_id, '
' (SELECT comm FROM comms WHERE id = comm_id) AS command, '
' thread_id, '
' (SELECT pid FROM threads WHERE id = thread_id) AS pid, '
' (SELECT tid FROM threads WHERE id = thread_id) AS tid '
' FROM comm_threads ' )
if perf_db_export_calls or perf_db_export_callchains :
do_query ( query , ' CREATE VIEW call_paths_view AS '
' SELECT '
' c.id, '
+ emit_to_hex ( ' c.ip ' ) + ' AS ip, '
' c.symbol_id, '
' (SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol, '
' (SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id, '
' (SELECT dso FROM symbols_view WHERE id = c.symbol_id) AS dso_short_name, '
' c.parent_id, '
+ emit_to_hex ( ' p.ip ' ) + ' AS parent_ip, '
' p.symbol_id AS parent_symbol_id, '
' (SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol, '
' (SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id, '
' (SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name '
' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id ' )
if perf_db_export_calls :
do_query ( query , ' CREATE VIEW calls_view AS '
' SELECT '
' calls.id, '
' thread_id, '
' (SELECT pid FROM threads WHERE id = thread_id) AS pid, '
' (SELECT tid FROM threads WHERE id = thread_id) AS tid, '
' (SELECT comm FROM comms WHERE id = comm_id) AS command, '
' call_path_id, '
+ emit_to_hex ( ' ip ' ) + ' AS ip, '
' symbol_id, '
' (SELECT name FROM symbols WHERE id = symbol_id) AS symbol, '
' call_time, '
' return_time, '
' return_time - call_time AS elapsed_time, '
' branch_count, '
' call_id, '
' return_id, '
perf thread-stack: Represent jmps to the start of a different symbol
The compiler might optimize a call/ret combination by making it a jmp.
However the thread-stack does not presently cater for that, so that such
control flow is not visible in the call graph. Make it visible by
recording on the stack a branch to the start of a different symbol.
Note, that means when a ret pops the stack, all jmps must be popped off
first.
Example:
$ cat jmp-to-fn.c
__attribute__((noinline)) int bar(void)
{
return -1;
}
__attribute__((noinline)) int foo(void)
{
return bar() + 1;
}
int main()
{
return foo();
}
$ gcc -ggdb3 -Wall -Wextra -O2 -o jmp-to-fn jmp-to-fn.c
$ objdump -d jmp-to-fn
<SNIP>
0000000000001040 <main>:
1040: 31 c0 xor %eax,%eax
1042: e9 09 01 00 00 jmpq 1150 <foo>
<SNIP>
0000000000001140 <bar>:
1140: b8 ff ff ff ff mov $0xffffffff,%eax
1145: c3 retq
<SNIP>
0000000000001150 <foo>:
1150: 31 c0 xor %eax,%eax
1152: e8 e9 ff ff ff callq 1140 <bar>
1157: 83 c0 01 add $0x1,%eax
115a: c3 retq
<SNIP>
$ perf record -o jmp-to-fn.perf.data -e intel_pt/cyc/u ./jmp-to-fn
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0,017 MB jmp-to-fn.perf.data ]
$ perf script -i jmp-to-fn.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py jmp-to-fn.db branches calls
2019-01-08 13:24:58.783069 Creating database...
2019-01-08 13:24:58.794650 Writing records...
2019-01-08 13:24:59.008050 Adding indexes
2019-01-08 13:24:59.015802 Done
$ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py jmp-to-fn.db
Before:
main
-> bar
After:
main
-> foo
-> bar
Committer testing:
Install the python2-pyside package, then select these menu options
on the GUI:
"Reports"
"Context sensitive callgraphs"
Then go on expanding the symbols, to get, full picture when doing this
on a fedora:29 with gcc version 8.2.1 20181215 (Red Hat 8.2.1-6) (GCC):
jmp-to-fn
PID:TID
_start (ld-2.28.so)
__libc_start_main
main
foo
bar
To verify that indeed, this fixes the problem.
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20190109091835.5570-5-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-01-09 17:18:33 +08:00
' CASE WHEN flags=0 THEN \' \' WHEN flags=1 THEN \' no call \' WHEN flags=2 THEN \' no return \' WHEN flags=3 THEN \' no call/return \' WHEN flags=6 THEN \' jump \' ELSE flags END AS flags, '
2019-02-28 21:00:25 +08:00
' parent_call_path_id, '
' parent_id '
2017-08-03 16:31:28 +08:00
' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id ' )
do_query ( query , ' CREATE VIEW samples_view AS '
' SELECT '
' id, '
' time, '
' cpu, '
' (SELECT pid FROM threads WHERE id = thread_id) AS pid, '
' (SELECT tid FROM threads WHERE id = thread_id) AS tid, '
' (SELECT comm FROM comms WHERE id = comm_id) AS command, '
' (SELECT name FROM selected_events WHERE id = evsel_id) AS event, '
+ emit_to_hex ( ' ip ' ) + ' AS ip_hex, '
' (SELECT name FROM symbols WHERE id = symbol_id) AS symbol, '
' sym_offset, '
' (SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name, '
+ emit_to_hex ( ' to_ip ' ) + ' AS to_ip_hex, '
' (SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol, '
' to_sym_offset, '
' (SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name, '
' (SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name, '
' in_tx '
' FROM samples ' )
do_query ( query , ' END TRANSACTION ' )
evsel_query = QSqlQuery ( db )
evsel_query . prepare ( " INSERT INTO selected_events VALUES (?, ?) " )
machine_query = QSqlQuery ( db )
machine_query . prepare ( " INSERT INTO machines VALUES (?, ?, ?) " )
thread_query = QSqlQuery ( db )
thread_query . prepare ( " INSERT INTO threads VALUES (?, ?, ?, ?, ?) " )
comm_query = QSqlQuery ( db )
comm_query . prepare ( " INSERT INTO comms VALUES (?, ?) " )
comm_thread_query = QSqlQuery ( db )
comm_thread_query . prepare ( " INSERT INTO comm_threads VALUES (?, ?, ?) " )
dso_query = QSqlQuery ( db )
dso_query . prepare ( " INSERT INTO dsos VALUES (?, ?, ?, ?, ?) " )
symbol_query = QSqlQuery ( db )
symbol_query . prepare ( " INSERT INTO symbols VALUES (?, ?, ?, ?, ?, ?) " )
branch_type_query = QSqlQuery ( db )
branch_type_query . prepare ( " INSERT INTO branch_types VALUES (?, ?) " )
sample_query = QSqlQuery ( db )
if branches :
sample_query . prepare ( " INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) " )
else :
sample_query . prepare ( " INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) " )
if perf_db_export_calls or perf_db_export_callchains :
call_path_query = QSqlQuery ( db )
call_path_query . prepare ( " INSERT INTO call_paths VALUES (?, ?, ?, ?) " )
if perf_db_export_calls :
call_query = QSqlQuery ( db )
2019-02-28 21:00:25 +08:00
call_query . prepare ( " INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) " )
2017-08-03 16:31:28 +08:00
def trace_begin ( ) :
2019-03-09 08:05:18 +08:00
printdate ( " Writing records... " )
2017-08-03 16:31:28 +08:00
do_query ( query , ' BEGIN TRANSACTION ' )
# id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs
evsel_table ( 0 , " unknown " )
machine_table ( 0 , 0 , " unknown " )
thread_table ( 0 , 0 , 0 , - 1 , - 1 )
comm_table ( 0 , " unknown " )
dso_table ( 0 , 0 , " unknown " , " unknown " , " " )
symbol_table ( 0 , 0 , 0 , 0 , 0 , " unknown " )
sample_table ( 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 )
if perf_db_export_calls or perf_db_export_callchains :
call_path_table ( 0 , 0 , 0 , 0 )
2019-02-28 21:00:25 +08:00
call_return_table ( 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 )
2017-08-03 16:31:28 +08:00
unhandled_count = 0
def trace_end ( ) :
do_query ( query , ' END TRANSACTION ' )
2019-03-09 08:05:18 +08:00
printdate ( " Adding indexes " )
2017-08-03 16:31:28 +08:00
if perf_db_export_calls :
do_query ( query , ' CREATE INDEX pcpid_idx ON calls (parent_call_path_id) ' )
2019-02-28 21:00:25 +08:00
do_query ( query , ' CREATE INDEX pid_idx ON calls (parent_id) ' )
2017-08-03 16:31:28 +08:00
if ( unhandled_count ) :
2019-03-09 08:05:18 +08:00
printdate ( " Warning: " , unhandled_count , " unhandled events " )
printdate ( " Done " )
2017-08-03 16:31:28 +08:00
def trace_unhandled ( event_name , context , event_fields_dict ) :
global unhandled_count
unhandled_count + = 1
def sched__sched_switch ( * x ) :
pass
def bind_exec ( q , n , x ) :
for xx in x [ 0 : n ] :
q . addBindValue ( str ( xx ) )
do_query_ ( q )
def evsel_table ( * x ) :
bind_exec ( evsel_query , 2 , x )
def machine_table ( * x ) :
bind_exec ( machine_query , 3 , x )
def thread_table ( * x ) :
bind_exec ( thread_query , 5 , x )
def comm_table ( * x ) :
bind_exec ( comm_query , 2 , x )
def comm_thread_table ( * x ) :
bind_exec ( comm_thread_query , 3 , x )
def dso_table ( * x ) :
bind_exec ( dso_query , 5 , x )
def symbol_table ( * x ) :
bind_exec ( symbol_query , 6 , x )
def branch_type_table ( * x ) :
bind_exec ( branch_type_query , 2 , x )
def sample_table ( * x ) :
if branches :
2018-09-11 19:45:04 +08:00
for xx in x [ 0 : 15 ] :
sample_query . addBindValue ( str ( xx ) )
for xx in x [ 19 : 22 ] :
sample_query . addBindValue ( str ( xx ) )
do_query_ ( sample_query )
2017-08-03 16:31:28 +08:00
else :
bind_exec ( sample_query , 22 , x )
def call_path_table ( * x ) :
bind_exec ( call_path_query , 4 , x )
def call_return_table ( * x ) :
2019-02-28 21:00:25 +08:00
bind_exec ( call_query , 12 , x )